py/emitnative: Access qstr values using indirection table qstr_table.

This changes the native emitter to access qstr values using the qstr
indirection table qstr_table, but only when generating native code that
will be saved to a .mpy file.  This makes the resulting native code fully
static, ie it does not require any fix-ups or rewriting when it is
imported.

The performance of native code is more or less unchanged.  Benchmark
results on PYBv1.0 (using --via-mpy and --emit native) are:

N=100 M=100          baseline -> this-commit     diff      diff% (error%)
bm_chaos.py            407.16 ->     411.85 :   +4.69 =  +1.152% (+/-0.01%)
bm_fannkuch.py         100.89 ->     101.20 :   +0.31 =  +0.307% (+/-0.01%)
bm_fft.py             3521.17 ->    3441.72 :  -79.45 =  -2.256% (+/-0.00%)
bm_float.py           6707.29 ->    6644.83 :  -62.46 =  -0.931% (+/-0.00%)
bm_hexiom.py            55.91 ->      55.41 :   -0.50 =  -0.894% (+/-0.00%)
bm_nqueens.py         5343.54 ->    5326.17 :  -17.37 =  -0.325% (+/-0.00%)
bm_pidigits.py         603.89 ->     632.79 :  +28.90 =  +4.786% (+/-0.33%)
core_qstr.py            64.18 ->      64.09 :   -0.09 =  -0.140% (+/-0.01%)
core_yield_from.py     313.61 ->     311.11 :   -2.50 =  -0.797% (+/-0.03%)
misc_aes.py            654.29 ->     659.75 :   +5.46 =  +0.834% (+/-0.02%)
misc_mandel.py        4205.10 ->    4272.08 :  +66.98 =  +1.593% (+/-0.01%)
misc_pystone.py       3077.79 ->    3128.39 :  +50.60 =  +1.644% (+/-0.01%)
misc_raytrace.py       388.45 ->     393.71 :   +5.26 =  +1.354% (+/-0.01%)
viper_call0.py         576.83 ->     566.76 :  -10.07 =  -1.746% (+/-0.05%)
viper_call1a.py        550.39 ->     540.12 :  -10.27 =  -1.866% (+/-0.11%)
viper_call1b.py        438.32 ->     432.09 :   -6.23 =  -1.421% (+/-0.11%)
viper_call1c.py        442.96 ->     436.11 :   -6.85 =  -1.546% (+/-0.08%)
viper_call2a.py        536.31 ->     527.37 :   -8.94 =  -1.667% (+/-0.04%)
viper_call2b.py        378.99 ->     377.50 :   -1.49 =  -0.393% (+/-0.08%)

Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
Damien George
2022-05-20 14:31:56 +10:00
parent 94955e8e3d
commit d4d53e9e11
12 changed files with 75 additions and 186 deletions

View File

@@ -839,7 +839,7 @@ class RawCode(object):
print("};")
print()
def freeze_raw_code(self, prelude_ptr=None, qstr_links=(), type_sig=0):
def freeze_raw_code(self, prelude_ptr=None, type_sig=0):
# Generate mp_raw_code_t.
print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
print(" .kind = %s," % RawCode.code_kind_str[self.code_kind])
@@ -879,8 +879,6 @@ class RawCode(object):
print(" #endif")
print(" #if MICROPY_EMIT_MACHINE_CODE")
print(" .prelude_offset = %u," % self.prelude_offset)
print(" .n_qstr = %u," % len(qstr_links))
print(" .qstr_link = NULL,") # TODO
print(" #endif")
print(" #endif")
print(" #if MICROPY_EMIT_MACHINE_CODE")
@@ -1038,47 +1036,6 @@ class RawCodeNative(RawCode):
ip += sz
self.disassemble_children()
def _asm_thumb_rewrite_mov(self, pc, val):
print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="")
print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="")
print(" (%s & 0xff)," % (val,), end="")
print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val))
def _link_qstr(self, pc, kind, qst):
if kind == 0:
# Generic 16-bit link
print(" %s & 0xff, %s >> 8," % (qst, qst))
return 2
else:
# Architecture-specific link
is_obj = kind == 2
if is_obj:
qst = "((uintptr_t)MP_OBJ_NEW_QSTR(%s))" % qst
if config.native_arch in (
MP_NATIVE_ARCH_X86,
MP_NATIVE_ARCH_X64,
MP_NATIVE_ARCH_ARMV6,
MP_NATIVE_ARCH_XTENSA,
MP_NATIVE_ARCH_XTENSAWIN,
):
print(
" %s & 0xff, (%s >> 8) & 0xff, (%s >> 16) & 0xff, %s >> 24,"
% (qst, qst, qst, qst)
)
return 4
elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
if is_obj:
# qstr object, movw and movt
self._asm_thumb_rewrite_mov(pc, qst)
self._asm_thumb_rewrite_mov(pc + 4, "(%s >> 16)" % qst)
return 8
else:
# qstr number, movw instruction
self._asm_thumb_rewrite_mov(pc, qst)
return 4
else:
assert 0
def freeze(self):
if self.scope_flags & ~0x0F:
raise FreezeError("unable to freeze code with relocations")
@@ -1098,21 +1055,13 @@ class RawCodeNative(RawCode):
i = 0
qi = 0
while i < i_top:
if qi < len(self.qstr_links) and i == self.qstr_links[qi][0]:
# link qstr
qi_off, qi_kind, qi_val = self.qstr_links[qi]
i += self._link_qstr(i, qi_kind, qi_val.qstr_id)
qi += 1
else:
# copy machine code (max 16 bytes)
i16 = min(i + 16, i_top)
if qi < len(self.qstr_links):
i16 = min(i16, self.qstr_links[qi][0])
print(" ", end="")
for ii in range(i, i16):
print(" 0x%02x," % self.fun_data[ii], end="")
print()
i = i16
# copy machine code (max 16 bytes)
i16 = min(i + 16, i_top)
print(" ", end="")
for ii in range(i, i16):
print(" 0x%02x," % self.fun_data[ii], end="")
print()
i = i16
print("};")
@@ -1134,7 +1083,7 @@ class RawCodeNative(RawCode):
print("#endif")
self.freeze_children(prelude_ptr)
self.freeze_raw_code(prelude_ptr, self.qstr_links, self.type_sig)
self.freeze_raw_code(prelude_ptr, self.type_sig)
class MPYSegment: