From bfc0d7b0b97cbf52db892775b0f371dd06d2ba08 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Thu, 23 Jan 2025 23:55:57 +0100 Subject: [PATCH] py/emitnative: Optimise Viper register offset load/stores on Xtensa. This commit improves the emitted code sequences for address generation in the Viper subsystem when loading/storing 16 and 32 bit values via a register offset. The Xtensa opcodes ADDX2 and ADDX4 are used to avoid performing the extra shifts to align the final operation offset. Those opcodes are available on both xtensa and xtensawin MicroPython architectures. Signed-off-by: Alessandro Gatti --- py/asmxtensa.h | 8 ++++++++ py/emitnative.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/py/asmxtensa.h b/py/asmxtensa.h index f226624a8..d2f37bf82 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -143,6 +143,14 @@ static inline void asm_xtensa_op_addi(asm_xtensa_t *as, uint reg_dest, uint reg_ asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRI8(2, 12, reg_src, reg_dest, imm8 & 0xff)); } +static inline void asm_xtensa_op_addx2(asm_xtensa_t *as, uint reg_dest, uint reg_src_a, uint reg_src_b) { + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRR(0, 0, 9, reg_dest, reg_src_a, reg_src_b)); +} + +static inline void asm_xtensa_op_addx4(asm_xtensa_t *as, uint reg_dest, uint reg_src_a, uint reg_src_b) { + asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRR(0, 0, 10, reg_dest, reg_src_a, reg_src_b)); +} + static inline void asm_xtensa_op_and(asm_xtensa_t *as, uint reg_dest, uint reg_src_a, uint reg_src_b) { asm_xtensa_op24(as, ASM_XTENSA_ENCODE_RRR(0, 0, 1, reg_dest, reg_src_a, reg_src_b)); } diff --git a/py/emitnative.c b/py/emitnative.c index 66c345b23..7d856e13f 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1625,6 +1625,11 @@ static void emit_native_load_subscr(emit_t *emit) { } case VTYPE_PTR16: { // pointer to 16-bit memory + #if N_XTENSA || N_XTENSAWIN + asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); + asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0); + break; + #endif ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index) @@ -1637,6 +1642,10 @@ static void emit_native_load_subscr(emit_t *emit) { asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); asm_rv32_opcode_lw(emit->as, REG_RET, REG_ARG_1, 0); break; + #elif N_XTENSA || N_XTENSAWIN + asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); + asm_xtensa_op_l32i_n(emit->as, REG_RET, REG_ARG_1, 0); + break; #endif ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base @@ -1900,6 +1909,10 @@ static void emit_native_store_subscr(emit_t *emit) { #if N_ARM asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); break; + #elif N_XTENSA || N_XTENSAWIN + asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); + asm_xtensa_op_s16i(emit->as, reg_value, REG_ARG_1, 0); + break; #endif ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base @@ -1916,6 +1929,10 @@ static void emit_native_store_subscr(emit_t *emit) { asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); asm_rv32_opcode_sw(emit->as, reg_value, REG_ARG_1, 0); break; + #elif N_XTENSA || N_XTENSAWIN + asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); + asm_xtensa_op_s32i_n(emit->as, reg_value, REG_ARG_1, 0); + break; #endif ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base