py/emitnative: Refactor Viper register-indexed load/stores.

This commit cleans up the Viper code generation blocks for
register-indexed load and store operations.

An attempt is made to simplify the code in the common code generator
code block, by moving architecture-specific code to the appropriate
native generation backends whenever possible.  This should make that
specific bit of code in the Viper generator clearer and easier to
maintain in the long term.

To achieve this, six generic assembler meta-opcodes have been
introduced, named `ASM_{LOAD,STORE}{8,16,32}_REG_REG_REG`.  A
platform-independent implementation for those operations is provided, so
backends that cannot emit a shorter sequence for the requested operation
or are fine with the platform-independent implementation can just not
provide said meta-opcodes.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti
2025-05-07 22:41:33 +02:00
parent 04c6b99cb9
commit b6d269ee32
6 changed files with 98 additions and 95 deletions

View File

@@ -221,6 +221,13 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src);
#define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base))
#define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base), 0)
#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrb_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index))
#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrh_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index))
#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldr_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index))
#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strb_reg_reg_reg((as), (reg_val), (reg_base), (reg_index))
#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strh_reg_reg_reg((as), (reg_val), (reg_base), (reg_index))
#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_str_reg_reg_reg((as), (reg_val), (reg_base), (reg_index))
#endif // GENERIC_ASM_API
#endif // MICROPY_INCLUDED_PY_ASMARM_H

View File

@@ -758,6 +758,30 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_
#define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs)
#define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs)
#define ASM_CLR_REG(state, rd)
#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 1); \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_lhu(state, rd, rs1, 0); \
} while (0)
#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 2); \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_lw(state, rd, rs1, 0); \
} while (0)
#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 1); \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_sh(state, rd, rs1, 0); \
} while (0)
#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 2); \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_sw(state, rd, rs1, 0); \
} while (0)
#endif

View File

@@ -491,26 +491,6 @@ void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint r
}
}
void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) {
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1);
asm_thumb_ldrh_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index);
}
void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) {
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2);
asm_thumb_ldr_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index);
}
void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) {
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1);
asm_thumb_strh_rlo_rlo_rlo(as, reg_val, reg_base, reg_index);
}
void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) {
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2);
asm_thumb_str_rlo_rlo_rlo(as, reg_val, reg_base, reg_index);
}
// this could be wrong, because it should have a range of +/- 16MiB...
#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))

View File

@@ -385,11 +385,6 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label);
void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience
void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience
void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index);
void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index);
void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index);
void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index);
void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch
void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch
void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp); // convenience
@@ -480,6 +475,29 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel);
#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index))
#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \
do { \
asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \
asm_thumb_ldrh_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \
} while (0)
#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \
do { \
asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \
asm_thumb_ldr_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \
} while (0)
#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_thumb_strb_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index))
#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \
do { \
asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \
asm_thumb_strh_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \
} while (0)
#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \
do { \
asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \
asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \
} while (0)
#endif // GENERIC_ASM_API
#endif // MICROPY_INCLUDED_PY_ASMTHUMB_H

View File

@@ -411,12 +411,32 @@ void asm_xtensa_call_ind_win(asm_xtensa_t *as, uint idx);
#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l8ui((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), (uint16_offset))
#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \
do { \
asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \
asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0); \
} while (0)
#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \
do { \
asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \
asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0); \
} while (0)
#define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_s32i_optimised((as), (reg_dest), (reg_base), (word_offset))
#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s8i((as), (reg_src), (reg_base), 0)
#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s16i((as), (reg_src), (reg_base), 0)
#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \
do { \
asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \
asm_xtensa_op_s16i((as), (reg_val), (reg_base), 0); \
} while (0)
#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s32i_n((as), (reg_src), (reg_base), 0)
#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \
do { \
asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \
asm_xtensa_op_s32i_n((as), (reg_val), (reg_base), 0); \
} while (0)
#endif // GENERIC_ASM_API

View File

@@ -1638,59 +1638,36 @@ static void emit_native_load_subscr(emit_t *emit) {
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
#if N_ARM
asm_arm_ldrb_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_ldrb_rlo_rlo_rlo(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#endif
// TODO optimise to use thumb ldrb r1, [r2, r3]
#ifdef ASM_LOAD8_REG_REG_REG
ASM_LOAD8_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD8_REG_REG(emit->as, REG_RET, REG_ARG_1); // store value to (base+index)
#endif
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
#if N_ARM
asm_arm_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#elif N_XTENSA || N_XTENSAWIN
asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0);
break;
#endif
#ifdef ASM_LOAD16_REG_REG_REG
ASM_LOAD16_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index)
#endif
break;
}
case VTYPE_PTR32: {
// pointer to word-size memory
#if N_ARM
asm_arm_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
break;
#elif N_RV32
asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2);
asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2);
asm_rv32_opcode_lw(emit->as, REG_RET, REG_ARG_1, 0);
break;
#elif N_XTENSA || N_XTENSAWIN
asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
asm_xtensa_op_l32i_n(emit->as, REG_RET, REG_ARG_1, 0);
break;
#endif
#ifdef ASM_LOAD32_REG_REG_REG
ASM_LOAD32_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD32_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+4*index)
#endif
break;
}
default:
@@ -1949,59 +1926,36 @@ static void emit_native_store_subscr(emit_t *emit) {
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
// TODO optimise to use thumb strb r1, [r2, r3]
#if N_ARM
asm_arm_strb_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_strb_rlo_rlo_rlo(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#endif
#ifdef ASM_STORE8_REG_REG_REG
ASM_STORE8_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE8_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+index)
#endif
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
#if N_ARM
asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#elif N_XTENSA || N_XTENSAWIN
asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
asm_xtensa_op_s16i(emit->as, reg_value, REG_ARG_1, 0);
break;
#endif
#ifdef ASM_STORE16_REG_REG_REG
ASM_STORE16_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE16_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+2*index)
#endif
break;
}
case VTYPE_PTR32: {
// pointer to 32-bit memory
#if N_ARM
asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#elif N_THUMB
asm_thumb_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#elif N_RV32
asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2);
asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2);
asm_rv32_opcode_sw(emit->as, reg_value, REG_ARG_1, 0);
break;
#elif N_XTENSA || N_XTENSAWIN
asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
asm_xtensa_op_s32i_n(emit->as, reg_value, REG_ARG_1, 0);
break;
#endif
#ifdef ASM_STORE32_REG_REG_REG
ASM_STORE32_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index);
#else
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE32_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+4*index)
#endif
break;
}
default: