diff --git a/src/lj_asm.c b/src/lj_asm.c index 231e76fc..7ebde7b8 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1515,6 +1515,124 @@ static void asm_loop(ASMState *as) #error "Missing assembler for target CPU" #endif +/* -- Instruction dispatch ------------------------------------------------ */ + +/* Assemble a single instruction. */ +static void asm_ir(ASMState *as, IRIns *ir) +{ + switch ((IROp)ir->o) { + /* Miscellaneous ops. */ + case IR_LOOP: asm_loop(as); break; + case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; + case IR_USE: + ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; + case IR_PHI: asm_phi(as, ir); break; + case IR_HIOP: asm_hiop(as, ir); break; + case IR_GCSTEP: asm_gcstep(as, ir); break; + + /* Guarded assertions. */ + case IR_LT: case IR_GE: case IR_LE: case IR_GT: + case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: + case IR_ABC: + asm_comp(as, ir); + break; + case IR_EQ: case IR_NE: + if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { + as->curins--; + asm_href(as, ir-1, (IROp)ir->o); + } else { + asm_equal(as, ir); + } + break; + + case IR_RETF: asm_retf(as, ir); break; + + /* Bit ops. */ + case IR_BNOT: asm_bnot(as, ir); break; + case IR_BSWAP: asm_bswap(as, ir); break; + case IR_BAND: asm_band(as, ir); break; + case IR_BOR: asm_bor(as, ir); break; + case IR_BXOR: asm_bxor(as, ir); break; + case IR_BSHL: asm_bshl(as, ir); break; + case IR_BSHR: asm_bshr(as, ir); break; + case IR_BSAR: asm_bsar(as, ir); break; + case IR_BROL: asm_brol(as, ir); break; + case IR_BROR: asm_bror(as, ir); break; + + /* Arithmetic ops. */ + case IR_ADD: asm_add(as, ir); break; + case IR_SUB: asm_sub(as, ir); break; + case IR_MUL: asm_mul(as, ir); break; + case IR_DIV: asm_div(as, ir); break; + case IR_MOD: asm_mod(as, ir); break; + case IR_POW: asm_pow(as, ir); break; + case IR_NEG: asm_neg(as, ir); break; + case IR_ABS: asm_abs(as, ir); break; + case IR_ATAN2: asm_atan2(as, ir); break; + case IR_LDEXP: asm_ldexp(as, ir); break; + case IR_MIN: asm_min(as, ir); break; + case IR_MAX: asm_max(as, ir); break; + case IR_FPMATH: asm_fpmath(as, ir); break; + + /* Overflow-checking arithmetic ops. */ + case IR_ADDOV: asm_addov(as, ir); break; + case IR_SUBOV: asm_subov(as, ir); break; + case IR_MULOV: asm_mulov(as, ir); break; + + /* Memory references. */ + case IR_AREF: asm_aref(as, ir); break; + case IR_HREF: asm_href(as, ir, 0); break; + case IR_HREFK: asm_hrefk(as, ir); break; + case IR_NEWREF: asm_newref(as, ir); break; + case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; + case IR_FREF: asm_fref(as, ir); break; + case IR_STRREF: asm_strref(as, ir); break; + + /* Loads and stores. */ + case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + asm_ahuvload(as, ir); + break; + case IR_FLOAD: asm_fload(as, ir); break; + case IR_XLOAD: asm_xload(as, ir); break; + case IR_SLOAD: asm_sload(as, ir); break; + + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; + case IR_FSTORE: asm_fstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir); break; + + /* Allocations. */ + case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; + case IR_TNEW: asm_tnew(as, ir); break; + case IR_TDUP: asm_tdup(as, ir); break; + case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; + + /* Buffer operations. */ + case IR_BUFHDR: asm_bufhdr(as, ir); break; + case IR_BUFPUT: asm_bufput(as, ir); break; + case IR_BUFSTR: asm_bufstr(as, ir); break; + + /* Write barriers. */ + case IR_TBAR: asm_tbar(as, ir); break; + case IR_OBAR: asm_obar(as, ir); break; + + /* Type conversions. */ + case IR_TOBIT: asm_tobit(as, ir); break; + case IR_CONV: asm_conv(as, ir); break; + case IR_TOSTR: asm_tostr(as, ir); break; + case IR_STRTO: asm_strto(as, ir); break; + + /* Calls. */ + case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; + case IR_CALLXS: asm_callx(as, ir); break; + case IR_CARG: break; + + default: + setintV(&as->J->errinfo, ir->o); + lj_trace_err_info(as->J, LJ_TRERR_NYIIR); + break; + } +} + /* -- Head of trace ------------------------------------------------------- */ /* Head of a root trace. */ diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 25a28bd7..039a2a9a 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -519,6 +519,8 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); } +#else +#define asm_tobit(as, ir) lua_assert(0) #endif static void asm_conv(ASMState *as, IRIns *ir) @@ -1038,7 +1040,7 @@ static void asm_xload(ASMState *as, IRIns *ir) asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { if (ir->r != RID_SINK) { Reg src = ra_alloc1(as, ir->op2, @@ -1048,6 +1050,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); @@ -1324,6 +1328,42 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); emit_dm(as, ai, (dest & 15), (left & 15)); } + +static void asm_callround(ASMState *as, IRIns *ir, int id) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| + RID2RSET(RID_R3)|RID2RSET(RID_R12); + RegSet of; + Reg dest, src; + ra_evictset(as, drop); + dest = ra_dest(as, ir, RSET_FPR); + emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); + emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : + id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : + (void *)lj_vm_trunc_sf); + /* Workaround to protect argument GPRs from being used for remat. */ + of = as->freeset; + as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); + as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); + src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ + as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); + emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) + return; + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, ARMI_VSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); +} +#else +#define asm_fpmath(as, ir) lua_assert(0) #endif static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) @@ -1373,32 +1413,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) asm_intop(as, ir, ai); } -static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ - uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; - if (cc <= CC_NE) { - as->mcp++; - ai |= ARMI_S; - } else if (cc == CC_GE) { - *++as->mcp ^= ((CC_GE^CC_PL) << 28); - ai |= ARMI_S; - } else if (cc == CC_LT) { - *++as->mcp ^= ((CC_LT^CC_MI) << 28); - ai |= ARMI_S; - } /* else: other conds don't work with bit ops. */ - } - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - emit_d(as, ai^m, dest); - } else { - /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ - asm_intop(as, ir, ai); - } -} - static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1464,6 +1478,26 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intmul(as, ir); } +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#if LJ_SOFTFP +#define asm_div(as, ir) lua_assert(0) +#define asm_pow(as, ir) lua_assert(0) +#define asm_abs(as, ir) lua_assert(0) +#define asm_atan2(as, ir) lua_assert(0) +#define asm_ldexp(as, ir) lua_assert(0) +#else +#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) +#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +#endif + +#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) + static void asm_neg(ASMState *as, IRIns *ir) { #if !LJ_SOFTFP @@ -1475,31 +1509,35 @@ static void asm_neg(ASMState *as, IRIns *ir) asm_intneg(as, ir, ARMI_RSB); } -#if !LJ_SOFTFP -static void asm_callround(ASMState *as, IRIns *ir, int id) +static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) { - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| - RID2RSET(RID_R3)|RID2RSET(RID_R12); - RegSet of; - Reg dest, src; - ra_evictset(as, drop); - dest = ra_dest(as, ir, RSET_FPR); - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); - emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : - id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : - (void *)lj_vm_trunc_sf); - /* Workaround to protect argument GPRs from being used for remat. */ - of = as->freeset; - as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); - as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); - src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); - emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); + if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ + uint32_t cc = (as->mcp[1] >> 28); + as->flagmcp = NULL; + if (cc <= CC_NE) { + as->mcp++; + ai |= ARMI_S; + } else if (cc == CC_GE) { + *++as->mcp ^= ((CC_GE^CC_PL) << 28); + ai |= ARMI_S; + } else if (cc == CC_LT) { + *++as->mcp ^= ((CC_LT^CC_MI) << 28); + ai |= ARMI_S; + } /* else: other conds don't work with bit ops. */ + } + if (ir->op2 == 0) { + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); + emit_d(as, ai^m, dest); + } else { + /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ + asm_intop(as, ir, ai); + } } -#endif -static void asm_bitswap(ASMState *as, IRIns *ir) +#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) + +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); @@ -1516,6 +1554,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) } } +#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) +#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) +#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) + static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) { if (irref_isk(ir->op2)) { /* Constant shifts. */ @@ -1533,6 +1575,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) } } +#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) +#define asm_brol(as, ir) lua_assert(0) + static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { uint32_t kcmp = 0, kmov = 0; @@ -1606,6 +1654,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) asm_intmin_max(as, ir, cc); } +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) + /* -- Comparisons --------------------------------------------------------- */ /* Map of comparisons to flags. ORDER IR. */ @@ -1721,6 +1772,18 @@ notst: as->flagmcp = as->mcp; /* Allow elimination of the compare. */ } +static void asm_comp(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else +#endif + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_int64comp(ASMState *as, IRIns *ir) @@ -1795,7 +1858,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } else if ((ir-1)->o == IR_XSTORE) { if ((ir-1)->r != RID_SINK) - asm_xstore(as, ir, 4); + asm_xstore_(as, ir, 4); return; } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ @@ -2064,141 +2127,6 @@ static void asm_tail_prep(ASMState *as) *p = 0; /* Prevent load/store merging. */ } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - /* fallthrough */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } -#endif - asm_intcomp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; - case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; - case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; - - case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; - case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; - case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; - case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; - case IR_BROL: lua_assert(0); break; - - /* Arithmetic ops. */ - case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; - case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; - case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_NEG: asm_neg(as, ir); break; - -#if LJ_SOFTFP - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP. */ - break; -#else - case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, ARMI_VSQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - case IR_TOBIT: asm_tobit(as, ir); break; -#endif - - case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; - case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Buffer operations. */ - case IR_BUFHDR: asm_bufhdr(as, ir); break; - case IR_BUFPUT: asm_bufput(as, ir); break; - case IR_BUFSTR: asm_bufstr(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index cbbd2966..122e5ecd 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -849,7 +849,7 @@ static void asm_xload(ASMState *as, IRIns *ir) asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { if (ir->r != RID_SINK) { Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); @@ -858,6 +858,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { IRType1 t = ir->t; @@ -1083,6 +1085,18 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) emit_fg(as, mi, dest, left); } +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) + return; + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, MIPSI_SQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); +} + static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { @@ -1126,6 +1140,10 @@ static void asm_mul(ASMState *as, IRIns *ir) } } +#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D) +#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) +#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) + static void asm_neg(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { @@ -1137,6 +1155,10 @@ static void asm_neg(ASMState *as, IRIns *ir) } } +#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + static void asm_arithov(ASMState *as, IRIns *ir) { Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); @@ -1170,6 +1192,9 @@ static void asm_arithov(ASMState *as, IRIns *ir) emit_move(as, RID_TMP, dest == left ? left : right); } +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + static void asm_mulov(ASMState *as, IRIns *ir) { #if LJ_DUALNUM @@ -1263,7 +1288,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) } #endif -static void asm_bitnot(ASMState *as, IRIns *ir) +static void asm_bnot(ASMState *as, IRIns *ir) { Reg left, right, dest = ra_dest(as, ir, RSET_GPR); IRIns *irl = IR(ir->op1); @@ -1277,7 +1302,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir) emit_dst(as, MIPSI_NOR, dest, left, right); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); @@ -1313,6 +1338,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) emit_dst(as, mi, dest, left, right); } +#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) +#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) +#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) + static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1326,7 +1355,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) } } -static void asm_bitror(ASMState *as, IRIns *ir) +#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) +#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) +#define asm_brol(as, ir) lua_assert(0) + +static void asm_bror(ASMState *as, IRIns *ir) { if ((as->flags & JIT_F_MIPS32R2)) { asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); @@ -1375,6 +1409,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) } } +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + /* -- Comparisons --------------------------------------------------------- */ static void asm_comp(ASMState *as, IRIns *ir) @@ -1412,7 +1449,7 @@ static void asm_comp(ASMState *as, IRIns *ir) } } -static void asm_compeq(ASMState *as, IRIns *ir) +static void asm_equal(ASMState *as, IRIns *ir) { Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); right = (left >> 8); left &= 255; @@ -1486,8 +1523,8 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { - asm_xstore(as, ir, LJ_LE ? 4 : 0); - asm_xstore(as, ir-1, LJ_LE ? 0 : 4); + asm_xstore_(as, ir, LJ_LE ? 4 : 0); + asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); } return; } @@ -1683,136 +1720,6 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->loopref ? as->mcp : NULL; } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - asm_compeq(as, ir); - break; - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: - asm_comp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitnot(as, ir); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break; - case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break; - case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break; - - case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break; - case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break; - case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break; - case IR_BROL: lua_assert(0); break; - case IR_BROR: asm_bitror(as, ir); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: asm_sub(as, ir); break; - case IR_MUL: asm_mul(as, ir); break; - case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_NEG: asm_neg(as, ir); break; - - case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_MIN: asm_min_max(as, ir, 0); break; - case IR_MAX: asm_min_max(as, ir, 1); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - - /* Overflow-checking arithmetic ops. */ - case IR_ADDOV: asm_arithov(as, ir); break; - case IR_SUBOV: asm_arithov(as, ir); break; - case IR_MULOV: asm_mulov(as, ir); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Buffer operations. */ - case IR_BUFHDR: asm_bufhdr(as, ir); break; - case IR_BUFPUT: asm_bufput(as, ir); break; - case IR_BUFSTR: asm_bufstr(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 9c9c3ea4..d9174e7d 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -840,7 +840,7 @@ static void asm_xload(ASMState *as, IRIns *ir) asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { IRIns *irb; if (ir->r == RID_SINK) @@ -857,6 +857,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { IRType1 t = ir->t; @@ -1120,6 +1122,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) emit_fb(as, pi, dest, left); } +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) + return; + if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) + asm_fpunary(as, ir, PPCI_FSQRT); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); +} + static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { @@ -1217,6 +1229,10 @@ static void asm_mul(ASMState *as, IRIns *ir) } } +#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) +#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) +#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) + static void asm_neg(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { @@ -1235,6 +1251,10 @@ static void asm_neg(ASMState *as, IRIns *ir) } } +#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) { Reg dest, left, right; @@ -1250,6 +1270,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) emit_tab(as, pi|PPCF_DOT, dest, left, right); } +#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) +#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) +#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) + #if LJ_HASFFI static void asm_add64(ASMState *as, IRIns *ir) { @@ -1329,7 +1353,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) } #endif -static void asm_bitnot(ASMState *as, IRIns *ir) +static void asm_bnot(ASMState *as, IRIns *ir) { Reg dest, left, right; PPCIns pi = PPCI_NOR; @@ -1356,7 +1380,7 @@ nofuse: emit_asb(as, pi, dest, left, right); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); IRIns *irx; @@ -1377,32 +1401,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) } } -static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - Reg tmp = left; - if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { - if (!checku16(k)) { - emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); - if ((k & 0xffff) == 0) return; - } - emit_asi(as, pik, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi, dest, left, right); -} - /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) { @@ -1433,7 +1431,7 @@ nofuse: *--as->mcp = pi | PPCF_T(left); } -static void asm_bitand(ASMState *as, IRIns *ir) +static void asm_band(ASMState *as, IRIns *ir) { Reg dest, left, right; IRRef lref = ir->op1; @@ -1488,6 +1486,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) emit_asb(as, PPCI_AND ^ dot, dest, left, right); } +static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + int32_t k = IR(ir->op2)->i; + Reg tmp = left; + if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { + if (!checku16(k)) { + emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); + if ((k & 0xffff) == 0) return; + } + emit_asi(as, pik, dest, left, k); + return; + } + } + /* May fail due to spills/restores above, but simplifies the logic. */ + if (as->flagmcp == as->mcp) { + as->flagmcp = NULL; + as->mcp++; + pi |= PPCF_DOT; + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_asb(as, pi, dest, left, right); +} + +#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) +#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) + static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) { Reg dest, left; @@ -1513,6 +1540,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) } } +#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) +#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) +#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) +#define asm_brol(as, ir) \ + asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ + PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) +#define asm_bror(as, ir) lua_assert(0) + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { if (irt_isnum(ir->t)) { @@ -1543,6 +1578,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) } } +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + /* -- Comparisons --------------------------------------------------------- */ #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ @@ -1619,6 +1657,8 @@ static void asm_comp(ASMState *as, IRIns *ir) } } +#define asm_equal(as, ir) asm_comp(as, ir) + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) @@ -1664,8 +1704,8 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { - asm_xstore(as, ir, 0); - asm_xstore(as, ir-1, 4); + asm_xstore_(as, ir, 0); + asm_xstore_(as, ir-1, 4); } return; } @@ -1871,134 +1911,6 @@ static void asm_tail_prep(ASMState *as) } } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - /* fallthrough */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: - asm_comp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitnot(as, ir); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitand(as, ir); break; - case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; - case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; - - case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; - case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; - case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; - case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; - case IR_BROR: lua_assert(0); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: asm_sub(as, ir); break; - case IR_MUL: asm_mul(as, ir); break; - case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_NEG: asm_neg(as, ir); break; - - case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_MIN: asm_min_max(as, ir, 0); break; - case IR_MAX: asm_min_max(as, ir, 1); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) - asm_fpunary(as, ir, PPCI_FSQRT); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - - /* Overflow-checking arithmetic ops. */ - case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; - case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; - case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Buffer operations. */ - case IR_BUFHDR: asm_bufhdr(as, ir); break; - case IR_BUFPUT: asm_bufput(as, ir); break; - case IR_BUFSTR: asm_bufstr(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 45fc7e85..2ab1dbf5 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1218,6 +1218,9 @@ static void asm_fxload(ASMState *as, IRIns *ir) emit_mrm(as, xo, dest, RID_MRM); } +#define asm_fload(as, ir) asm_fxload(as, ir) +#define asm_xload(as, ir) asm_fxload(as, ir) + static void asm_fxstore(ASMState *as, IRIns *ir) { RegSet allow = RSET_GPR; @@ -1281,6 +1284,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir) } } +#define asm_fstore(as, ir) asm_fxstore(as, ir) +#define asm_xstore(as, ir) asm_fxstore(as, ir) + #if LJ_64 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) { @@ -1666,6 +1672,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } } +#define asm_atan2(as, ir) asm_fpmath(as, ir) +#define asm_ldexp(as, ir) asm_fpmath(as, ir) + static void asm_fppowi(ASMState *as, IRIns *ir) { /* The modified regs must match with the *.dasc implementation. */ @@ -1679,6 +1688,17 @@ static void asm_fppowi(ASMState *as, IRIns *ir) ra_left(as, RID_EAX, ir->op2); } +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_fppowi(as, ir); +} + static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); @@ -1855,6 +1875,44 @@ static void asm_add(ASMState *as, IRIns *ir) asm_intarith(as, ir, XOg_ADD); } +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_SUBSD); + else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ + asm_intarith(as, ir, XOg_SUB); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MULSD); + else + asm_intarith(as, ir, XOg_X_IMUL); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fparith(as, ir, XO_DIVSD); +} + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1862,7 +1920,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) ra_left(as, dest, ir->op1); } -static void asm_min_max(ASMState *as, IRIns *ir, int cc) +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_XORPS); + else + asm_neg_not(as, ir, XOg_NEG); +} + +#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) + +static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { Reg right, dest = ra_dest(as, ir, RSET_GPR); IRRef lref = ir->op1, rref = ir->op2; @@ -1873,7 +1941,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) ra_left(as, dest, lref); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_min(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MINSD); + else + asm_intmin_max(as, ir, CC_G); +} + +static void asm_max(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MAXSD); + else + asm_intmin_max(as, ir, CC_L); +} + +/* Note: don't use LEA for overflow-checking arithmetic! */ +#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) +#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) +#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) + +#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) + +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), @@ -1881,6 +1972,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) ra_left(as, dest, ir->op1); } +#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) +#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) +#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) + static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) { IRRef rref = ir->op2; @@ -1920,6 +2015,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) */ } +#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR) +#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL) +#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR) + /* -- Comparisons --------------------------------------------------------- */ /* Virtual flags for unordered FP comparisons. */ @@ -1946,8 +2047,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { }; /* FP and integer comparisons. */ -static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) +static void asm_comp(ASMState *as, IRIns *ir) { + uint32_t cc = asm_compmap[ir->o]; if (irt_isnum(ir->t)) { IRRef lref = ir->op1; IRRef rref = ir->op2; @@ -2102,6 +2204,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) } } +#define asm_equal(as, ir) asm_comp(as, ir) + #if LJ_32 && LJ_HASFFI /* 64 bit integer comparisons in 32 bit mode. */ static void asm_comp_int64(ASMState *as, IRIns *ir) @@ -2484,175 +2588,6 @@ static void asm_tail_prep(ASMState *as) } } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - /* fallthrough */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: - asm_comp(as, ir, asm_compmap[ir->o]); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_intarith(as, ir, XOg_AND); break; - case IR_BOR: asm_intarith(as, ir, XOg_OR); break; - case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; - - case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; - case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; - case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; - case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; - case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_SUBSD); - else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ - asm_intarith(as, ir, XOg_SUB); - break; - case IR_MUL: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MULSD); - else - asm_intarith(as, ir, XOg_X_IMUL); - break; - case IR_DIV: -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, XO_DIVSD); - break; - case IR_MOD: -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); - break; - - case IR_NEG: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_XORPS); - else - asm_neg_not(as, ir, XOg_NEG); - break; - case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; - - case IR_MIN: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MINSD); - else - asm_min_max(as, ir, CC_G); - break; - case IR_MAX: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MAXSD); - else - asm_min_max(as, ir, CC_L); - break; - - case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: - asm_fpmath(as, ir); - break; - case IR_POW: -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_fppowi(as, ir); - break; - - /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ - case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; - case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; - case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Buffer operations. */ - case IR_BUFHDR: asm_bufhdr(as, ir); break; - case IR_BUFPUT: asm_bufput(as, ir); break; - case IR_BUFSTR: asm_bufstr(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_CONV: asm_conv(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */