diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 5890b54f..74a3a927 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1110,6 +1110,16 @@ static void asm_intmul(ASMState *as, IRIns *ir) if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); } +static void asm_intmod(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); +} + static void asm_bitswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1652,6 +1662,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ADD: case IR_ADDOV: asm_arithop(as, ir, ARMI_ADD); break; case IR_SUB: case IR_SUBOV: asm_arithop(as, ir, ARMI_SUB); break; case IR_MUL: case IR_MULOV: asm_intmul(as, ir); break; + case IR_MOD: asm_intmod(as, ir); break; case IR_NEG: asm_intneg(as, ir, ARMI_RSB); break; @@ -1659,7 +1670,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_MAX: asm_intmin_max(as, ir, CC_LT); break; case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: - case IR_DIV: case IR_MOD: case IR_POW: case IR_ABS: case IR_TOBIT: + case IR_DIV: case IR_POW: case IR_ABS: case IR_TOBIT: lua_assert(0); /* Unused for LJ_SOFTFP. */ break; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index c89bc521..e5bc4a0e 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1651,6 +1651,16 @@ static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) } #endif +static void asm_intmod(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); +} + static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); @@ -2499,11 +2509,12 @@ static void asm_ir(ASMState *as, IRIns *ir) break; case IR_MOD: #if LJ_64 && LJ_HASFFI - asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); -#else - lua_assert(0); + if (!irt_isint(ir->t)) + asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else #endif + asm_intmod(as, ir); break; case IR_NEG: diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 1cbee91c..3b0a54b6 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -143,6 +143,7 @@ typedef struct CCallInfo { _(lj_gc_barrieruv, 2, FS, NIL, 0) \ _(lj_mem_newgco, 2, FS, P32, CCI_L) \ _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ + _(lj_vm_modi, 2, FN, INT, 0) \ IRCALLDEF_SOFTFP(_) \ IRCALLDEF_FPMATH(_) \ IRCALLDEF_FFI(_) \ diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 7ab42b7a..3059fb9e 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -141,7 +141,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); +LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 0fc73683..2ecac2d9 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -231,6 +231,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; + case IR_MOD: k1 = lj_vm_modi(k1, k2); break; case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; @@ -249,6 +250,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) LJFOLD(ADD KINT KINT) LJFOLD(SUB KINT KINT) LJFOLD(MUL KINT KINT) +LJFOLD(MOD KINT KINT) LJFOLD(BAND KINT KINT) LJFOLD(BOR KINT KINT) LJFOLD(BXOR KINT KINT) @@ -1148,7 +1150,6 @@ LJFOLDF(simplify_intmul_k32) LJFOLD(MUL any KINT64) LJFOLDF(simplify_intmul_k64) - { if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ return INT64FOLD(0); @@ -1160,6 +1161,27 @@ LJFOLDF(simplify_intmul_k64) return NEXTFOLD; } +LJFOLD(MOD any KINT) +LJFOLDF(simplify_intmod_k) +{ + int32_t k = fright->i; + lua_assert(k != 0); + if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ + fins->o = IR_BAND; + fins->op2 = lj_ir_kint(J, k-1); + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(MOD KINT any) +LJFOLDF(simplify_intmod_kleft) +{ + if (fleft->i == 0) + return INTFOLD(0); + return NEXTFOLD; +} + LJFOLD(SUB any any) LJFOLD(SUBOV any any) LJFOLDF(simplify_intsub) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index e7f280ec..40696c02 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -551,16 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) } /* Narrowing of modulo operator. */ -TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) +TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) { TRef tmp; - if ((J->flags & JIT_F_OPT_NARROW) && - tref_isk(rc) && tref_isint(rc)) { /* Optimize x % k. */ - int32_t k = IR(tref_ref(rc))->i; - if (k > 0 && (k & (k-1)) == 0) { /* i % 2^k ==> band(i, 2^k-1) */ - if (tref_isinteger(rb)) - return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1)); - } + if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && + tref_isinteger(rb) && tref_isinteger(rc) && + (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) { + emitir(IRTGI(IR_NE), rc, lj_ir_kint(J, 0)); + return emitir(IRTI(IR_MOD), rb, rc); } /* b % c ==> b - floor(b/c)*c */ rb = lj_ir_tonum(J, rb); diff --git a/src/lj_record.c b/src/lj_record.c index 689d7b80..fe79832a 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1715,7 +1715,7 @@ void lj_record_ins(jit_State *J) case BC_MODVN: case BC_MODVV: recmod: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_mod(J, rb, rc); + rc = lj_opt_narrow_mod(J, rb, rc, rcv); else rc = rec_mm_arith(J, &ix, MM_mod); break; diff --git a/src/lj_vm.h b/src/lj_vm.h index 3941dad9..ba624dad 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -71,6 +71,7 @@ LJ_ASMF double lj_vm_exp2(double); #define lj_vm_exp2 exp2 #endif #endif +LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #endif /* Continuations for metamethods. */ diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index d357b4e3..970bb5ad 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -39,6 +39,20 @@ double lj_vm_exp2(double a) } #endif +#if !LJ_TARGET_ARM +int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) +{ + uint32_t y, ua, ub; + lua_assert(b != 0); /* This must be checked before using this function. */ + ua = a < 0 ? -(uint32_t)a : (uint32_t)a; + ub = b < 0 ? -(uint32_t)b : (uint32_t)b; + y = ua % ub; + if (y != 0 && (a^b) < 0) y = y - ub; + if (((int32_t)y^b) < 0) y = -y; + return (int32_t)y; +} +#endif + #if !LJ_TARGET_X86ORX64 /* Unsigned x^k. */ static double lj_vm_powui(double x, uint32_t k)