mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
Add support for integer IR_MUL.
This commit is contained in:
parent
b56b83487f
commit
86fd2289f0
32
src/lj_asm.c
32
src/lj_asm.c
@ -2482,6 +2482,31 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
|
|||||||
ra_left(as, dest, lref);
|
ra_left(as, dest, lref);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void asm_intmul(ASMState *as, IRIns *ir)
|
||||||
|
{
|
||||||
|
IRRef lref = ir->op1;
|
||||||
|
IRRef rref = ir->op2;
|
||||||
|
int32_t k = 0;
|
||||||
|
if (asm_isk32(as, rref, &k)) {
|
||||||
|
/* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
|
||||||
|
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||||
|
Reg left = asm_fuseload(as, lref, RSET_GPR);
|
||||||
|
x86Op xo;
|
||||||
|
if (checki8(k)) {
|
||||||
|
emit_i8(as, k);
|
||||||
|
xo = XO_IMULi8;
|
||||||
|
} else {
|
||||||
|
emit_i32(as, k);
|
||||||
|
xo = XO_IMULi;
|
||||||
|
}
|
||||||
|
emit_rr(as, xo, REX_64IR(ir, dest), left);
|
||||||
|
} else {
|
||||||
|
/* NYI: integer multiply of non-constant operands. */
|
||||||
|
setintV(&as->J->errinfo, ir->o);
|
||||||
|
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* LEA is really a 4-operand ADD with an independent destination register,
|
/* LEA is really a 4-operand ADD with an independent destination register,
|
||||||
** up to two source registers and an immediate. One register can be scaled
|
** up to two source registers and an immediate. One register can be scaled
|
||||||
** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
|
** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
|
||||||
@ -3445,7 +3470,12 @@ static void asm_ir(ASMState *as, IRIns *ir)
|
|||||||
else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
|
else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
|
||||||
asm_intarith(as, ir, XOg_SUB);
|
asm_intarith(as, ir, XOg_SUB);
|
||||||
break;
|
break;
|
||||||
case IR_MUL: asm_fparith(as, ir, XO_MULSD); break;
|
case IR_MUL:
|
||||||
|
if (irt_isnum(ir->t))
|
||||||
|
asm_fparith(as, ir, XO_MULSD);
|
||||||
|
else
|
||||||
|
asm_intmul(as, ir);
|
||||||
|
break;
|
||||||
case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
|
case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
|
||||||
|
|
||||||
case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;
|
case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;
|
||||||
|
@ -197,6 +197,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
|
|||||||
switch (op) {
|
switch (op) {
|
||||||
case IR_ADD: k1 += k2; break;
|
case IR_ADD: k1 += k2; break;
|
||||||
case IR_SUB: k1 -= k2; break;
|
case IR_SUB: k1 -= k2; break;
|
||||||
|
case IR_MUL: k1 *= k2; break;
|
||||||
case IR_BAND: k1 &= k2; break;
|
case IR_BAND: k1 &= k2; break;
|
||||||
case IR_BOR: k1 |= k2; break;
|
case IR_BOR: k1 |= k2; break;
|
||||||
case IR_BXOR: k1 ^= k2; break;
|
case IR_BXOR: k1 ^= k2; break;
|
||||||
@ -212,6 +213,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
|
|||||||
|
|
||||||
LJFOLD(ADD KINT KINT)
|
LJFOLD(ADD KINT KINT)
|
||||||
LJFOLD(SUB KINT KINT)
|
LJFOLD(SUB KINT KINT)
|
||||||
|
LJFOLD(MUL KINT KINT)
|
||||||
LJFOLD(BAND KINT KINT)
|
LJFOLD(BAND KINT KINT)
|
||||||
LJFOLD(BOR KINT KINT)
|
LJFOLD(BOR KINT KINT)
|
||||||
LJFOLD(BXOR KINT KINT)
|
LJFOLD(BXOR KINT KINT)
|
||||||
@ -680,6 +682,43 @@ LJFOLDF(simplify_intsub_k64)
|
|||||||
return RETRYFOLD;
|
return RETRYFOLD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static TRef simplify_intmul_k(jit_State *J, int32_t k)
|
||||||
|
{
|
||||||
|
/* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
|
||||||
|
** But this is mainly intended for simple address arithmetic.
|
||||||
|
** Also it's easier for the backend to optimize the original multiplies.
|
||||||
|
*/
|
||||||
|
if (k == 1) { /* i * 1 ==> i */
|
||||||
|
return LEFTFOLD;
|
||||||
|
} else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
|
||||||
|
fins->o = IR_BSHL;
|
||||||
|
fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
|
||||||
|
return RETRYFOLD;
|
||||||
|
}
|
||||||
|
return NEXTFOLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
LJFOLD(MUL any KINT)
|
||||||
|
LJFOLDF(simplify_intmul_k32)
|
||||||
|
{
|
||||||
|
if (fright->i == 0) /* i * 0 ==> 0 */
|
||||||
|
return INTFOLD(0);
|
||||||
|
else if (fright->i > 0)
|
||||||
|
return simplify_intmul_k(J, fright->i);
|
||||||
|
return NEXTFOLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
LJFOLD(MUL any KINT64)
|
||||||
|
LJFOLDF(simplify_intmul_k64)
|
||||||
|
|
||||||
|
{
|
||||||
|
if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
|
||||||
|
return lj_ir_kint64(J, 0);
|
||||||
|
else if (ir_kint64(fright)->u64 < 0x80000000u)
|
||||||
|
return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
|
||||||
|
return NEXTFOLD;
|
||||||
|
}
|
||||||
|
|
||||||
LJFOLD(SUB any any)
|
LJFOLD(SUB any any)
|
||||||
LJFOLD(SUBOV any any)
|
LJFOLD(SUBOV any any)
|
||||||
LJFOLDF(simplify_intsub)
|
LJFOLDF(simplify_intsub)
|
||||||
@ -816,16 +855,17 @@ LJFOLD(BROL any KINT)
|
|||||||
LJFOLD(BROR any KINT)
|
LJFOLD(BROR any KINT)
|
||||||
LJFOLDF(simplify_shift_ik)
|
LJFOLDF(simplify_shift_ik)
|
||||||
{
|
{
|
||||||
int32_t k = (fright->i & 31);
|
int32_t mask = irt_is64(fins->t) ? 63 : 31;
|
||||||
|
int32_t k = (fright->i & mask);
|
||||||
if (k == 0) /* i o 0 ==> i */
|
if (k == 0) /* i o 0 ==> i */
|
||||||
return LEFTFOLD;
|
return LEFTFOLD;
|
||||||
if (k != fright->i) { /* i o k ==> i o (k & 31) */
|
if (k != fright->i) { /* i o k ==> i o (k & mask) */
|
||||||
fins->op2 = (IRRef1)lj_ir_kint(J, k);
|
fins->op2 = (IRRef1)lj_ir_kint(J, k);
|
||||||
return RETRYFOLD;
|
return RETRYFOLD;
|
||||||
}
|
}
|
||||||
if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */
|
if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
|
||||||
fins->o = IR_BROL;
|
fins->o = IR_BROL;
|
||||||
fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31);
|
fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
|
||||||
return RETRYFOLD;
|
return RETRYFOLD;
|
||||||
}
|
}
|
||||||
return NEXTFOLD;
|
return NEXTFOLD;
|
||||||
@ -841,9 +881,10 @@ LJFOLDF(simplify_shift_andk)
|
|||||||
IRIns *irk = IR(fright->op2);
|
IRIns *irk = IR(fright->op2);
|
||||||
PHIBARRIER(fright);
|
PHIBARRIER(fright);
|
||||||
if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
|
if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
|
||||||
irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */
|
irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
|
||||||
int32_t k = irk->i & 31;
|
int32_t mask = irt_is64(fins->t) ? 63 : 31;
|
||||||
if (k == 31) {
|
int32_t k = irk->i & mask;
|
||||||
|
if (k == mask) {
|
||||||
fins->op2 = fright->op1;
|
fins->op2 = fright->op1;
|
||||||
return RETRYFOLD;
|
return RETRYFOLD;
|
||||||
}
|
}
|
||||||
@ -870,9 +911,29 @@ LJFOLDF(simplify_shift2_ki)
|
|||||||
return NEXTFOLD;
|
return NEXTFOLD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LJFOLD(BSHL KINT64 any)
|
||||||
|
LJFOLD(BSHR KINT64 any)
|
||||||
|
LJFOLDF(simplify_shift1_ki64)
|
||||||
|
{
|
||||||
|
if (ir_kint64(fleft)->u64 == 0) /* 0 o i ==> 0 */
|
||||||
|
return LEFTFOLD;
|
||||||
|
return NEXTFOLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
LJFOLD(BSAR KINT64 any)
|
||||||
|
LJFOLD(BROL KINT64 any)
|
||||||
|
LJFOLD(BROR KINT64 any)
|
||||||
|
LJFOLDF(simplify_shift2_ki64)
|
||||||
|
{
|
||||||
|
if (ir_kint64(fleft)->u64 == 0 || (int64_t)ir_kint64(fleft)->u64 == -1)
|
||||||
|
return LEFTFOLD; /* 0 o i ==> 0; -1 o i ==> -1 */
|
||||||
|
return NEXTFOLD;
|
||||||
|
}
|
||||||
|
|
||||||
/* -- Reassociation ------------------------------------------------------- */
|
/* -- Reassociation ------------------------------------------------------- */
|
||||||
|
|
||||||
LJFOLD(ADD ADD KINT)
|
LJFOLD(ADD ADD KINT)
|
||||||
|
LJFOLD(MUL MUL KINT)
|
||||||
LJFOLD(BAND BAND KINT)
|
LJFOLD(BAND BAND KINT)
|
||||||
LJFOLD(BOR BOR KINT)
|
LJFOLD(BOR BOR KINT)
|
||||||
LJFOLD(BXOR BXOR KINT)
|
LJFOLD(BXOR BXOR KINT)
|
||||||
@ -924,14 +985,15 @@ LJFOLDF(reassoc_shift)
|
|||||||
IRIns *irk = IR(fleft->op2);
|
IRIns *irk = IR(fleft->op2);
|
||||||
PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
|
PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
|
||||||
if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
|
if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
|
||||||
int32_t k = (irk->i & 31) + (fright->i & 31);
|
int32_t mask = irt_is64(fins->t) ? 63 : 31;
|
||||||
if (k > 31) { /* Combined shift too wide? */
|
int32_t k = (irk->i & mask) + (fright->i & mask);
|
||||||
|
if (k > mask) { /* Combined shift too wide? */
|
||||||
if (fins->o == IR_BSHL || fins->o == IR_BSHR)
|
if (fins->o == IR_BSHL || fins->o == IR_BSHR)
|
||||||
return INTFOLD(0);
|
return mask == 31 ? INTFOLD(0) : lj_ir_kint64(J, 0);
|
||||||
else if (fins->o == IR_BSAR)
|
else if (fins->o == IR_BSAR)
|
||||||
k = 31;
|
k = mask;
|
||||||
else
|
else
|
||||||
k &= 31;
|
k &= mask;
|
||||||
}
|
}
|
||||||
fins->op1 = fleft->op1;
|
fins->op1 = fleft->op1;
|
||||||
fins->op2 = (IRRef1)lj_ir_kint(J, k);
|
fins->op2 = (IRRef1)lj_ir_kint(J, k);
|
||||||
|
@ -218,6 +218,7 @@ typedef enum {
|
|||||||
XO_SHIFTi = XO_(c1),
|
XO_SHIFTi = XO_(c1),
|
||||||
XO_SHIFT1 = XO_(d1),
|
XO_SHIFT1 = XO_(d1),
|
||||||
XO_SHIFTcl = XO_(d3),
|
XO_SHIFTcl = XO_(d3),
|
||||||
|
XO_IMULi = XO_(69),
|
||||||
XO_IMULi8 = XO_(6b),
|
XO_IMULi8 = XO_(6b),
|
||||||
XO_CMP = XO_(3b),
|
XO_CMP = XO_(3b),
|
||||||
XO_TEST = XO_(85),
|
XO_TEST = XO_(85),
|
||||||
|
Loading…
Reference in New Issue
Block a user