Add support for integer IR_MUL.

This commit is contained in:
Mike Pall 2010-12-06 12:54:00 +01:00
parent b56b83487f
commit 86fd2289f0
3 changed files with 106 additions and 13 deletions

View File

@ -2482,6 +2482,31 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
ra_left(as, dest, lref); ra_left(as, dest, lref);
} }
static void asm_intmul(ASMState *as, IRIns *ir)
{
IRRef lref = ir->op1;
IRRef rref = ir->op2;
int32_t k = 0;
if (asm_isk32(as, rref, &k)) {
/* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = asm_fuseload(as, lref, RSET_GPR);
x86Op xo;
if (checki8(k)) {
emit_i8(as, k);
xo = XO_IMULi8;
} else {
emit_i32(as, k);
xo = XO_IMULi;
}
emit_rr(as, xo, REX_64IR(ir, dest), left);
} else {
/* NYI: integer multiply of non-constant operands. */
setintV(&as->J->errinfo, ir->o);
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
}
}
/* LEA is really a 4-operand ADD with an independent destination register, /* LEA is really a 4-operand ADD with an independent destination register,
** up to two source registers and an immediate. One register can be scaled ** up to two source registers and an immediate. One register can be scaled
** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
@ -3445,7 +3470,12 @@ static void asm_ir(ASMState *as, IRIns *ir)
else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
asm_intarith(as, ir, XOg_SUB); asm_intarith(as, ir, XOg_SUB);
break; break;
case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; case IR_MUL:
if (irt_isnum(ir->t))
asm_fparith(as, ir, XO_MULSD);
else
asm_intmul(as, ir);
break;
case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;

View File

@ -197,6 +197,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
switch (op) { switch (op) {
case IR_ADD: k1 += k2; break; case IR_ADD: k1 += k2; break;
case IR_SUB: k1 -= k2; break; case IR_SUB: k1 -= k2; break;
case IR_MUL: k1 *= k2; break;
case IR_BAND: k1 &= k2; break; case IR_BAND: k1 &= k2; break;
case IR_BOR: k1 |= k2; break; case IR_BOR: k1 |= k2; break;
case IR_BXOR: k1 ^= k2; break; case IR_BXOR: k1 ^= k2; break;
@ -212,6 +213,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
LJFOLD(ADD KINT KINT) LJFOLD(ADD KINT KINT)
LJFOLD(SUB KINT KINT) LJFOLD(SUB KINT KINT)
LJFOLD(MUL KINT KINT)
LJFOLD(BAND KINT KINT) LJFOLD(BAND KINT KINT)
LJFOLD(BOR KINT KINT) LJFOLD(BOR KINT KINT)
LJFOLD(BXOR KINT KINT) LJFOLD(BXOR KINT KINT)
@ -680,6 +682,43 @@ LJFOLDF(simplify_intsub_k64)
return RETRYFOLD; return RETRYFOLD;
} }
static TRef simplify_intmul_k(jit_State *J, int32_t k)
{
/* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
** But this is mainly intended for simple address arithmetic.
** Also it's easier for the backend to optimize the original multiplies.
*/
if (k == 1) { /* i * 1 ==> i */
return LEFTFOLD;
} else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
fins->o = IR_BSHL;
fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
return RETRYFOLD;
}
return NEXTFOLD;
}
LJFOLD(MUL any KINT)
LJFOLDF(simplify_intmul_k32)
{
if (fright->i == 0) /* i * 0 ==> 0 */
return INTFOLD(0);
else if (fright->i > 0)
return simplify_intmul_k(J, fright->i);
return NEXTFOLD;
}
LJFOLD(MUL any KINT64)
LJFOLDF(simplify_intmul_k64)
{
if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
return lj_ir_kint64(J, 0);
else if (ir_kint64(fright)->u64 < 0x80000000u)
return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
return NEXTFOLD;
}
LJFOLD(SUB any any) LJFOLD(SUB any any)
LJFOLD(SUBOV any any) LJFOLD(SUBOV any any)
LJFOLDF(simplify_intsub) LJFOLDF(simplify_intsub)
@ -816,16 +855,17 @@ LJFOLD(BROL any KINT)
LJFOLD(BROR any KINT) LJFOLD(BROR any KINT)
LJFOLDF(simplify_shift_ik) LJFOLDF(simplify_shift_ik)
{ {
int32_t k = (fright->i & 31); int32_t mask = irt_is64(fins->t) ? 63 : 31;
int32_t k = (fright->i & mask);
if (k == 0) /* i o 0 ==> i */ if (k == 0) /* i o 0 ==> i */
return LEFTFOLD; return LEFTFOLD;
if (k != fright->i) { /* i o k ==> i o (k & 31) */ if (k != fright->i) { /* i o k ==> i o (k & mask) */
fins->op2 = (IRRef1)lj_ir_kint(J, k); fins->op2 = (IRRef1)lj_ir_kint(J, k);
return RETRYFOLD; return RETRYFOLD;
} }
if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
fins->o = IR_BROL; fins->o = IR_BROL;
fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
return RETRYFOLD; return RETRYFOLD;
} }
return NEXTFOLD; return NEXTFOLD;
@ -841,9 +881,10 @@ LJFOLDF(simplify_shift_andk)
IRIns *irk = IR(fright->op2); IRIns *irk = IR(fright->op2);
PHIBARRIER(fright); PHIBARRIER(fright);
if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
int32_t k = irk->i & 31; int32_t mask = irt_is64(fins->t) ? 63 : 31;
if (k == 31) { int32_t k = irk->i & mask;
if (k == mask) {
fins->op2 = fright->op1; fins->op2 = fright->op1;
return RETRYFOLD; return RETRYFOLD;
} }
@ -870,9 +911,29 @@ LJFOLDF(simplify_shift2_ki)
return NEXTFOLD; return NEXTFOLD;
} }
LJFOLD(BSHL KINT64 any)
LJFOLD(BSHR KINT64 any)
LJFOLDF(simplify_shift1_ki64)
{
if (ir_kint64(fleft)->u64 == 0) /* 0 o i ==> 0 */
return LEFTFOLD;
return NEXTFOLD;
}
LJFOLD(BSAR KINT64 any)
LJFOLD(BROL KINT64 any)
LJFOLD(BROR KINT64 any)
LJFOLDF(simplify_shift2_ki64)
{
if (ir_kint64(fleft)->u64 == 0 || (int64_t)ir_kint64(fleft)->u64 == -1)
return LEFTFOLD; /* 0 o i ==> 0; -1 o i ==> -1 */
return NEXTFOLD;
}
/* -- Reassociation ------------------------------------------------------- */ /* -- Reassociation ------------------------------------------------------- */
LJFOLD(ADD ADD KINT) LJFOLD(ADD ADD KINT)
LJFOLD(MUL MUL KINT)
LJFOLD(BAND BAND KINT) LJFOLD(BAND BAND KINT)
LJFOLD(BOR BOR KINT) LJFOLD(BOR BOR KINT)
LJFOLD(BXOR BXOR KINT) LJFOLD(BXOR BXOR KINT)
@ -924,14 +985,15 @@ LJFOLDF(reassoc_shift)
IRIns *irk = IR(fleft->op2); IRIns *irk = IR(fleft->op2);
PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
int32_t k = (irk->i & 31) + (fright->i & 31); int32_t mask = irt_is64(fins->t) ? 63 : 31;
if (k > 31) { /* Combined shift too wide? */ int32_t k = (irk->i & mask) + (fright->i & mask);
if (k > mask) { /* Combined shift too wide? */
if (fins->o == IR_BSHL || fins->o == IR_BSHR) if (fins->o == IR_BSHL || fins->o == IR_BSHR)
return INTFOLD(0); return mask == 31 ? INTFOLD(0) : lj_ir_kint64(J, 0);
else if (fins->o == IR_BSAR) else if (fins->o == IR_BSAR)
k = 31; k = mask;
else else
k &= 31; k &= mask;
} }
fins->op1 = fleft->op1; fins->op1 = fleft->op1;
fins->op2 = (IRRef1)lj_ir_kint(J, k); fins->op2 = (IRRef1)lj_ir_kint(J, k);

View File

@ -218,6 +218,7 @@ typedef enum {
XO_SHIFTi = XO_(c1), XO_SHIFTi = XO_(c1),
XO_SHIFT1 = XO_(d1), XO_SHIFT1 = XO_(d1),
XO_SHIFTcl = XO_(d3), XO_SHIFTcl = XO_(d3),
XO_IMULi = XO_(69),
XO_IMULi8 = XO_(6b), XO_IMULi8 = XO_(6b),
XO_CMP = XO_(3b), XO_CMP = XO_(3b),
XO_TEST = XO_(85), XO_TEST = XO_(85),