x86: Generate BMI2 shifts and rotates, if available.

Contributed by Peter Cawley.
This commit is contained in:
Mike Pall 2016-03-28 23:05:20 +02:00
parent 6801e7165c
commit 892887e584
5 changed files with 51 additions and 7 deletions

View File

@ -244,6 +244,7 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
--Fx
[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
[0xf7] = "|sarxVrmv|shlxVrmv|shrxVrmv",
},
["3a"] = { -- [66] 0f 3a xx
@ -273,6 +274,8 @@ nil,nil,nil,nil,
[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
[0xdf] = "||aeskeygenassistXrmu",
--Fx
[0xf0] = "|||rorxVrmu",
},
}

View File

@ -2150,7 +2150,10 @@ static void asm_setup_regsp(ASMState *as)
#endif
#if LJ_TARGET_X86ORX64
/* Non-constant shift counts need to be in RID_ECX on x86/x64. */
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
case IR_BSHL: case IR_BSHR: case IR_BSAR:
if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
break;
case IR_BROL: case IR_BROR:
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
IR(ir->op2)->r = REGSP_HINT(RID_ECX);
if (inloop)

View File

@ -1956,7 +1956,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
{
IRRef rref = ir->op2;
IRIns *irr = IR(rref);
@ -1965,11 +1965,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
int shift;
dest = ra_dest(as, ir, RSET_GPR);
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
if (!xv && shift && (as->flags & JIT_F_BMI2)) {
Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
if (left != dest) { /* BMI2 rotate right by constant. */
emit_i8(as, xs == XOg_ROL ? -shift : shift);
emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
return;
}
}
switch (shift) {
case 0: break;
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
}
} else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
Reg left, right;
dest = ra_dest(as, ir, RSET_GPR);
right = ra_alloc1(as, rref, RSET_GPR);
left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
irt_is64(ir->t));
emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
return;
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
Reg right;
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@ -1995,11 +2011,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
*/
}
#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
/* -- Comparisons --------------------------------------------------------- */

View File

@ -13,10 +13,12 @@
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
#define REX_64 (FORCE_REX|0x080000)
#define VEX_64 0x800000
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
#define REX_64 0
#define VEX_64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@ -31,6 +33,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
MCode *p, int delta)
{
int n = (int8_t)xo;
if (n == -60) { /* VEX-encoded instruction */
#if LJ_64
xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
#endif
*(uint32_t *)(p+delta-5) = (uint32_t)xo;
return p+delta-5;
}
#if defined(__GNUC__)
if (__builtin_constant_p(xo) && n == -2)
p[delta-2] = (MCode)(xo >> 24);
@ -412,8 +421,10 @@ static void emit_call_(ASMState *as, MCode *target)
/* Use 64 bit operations to handle 64 bit IR types. */
#if LJ_64
#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
#else
#define REX_64IR(ir, r) (r)
#define VEX_64IR(ir, r) (r)
#endif
/* Generic move between two regs. */

View File

@ -189,6 +189,11 @@ typedef struct {
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
** whole mess.
@ -231,6 +236,12 @@ typedef enum {
XI_FSCALE = 0xfdd9,
XI_FYL2X = 0xf1d9,
/* VEX-encoded instructions. XV_* prefix. */
XV_RORX = XV_f20f3a(f0),
XV_SARX = XV_f30f38(f7),
XV_SHLX = XV_660f38(f7),
XV_SHRX = XV_f20f38(f7),
/* Variable-length opcodes. XO_* prefix. */
XO_MOV = XO_(8b),
XO_MOVto = XO_(89),