mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
x86: Generate BMI2 shifts and rotates, if available.
Contributed by Peter Cawley.
This commit is contained in:
parent
6801e7165c
commit
892887e584
@ -244,6 +244,7 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
|
|||||||
[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
|
[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
|
||||||
--Fx
|
--Fx
|
||||||
[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
|
[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
|
||||||
|
[0xf7] = "|sarxVrmv|shlxVrmv|shrxVrmv",
|
||||||
},
|
},
|
||||||
|
|
||||||
["3a"] = { -- [66] 0f 3a xx
|
["3a"] = { -- [66] 0f 3a xx
|
||||||
@ -273,6 +274,8 @@ nil,nil,nil,nil,
|
|||||||
[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
|
[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
|
||||||
[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
|
[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
|
||||||
[0xdf] = "||aeskeygenassistXrmu",
|
[0xdf] = "||aeskeygenassistXrmu",
|
||||||
|
--Fx
|
||||||
|
[0xf0] = "|||rorxVrmu",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2150,7 +2150,10 @@ static void asm_setup_regsp(ASMState *as)
|
|||||||
#endif
|
#endif
|
||||||
#if LJ_TARGET_X86ORX64
|
#if LJ_TARGET_X86ORX64
|
||||||
/* Non-constant shift counts need to be in RID_ECX on x86/x64. */
|
/* Non-constant shift counts need to be in RID_ECX on x86/x64. */
|
||||||
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
|
case IR_BSHL: case IR_BSHR: case IR_BSAR:
|
||||||
|
if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
|
||||||
|
break;
|
||||||
|
case IR_BROL: case IR_BROR:
|
||||||
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
|
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
|
||||||
IR(ir->op2)->r = REGSP_HINT(RID_ECX);
|
IR(ir->op2)->r = REGSP_HINT(RID_ECX);
|
||||||
if (inloop)
|
if (inloop)
|
||||||
|
@ -1956,7 +1956,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
|
|||||||
#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
|
#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
|
||||||
#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
|
#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
|
||||||
|
|
||||||
static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
|
static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
|
||||||
{
|
{
|
||||||
IRRef rref = ir->op2;
|
IRRef rref = ir->op2;
|
||||||
IRIns *irr = IR(rref);
|
IRIns *irr = IR(rref);
|
||||||
@ -1965,11 +1965,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
|
|||||||
int shift;
|
int shift;
|
||||||
dest = ra_dest(as, ir, RSET_GPR);
|
dest = ra_dest(as, ir, RSET_GPR);
|
||||||
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
|
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
|
||||||
|
if (!xv && shift && (as->flags & JIT_F_BMI2)) {
|
||||||
|
Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
|
||||||
|
if (left != dest) { /* BMI2 rotate right by constant. */
|
||||||
|
emit_i8(as, xs == XOg_ROL ? -shift : shift);
|
||||||
|
emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
switch (shift) {
|
switch (shift) {
|
||||||
case 0: break;
|
case 0: break;
|
||||||
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
|
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
|
||||||
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
|
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
|
||||||
}
|
}
|
||||||
|
} else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
|
||||||
|
Reg left, right;
|
||||||
|
dest = ra_dest(as, ir, RSET_GPR);
|
||||||
|
right = ra_alloc1(as, rref, RSET_GPR);
|
||||||
|
left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
|
||||||
|
irt_is64(ir->t));
|
||||||
|
emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
|
||||||
|
return;
|
||||||
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
|
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
|
||||||
Reg right;
|
Reg right;
|
||||||
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
|
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
|
||||||
@ -1995,11 +2011,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
|
#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
|
||||||
#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
|
#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
|
||||||
#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
|
#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
|
||||||
#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
|
#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
|
||||||
#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
|
#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
|
||||||
|
|
||||||
/* -- Comparisons --------------------------------------------------------- */
|
/* -- Comparisons --------------------------------------------------------- */
|
||||||
|
|
||||||
|
@ -13,10 +13,12 @@
|
|||||||
if (rex != 0x40) *--(p) = rex; }
|
if (rex != 0x40) *--(p) = rex; }
|
||||||
#define FORCE_REX 0x200
|
#define FORCE_REX 0x200
|
||||||
#define REX_64 (FORCE_REX|0x080000)
|
#define REX_64 (FORCE_REX|0x080000)
|
||||||
|
#define VEX_64 0x800000
|
||||||
#else
|
#else
|
||||||
#define REXRB(p, rr, rb) ((void)0)
|
#define REXRB(p, rr, rb) ((void)0)
|
||||||
#define FORCE_REX 0
|
#define FORCE_REX 0
|
||||||
#define REX_64 0
|
#define REX_64 0
|
||||||
|
#define VEX_64 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
|
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
|
||||||
@ -31,6 +33,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
|
|||||||
MCode *p, int delta)
|
MCode *p, int delta)
|
||||||
{
|
{
|
||||||
int n = (int8_t)xo;
|
int n = (int8_t)xo;
|
||||||
|
if (n == -60) { /* VEX-encoded instruction */
|
||||||
|
#if LJ_64
|
||||||
|
xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
|
||||||
|
#endif
|
||||||
|
*(uint32_t *)(p+delta-5) = (uint32_t)xo;
|
||||||
|
return p+delta-5;
|
||||||
|
}
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
if (__builtin_constant_p(xo) && n == -2)
|
if (__builtin_constant_p(xo) && n == -2)
|
||||||
p[delta-2] = (MCode)(xo >> 24);
|
p[delta-2] = (MCode)(xo >> 24);
|
||||||
@ -412,8 +421,10 @@ static void emit_call_(ASMState *as, MCode *target)
|
|||||||
/* Use 64 bit operations to handle 64 bit IR types. */
|
/* Use 64 bit operations to handle 64 bit IR types. */
|
||||||
#if LJ_64
|
#if LJ_64
|
||||||
#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
|
#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
|
||||||
|
#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
|
||||||
#else
|
#else
|
||||||
#define REX_64IR(ir, r) (r)
|
#define REX_64IR(ir, r) (r)
|
||||||
|
#define VEX_64IR(ir, r) (r)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Generic move between two regs. */
|
/* Generic move between two regs. */
|
||||||
|
@ -189,6 +189,11 @@ typedef struct {
|
|||||||
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
|
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
|
||||||
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
|
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
|
||||||
|
|
||||||
|
#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
|
||||||
|
#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
|
||||||
|
#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
|
||||||
|
#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
|
||||||
|
|
||||||
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
|
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
|
||||||
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
|
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
|
||||||
** whole mess.
|
** whole mess.
|
||||||
@ -231,6 +236,12 @@ typedef enum {
|
|||||||
XI_FSCALE = 0xfdd9,
|
XI_FSCALE = 0xfdd9,
|
||||||
XI_FYL2X = 0xf1d9,
|
XI_FYL2X = 0xf1d9,
|
||||||
|
|
||||||
|
/* VEX-encoded instructions. XV_* prefix. */
|
||||||
|
XV_RORX = XV_f20f3a(f0),
|
||||||
|
XV_SARX = XV_f30f38(f7),
|
||||||
|
XV_SHLX = XV_660f38(f7),
|
||||||
|
XV_SHRX = XV_f20f38(f7),
|
||||||
|
|
||||||
/* Variable-length opcodes. XO_* prefix. */
|
/* Variable-length opcodes. XO_* prefix. */
|
||||||
XO_MOV = XO_(8b),
|
XO_MOV = XO_(8b),
|
||||||
XO_MOVto = XO_(89),
|
XO_MOVto = XO_(89),
|
||||||
|
Loading…
Reference in New Issue
Block a user