mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 15:34:09 +00:00
Added a flag(s) for opcodes with non memory store side effects and enabled DCE of intrinsics
Intrinsics are now assumed to have no side effects unless flagged to with either memory side effects(S) or non memory side effects(s)
This commit is contained in:
parent
275d0dcdd7
commit
a374e90531
@ -866,12 +866,13 @@ static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo)
|
|||||||
checkmclim(as);
|
checkmclim(as);
|
||||||
}
|
}
|
||||||
|
|
||||||
void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo)
|
int asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo)
|
||||||
{
|
{
|
||||||
IRRef results[LJ_INTRINS_MAXREG];
|
IRRef results[LJ_INTRINS_MAXREG];
|
||||||
RegSet evict = 0, outset = 0, aout = 0;
|
RegSet evict = 0, outset = 0, aout = 0;
|
||||||
int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
|
int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
|
||||||
int32_t dynout = intrin_dynrout(intrins) ? 1 : 0;
|
int32_t dynout = intrin_dynrout(intrins) ? 1 : 0;
|
||||||
|
int used = 0;
|
||||||
|
|
||||||
/* Gather the output register IR instructions */
|
/* Gather the output register IR instructions */
|
||||||
if (intrins->outsz > 0) {
|
if (intrins->outsz > 0) {
|
||||||
@ -883,6 +884,7 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
|
|||||||
results[n] = (IRRef)(irret - as->ir);
|
results[n] = (IRRef)(irret - as->ir);
|
||||||
|
|
||||||
if (ra_used(irret)) {
|
if (ra_used(irret)) {
|
||||||
|
used++;
|
||||||
if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) {
|
if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) {
|
||||||
rset_set(aout, irret->r);
|
rset_set(aout, irret->r);
|
||||||
}
|
}
|
||||||
@ -895,6 +897,10 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!used && !intrin_sideeff(intrins)) {
|
||||||
|
/* IR is dead code */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
evict = ininfo->modset;
|
evict = ininfo->modset;
|
||||||
|
|
||||||
/* Check what registers need evicting for fixed input registers */
|
/* Check what registers need evicting for fixed input registers */
|
||||||
@ -938,6 +944,8 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
|
|||||||
ra_destreg(as, irret, r);
|
ra_destreg(as, irret, r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
||||||
@ -983,7 +991,10 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
|||||||
}
|
}
|
||||||
lua_assert(n == 0);
|
lua_assert(n == 0);
|
||||||
|
|
||||||
asm_intrin_results(as, ir, intrins, &ininfo);
|
/* If there is no users of our results skip emitting */
|
||||||
|
if (!asm_intrin_results(as, ir, intrins, &ininfo)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
if (intrin_regmode(intrins)) {
|
if (intrin_regmode(intrins)) {
|
||||||
asm_intrin_opcode(as, ir, &ininfo);
|
asm_intrin_opcode(as, ir, &ininfo);
|
||||||
@ -1004,6 +1015,7 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
|||||||
}
|
}
|
||||||
|
|
||||||
asm_asmsetupargs(as, &ininfo);
|
asm_asmsetupargs(as, &ininfo);
|
||||||
|
exit:
|
||||||
if (ininfo.asmend) {
|
if (ininfo.asmend) {
|
||||||
/* Skip over our IR_INTRN since were emitting from the tail */
|
/* Skip over our IR_INTRN since were emitting from the tail */
|
||||||
as->curins = (IRRef)(ir - as->ir);
|
as->curins = (IRRef)(ir - as->ir);
|
||||||
|
@ -1398,8 +1398,9 @@ void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Intrinsics are assumed to always have side effects */
|
if (intrin_sideeff(intrins)) {
|
||||||
J->needsnap = 1;
|
J->needsnap = 1;
|
||||||
|
}
|
||||||
rd->nres = intrins->outsz;
|
rd->nres = intrins->outsz;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -310,6 +310,11 @@ static int parse_opmode(const char *op, MSize len)
|
|||||||
case 'U':
|
case 'U':
|
||||||
flags |= INTRINSFLAG_IMMB;
|
flags |= INTRINSFLAG_IMMB;
|
||||||
break;
|
break;
|
||||||
|
case 'S':
|
||||||
|
flags |= INTRINSFLAG_MEMORYSIDE;
|
||||||
|
case 's':
|
||||||
|
flags |= INTRINSFLAG_HASSIDE;
|
||||||
|
break;
|
||||||
case 'C':
|
case 'C':
|
||||||
flags |= INTRINSFLAG_CALLED;
|
flags |= INTRINSFLAG_CALLED;
|
||||||
break;
|
break;
|
||||||
|
@ -61,6 +61,8 @@ typedef enum INTRINSFLAGS {
|
|||||||
INTRINSFLAG_LARGEOP = 0x800,
|
INTRINSFLAG_LARGEOP = 0x800,
|
||||||
/* Opcode is commutative allowing the input registers to be swapped to allow better fusing */
|
/* Opcode is commutative allowing the input registers to be swapped to allow better fusing */
|
||||||
INTRINSFLAG_ISCOMM = 0x1000,
|
INTRINSFLAG_ISCOMM = 0x1000,
|
||||||
|
/* Instruction has non obvious side effects */
|
||||||
|
INTRINSFLAG_HASSIDE = 0x2000,
|
||||||
|
|
||||||
/* Opcode uses ymm registers */
|
/* Opcode uses ymm registers */
|
||||||
INTRINSFLAG_VEX256 = 0x4000,
|
INTRINSFLAG_VEX256 = 0x4000,
|
||||||
@ -89,6 +91,8 @@ typedef struct AsmHeader {
|
|||||||
#define intrin_setregmode(intrins, mode) \
|
#define intrin_setregmode(intrins, mode) \
|
||||||
(intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode)
|
(intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode)
|
||||||
#define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM)
|
#define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM)
|
||||||
|
/* Has side effects that may not be to memory */
|
||||||
|
#define intrin_sideeff(intrins) ((intrins)->flags & INTRINSFLAG_HASSIDE)
|
||||||
|
|
||||||
#define intrin_getopextb(intrins) ((intrins)->out[3])
|
#define intrin_getopextb(intrins) ((intrins)->out[3])
|
||||||
#define intrin_setopextb(intrins, opext) \
|
#define intrin_setopextb(intrins, opext) \
|
||||||
|
@ -478,7 +478,7 @@ context("__mcode", function()
|
|||||||
end)
|
end)
|
||||||
|
|
||||||
it("prefix byte", function()
|
it("prefix byte", function()
|
||||||
assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIP", 0xF0);]], "atomicadd")
|
assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd")
|
||||||
|
|
||||||
local sum = 0
|
local sum = 0
|
||||||
local function checker(i, jsum)
|
local function checker(i, jsum)
|
||||||
@ -498,7 +498,7 @@ context("__mcode", function()
|
|||||||
|
|
||||||
if ffi.arch == "x64" then
|
if ffi.arch == "x64" then
|
||||||
it("prefix64", function()
|
it("prefix64", function()
|
||||||
assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIP", 0xF0);]], "atomicadd64")
|
assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd64")
|
||||||
|
|
||||||
local sum = 0
|
local sum = 0
|
||||||
local function checker(i, jsum)
|
local function checker(i, jsum)
|
||||||
@ -516,7 +516,7 @@ context("__mcode", function()
|
|||||||
end
|
end
|
||||||
|
|
||||||
it("prefix and imm byte", function()
|
it("prefix and imm byte", function()
|
||||||
assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUP", 0xF0, 0x01);]], "atomicadd1")
|
assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUPS", 0xF0, 0x01);]], "atomicadd1")
|
||||||
|
|
||||||
local function checker(i, jsum)
|
local function checker(i, jsum)
|
||||||
if(jsum ~= i) then
|
if(jsum ~= i) then
|
||||||
@ -565,11 +565,39 @@ context("__mcode", function()
|
|||||||
assert_exit(10, test_idiv, 10, 5)
|
assert_exit(10, test_idiv, 10, 5)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it("side effects(mode)", function()
|
||||||
|
assert_cdef([[void add1_noside(int32_t* nptr) __mcode("830mIU", 0x01);]], "add1_noside")
|
||||||
|
assert_cdef([[void add1_side(int32_t* nptr) __mcode("830mIUs", 0x01);]], "add1_side")
|
||||||
|
|
||||||
|
local numptr = ffi.new("int32_t[2]", 0)
|
||||||
|
|
||||||
|
local function checker(i, n)
|
||||||
|
assert(n == i)
|
||||||
|
assert(numptr[0] >= numptr[1])
|
||||||
|
end
|
||||||
|
|
||||||
|
local function test_sideff(i)
|
||||||
|
ffi.C.add1_side(numptr)
|
||||||
|
ffi.C.add1_noside(numptr+1)
|
||||||
|
return numptr[0]
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_jitchecker(checker, test_sideff)
|
||||||
|
assert_greater_than(numptr[0], numptr[1])
|
||||||
|
|
||||||
|
numptr[0] = 0
|
||||||
|
numptr[1] = 0
|
||||||
|
--test directly as JIT'ed
|
||||||
|
test_sideff()
|
||||||
|
assert_equal(numptr[0], 1)
|
||||||
|
assert_equal(numptr[1], 0)
|
||||||
|
end)
|
||||||
|
|
||||||
it("prefetch", function()
|
it("prefetch", function()
|
||||||
assert_cdef([[void prefetch0(void* mem) __mcode("0F181mI")]], "prefetch0")
|
assert_cdef([[void prefetch0(void* mem) __mcode("0F181mIs")]], "prefetch0")
|
||||||
assert_cdef([[void prefetch1(void* mem) __mcode("0F182mI")]], "prefetch1")
|
assert_cdef([[void prefetch1(void* mem) __mcode("0F182mIs")]], "prefetch1")
|
||||||
assert_cdef([[void prefetch2(void* mem) __mcode("0F183mI")]], "prefetch2")
|
assert_cdef([[void prefetch2(void* mem) __mcode("0F183mIs")]], "prefetch2")
|
||||||
assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mI")]], "prefetchnta")
|
assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mIs")]], "prefetchnta")
|
||||||
|
|
||||||
local asm = ffi.C
|
local asm = ffi.C
|
||||||
local kmem = ffi.new("int[4]")
|
local kmem = ffi.new("int[4]")
|
||||||
|
Loading…
Reference in New Issue
Block a user