Added a flag(s) for opcodes with non memory store side effects and enabled DCE of intrinsics

Intrinsics are now assumed to have no side effects unless flagged to with either memory side effects(S) or non memory side effects(s)
This commit is contained in:
fsfod 2016-03-29 11:20:01 +01:00
parent 275d0dcdd7
commit a374e90531
5 changed files with 61 additions and 11 deletions

View File

@ -866,12 +866,13 @@ static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo)
checkmclim(as); checkmclim(as);
} }
void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo) int asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo)
{ {
IRRef results[LJ_INTRINS_MAXREG]; IRRef results[LJ_INTRINS_MAXREG];
RegSet evict = 0, outset = 0, aout = 0; RegSet evict = 0, outset = 0, aout = 0;
int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0; int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
int32_t dynout = intrin_dynrout(intrins) ? 1 : 0; int32_t dynout = intrin_dynrout(intrins) ? 1 : 0;
int used = 0;
/* Gather the output register IR instructions */ /* Gather the output register IR instructions */
if (intrins->outsz > 0) { if (intrins->outsz > 0) {
@ -883,6 +884,7 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
results[n] = (IRRef)(irret - as->ir); results[n] = (IRRef)(irret - as->ir);
if (ra_used(irret)) { if (ra_used(irret)) {
used++;
if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) { if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) {
rset_set(aout, irret->r); rset_set(aout, irret->r);
} }
@ -895,6 +897,10 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
} }
} }
if (!used && !intrin_sideeff(intrins)) {
/* IR is dead code */
return 0;
}
evict = ininfo->modset; evict = ininfo->modset;
/* Check what registers need evicting for fixed input registers */ /* Check what registers need evicting for fixed input registers */
@ -938,6 +944,8 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf
ra_destreg(as, irret, r); ra_destreg(as, irret, r);
} }
} }
return 1;
} }
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
@ -983,7 +991,10 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
} }
lua_assert(n == 0); lua_assert(n == 0);
asm_intrin_results(as, ir, intrins, &ininfo); /* If there is no users of our results skip emitting */
if (!asm_intrin_results(as, ir, intrins, &ininfo)) {
goto exit;
}
if (intrin_regmode(intrins)) { if (intrin_regmode(intrins)) {
asm_intrin_opcode(as, ir, &ininfo); asm_intrin_opcode(as, ir, &ininfo);
@ -1004,6 +1015,7 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
} }
asm_asmsetupargs(as, &ininfo); asm_asmsetupargs(as, &ininfo);
exit:
if (ininfo.asmend) { if (ininfo.asmend) {
/* Skip over our IR_INTRN since were emitting from the tail */ /* Skip over our IR_INTRN since were emitting from the tail */
as->curins = (IRRef)(ir - as->ir); as->curins = (IRRef)(ir - as->ir);

View File

@ -1398,8 +1398,9 @@ void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
} }
} }
/* Intrinsics are assumed to always have side effects */ if (intrin_sideeff(intrins)) {
J->needsnap = 1; J->needsnap = 1;
}
rd->nres = intrins->outsz; rd->nres = intrins->outsz;
} }
#else #else

View File

@ -310,6 +310,11 @@ static int parse_opmode(const char *op, MSize len)
case 'U': case 'U':
flags |= INTRINSFLAG_IMMB; flags |= INTRINSFLAG_IMMB;
break; break;
case 'S':
flags |= INTRINSFLAG_MEMORYSIDE;
case 's':
flags |= INTRINSFLAG_HASSIDE;
break;
case 'C': case 'C':
flags |= INTRINSFLAG_CALLED; flags |= INTRINSFLAG_CALLED;
break; break;

View File

@ -61,6 +61,8 @@ typedef enum INTRINSFLAGS {
INTRINSFLAG_LARGEOP = 0x800, INTRINSFLAG_LARGEOP = 0x800,
/* Opcode is commutative allowing the input registers to be swapped to allow better fusing */ /* Opcode is commutative allowing the input registers to be swapped to allow better fusing */
INTRINSFLAG_ISCOMM = 0x1000, INTRINSFLAG_ISCOMM = 0x1000,
/* Instruction has non obvious side effects */
INTRINSFLAG_HASSIDE = 0x2000,
/* Opcode uses ymm registers */ /* Opcode uses ymm registers */
INTRINSFLAG_VEX256 = 0x4000, INTRINSFLAG_VEX256 = 0x4000,
@ -89,6 +91,8 @@ typedef struct AsmHeader {
#define intrin_setregmode(intrins, mode) \ #define intrin_setregmode(intrins, mode) \
(intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode) (intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode)
#define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM) #define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM)
/* Has side effects that may not be to memory */
#define intrin_sideeff(intrins) ((intrins)->flags & INTRINSFLAG_HASSIDE)
#define intrin_getopextb(intrins) ((intrins)->out[3]) #define intrin_getopextb(intrins) ((intrins)->out[3])
#define intrin_setopextb(intrins, opext) \ #define intrin_setopextb(intrins, opext) \

View File

@ -478,7 +478,7 @@ context("__mcode", function()
end) end)
it("prefix byte", function() it("prefix byte", function()
assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIP", 0xF0);]], "atomicadd") assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd")
local sum = 0 local sum = 0
local function checker(i, jsum) local function checker(i, jsum)
@ -498,7 +498,7 @@ context("__mcode", function()
if ffi.arch == "x64" then if ffi.arch == "x64" then
it("prefix64", function() it("prefix64", function()
assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIP", 0xF0);]], "atomicadd64") assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd64")
local sum = 0 local sum = 0
local function checker(i, jsum) local function checker(i, jsum)
@ -516,7 +516,7 @@ context("__mcode", function()
end end
it("prefix and imm byte", function() it("prefix and imm byte", function()
assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUP", 0xF0, 0x01);]], "atomicadd1") assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUPS", 0xF0, 0x01);]], "atomicadd1")
local function checker(i, jsum) local function checker(i, jsum)
if(jsum ~= i) then if(jsum ~= i) then
@ -565,11 +565,39 @@ context("__mcode", function()
assert_exit(10, test_idiv, 10, 5) assert_exit(10, test_idiv, 10, 5)
end) end)
it("side effects(mode)", function()
assert_cdef([[void add1_noside(int32_t* nptr) __mcode("830mIU", 0x01);]], "add1_noside")
assert_cdef([[void add1_side(int32_t* nptr) __mcode("830mIUs", 0x01);]], "add1_side")
local numptr = ffi.new("int32_t[2]", 0)
local function checker(i, n)
assert(n == i)
assert(numptr[0] >= numptr[1])
end
local function test_sideff(i)
ffi.C.add1_side(numptr)
ffi.C.add1_noside(numptr+1)
return numptr[0]
end
assert_jitchecker(checker, test_sideff)
assert_greater_than(numptr[0], numptr[1])
numptr[0] = 0
numptr[1] = 0
--test directly as JIT'ed
test_sideff()
assert_equal(numptr[0], 1)
assert_equal(numptr[1], 0)
end)
it("prefetch", function() it("prefetch", function()
assert_cdef([[void prefetch0(void* mem) __mcode("0F181mI")]], "prefetch0") assert_cdef([[void prefetch0(void* mem) __mcode("0F181mIs")]], "prefetch0")
assert_cdef([[void prefetch1(void* mem) __mcode("0F182mI")]], "prefetch1") assert_cdef([[void prefetch1(void* mem) __mcode("0F182mIs")]], "prefetch1")
assert_cdef([[void prefetch2(void* mem) __mcode("0F183mI")]], "prefetch2") assert_cdef([[void prefetch2(void* mem) __mcode("0F183mIs")]], "prefetch2")
assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mI")]], "prefetchnta") assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mIs")]], "prefetchnta")
local asm = ffi.C local asm = ffi.C
local kmem = ffi.new("int[4]") local kmem = ffi.new("int[4]")