From a374e90531dffca47872433d4e7c43f48a5694da Mon Sep 17 00:00:00 2001 From: fsfod Date: Tue, 29 Mar 2016 11:20:01 +0100 Subject: [PATCH] Added a flag(s) for opcodes with non memory store side effects and enabled DCE of intrinsics Intrinsics are now assumed to have no side effects unless flagged to with either memory side effects(S) or non memory side effects(s) --- src/lj_asm_x86.h | 16 +++++++++++++-- src/lj_crecord.c | 5 +++-- src/lj_intrinsic.c | 5 +++++ src/lj_intrinsic.h | 4 ++++ tests/intrinsic_spec.lua | 42 +++++++++++++++++++++++++++++++++------- 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index fe3cac68..b7ce4b06 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -866,12 +866,13 @@ static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo) checkmclim(as); } -void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo) +int asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo) { IRRef results[LJ_INTRINS_MAXREG]; RegSet evict = 0, outset = 0, aout = 0; int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0; int32_t dynout = intrin_dynrout(intrins) ? 1 : 0; + int used = 0; /* Gather the output register IR instructions */ if (intrins->outsz > 0) { @@ -883,6 +884,7 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf results[n] = (IRRef)(irret - as->ir); if (ra_used(irret)) { + used++; if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) { rset_set(aout, irret->r); } @@ -895,6 +897,10 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf } } + if (!used && !intrin_sideeff(intrins)) { + /* IR is dead code */ + return 0; + } evict = ininfo->modset; /* Check what registers need evicting for fixed input registers */ @@ -938,6 +944,8 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf ra_destreg(as, irret, r); } } + + return 1; } static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) @@ -983,7 +991,10 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) } lua_assert(n == 0); - asm_intrin_results(as, ir, intrins, &ininfo); + /* If there is no users of our results skip emitting */ + if (!asm_intrin_results(as, ir, intrins, &ininfo)) { + goto exit; + } if (intrin_regmode(intrins)) { asm_intrin_opcode(as, ir, &ininfo); @@ -1004,6 +1015,7 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) } asm_asmsetupargs(as, &ininfo); +exit: if (ininfo.asmend) { /* Skip over our IR_INTRN since were emitting from the tail */ as->curins = (IRRef)(ir - as->ir); diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 75fafa13..c60cf471 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1398,8 +1398,9 @@ void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func) } } - /* Intrinsics are assumed to always have side effects */ - J->needsnap = 1; + if (intrin_sideeff(intrins)) { + J->needsnap = 1; + } rd->nres = intrins->outsz; } #else diff --git a/src/lj_intrinsic.c b/src/lj_intrinsic.c index 7a39abb7..c140a708 100644 --- a/src/lj_intrinsic.c +++ b/src/lj_intrinsic.c @@ -310,6 +310,11 @@ static int parse_opmode(const char *op, MSize len) case 'U': flags |= INTRINSFLAG_IMMB; break; + case 'S': + flags |= INTRINSFLAG_MEMORYSIDE; + case 's': + flags |= INTRINSFLAG_HASSIDE; + break; case 'C': flags |= INTRINSFLAG_CALLED; break; diff --git a/src/lj_intrinsic.h b/src/lj_intrinsic.h index bcf4618e..438174c2 100644 --- a/src/lj_intrinsic.h +++ b/src/lj_intrinsic.h @@ -61,6 +61,8 @@ typedef enum INTRINSFLAGS { INTRINSFLAG_LARGEOP = 0x800, /* Opcode is commutative allowing the input registers to be swapped to allow better fusing */ INTRINSFLAG_ISCOMM = 0x1000, + /* Instruction has non obvious side effects */ + INTRINSFLAG_HASSIDE = 0x2000, /* Opcode uses ymm registers */ INTRINSFLAG_VEX256 = 0x4000, @@ -89,6 +91,8 @@ typedef struct AsmHeader { #define intrin_setregmode(intrins, mode) \ (intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode) #define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM) +/* Has side effects that may not be to memory */ +#define intrin_sideeff(intrins) ((intrins)->flags & INTRINSFLAG_HASSIDE) #define intrin_getopextb(intrins) ((intrins)->out[3]) #define intrin_setopextb(intrins, opext) \ diff --git a/tests/intrinsic_spec.lua b/tests/intrinsic_spec.lua index 58db64a1..80f39bf5 100644 --- a/tests/intrinsic_spec.lua +++ b/tests/intrinsic_spec.lua @@ -478,7 +478,7 @@ context("__mcode", function() end) it("prefix byte", function() - assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIP", 0xF0);]], "atomicadd") + assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd") local sum = 0 local function checker(i, jsum) @@ -498,7 +498,7 @@ context("__mcode", function() if ffi.arch == "x64" then it("prefix64", function() - assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIP", 0xF0);]], "atomicadd64") + assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd64") local sum = 0 local function checker(i, jsum) @@ -516,7 +516,7 @@ context("__mcode", function() end it("prefix and imm byte", function() - assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUP", 0xF0, 0x01);]], "atomicadd1") + assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUPS", 0xF0, 0x01);]], "atomicadd1") local function checker(i, jsum) if(jsum ~= i) then @@ -565,11 +565,39 @@ context("__mcode", function() assert_exit(10, test_idiv, 10, 5) end) + it("side effects(mode)", function() + assert_cdef([[void add1_noside(int32_t* nptr) __mcode("830mIU", 0x01);]], "add1_noside") + assert_cdef([[void add1_side(int32_t* nptr) __mcode("830mIUs", 0x01);]], "add1_side") + + local numptr = ffi.new("int32_t[2]", 0) + + local function checker(i, n) + assert(n == i) + assert(numptr[0] >= numptr[1]) + end + + local function test_sideff(i) + ffi.C.add1_side(numptr) + ffi.C.add1_noside(numptr+1) + return numptr[0] + end + + assert_jitchecker(checker, test_sideff) + assert_greater_than(numptr[0], numptr[1]) + + numptr[0] = 0 + numptr[1] = 0 + --test directly as JIT'ed + test_sideff() + assert_equal(numptr[0], 1) + assert_equal(numptr[1], 0) + end) + it("prefetch", function() - assert_cdef([[void prefetch0(void* mem) __mcode("0F181mI")]], "prefetch0") - assert_cdef([[void prefetch1(void* mem) __mcode("0F182mI")]], "prefetch1") - assert_cdef([[void prefetch2(void* mem) __mcode("0F183mI")]], "prefetch2") - assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mI")]], "prefetchnta") + assert_cdef([[void prefetch0(void* mem) __mcode("0F181mIs")]], "prefetch0") + assert_cdef([[void prefetch1(void* mem) __mcode("0F182mIs")]], "prefetch1") + assert_cdef([[void prefetch2(void* mem) __mcode("0F183mIs")]], "prefetch2") + assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mIs")]], "prefetchnta") local asm = ffi.C local kmem = ffi.new("int[4]")