diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index fe3cac68..b7ce4b06 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -866,12 +866,13 @@ static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo) checkmclim(as); } -void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo) +int asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo) { IRRef results[LJ_INTRINS_MAXREG]; RegSet evict = 0, outset = 0, aout = 0; int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0; int32_t dynout = intrin_dynrout(intrins) ? 1 : 0; + int used = 0; /* Gather the output register IR instructions */ if (intrins->outsz > 0) { @@ -883,6 +884,7 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf results[n] = (IRRef)(irret - as->ir); if (ra_used(irret)) { + used++; if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) { rset_set(aout, irret->r); } @@ -895,6 +897,10 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf } } + if (!used && !intrin_sideeff(intrins)) { + /* IR is dead code */ + return 0; + } evict = ininfo->modset; /* Check what registers need evicting for fixed input registers */ @@ -938,6 +944,8 @@ void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInf ra_destreg(as, irret, r); } } + + return 1; } static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) @@ -983,7 +991,10 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) } lua_assert(n == 0); - asm_intrin_results(as, ir, intrins, &ininfo); + /* If there is no users of our results skip emitting */ + if (!asm_intrin_results(as, ir, intrins, &ininfo)) { + goto exit; + } if (intrin_regmode(intrins)) { asm_intrin_opcode(as, ir, &ininfo); @@ -1004,6 +1015,7 @@ static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend) } asm_asmsetupargs(as, &ininfo); +exit: if (ininfo.asmend) { /* Skip over our IR_INTRN since were emitting from the tail */ as->curins = (IRRef)(ir - as->ir); diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 75fafa13..c60cf471 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1398,8 +1398,9 @@ void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func) } } - /* Intrinsics are assumed to always have side effects */ - J->needsnap = 1; + if (intrin_sideeff(intrins)) { + J->needsnap = 1; + } rd->nres = intrins->outsz; } #else diff --git a/src/lj_intrinsic.c b/src/lj_intrinsic.c index 7a39abb7..c140a708 100644 --- a/src/lj_intrinsic.c +++ b/src/lj_intrinsic.c @@ -310,6 +310,11 @@ static int parse_opmode(const char *op, MSize len) case 'U': flags |= INTRINSFLAG_IMMB; break; + case 'S': + flags |= INTRINSFLAG_MEMORYSIDE; + case 's': + flags |= INTRINSFLAG_HASSIDE; + break; case 'C': flags |= INTRINSFLAG_CALLED; break; diff --git a/src/lj_intrinsic.h b/src/lj_intrinsic.h index bcf4618e..438174c2 100644 --- a/src/lj_intrinsic.h +++ b/src/lj_intrinsic.h @@ -61,6 +61,8 @@ typedef enum INTRINSFLAGS { INTRINSFLAG_LARGEOP = 0x800, /* Opcode is commutative allowing the input registers to be swapped to allow better fusing */ INTRINSFLAG_ISCOMM = 0x1000, + /* Instruction has non obvious side effects */ + INTRINSFLAG_HASSIDE = 0x2000, /* Opcode uses ymm registers */ INTRINSFLAG_VEX256 = 0x4000, @@ -89,6 +91,8 @@ typedef struct AsmHeader { #define intrin_setregmode(intrins, mode) \ (intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode) #define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM) +/* Has side effects that may not be to memory */ +#define intrin_sideeff(intrins) ((intrins)->flags & INTRINSFLAG_HASSIDE) #define intrin_getopextb(intrins) ((intrins)->out[3]) #define intrin_setopextb(intrins, opext) \ diff --git a/tests/intrinsic_spec.lua b/tests/intrinsic_spec.lua index 58db64a1..80f39bf5 100644 --- a/tests/intrinsic_spec.lua +++ b/tests/intrinsic_spec.lua @@ -478,7 +478,7 @@ context("__mcode", function() end) it("prefix byte", function() - assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIP", 0xF0);]], "atomicadd") + assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd") local sum = 0 local function checker(i, jsum) @@ -498,7 +498,7 @@ context("__mcode", function() if ffi.arch == "x64" then it("prefix64", function() - assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIP", 0xF0);]], "atomicadd64") + assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd64") local sum = 0 local function checker(i, jsum) @@ -516,7 +516,7 @@ context("__mcode", function() end it("prefix and imm byte", function() - assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUP", 0xF0, 0x01);]], "atomicadd1") + assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUPS", 0xF0, 0x01);]], "atomicadd1") local function checker(i, jsum) if(jsum ~= i) then @@ -565,11 +565,39 @@ context("__mcode", function() assert_exit(10, test_idiv, 10, 5) end) + it("side effects(mode)", function() + assert_cdef([[void add1_noside(int32_t* nptr) __mcode("830mIU", 0x01);]], "add1_noside") + assert_cdef([[void add1_side(int32_t* nptr) __mcode("830mIUs", 0x01);]], "add1_side") + + local numptr = ffi.new("int32_t[2]", 0) + + local function checker(i, n) + assert(n == i) + assert(numptr[0] >= numptr[1]) + end + + local function test_sideff(i) + ffi.C.add1_side(numptr) + ffi.C.add1_noside(numptr+1) + return numptr[0] + end + + assert_jitchecker(checker, test_sideff) + assert_greater_than(numptr[0], numptr[1]) + + numptr[0] = 0 + numptr[1] = 0 + --test directly as JIT'ed + test_sideff() + assert_equal(numptr[0], 1) + assert_equal(numptr[1], 0) + end) + it("prefetch", function() - assert_cdef([[void prefetch0(void* mem) __mcode("0F181mI")]], "prefetch0") - assert_cdef([[void prefetch1(void* mem) __mcode("0F182mI")]], "prefetch1") - assert_cdef([[void prefetch2(void* mem) __mcode("0F183mI")]], "prefetch2") - assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mI")]], "prefetchnta") + assert_cdef([[void prefetch0(void* mem) __mcode("0F181mIs")]], "prefetch0") + assert_cdef([[void prefetch1(void* mem) __mcode("0F182mIs")]], "prefetch1") + assert_cdef([[void prefetch2(void* mem) __mcode("0F183mIs")]], "prefetch2") + assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mIs")]], "prefetchnta") local asm = ffi.C local kmem = ffi.new("int[4]")