CSE support for intrinsics only enabled for single value returning intrinsics that have no side effects and are not forced indirect ModRM which could be a load or store

This commit is contained in:
fsfod 2016-03-29 12:15:40 +01:00
parent befcdc6e55
commit 748091cfd9
2 changed files with 112 additions and 2 deletions

View File

@ -25,6 +25,7 @@
#if LJ_HASFFI #if LJ_HASFFI
#include "lj_ctype.h" #include "lj_ctype.h"
#include "lj_carith.h" #include "lj_carith.h"
#include "lj_intrinsic.h"
#endif #endif
#include "lj_vm.h" #include "lj_vm.h"
#include "lj_strscan.h" #include "lj_strscan.h"
@ -2331,6 +2332,25 @@ LJFOLDF(xload_kptr)
LJFOLD(XLOAD any any) LJFOLD(XLOAD any any)
LJFOLDX(lj_opt_fwd_xload) LJFOLDX(lj_opt_fwd_xload)
/* -- Intrinsics ----------------------------------------------------------- */
LJFOLD(INTRN any any)
LJFOLDF(cse_intrin)
{
CIntrinsic *intrin = lj_intrinsic_get(ctype_ctsG(J2G(J)), fins->op2);
/* NYI: CSE of multi return intrinsics */
if (!LJ_LIKELY(J->flags & JIT_F_OPT_CSE) || intrin->outsz != 1 ||
intrin->insz == 0 || (intrin->flags & (INTRINSFLAG_INDIRECT|
INTRINSFLAG_HASSIDE|INTRINSFLAG_MEMORYSIDE))) {
return EMITFOLD;
}
/* This also works for template intrinsics since they append an extra CARG with
** a pointer to there per instance code.
*/
return lj_opt_cse(J);
}
/* -- Write barriers ------------------------------------------------------ */ /* -- Write barriers ------------------------------------------------------ */
/* Write barriers are amenable to CSE, but not across any incremental /* Write barriers are amenable to CSE, but not across any incremental
@ -2371,7 +2391,6 @@ LJFOLDF(prof)
return ref; return ref;
return EMITFOLD; return EMITFOLD;
} }
/* -- Stores and allocations ---------------------------------------------- */ /* -- Stores and allocations ---------------------------------------------- */
/* Stores and allocations cannot be folded or passed on to CSE in general. /* Stores and allocations cannot be folded or passed on to CSE in general.
@ -2405,7 +2424,6 @@ LJFOLD(TDUP any)
LJFOLD(CNEW any any) LJFOLD(CNEW any any)
LJFOLD(XSNEW any any) LJFOLD(XSNEW any any)
LJFOLD(BUFHDR any any) LJFOLD(BUFHDR any any)
LJFOLD(INTRN any any)
LJFOLD(ASMRET any any) LJFOLD(ASMRET any any)
LJFOLDX(lj_ir_emit) LJFOLDX(lj_ir_emit)

View File

@ -1116,6 +1116,98 @@ context("mixed register type opcodes", function()
end) end)
end) end)
context("Intrinsic CSE", function()
it("cse 1 input, same arg", function()
assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3")
local function add3mul2(a)
return ffi.C.add3(a)+ffi.C.add3(a)
end
assert_jit(10, add3mul2, 2)
end)
it("cse 1 input, diff args", function()
assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3")
local function add3mul2(a, b)
return ffi.C.add3(a)+ffi.C.add3(b)
end
assert_jit(11, add3mul2, 2, 3)
assert_noexit(12, add3mul2, 3, 3)
end)
it("cse 1 input, same and diff args", function()
assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3")
local function add3mul2(a, b)
return ffi.C.add3(a)+ffi.C.add3(b)+ffi.C.add3(a)
end
assert_jit(16, add3mul2, 2, 3)
assert_noexit(10, add3mul2, -1, 3)
end)
it("no cse, indirect ModRM", function()
assert_cdef([[int32_t xadd(int32_t* nptr, int32_t n) __mcode("0FC1mRI");]], "xadd")
local num = ffi.new("int32_t[1]", 0)
-- Pretend to use the output values so we don't get DCE'ed since we didn't flag xadd to have side effects
local function inc2(a, b)
local a = ffi.C.xadd(num, 1)
a = a + ffi.C.xadd(num, 1)
return num[0], a
end
local function checker(i, n)
assert(i * 2 == n, n)
end
assert_jitchecker(checker, inc2)
end)
it("no cse, has side effects", function()
--Pretend we have an input register
assert_cdef([[void rdtsc2(int32_t edx) __mcode("0f31_Es") __reglist(out, uint32_t eax) __reglist(mod, edx);]], "rdtsc2")
local function getticks()
local t1 = ffi.C.rdtsc2(0)
local t2 = ffi.C.rdtsc2(0)
return t2-t1
end
local function checker(i, n)
assert(n > 0, tostring(n))
end
assert_jitchecker(checker, getticks)
end)
it("cse, fuse conflict", function()
-- assert_cdef([[int32_t add(int32_t i, int32_t n) __mcode("03rMc");]], "add")
assert_cdef([[double addsd(double n1, double n2) __mcode("F20F58rMvc");]], "addsd")
local num = ffi.new("double[2]", 0)
local function testfuse(i, a, b)
local n = ffi.C.addsd(num[0], num[1])
num[1] = i
n = n + ffi.C.addsd(num[0], num[1])
return n
end
local function checker(i, n)
assert(n > 0, tostring(n))
end
assert_jitchecker(checker, testfuse, 2.5)
end)
end)
end) end)