From 748091cfd93c64e9303dac5e3802e0b513f14c94 Mon Sep 17 00:00:00 2001 From: fsfod Date: Tue, 29 Mar 2016 12:15:40 +0100 Subject: [PATCH] CSE support for intrinsics only enabled for single value returning intrinsics that have no side effects and are not forced indirect ModRM which could be a load or store --- src/lj_opt_fold.c | 22 +++++++++- tests/intrinsic_spec.lua | 92 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 2 deletions(-) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index ffe32788..999b223d 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -25,6 +25,7 @@ #if LJ_HASFFI #include "lj_ctype.h" #include "lj_carith.h" +#include "lj_intrinsic.h" #endif #include "lj_vm.h" #include "lj_strscan.h" @@ -2331,6 +2332,25 @@ LJFOLDF(xload_kptr) LJFOLD(XLOAD any any) LJFOLDX(lj_opt_fwd_xload) +/* -- Intrinsics ----------------------------------------------------------- */ + +LJFOLD(INTRN any any) +LJFOLDF(cse_intrin) +{ + CIntrinsic *intrin = lj_intrinsic_get(ctype_ctsG(J2G(J)), fins->op2); + + /* NYI: CSE of multi return intrinsics */ + if (!LJ_LIKELY(J->flags & JIT_F_OPT_CSE) || intrin->outsz != 1 || + intrin->insz == 0 || (intrin->flags & (INTRINSFLAG_INDIRECT| + INTRINSFLAG_HASSIDE|INTRINSFLAG_MEMORYSIDE))) { + return EMITFOLD; + } + /* This also works for template intrinsics since they append an extra CARG with + ** a pointer to there per instance code. + */ + return lj_opt_cse(J); +} + /* -- Write barriers ------------------------------------------------------ */ /* Write barriers are amenable to CSE, but not across any incremental @@ -2371,7 +2391,6 @@ LJFOLDF(prof) return ref; return EMITFOLD; } - /* -- Stores and allocations ---------------------------------------------- */ /* Stores and allocations cannot be folded or passed on to CSE in general. @@ -2405,7 +2424,6 @@ LJFOLD(TDUP any) LJFOLD(CNEW any any) LJFOLD(XSNEW any any) LJFOLD(BUFHDR any any) -LJFOLD(INTRN any any) LJFOLD(ASMRET any any) LJFOLDX(lj_ir_emit) diff --git a/tests/intrinsic_spec.lua b/tests/intrinsic_spec.lua index bf621444..00416c73 100644 --- a/tests/intrinsic_spec.lua +++ b/tests/intrinsic_spec.lua @@ -1116,6 +1116,98 @@ context("mixed register type opcodes", function() end) end) +context("Intrinsic CSE", function() + + it("cse 1 input, same arg", function() + assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3") + + local function add3mul2(a) + return ffi.C.add3(a)+ffi.C.add3(a) + end + + assert_jit(10, add3mul2, 2) + end) + + it("cse 1 input, diff args", function() + assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3") + + local function add3mul2(a, b) + return ffi.C.add3(a)+ffi.C.add3(b) + end + + assert_jit(11, add3mul2, 2, 3) + assert_noexit(12, add3mul2, 3, 3) + end) + + + it("cse 1 input, same and diff args", function() + assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add_imm3") + + local function add3mul2(a, b) + return ffi.C.add3(a)+ffi.C.add3(b)+ffi.C.add3(a) + end + + assert_jit(16, add3mul2, 2, 3) + assert_noexit(10, add3mul2, -1, 3) + end) + + it("no cse, indirect ModRM", function() + assert_cdef([[int32_t xadd(int32_t* nptr, int32_t n) __mcode("0FC1mRI");]], "xadd") + + local num = ffi.new("int32_t[1]", 0) + -- Pretend to use the output values so we don't get DCE'ed since we didn't flag xadd to have side effects + local function inc2(a, b) + local a = ffi.C.xadd(num, 1) + a = a + ffi.C.xadd(num, 1) + return num[0], a + end + + local function checker(i, n) + assert(i * 2 == n, n) + end + + assert_jitchecker(checker, inc2) + end) + + it("no cse, has side effects", function() + --Pretend we have an input register + assert_cdef([[void rdtsc2(int32_t edx) __mcode("0f31_Es") __reglist(out, uint32_t eax) __reglist(mod, edx);]], "rdtsc2") + + local function getticks() + local t1 = ffi.C.rdtsc2(0) + local t2 = ffi.C.rdtsc2(0) + return t2-t1 + end + + local function checker(i, n) + assert(n > 0, tostring(n)) + end + + assert_jitchecker(checker, getticks) + end) + + it("cse, fuse conflict", function() + -- assert_cdef([[int32_t add(int32_t i, int32_t n) __mcode("03rMc");]], "add") + assert_cdef([[double addsd(double n1, double n2) __mcode("F20F58rMvc");]], "addsd") + + local num = ffi.new("double[2]", 0) + + local function testfuse(i, a, b) + local n = ffi.C.addsd(num[0], num[1]) + num[1] = i + n = n + ffi.C.addsd(num[0], num[1]) + return n + end + + local function checker(i, n) + assert(n > 0, tostring(n)) + end + + assert_jitchecker(checker, testfuse, 2.5) + end) + +end) + end)