local ffi = require("ffi") local jit = require("jit") ffi.cdef[[ typedef float float4 __attribute__((__vector_size__(16))); typedef float float8 __attribute__((__vector_size__(32))); typedef int int4 __attribute__((__vector_size__(16))); typedef uint8_t byte16 __attribute__((__vector_size__(16))); typedef int64_t long2 __attribute__((__vector_size__(16))); ]] local float4 = ffi.new("float[4]") local float4_2 = ffi.new("float[4]", {2, 2, 2, 2}) local float8 = ffi.new("float[8]", 0) local byte16 = ffi.new("uint8_t[16]", 1, 0xff, 0) local int4 = ffi.new("int32_t[5]", 0) local float4ptr = float4+0 local union64 = ffi.new([[ union __attribute__((packed, aligned(4))){ int64_t i64; struct{ int32_t low; int32_t high; }; }]]) describe("intrinsic tests", function() context("nop inout", function() it("fpr", function() assert_cdef([[void fpr_nop1(double xmm0) __mcode("90_E") __reglist(out, double xmm0)]], "fpr_nop1") local fpr1 = ffi.C.fpr_nop1 assert_error(function() fpr1() end) assert_error(function() fpr1(nil) end) assert_error(function() fpr1(1, 2) end) assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075) assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075) assert_cdef([[void fpr_all(double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7) __mcode("?E") __reglist(out,double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7)]]) local fpr_all = ffi.intrinsic("fpr_all", "\x90", 1) local function testfpr_all(i, r1, r2, r3, r4, r5, r6, r7, r8) local spilled = r1*2*i local ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8 = fpr_all(r1, r2, r3, r4, r5, r6, r7, r8) return ro1+i, ro2+i, ro3+i, ro4+i, ro5+i, ro6+i, ro7+i, (ro8*ro3)+i, ro2+spilled end local function checker(i, ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8, spilled) assert(ro1 == 1.5+i) assert(ro2 == 2.5+i) assert(ro3 == 3.5+i) assert(ro4 == 4.5+i) assert(ro5 == 5.5+i) assert(ro6 == 60000.525+i) assert(ro7 == i+-7.5) assert(ro8 == (-100*3.5)+i) assert(spilled == 2.5+(1.5*2*i)) end assert_jitchecker(checker, testfpr_all, 1.5, 2.5, 3.5, 4.5, 5.5, 60000.525, -7.5, -100) end) it("gpr", function() assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax)]], "gpr_nop1") local function testgpr1(num) return (ffi.C.gpr_nop1(num)) end assert_jit(1235678, testgpr1, 1235678) assert_noexit(-1, testgpr1, -1) assert_cdef([[void gpr_scatch(int32_t eax, int32_t ecx, int32_t edx) __mcode("90_E") __reglist(out, int32_t eax, int32_t ecx, int32_t edx)]], "gpr_scatch") local function testgpr_scratch(i, r1, r2, r3) local ro1, ro2, ro3 = ffi.C.gpr_scatch(r1, r2, r3) return ro1+i, ro2+i, ro3+i end local function checker(i, ro1, ro2, ro3) assert(ro1 == 0+i) assert(ro2 == 1+i) assert(ro3 == 30000+i) end assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000) assert_cdef([[void gpr_all(int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx) __mcode("?E") __reglist(out, int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx)]]) local gpr_all = ffi.intrinsic("gpr_all", "\x90", 1) local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7) local spilled = r1+(10000*i) local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr_all(r1, r2, r3, r4, 100, r6, r7) return spilled+(ro1+ro2+ro3+ro4+ro5+ro6+ro7), ro3+i, ro2+i, ro1+i, ro4+i, ro5, ro6+i, ro7+i end local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7) assert(ro1 == 1+i) assert(ro2 == 2+i) assert(ro3 == 3+i) assert(ro4 == 4+i) assert(ro5 == 100) assert(ro6 == 6+i) assert(ro7 == 7+i) assert(spilled == 124+(10000*i)) end assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5, 6, 7) end) if ffi.arch == "x64" then it("gpr_gc64disp", function() assert_cdef([[void dispin(int64_t rax, int64_t rbx) __mcode("90_E") __reglist(out, int64_t rax)]], "dispin") local function test_dispin(i, r1) return (ffi.C.dispin(i, i + r1)) end --Check that the dispatch register is restored if it was an input register assert_jit(-3, test_dispin, -3, 999) --xor rb,rbx assert_cdef([[void dispmod() __mcode("?E") __reglist(mod, rbx)]]) local dispmod = ffi.intrinsic("dispmod", "\x48\x31\xDB", 3) local function test_dispmod(r1, r2, r3, r4, r5, r6, r7) dispmod() return r1 + r2 + r3 + r4 + r5 + r6 + r7 end -- Check that the dispatched register is restored if its in the modifed register list assert_jit(28, test_dispmod, 1, 2, 3, 4, 5, 6, 7) end) it("gpr64", function() assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90_E") __reglist(out, int64_t rdx)]], "gpr64_1") local function testgpr1(num) return (ffi.C.gpr64_1(num)) end assert_jit(1235678ull, testgpr1, 1235678) assert_noexit(-1LL, testgpr1, -1) assert_cdef([[void gpr64(int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx) __mcode("?E") __reglist(out, int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx)]]) local gpr7 = ffi.intrinsic("gpr64", "\x90", 1) local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7) local spilled = r1+(10000*i) local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr7(r1, r2, r3, 68719476735ll, r5, r6, r7) return spilled, ro3+i, ro2+i, ro1+i, ro4+i, ro5+i, ro6+i, ro7+i end local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7) local sp = (10000*i)+1 assert(ro1 == 1+i) assert(ro2 == 2+i) assert(ro3 == 3+i) assert(type(ro4) == "cdata" and ro4 == 68719476735ll+i) assert(type(ro5) == "cdata" and ro5 == 5ll+i) assert(type(ro6) == "cdata" and ro6 == 68719476721ll+i) assert(type(ro7) == "cdata" and ro7 == (-7ll)+i) assert(spilled == sp) end assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5ll, 68719476721ll, -7ll) end) it("rex fpr", function() assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90_E") __reglist(out, double xmm0, double xmm9)]], "fpr_reg") local fpr = ffi.C.fpr_reg local function testrex(n1, n2) local o1, o2 = fpr(n1, n2) return o1+o2 end assert_jit(444.575, testrex, 123.075, 321.5) end) it("fpr_vexrex(ymm)", function() local array = ffi.new("float8", 0, 1, 2, 3, 4, 5, 6, 7) --force a Vex.B base register assert_cdef([[void fpr_vexrex(float8 ymm14, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebp) __mcode("?E") __reglist(out, float8 ymm14, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebp) __reglist(mod, ymm1, ymm7)]]) local ymmtest = ffi.intrinsic("fpr_vexrex", "\x90", 1) local ymmout = ymmtest(array, 1, 2, 3, 4, 5, 6, 7) for i=0,7 do assert_equal(ymmout[i], i) end end) end it("fpr_vec", function() assert_cdef([[void fpr_vec(float4 xmm7v) __mcode("90_E") __reglist(out, float4 xmm7v)]], "fpr_vec") local v1 = ffi.new("float[4]", 1, 2, 3, 4) local xmmout = ffi.C.fpr_vec(v1) assert_v4eq(xmmout, 1, 2, 3, 4) end) it("check extra register spill", function() local array = ffi.new("float4", 1, 2, 3, 4) -- Use up all gpr scatch registers before loading the vec causing wrapper builder to restart with an extra spill assert_cdef([[void spillrestart(float4 xmm0v, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx) __mcode("?E") __reglist(out, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx, float4 xmm0v) ]]) local xmmtest = ffi.intrinsic("spillrestart", "\x90", 1) local eax, ecx, edx, esi, edi, ebx, xmmout = xmmtest(array, 1, 2, 3, 4, 5, 6) assert_equal(eax, 1) assert_equal(ecx, 2) assert_equal(edx, 3) assert_equal(esi, 4) assert_equal(edi, 5) assert_equal(ebx, 6) for i=1,4 do assert_equal(xmmout[i-1], i) end end) it("fpr_vec(ymm)", function() assert_cdef([[void fpr_ymmvec(float8 ymm7) __mcode("90_E") __reglist(out, float8 ymm7)]], "fpr_ymmvec") --test using plain array in place of a vector local v1 = ffi.new("float[8]", 0, 1, 2, 3, 4, 5, 6, 7) local ymmout = ffi.C.fpr_ymmvec(v1) for i=0,7 do assert_equal(ymmout[i], i) end assert_cdef([[void fpr_ymmvec2(float8 ymm0, void* ymm7) __mcode("90_E") __reglist(out, float8 ymm7, float8 ymm0)]], "fpr_ymmvec2") local v2 = ffi.new("float[8]", 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5) local ymmtest2 = ffi.C.fpr_ymmvec2 local ymm7, ymm0 = ymmtest2(v1, v2) for i=0,7 do assert_equal(ymm0[i], i) end for i=0,7 do assert_equal(ymm7[i], i+0.5) end --test using a cdata vector v2 = ffi.new("float8", 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5) ymm7, ymm0 = ymmtest2(v1, v2) for i=0,7 do assert_equal(ymm0[i], i) end for i=0,7 do assert_equal(ymm7[i], i+0.5) end end) it("idiv", function() assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv") local function checker(i, result, remainder) local rem = i%3 if rem ~= remainder then return rem, remainder end local expected = (i-rem)/3 if expected ~= result then return expected, result end end local function test_idiv(value, divisor) local result, remainder = ffi.C.idiv(value, divisor) return result, remainder end assert_jitchecker(checker, test_idiv, 3) --test with jited with a constant arg local function test_idivK(value) local result, remainder = ffi.C.idiv(value, 3) return result, remainder end assert_jitchecker(checker, test_idivK, 3) end) end) context("__mcode", function() it("incomplete mcode def", function() assert_cdeferr([[int test1() __mcode]]) assert_cdeferr([[int test2() __mcode(]]) assert_cdeferr([[int test3() __mcode()]]) assert_cdeferr([[int test3() __mcode(,)]]) assert_cdeferr([[int test4() __mcode("ff"]]) assert_cdeferr([[int test5() __mcode("ff",,)]]) assert_cdeferr([[int test6() __mcode("ff" 1)]]) assert_cdeferr([[int test7() __mcode("ff", )]]) assert_cdeferr([[int test8() __mcode("ff", 1]]) assert_cdeferr([[int test9() __mcode("ff", 1, 1]]) assert_cdeferr([[int test10() __mcode("ff", 1, 1, ]]) assert_cdeferr([[__mcode("90")]]) assert_cdeferr([[int __mcode("90")]]) end) it("bad mcoddef", function() assert_cdeferr([[void test1(float a) __mcode(0);]]) assert_cdeferr([[void test2(float a) __mcode("");]]) assert_cdeferr([[void test3(float a) __mcode("0");]]) assert_cdeferr([[void test4(float a) __mcode("rff");]]) assert_cdeferr([[struct c{float a __mcode("90");};]]) --Max 2 literals after the opcode string assert_cdeferr([[int test11() __mcode("ff", 1, 1, 2)]]) assert_cdeferr([[struct b{float a; __mcode("90");};]]) end) it("invalid registers", function() assert_cdef([[void validreg_gpr(int eax) __mcode("90_E");]], "validreg_gpr") assert_cdeferr([[void badreg_1(int e) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_1(int r20d) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_gpr1() __mcode("90_E") __reglist(out, int e);]], "invalid") assert_cdeferr([[void badreg_gpr2() __mcode("90_E") __reglist(mod, e);]], "invalid") assert_cdef([[void validreg_fpr(float xmm0) __mcode("90_E");]], "validreg_fpr") assert_cdeferr([[void badreg_fpr1(float x) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90_E");]], "invalid") assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90_E");]], "invalid") --xmm register number too large assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid") end) it("invalid commutative mode registers", function() assert_cdef([[int4 valid_comm(int4 v1, int4 v2) __mcode("90rMc");]], "valid_comm") --must have 1+ input argument assert_cdeferr([[int4 invalid_comm1(int4 v1) __mcode("90rMc");]]) -- input register types must match assert_cdeferr([[void invalid_comm2(int32_t i, int4 v1) __mcode("90rMc");]]) assert_cdeferr([[void invalid_comm3(int4 v1, int32_t i) __mcode("90rMc");]]) end) it("multidef rollback", function() --check ctype rollback after parsing a valid intrinsic the line before assert_cdeferr([[ void multi1() __mcode("90"); void multi2() __mcode("0"); ]]) assert_error(function() ffi.C.multi1() end) assert_error(function() ffi.C.multi2() end) assert_not_error(function() ffi.cdef[[ void multi1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax); ]] end) assert_equal(ffi.C.multi1(1.1), 1) end) it("bad dynamic registers", function() --No modrm specifed for the implicit output register decleared having a non void return type assert_cdeferr([[int32_t dynerr1() __mcode("90");]]) assert_cdeferr([[void dynerr2(int32_t a) __mcode("90");]]) assert_cdeferr([[int32_t dynerr3(int32_t a) __mcode("90");]]) -- no dynamic registers listed assert_cdeferr([[void dynerr4() __mcode("90m");]]) assert_cdeferr([[void dynerr5() __mcode("90rM");]]) assert_cdeferr([[void dynerr6() __mcode("90Mr");]]) --need 2 in or 1 in and a return type assert_cdeferr([[void dynerr7(int32_t a) __mcode("90rM");]]) --too many dynamic registers assert_cdeferr([[void dynerr8(int a, int b, int c) __mcode("90rR");]]) end) it("bad ffi types mcode", function() assert_cdeferr([[void testffi1(float a2, ...) __mcode("90");]]) assert_cdeferr([[void testffi2(complex a2) __mcode("90");]]) --NYI non 16/32 byte vectors assert_cdeferr([[ typedef float float2 __attribute__((__vector_size__(8))); void testffi2(float2 a2) __mcode("90") ]]) end) it("bad args", function() assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv2") local idiv = ffi.C.idiv2 assert_equal(idiv(6, 2), 3) --too few arguments assert_error(function() idiv() end) assert_error(function() idiv(nil) end) assert_error(function() idiv(1) end) assert_error(function() idiv(1, nil) end) --too many arguments assert_error(function() idiv(1, 2, nil) end) assert_error(function() idiv(1, 2, 3) end) assert_error(function() idiv(1, 2, 3, 4) end) end) it("output pointers", function() assert_cdef([[const char* addptr(const char* nptr, int32_t n) __mcode("03rM");]], "addptr") local s = "0123456789abcdefghijklmnopqrstvwxyz" local ptr = ffi.C.addptr(s, 0) assert_equal(ptr, ffi.cast("const char*", s)) assert_equal(ptr[0], string.byte(s)) local function checker(i, sptr) assert(tostring(sptr), tostring(ptr+i)) assert(sptr == ptr+i) end assert_jitchecker(checker, function(i) return (ffi.C.addptr(s, i)) end) end) it("signed/unsigned numbers", function() assert_cdef([[int32_t sub_signed(int32_t n, int32_t i) __mcode("2brM");]], "sub_signed") assert_cdef([[uint32_t sub_unsigned(uint32_t n, uint32_t i) __mcode("2brM");]], "sub_unsigned") assert_cdef([[uint32_t sub_signedun(int32_t n, int32_t i) __mcode("2brM");]], "sub_signedun") assert_equal(tonumber(ffi.C.sub_unsigned(3, 1)), 2) local function unsignedtest(n1, n2) return (tonumber(ffi.C.sub_unsigned(n1, n2))) end assert_jit(2, unsignedtest, 3, 1) assert_jit(2999999999, unsignedtest, 3000000000, 1) --wrap around assert_jit(4294967295, unsignedtest, 300, 301) local function unsignedtest_boxed(n1, n2) return (ffi.C.sub_unsigned(n1, n2)) end assert_jit(ffi.new("uint32_t", 2), unsignedtest_boxed, 3, 1) assert_jit(ffi.new("uint32_t", 2999999999), unsignedtest_boxed, 3000000000, 1) --wrap around assert_jit(ffi.new("uint32_t", 4294967295), unsignedtest_boxed, 300, 301) local function signedtest(n1, n2) return (ffi.C.sub_signed(n1, n2)) end assert_jit(-2, signedtest, -1, 1) assert_noexit(3, signedtest, -1, -4) end) it("op encode", function() assert_cdef([[int32_t not32(int32_t n) __mcode("F72m");]], "not32") local function test_not(i) return (ffi.C.not32(i)) end assert_jit(-1, test_not, 0) assert_noexit(0, test_not, -1) assert_cdef([[int32_t add_imm3(int32_t n) __mcode("830mU", 3);]], "add_imm3") local function checker(i, n) return i+3, n end assert_jitchecker(checker, function(i) return (ffi.C.add_imm3(i)) end) end) it("prefix byte", function() assert_cdef([[void atomicadd(int32_t* nptr, int32_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd") local sum = 0 local function checker(i, jsum) sum = sum+i if(jsum ~= sum) then return jsum, sum end end local numptr = ffi.new("int32_t[1]", 0) assert_jitchecker(checker, function(i) ffi.C.atomicadd(numptr, i) return numptr[0] end) end) if ffi.arch == "x64" then it("prefix64", function() assert_cdef([[void atomicadd64(int64_t* nptr, int64_t n) __mcode("01mRIPS", 0xF0);]], "atomicadd64") local sum = 0 local function checker(i, jsum) sum = sum+i assert(jsum == sum) end local numptr = ffi.new("int64_t[1]", 0) assert_jitchecker(checker, function(i) ffi.C.atomicadd64(numptr, i) return numptr[0] end) end) end it("prefix and imm byte", function() assert_cdef([[void atomicadd1(int32_t* nptr) __mcode("830mIUPS", 0xF0, 0x01);]], "atomicadd1") local function checker(i, jsum) if(jsum ~= i) then return i, jsum end end local numptr = ffi.new("int32_t[1]", 0) assert_jitchecker(checker, function(i) ffi.C.atomicadd1(numptr) return numptr[0] end) end) it("idiv(template)", function() assert_cdef([[void idivT(int32_t eax, int32_t ecx) __mcode("?E") __reglist(out, int32_t eax, int32_t edx)]]) --trying to create template intrinsic through C library should always fail assert_error(function() return ffi.C.idivT end) local idiv = ffi.intrinsic("idivT", "\x99\xF7\xF9", 3) local function checker(i, result, remainder) local rem = i%2 if rem ~= remainder then return rem, remainder end local expected = (i-rem)/2 if expected ~= result then return expected, result end end local function test_idiv(value, divisor) local result, remainder = idiv(value, divisor) return result, remainder end assert_jitchecker(checker, test_idiv, 2) -- create a second instance and check guard for wrapper pointer fails idiv = ffi.intrinsic("idivT", "\x90", 1) assert_exit(10, test_idiv, 10, 5) end) it("side effects(mode)", function() assert_cdef([[void add1_noside(int32_t* nptr) __mcode("830mIU", 0x01);]], "add1_noside") assert_cdef([[void add1_side(int32_t* nptr) __mcode("830mIUs", 0x01);]], "add1_side") local numptr = ffi.new("int32_t[2]", 0) local function checker(i, n) assert(n == i) assert(numptr[0] >= numptr[1]) end local function test_sideff(i) ffi.C.add1_side(numptr) ffi.C.add1_noside(numptr+1) return numptr[0] end assert_jitchecker(checker, test_sideff) assert_greater_than(numptr[0], numptr[1]) numptr[0] = 0 numptr[1] = 0 --test directly as JIT'ed test_sideff() assert_equal(numptr[0], 1) assert_equal(numptr[1], 0) end) it("prefetch", function() assert_cdef([[void prefetch0(void* mem) __mcode("0F181mIs")]], "prefetch0") assert_cdef([[void prefetch1(void* mem) __mcode("0F182mIs")]], "prefetch1") assert_cdef([[void prefetch2(void* mem) __mcode("0F183mIs")]], "prefetch2") assert_cdef([[void prefetchnta(void* mem) __mcode("0F180mIs")]], "prefetchnta") local asm = ffi.C local kmem = ffi.new("int[4]") local mem = 1 mem = mem and ffi.new("int[8]", 1, 2, 3, 4, 5, 6, 7, 8) local function testprefetch(a, b, c) local n = a+b local ptr = mem+c asm.prefetch2(ptr) asm.prefetch1(kmem) asm.prefetch0(mem+a) asm.prefetchnta(mem) asm.prefetch0(kmem+a) asm.prefetch1(kmem+b) return (ptr) ~= 0 and ptr[0] + ptr[3] end assert_jit(11, testprefetch, 1, 2, 3) end) it("cmpxchg", function() assert_cdef([[void cmpxchg(int32_t* gpr32, int32_t gpr32, int32_t eax) __mcode("0FB1mRPEI", 0xF0) __reglist(out, int32_t eax);]], "cmpxchg") local kptr32 = ffi.new("int32_t[1]", 0) int4[0] = 0 local function checker(i, n, eax) assert(n == i) assert(kptr32[0] == i) assert(eax == i-1) end local function test_cmpxchg(i) local eax = ffi.C.cmpxchg(kptr32, i, i-1) return kptr32[0], eax end assert_jitchecker(checker, test_cmpxchg) --test not equal non swapping local num, eax = test_cmpxchg(0) assert_equal(eax, kptr32[0]) num, eax = test_cmpxchg(kptr32[0]+1) assert_equal(eax, kptr32[0]-1) end) if ffi.arch == "x64" then it("cmpxchg64", function() assert_cdef([[void cmpxchg64(int64_t* gpr64, int64_t gpr64, int64_t rax) __mcode("0FB1mRPEIX", 0xF0) __reglist(out, int64_t rax);]], "cmpxchg64") local kptr64 = ffi.new("int64_t[1]", 0) local function test_cmpxchg64(i) local rax = ffi.C.cmpxchg64(kptr64, -i, -(i-1)) return kptr64[0], rax end local function checker(i, newval, rax) assert(newval == -i) assert(kptr64[0] == -i) assert(rax == -(i-1)) end assert_jitchecker(checker, test_cmpxchg64, 2) --test not equal non swapping local num, rax = test_cmpxchg64(0, 1) assert_equal(rax, kptr64[0]) end) end it("cmpxchg8b", function() ffi.cdef([[typedef struct int32pair { int32_t i1; int32_t i2; } __attribute__((aligned(8))) int32pair;]]) assert_cdef([[void cmpxchg8b(void* gpr32, int32_t eax, int32_t edx, int32_t ebx, int32_t ecx) __mcode("0FC71mPEI", 0xf0) __reglist(out, int32_t eax, int32_t edx);]], "cmpxchg8b") local int32pair = ffi.new("int32pair") int32pair.i1 = 1 int32pair.i2 = -1 local function test_cmpxchg8b(i) local eax,edx = ffi.C.cmpxchg8b(int32pair, i, -i, i+1, -(i+1)) return int32pair.i1, int32pair.i2, eax, edx end local function checker(i, n1, n2, eax, edx) assert(n1 == i+1) assert(n2 == -(i+1)) assert(int32pair.i1 == i+1) assert(int32pair.i2 == -(i+1)) assert(eax == i) assert(edx == -i) end assert_jitchecker(checker, test_cmpxchg8b) end) it("cpuid_brand", function() assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2_E") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid") local cpuid = ffi.C.cpuid local function getcpuidstr(eax) int4[0] = 0; int4[1] = 0; int4[2] = 0; int4[3] = 0 int4[0], int4[1], int4[2], int4[3] = cpuid(eax, 0) return (ffi.string(ffi.cast("char*", int4+0))) end local brand = getcpuidstr(-2147483646)..getcpuidstr(-2147483645)..getcpuidstr(-2147483644) print("Processor brand: "..brand) local function testcpuid_brand() local s = "" int4[0] = 0 int4[1] = 0 int4[2] = 0 int4[3] = 0 int4[0], int4[1], int4[2], int4[3] = cpuid(-2147483646, 0) s = s..ffi.string(ffi.cast("char*", int4+0)) int4[0], int4[1], int4[2], int4[3] = cpuid(-2147483645, 0) s = s..ffi.string(ffi.cast("char*", int4+0)) int4[0], int4[1], int4[2], int4[3] = cpuid(-2147483644, 0) s = s..ffi.string(ffi.cast("char*", int4+0)) return s end assert_jit(brand, testcpuid_brand) end) it("no dse between intrinsic", function() assert_cdef([[int32_t volatileload(int32_t* iptr) __mcode("8brMI");]], "volatileload") local numptr = ffi.new("int32_t[1]", 0) local volatileload = ffi.C.volatileload local function testdse() assert(volatileload) local sum = 0 numptr[0] = 0 numptr[0] = 1 sum = volatileload(numptr) numptr[0] = 2 sum = sum + volatileload(numptr) return sum end assert_jit(3, testdse) end) end) context("__reglist", function() it("incomplete reglist", function() assert_cdeferr([[int test1() __mcode("90") __reglist]]) assert_cdeferr([[int test2() __mcode("90") __reglist(]]) assert_cdeferr([[int test3() __mcode("90") __reglist();]]) assert_cdeferr([[int test4() __mcode("90") __reglist(,);]]) assert_cdeferr([[int test5() __mcode("90") __reglist(in, eax);]]) assert_cdeferr([[int test6() __mcode("90") __reglist(out, ]]) assert_cdeferr([[int test6() __mcode("90") __reglist(mod, ]]) assert_cdeferr([[int test7() __mcode("90") __reglist(mod, eax, ]]) assert_cdeferr([[int test8() __mcode("90") __reglist("out, ]]) assert_cdeferr([[int test9() __mcode("90") __reglist(o]]) assert_cdeferr([[int test10() __mcode("90") __reglist(ou]]) assert_cdeferr([[int invalid_reglist4() __mcode("90") __reglist(out, int)]]) assert_cdeferr([[int invalid_reglist4() __mcode("90") __reglist(out, int eax,)]]) end) it("invalid reglist", function() assert_cdeferr([[int invalid_reglist1() __mcode("90") __reglist(inn, int eax)]]) assert_cdeferr([[int invalid_reglist2() __mcode("90") __reglist(o, int eax)]]) assert_cdeferr([[int invalid_reglist3() __mcode("90") __reglist(oout, int eax)]]) assert_cdeferr([[int invalid_reglist4() __mcode("90") __reglist(out, int reax)]]) --exceeded max register list size assert_cdeferr([[int invalid_reglist5() __mcode("90") __reglist(out, int eax, int ebx, int ecx, int edx, int esi, int edi, float xmm0, float xmm1, float xmm2)]]) end) it("stack pointer blacklist", function() assert_cdeferr([[void blacklist_in(int esp) __mcode("90_E")]], "blacklist") assert_cdeferr([[void blacklist_out(int eax) __mcode("90_E") __reglist(out, int esp)]], "blacklist") --FIXME --assert_cdeferr([[void blacklist_mod(int eax) __mcode("90_E") __reglist(mod, esp)]], "blacklist") if ffi.arch == "x64" then assert_cdeferr([[void blacklist_64(int rsp) __mcode("90_E")]], "blacklist") end end) it("duplicate regs", function() assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90_E")]], "duplicate") assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90_E")]], "duplicate") assert_cdeferr([[void duplicate_out(int eax) __mcode("90_E") __reglist(out, int eax, int eax)]], "duplicate") --FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate") end) it("rdtsc", function() assert_cdef([[void rdtsc() __mcode("0f31") __reglist(out, int32_t eax, int32_t edx);]], "rdtsc") local rdtsc = ffi.C.rdtsc local function getticks() union64.low, union64.high = rdtsc() return union64.i64 end local prev = 0ll local function checker(i, result) --print(tonumber(result-prev)) assert(result > prev) prev = result end assert_jitchecker(checker, getticks) end) end) it("popcnt", function() assert_cdef([[int32_t popcnt(int32_t n) __mcode("f30fb8rM");]], "popcnt") local popcnt = ffi.C.popcnt assert_equal(popcnt(7), 3) assert_equal(popcnt(1024), 1) assert_equal(popcnt(1023), 10) local function testpopcnt(num) return (popcnt(num)) end assert_jit(10, testpopcnt, 1023) assert_noexit(32, testpopcnt, -1) assert_noexit(0, testpopcnt, 0) assert_noexit(1, testpopcnt, 1) ffi.cdef([[int32_t popcntuf(int32_t n) __mcode("f30fb8rR");]]) --check unfused popcnt = ffi.C.popcntuf assert_equal(popcnt(7), 3) assert_equal(popcnt(1024), 1) end) it("addsd", function() assert_cdef([[double addsd(double n1, double n2) __mcode("F20F58rMv");]], "addsd") local addsd = ffi.C.addsd function test_addsd(n1, n2) return (addsd(n1, n2)) end assert_equal(3, addsd(1, 2)) assert_equal(0, addsd(0, 0)) assert_jit(-3, test_addsd, -4.5, 1.5) assert_noexit(3, test_addsd, 4.5, -1.5) --check dual num exit assert_equal(5, test_addsd(3 , 2)) --test same ref input function test_addsd2(n) return (addsd(n, n)) end assert_jit(3, test_addsd2, 1.5) assert_noexit(-3, test_addsd2, -1.5) --check dual num exit assert_equal(6, test_addsd2(3)) --check unfused ffi.cdef([[double addsduf(double n1, double n2) __mcode("F20F58rRv");]]) addsd = ffi.C.addsduf assert_equal(3, addsd(1, 2)) assert_equal(0, addsd(0, 0)) end) it("addss", function() assert_cdef([[float addss(float n1, float n2) __mcode("F30F58rMv");]], "addss") local addsd = ffi.C.addss function test_addsd(n1, n2) return (addsd(n1, n2)) end assert_equal(3, addsd(1, 2)) assert_equal(0, addsd(0, 0)) assert_jit(-3, test_addsd, -4.5, 1.5) assert_noexit(3, test_addsd, 4.5, -1.5) --check dual num exit assert_equal(5, test_addsd(3, 2)) --test same ref input function test_addss2(n) return (addsd(n, n)) end assert_jit(-9, test_addss2, -4.5) assert_noexit(3, test_addss2, 1.5) --check unfused ffi.cdef[[float addssuf(float n1, float n2) __mcode("F30F58rRv");]] addsd = ffi.C.addssuf assert_equal(3, addsd(1, 2)) assert_equal(0, addsd(0, 0)) end) it("shufps", function() assert_cdef([[float4 shufps(float4 v1, float4 v2) __mcode("0FC6rMU", 0);]], "shufps") local shufps = ffi.C.shufps local v = ffi.new("float4", 1.5, 2.25, 3.125, 4.0625) local vzero = ffi.new("float4", 1) function test_shufps(v1, v2) return (shufps(v1, v2)) end local vout = shufps(v, v) assert_equal(vout[0], 1.5) assert_equal(vout[1], 1.5) assert_equal(vout[2], 1.5) assert_equal(vout[3], 1.5) assert_cdef([[float4 shufpsrev(float4 v1, float4 v2) __mcode("0FC6rMU", 0x1b);]], "shufpsrev") local vout = ffi.C.shufpsrev(v, v) assert_equal(vout[0], 4.0625) assert_equal(vout[1], 3.125) assert_equal(vout[2], 2.25) assert_equal(vout[3], 1.5) end) it("vpermilps(avx)", function() assert_cdef([[float4 vpermilps(float4 v1, int4 control) __mcode("660F380CrMV");]], "vpermilps") local v1, v2 = ffi.new("float4", 1, 2, 3, 4) v2 = ffi.new("int4", 0, 0, 0, 0) assert_v4eq(ffi.C.vpermilps(v1, v2), 1, 1, 1, 1) -- Revese the vector v2 = ffi.new("int4", 3, 2, 1, 0) assert_v4eq(ffi.C.vpermilps(v1, v2), 4, 3, 2, 1) end) it("vpslldq(avx)", function() assert_cdef([[int4 vpslldq_4(int4 v1) __mcode("660F737mUV", 4);]], "vpslldq_4") local v = ffi.new("int4", 1, 2, 3, 4) assert_v4eq(ffi.C.vpslldq_4(v), 0, 1, 2, 3) end) it("vaddps(avx, ymm)", function() assert_cdef([[float8 vaddps(float8 v1, float8 v2) __mcode("0F58rMV");]], "vaddps") local v1 = ffi.new("float8", 1, 2, 3, 4, 5, 6, 7, 8) local v2 = ffi.new("float8", 1, 1, 1, 1, 1, 1, 1, 1) local vout = ffi.C.vaddps(v1, v2) for i=0,7 do assert_equal(vout[i], i+2) end end) if ffi.arch == "x64" then --Check VEX.L bit is correcly set when the X opcode flag is specifed on vex opcodes it("vmovd VEX.L bit(avx)", function() assert_cdef([[long2 vmovd(int64_t n) __mcode("660F6ErMVX");]], "vmovd") local v = ffi.C.vmovd(-1LL) assert_equal(v[0], -1LL) end) end it("phaddd 4byte opcode", function() ffi.cdef([[int4 phaddd(int4 v1, int4 v2) __mcode("660F3802rM");]]) local phaddd = ffi.C.phaddd function hsum(v) local result = phaddd(v, v) result = phaddd(result, result) return result[0] end local v = ffi.new("int4", 1, 2, 3, 4) local vzero = ffi.new("int4", 0) assert_equal(hsum(v), 10) assert_equal(hsum(vzero), 0) end) context("mixed register type opcodes", function() it("pcmpstr", function() ffi.cdef([[void pcmpistri(byte16 string, byte16 mask) __mcode("660F3A63rMU", 0x2) __reglist(out, int32_t ecx)]]) local charlist = ffi.new("byte16", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) local string = ffi.new("byte16", 2, 2, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 7, 7, 2, 2) local ecx = ffi.C.pcmpistri(charlist, string) assert_equal(ecx, 4) ffi.cdef([[ void pcmpistrm(byte16 string, byte16 mask) __mcode("660F3A62rMU", 0x40) __reglist(out, byte16 xmm0v); int32_t pmovmskb(byte16 mask) __mcode("660FD7rM"); ]]) local mask = ffi.C.pcmpistrm(charlist, string) mask = ffi.C.pmovmskb(mask) assert_equal(mask, 48) end) it("cvttsd2s", function() assert_cdef([[int cvttsd2s(double n) __mcode("F20F2CrM");]], "cvttsd2s") local cvttsd2s = ffi.C.cvttsd2s function test_cvttsd2s(n) return (cvttsd2s(n)) end assert_equal(0, cvttsd2s(-0)) assert_equal(1, cvttsd2s(1)) assert_equal(1, cvttsd2s(1.2)) assert_jit(3, test_cvttsd2s, 3.3) assert_noexit(-1, test_cvttsd2s, -1.5) --check dual num exit assert_equal(5, test_cvttsd2s(5)) --check unfused ffi.cdef([[int cvttsd2suf(double n) __mcode("F20F2CrR");]]) cvttsd2s = ffi.C.cvttsd2suf assert_equal(0, cvttsd2s(-0)) assert_equal(1, cvttsd2s(1)) assert_equal(1, cvttsd2s(1.2)) end) it("cvtsi2sd", function() assert_cdef([[double cvtsi2sd(int32_t n) __mcode("F20F2ArM");]], "cvtsi2sd") local cvtsi2sd = ffi.C.cvtsi2sd function test_cvtsi2sd(n1, n2) return (cvtsi2sd(n1)+n2) end assert_equal(0.5, test_cvtsi2sd(0, 0.5)) assert_equal(1.25, test_cvtsi2sd(1.0, 0.25)) assert_equal(-1.5, test_cvtsi2sd(-2, 0.5)) assert_jit(3.25, test_cvtsi2sd, 3, 0.25) assert_noexit(-1.5, test_cvtsi2sd, -2, 0.5) --check dual num exit assert_equal(11, test_cvtsi2sd(5, 6)) --check unfused ffi.cdef([[double cvtsi2sduf(int32_t n) __mcode("F20F2ArR");]]) cvtsi2sd = ffi.C.cvtsi2sduf assert_equal(0.5, test_cvtsi2sd(0, 0.5)) assert_equal(1.25, test_cvtsi2sd(1.0, 0.25)) assert_equal(-1.5, test_cvtsi2sd(-2, 0.5)) end) it("pextrw", function() local v = ffi.new("byte16", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) assert_cdef([[int32_t pextrw_0(byte16 v) __mcode("660FC5mRU", 0);]], "pextrw_0") assert_equal(0x0201, ffi.C.pextrw_0(v)) assert_cdef([[int32_t pextrw_7(byte16 v) __mcode("660FC5mRU", 7);]], "pextrw_7") assert_equal(0x100f, ffi.C.pextrw_7(v)) end) it("pinsrw", function() assert_cdef([[int4 pinsrw_0(byte16 v, int32_t word) __mcode("660FC4rMU", 0);]], "pinsrw_0") local v = ffi.new("byte16", 0) local vout = ffi.C.pinsrw_0(v, 0xf0f1) assert_equal(0xf0f1, vout[0]) assert_cdef([[int4 pinsrw_7(byte16 v, int32_t word) __mcode("660FC4rMU", 7);]], "pinsrw_7") vout = ffi.C.pinsrw_0(v, 0xf0f1) assert_equal(0xf0f1, vout[0]) end) end) context("Intrinsic CSE", function() it("cse 1 input, same arg", function() assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add3") local function add3mul2(a) return ffi.C.add3(a)+ffi.C.add3(a) end assert_jit(10, add3mul2, 2) end) it("cse 1 input, diff args", function() assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add3") local function add3mul2(a, b) return ffi.C.add3(a)+ffi.C.add3(b) end assert_jit(11, add3mul2, 2, 3) assert_noexit(12, add3mul2, 3, 3) end) it("cse 1 input, same and diff args", function() assert_cdef([[int32_t add3(int32_t n) __mcode("830mU", 3);]], "add3") local function add3mul2(a, b) return ffi.C.add3(a)+ffi.C.add3(b)+ffi.C.add3(a) end assert_jit(16, add3mul2, 2, 3) assert_noexit(10, add3mul2, -1, 3) end) it("no cse, indirect ModRM", function() assert_cdef([[int32_t xadd(int32_t* nptr, int32_t n) __mcode("0FC1mRI");]], "xadd") local num = ffi.new("int32_t[1]", 0) -- Pretend to use the output values so we don't get DCE'ed since we didn't flag xadd to have side effects local function inc2(a, b) local a = ffi.C.xadd(num, 1) a = a + ffi.C.xadd(num, 1) return num[0], a end local function checker(i, n) assert(i * 2 == n, n) end assert_jitchecker(checker, inc2) end) it("no cse, has side effects", function() --Pretend we have an input register assert_cdef([[void rdtsc2(int32_t edx) __mcode("0f31_Es") __reglist(out, uint32_t eax) __reglist(mod, edx);]], "rdtsc2") local function getticks() local t1 = ffi.C.rdtsc2(0) local t2 = ffi.C.rdtsc2(0) return t2-t1 end local function checker(i, n) assert(n > 0, tostring(n)) end assert_jitchecker(checker, getticks) end) it("cse, fuse conflict", function() -- assert_cdef([[int32_t add(int32_t i, int32_t n) __mcode("03rMc");]], "add") assert_cdef([[double addsd(double n1, double n2) __mcode("F20F58rMvc");]], "addsd") local num = ffi.new("double[2]", 0) local function testfuse(i, a, b) local n = ffi.C.addsd(num[0], num[1]) num[1] = i n = n + ffi.C.addsd(num[0], num[1]) return n end local function checker(i, n) assert(n > 0, tostring(n)) end assert_jitchecker(checker, testfuse, 2.5) end) end) end)