From e1a1721ae203ac23ff9b326902b535bd131e0155 Mon Sep 17 00:00:00 2001 From: fsfod Date: Tue, 9 Feb 2016 04:21:27 +0000 Subject: [PATCH] Template intrinsics user machine code --- src/lib_ffi.c | 10 +++ src/lj_asm.c | 34 +++++-- src/lj_emit_x86.h | 18 ++++ src/lj_intrinsic.c | 104 ++++++++++++++++++---- src/lj_intrinsic.h | 5 ++ tests/intrinsic_spec.lua | 186 +++++++++++++++++++++++++++++++++------ 6 files changed, 308 insertions(+), 49 deletions(-) diff --git a/src/lib_ffi.c b/src/lib_ffi.c index cff4e0fd..f11ac79d 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -492,6 +492,16 @@ LJLIB_CF(ffi_cdef) return 0; } +LJLIB_CF(ffi_intrinsic) +{ +#if LJ_HASINTRINSICS + lj_intrinsic_create(L); + return 1; +#else + lj_err_callermsg(L, "Intrinsics disabled"); +#endif +} + LJLIB_CF(ffi_new) LJLIB_REC(.) { CTState *cts = ctype_cts(L); diff --git a/src/lj_asm.c b/src/lj_asm.c index ec5e7c7f..1e83133b 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2569,6 +2569,7 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta IntrinBuildState info; AsmHeader *hdr; MCode *asmofs = NULL, *origtop; + void* target = state->target; int spadj = 0; lj_asm_setup_intrins(J, as); @@ -2587,6 +2588,12 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta ra_modified(as, info.outcontext); } + /* Embed the mcode after the wrapper */ + if ((intrins->flags & INTRINSFLAG_CALLED) && state->targetsz) { + *--as->mcp = XI_RET; + target = asmofs = asm_mcode(as, target, state->targetsz); + } + restart: if (info.contexspill || rset_test(info.outset, info.outcontext)) { /* add some extra space for context spill and temp spill */ @@ -2630,8 +2637,20 @@ restart: emit_storeofsirt(as, IRT_INTP, info.outcontext, RID_SP, TEMPSPILL); } - /* Append the user supplied machine code */ - asmofs = asm_mcode(as, state->target, state->targetsz); + if (intrins->flags & INTRINSFLAG_CALLED) { + Reg rin = 0; +#if LJ_64 + /* Pick a scratch register in case the relative distance for the call is + ** larger than a signed 32bit value + */ + rin = intrinsic_scratch(as, RSET_GPR); +#endif + /* emit a call to the target which may be collocated after us */ + emit_intrins(as, intrins, rin, (uintptr_t)target); + } else { + /* Append the user supplied machine code */ + asmofs = asm_mcode(as, state->target, state->targetsz); + } /* Move values out the context into there respective input registers */ intrins_loadregs(as, intrins, &info); @@ -2657,9 +2676,14 @@ restart: memset(hdr, 0, sizeof(AsmHeader)); hdr->totalzs = (uint32_t)(origtop-as->mcp); - lua_assert((asmofs-as->mcp) < 0xffff); - hdr->asmofs = (uint16_t)(asmofs-as->mcp); - hdr->asmsz = state->targetsz; + /* Embed info before the code to support multiple versions of a user intrinsic intrinsic */ + if ((intrins->flags & INTRINSFLAG_CALLEDIND) == INTRINSFLAG_CALLEDIND) { + hdr->target = (uintptr_t)target; + } else if (asmofs){ + lua_assert((asmofs-as->mcp) < 0xffff); + hdr->asmofs = (uint16_t)(asmofs-as->mcp); + hdr->asmsz = state->targetsz; + } as->mcp = (MCode*)hdr; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 85c2196e..f17b884a 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -614,6 +614,24 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) #define TEMPSPILL (1*sizeof(intptr_t)) #define CONTEXTSPILL (0) + +static MCode* emit_intrins(ASMState *as, CIntrinsic *intrins, Reg r1, + uintptr_t r2) +{ + if (intrins->flags & INTRINSFLAG_CALLED) { + lua_assert(r2); + emit_call_(as, (MCode*)r2, r1); + return NULL; + } else { + AsmHeader *hdr = ((AsmHeader*)r2)-1; + lua_assert((hdr->asmofs != 0 || hdr->asmofs < hdr->totalzs)); + + /* Directly copy the unmodified machine code of the intrinsic in */ + asm_mcode(as, ((char*)r2)+hdr->asmofs, hdr->asmsz); + } + return as->mcp; +} + static int lj_popcnt(uint32_t i) { i = i - ((i >> 1) & 0x55555555); diff --git a/src/lj_intrinsic.c b/src/lj_intrinsic.c index 17e5869c..1da30a34 100644 --- a/src/lj_intrinsic.c +++ b/src/lj_intrinsic.c @@ -83,6 +83,16 @@ static CTypeID register_intrinsic(lua_State *L, CIntrinsic* src, CType *func) return id; } +static void lj_intrinsic_new(lua_State *L, CTypeID id, void* wrapmc) +{ + CTState *cts = ctype_cts(L); + GCcdata *cd; + lua_assert(ctype_isintrinsic(ctype_get(cts, id)->info)); + cd = lj_cdata_new(cts, id, CTSIZE_PTR); + *(void **)cdataptr(cd) = wrapmc; + setcdataV(L, L->top++, cd); +} + static int parse_fprreg(const char *name, uint32_t len) { uint32_t rid = 0, kind = REGKIND_FPR64; @@ -282,27 +292,33 @@ static void setopcode(lua_State *L, CIntrinsic *intrins, uint32_t opcode) intrins->opcode = opcode; } -static int parse_opstr(lua_State *L, GCstr *opstr) +static int parse_opstr(lua_State *L, GCstr *opstr, CIntrinsic *intrins, int* buildflags) { const char *op = strdata(opstr); uint32_t opcode = 0; uint32_t i; - /* Find the end of the opcode number */ - for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) { - } + /* Parse the opcode number if this is not a template */ + if (op[0] != '?') { + for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) { + } - if (i == 0 || i > 8) { - /* invalid or no hex number */ - lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number"); - } + if (i == 0 || i > 8) { + /* invalid or no hex number */ + lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number"); + } - /* Scan hex digits. */ - for (; i; i--, op++) { - uint32_t d = *op; if (d > '9') d += 9; - opcode = (opcode << 4) + (d & 15); - } + /* Scan hex digits. */ + for (; i; i--, op++) { + uint32_t d = *op; if (d > '9') d += 9; + opcode = (opcode << 4) + (d & 15); + } + } else { + *buildflags |= INTRINSFLAG_TEMPLATE; + op++; + } + return opcode; } @@ -337,6 +353,54 @@ static IntrinsicWrapper lj_intrinsic_buildwrap(lua_State *L, CIntrinsic *intrins return (IntrinsicWrapper)state.wrapper; } +CTypeID lj_intrinsic_template(lua_State *L, int narg) +{ + CTState *cts = ctype_cts(L); + CType *ct; + CTypeID id; + CIntrinsic* intrins; + GCstr *name = lj_lib_checkstr(L, narg); + + id = lj_ctype_getname(cts, &ct, name, 1u << CT_FUNC); + + if (!id) { + lj_err_argv(L, narg, LJ_ERR_FFI_NODECL, name); + } else if (!ctype_isintrinsic(ct->info)) { + lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE); + } + + intrins = lj_intrinsic_get(cts, ct->size); + + /* Can't be a template if it an opcode */ + if ((intrins->opcode && intrins->outsz <= 4) || intrins->wrapped) + lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE); + + return id; +} + +int lj_intrinsic_create(lua_State *L) +{ + CTState *cts = ctype_cts(L); + CTypeID id = lj_intrinsic_template(L, 1); + void *intrinsmc; + MSize asmsz; + CIntrinsic* intrins = lj_intrinsic_get(cts, ctype_get(cts, id)->size); + + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&intrinsmc, + L->base+1, CCF_ARG(2)); + + asmsz = lj_lib_checkint(L, 3); + if (asmsz <= 0 || asmsz > 0xffff || + asmsz > (MSize)(L2J(L)->param[JIT_P_sizemcode] << 10)) { + lj_err_callermsg(L, "bad code size"); + } + + intrinsmc = lj_intrinsic_buildwrap(L, intrins, intrinsmc, asmsz, + intrin_getmodrset(cts, intrins)); + lj_intrinsic_new(L, id, intrinsmc); + return 1; +} + GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func) { GCcdata *cd; @@ -345,6 +409,10 @@ GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func) RegSet mod = intrin_getmodrset(cts, intrins); uint32_t op = intrins->opcode; void* mcode = ((char*)&op) + (4-intrin_oplen(intrins)); + + if (intrins->opcode == 0) { + lj_err_callermsg(cts->L, "expected non template intrinsic"); + } intrins->wrapped = lj_intrinsic_buildwrap(cts->L, intrins, mcode, intrin_oplen(intrins), mod); @@ -360,13 +428,14 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm) CType *func = ctype_get(cts, fid); CTypeID sib = func->sib, retid = ctype_cid(func->info); uint32_t opcode; + int buildflags = 0; CIntrinsic _intrins; CIntrinsic* intrins = &_intrins; memset(intrins, 0, sizeof(CIntrinsic)); - opcode = parse_opstr(L, opstr); + opcode = parse_opstr(L, opstr, intrins, &buildflags); - if (!opcode) { + if (!opcode && !(buildflags & INTRINSFLAG_TEMPLATE)) { return 0; } @@ -387,7 +456,10 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm) sib = retid; } - setopcode(L, intrins, opcode); + /* If were a template theres no opcode to set */ + if (opcode) { + setopcode(L, intrins, opcode); + } register_intrinsic(L, intrins, ctype_get(cts, fid)); lua_assert(sib > 0 && sib < cts->top); diff --git a/src/lj_intrinsic.h b/src/lj_intrinsic.h index e53183b4..f1235cb2 100644 --- a/src/lj_intrinsic.h +++ b/src/lj_intrinsic.h @@ -26,6 +26,10 @@ typedef enum INTRINSFLAGS { INTRINSFLAG_CALLED = 0x20, /* MODRM should always be set as indirect mode */ INTRINSFLAG_INDIRECT = 0x40, + /* Intrinsic is a template with no machine code set until instantiate at runtime with + ** user supplied code. + */ + INTRINSFLAG_TEMPLATE = 0x40000, INTRINSFLAG_CALLEDIND = INTRINSFLAG_CALLED | INTRINSFLAG_INDIRECT } INTRINSFLAGS; @@ -97,6 +101,7 @@ CTypeID1 regkind_ct[16]; #define rk_ctype(rid, kind) ((rid) < RID_MAX_GPR ? rk_ctypegpr(kind) : rk_ctypefpr(kind)) LJ_FUNC void lj_intrinsic_init(lua_State *L); +LJ_FUNC int lj_intrinsic_create(lua_State *L); LJ_FUNC GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func); LJ_FUNC int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opcode, uint32_t imm); LJ_FUNC int lj_intrinsic_call(CTState *cts, CType *ct); diff --git a/tests/intrinsic_spec.lua b/tests/intrinsic_spec.lua index 29b943fb..c4701560 100644 --- a/tests/intrinsic_spec.lua +++ b/tests/intrinsic_spec.lua @@ -30,7 +30,7 @@ describe("intrinsic tests", function() context("nop inout", function() it("fpr", function() - assert_cdef([[void fpr_nop1(double xmm0) __mcode("90") __reglist(out, double xmm0)]], "fpr_nop1") + assert_cdef([[void fpr_nop1(double xmm0) __mcode("90_E") __reglist(out, double xmm0)]], "fpr_nop1") local fpr1 = ffi.C.fpr_nop1 assert_error(function() fpr1() end) @@ -39,10 +39,34 @@ context("nop inout", function() assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075) assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075) + + assert_cdef([[void fpr_all(double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7) __mcode("?E") + __reglist(out,double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7)]]) + local fpr_all = ffi.intrinsic("fpr_all", "\x90", 1) + + local function testfpr_all(i, r1, r2, r3, r4, r5, r6, r7, r8) + local spilled = r1*2*i + local ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8 = fpr_all(r1, r2, r3, r4, r5, r6, r7, r8) + return ro1+i, ro2+i, ro3+i, ro4+i, ro5+i, ro6+i, ro7+i, (ro8*ro3)+i, ro2+spilled + end + + local function checker(i, ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8, spilled) + assert(ro1 == 1.5+i) + assert(ro2 == 2.5+i) + assert(ro3 == 3.5+i) + assert(ro4 == 4.5+i) + assert(ro5 == 5.5+i) + assert(ro6 == 60000.525+i) + assert(ro7 == i+-7.5) + assert(ro8 == (-100*3.5)+i) + assert(spilled == 2.5+(1.5*2*i)) + end + + assert_jitchecker(checker, testfpr_all, 1.5, 2.5, 3.5, 4.5, 5.5, 60000.525, -7.5, -100) end) it("gpr", function() - assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90") __reglist(out, int32_t eax)]], "gpr_nop1") + assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax)]], "gpr_nop1") local function testgpr1(num) return (ffi.C.gpr_nop1(num)) @@ -67,11 +91,35 @@ context("nop inout", function() end assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000) + + assert_cdef([[void gpr_all(int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx) __mcode("?E") + __reglist(out, int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx)]]) + + local gpr_all = ffi.intrinsic("gpr_all", "\x90", 1) + + local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7) + local spilled = r1+(10000*i) + local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr_all(r1, r2, r3, r4, 100, r6, r7) + return spilled+(ro1+ro2+ro3+ro4+ro5+ro6+ro7), ro3+i, ro2+i, ro1+i, ro4+i, ro5, ro6+i, ro7+i + end + + local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7) + assert(ro1 == 1+i) + assert(ro2 == 2+i) + assert(ro3 == 3+i) + assert(ro4 == 4+i) + assert(ro5 == 100) + assert(ro6 == 6+i) + assert(ro7 == 7+i) + assert(spilled == 124+(10000*i)) + end + + assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5, 6, 7) end) if ffi.arch == "x64" then it("gpr64", function() - assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90") __reglist(out, int64_t rdx)]], "gpr64_1") + assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90_E") __reglist(out, int64_t rdx)]], "gpr64_1") local function testgpr1(num) return (ffi.C.gpr64_1(num)) @@ -79,10 +127,35 @@ if ffi.arch == "x64" then assert_jit(1235678ull, testgpr1, 1235678) assert_noexit(-1LL, testgpr1, -1) + + assert_cdef([[void gpr64(int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx) __mcode("?E") + __reglist(out, int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx)]]) + + local gpr7 = ffi.intrinsic("gpr64", "\x90", 1) + + local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7) + local spilled = r1+(10000*i) + local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr7(r1, r2, r3, 68719476735ll, r5, r6, r7) + return spilled, ro3+i, ro2+i, ro1+i, ro4+i, ro5+i, ro6+i, ro7+i + end + + local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7) + local sp = (10000*i)+1 + assert(ro1 == 1+i) + assert(ro2 == 2+i) + assert(ro3 == 3+i) + assert(type(ro4) == "cdata" and ro4 == 68719476735ll+i) + assert(type(ro5) == "cdata" and ro5 == 5ll+i) + assert(type(ro6) == "cdata" and ro6 == 68719476721ll+i) + assert(type(ro7) == "cdata" and ro7 == (-7ll)+i) + assert(spilled == sp) + end + + assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5ll, 68719476721ll, -7ll) end) it("rex fpr", function() - assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90") __reglist(out, double xmm0, double xmm9)]], "fpr_reg") + assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90_E") __reglist(out, double xmm0, double xmm9)]], "fpr_reg") local fpr = ffi.C.fpr_reg local function testrex(n1, n2) @@ -95,15 +168,38 @@ if ffi.arch == "x64" then end it("fpr_vec", function() - assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90") __reglist(out, float4 xmm7v)]], "fpr_vec") + assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90_E") __reglist(out, float4 xmm7v)]], "fpr_vec") local v1 = ffi.new("float[4]", 1, 2, 3, 4) local xmmout = ffi.C.fpr_vec(v1) assert_v4eq(xmmout, 1, 2, 3, 4) end) + it("check extra register spill", function() + local array = ffi.new("float4", 1, 2, 3, 4) + + -- Use up all gpr scatch registers before loading the vec causing wrapper builder to restart with an extra spill + assert_cdef([[void spillrestart(float4 xmm0v, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx) __mcode("?E") + __reglist(out, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx, float4 xmm0v) + ]]) + + local xmmtest = ffi.intrinsic("spillrestart", "\x90", 1) + local eax, ecx, edx, esi, edi, ebx, xmmout = xmmtest(array, 1, 2, 3, 4, 5, 6) + + assert_equal(eax, 1) + assert_equal(ecx, 2) + assert_equal(edx, 3) + assert_equal(esi, 4) + assert_equal(edi, 5) + assert_equal(ebx, 6) + + for i=1,4 do + assert_equal(xmmout[i-1], i) + end + end) + it("idiv", function() - assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv") + assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv") local function checker(i, result, remainder) local rem = i%3 @@ -169,22 +265,22 @@ context("__mcode", function() end) it("invalid registers", function() - assert_cdef([[void validreg_gpr(int eax) __mcode("90");]], "validreg_gpr") + assert_cdef([[void validreg_gpr(int eax) __mcode("90_E");]], "validreg_gpr") - assert_cdeferr([[void badreg_1(int e) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_1(int r20d) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_gpr1() __mcode("90") __reglist(out, int e);]], "invalid") - assert_cdeferr([[void badreg_gpr2() __mcode("90") __reglist(mod, e);]], "invalid") + assert_cdeferr([[void badreg_1(int e) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_1(int r20d) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_gpr1() __mcode("90_E") __reglist(out, int e);]], "invalid") + assert_cdeferr([[void badreg_gpr2() __mcode("90_E") __reglist(mod, e);]], "invalid") - assert_cdef([[void validreg_fpr(float xmm0) __mcode("90");]], "validreg_fpr") + assert_cdef([[void validreg_fpr(float xmm0) __mcode("90_E");]], "validreg_fpr") - assert_cdeferr([[void badreg_fpr1(float x) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90");]], "invalid") - assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90");]], "invalid") + assert_cdeferr([[void badreg_fpr1(float x) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90_E");]], "invalid") + assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90_E");]], "invalid") --xmm register number too large - assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90");]], "invalid") + assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid") end) it("multidef rollback", function() @@ -199,7 +295,7 @@ context("__mcode", function() assert_error(function() ffi.C.multi2() end) assert_not_error(function() ffi.cdef[[ - void multi1(int32_t eax) __mcode("90") __reglist(out, int32_t eax); + void multi1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax); ]] end) assert_equal(ffi.C.multi1(1.1), 1) @@ -217,7 +313,7 @@ context("__mcode", function() end) it("bad args", function() - assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv2") + assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv2") local idiv = ffi.C.idiv2 @@ -232,10 +328,44 @@ context("__mcode", function() assert_error(function() idiv(1, 2, nil) end) assert_error(function() idiv(1, 2, 3) end) assert_error(function() idiv(1, 2, 3, 4) end) + end) + + it("idiv(template)", function() + assert_cdef([[void idivT(int32_t eax, int32_t ecx) __mcode("?E") __reglist(out, int32_t eax, int32_t edx)]]) + --trying to create template intrinsic through C library should always fail + assert_error(function() return ffi.C.idivT end) + + local idiv = ffi.intrinsic("idivT", "\x99\xF7\xF9", 3) + + local function checker(i, result, remainder) + local rem = i%2 + + if rem ~= remainder then + return rem, remainder + end + + local expected = (i-rem)/2 + + if expected ~= result then + return expected, result + end + end + + local function test_idiv(value, divisor) + local result, remainder = idiv(value, divisor) + return result, remainder + end + + assert_jitchecker(checker, test_idiv, 2) + + -- create a second instance and check guard for wrapper pointer fails + idiv = ffi.intrinsic("idivT", "\x90", 1) + + assert_exit(10, test_idiv, 10, 5) end) it("cpuid_brand", function() - assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid") + assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2_E") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid") local cpuid = ffi.C.cpuid @@ -304,20 +434,20 @@ context("__reglist", function() it("stack pointer blacklist", function() - assert_cdeferr([[void blacklist_in(int esp) __mcode("90")]], "blacklist") - assert_cdeferr([[void blacklist_out(int eax) __mcode("90") __reglist(out, int esp)]], "blacklist") + assert_cdeferr([[void blacklist_in(int esp) __mcode("90_E")]], "blacklist") + assert_cdeferr([[void blacklist_out(int eax) __mcode("90_E") __reglist(out, int esp)]], "blacklist") --FIXME - --assert_cdeferr([[void blacklist_mod(int eax) __mcode("90") __reglist(mod, esp)]], "blacklist") + --assert_cdeferr([[void blacklist_mod(int eax) __mcode("90_E") __reglist(mod, esp)]], "blacklist") if ffi.arch == "x64" then - assert_cdeferr([[void blacklist_64(int rsp) __mcode("90")]], "blacklist") + assert_cdeferr([[void blacklist_64(int rsp) __mcode("90_E")]], "blacklist") end end) it("duplicate regs", function() - assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90")]], "duplicate") - assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90")]], "duplicate") - assert_cdeferr([[void duplicate_out(int eax) __mcode("90") __reglist(out, int eax, int eax)]], "duplicate") + assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90_E")]], "duplicate") + assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90_E")]], "duplicate") + assert_cdeferr([[void duplicate_out(int eax) __mcode("90_E") __reglist(out, int eax, int eax)]], "duplicate") --FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate") end)