Template intrinsics user machine code

This commit is contained in:
fsfod 2016-02-09 04:21:27 +00:00
parent c84b176062
commit e1a1721ae2
6 changed files with 308 additions and 49 deletions

View File

@ -492,6 +492,16 @@ LJLIB_CF(ffi_cdef)
return 0; return 0;
} }
LJLIB_CF(ffi_intrinsic)
{
#if LJ_HASINTRINSICS
lj_intrinsic_create(L);
return 1;
#else
lj_err_callermsg(L, "Intrinsics disabled");
#endif
}
LJLIB_CF(ffi_new) LJLIB_REC(.) LJLIB_CF(ffi_new) LJLIB_REC(.)
{ {
CTState *cts = ctype_cts(L); CTState *cts = ctype_cts(L);

View File

@ -2569,6 +2569,7 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta
IntrinBuildState info; IntrinBuildState info;
AsmHeader *hdr; AsmHeader *hdr;
MCode *asmofs = NULL, *origtop; MCode *asmofs = NULL, *origtop;
void* target = state->target;
int spadj = 0; int spadj = 0;
lj_asm_setup_intrins(J, as); lj_asm_setup_intrins(J, as);
@ -2587,6 +2588,12 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta
ra_modified(as, info.outcontext); ra_modified(as, info.outcontext);
} }
/* Embed the mcode after the wrapper */
if ((intrins->flags & INTRINSFLAG_CALLED) && state->targetsz) {
*--as->mcp = XI_RET;
target = asmofs = asm_mcode(as, target, state->targetsz);
}
restart: restart:
if (info.contexspill || rset_test(info.outset, info.outcontext)) { if (info.contexspill || rset_test(info.outset, info.outcontext)) {
/* add some extra space for context spill and temp spill */ /* add some extra space for context spill and temp spill */
@ -2630,8 +2637,20 @@ restart:
emit_storeofsirt(as, IRT_INTP, info.outcontext, RID_SP, TEMPSPILL); emit_storeofsirt(as, IRT_INTP, info.outcontext, RID_SP, TEMPSPILL);
} }
/* Append the user supplied machine code */ if (intrins->flags & INTRINSFLAG_CALLED) {
asmofs = asm_mcode(as, state->target, state->targetsz); Reg rin = 0;
#if LJ_64
/* Pick a scratch register in case the relative distance for the call is
** larger than a signed 32bit value
*/
rin = intrinsic_scratch(as, RSET_GPR);
#endif
/* emit a call to the target which may be collocated after us */
emit_intrins(as, intrins, rin, (uintptr_t)target);
} else {
/* Append the user supplied machine code */
asmofs = asm_mcode(as, state->target, state->targetsz);
}
/* Move values out the context into there respective input registers */ /* Move values out the context into there respective input registers */
intrins_loadregs(as, intrins, &info); intrins_loadregs(as, intrins, &info);
@ -2657,9 +2676,14 @@ restart:
memset(hdr, 0, sizeof(AsmHeader)); memset(hdr, 0, sizeof(AsmHeader));
hdr->totalzs = (uint32_t)(origtop-as->mcp); hdr->totalzs = (uint32_t)(origtop-as->mcp);
lua_assert((asmofs-as->mcp) < 0xffff); /* Embed info before the code to support multiple versions of a user intrinsic intrinsic */
hdr->asmofs = (uint16_t)(asmofs-as->mcp); if ((intrins->flags & INTRINSFLAG_CALLEDIND) == INTRINSFLAG_CALLEDIND) {
hdr->asmsz = state->targetsz; hdr->target = (uintptr_t)target;
} else if (asmofs){
lua_assert((asmofs-as->mcp) < 0xffff);
hdr->asmofs = (uint16_t)(asmofs-as->mcp);
hdr->asmsz = state->targetsz;
}
as->mcp = (MCode*)hdr; as->mcp = (MCode*)hdr;

View File

@ -614,6 +614,24 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
#define TEMPSPILL (1*sizeof(intptr_t)) #define TEMPSPILL (1*sizeof(intptr_t))
#define CONTEXTSPILL (0) #define CONTEXTSPILL (0)
static MCode* emit_intrins(ASMState *as, CIntrinsic *intrins, Reg r1,
uintptr_t r2)
{
if (intrins->flags & INTRINSFLAG_CALLED) {
lua_assert(r2);
emit_call_(as, (MCode*)r2, r1);
return NULL;
} else {
AsmHeader *hdr = ((AsmHeader*)r2)-1;
lua_assert((hdr->asmofs != 0 || hdr->asmofs < hdr->totalzs));
/* Directly copy the unmodified machine code of the intrinsic in */
asm_mcode(as, ((char*)r2)+hdr->asmofs, hdr->asmsz);
}
return as->mcp;
}
static int lj_popcnt(uint32_t i) static int lj_popcnt(uint32_t i)
{ {
i = i - ((i >> 1) & 0x55555555); i = i - ((i >> 1) & 0x55555555);

View File

@ -83,6 +83,16 @@ static CTypeID register_intrinsic(lua_State *L, CIntrinsic* src, CType *func)
return id; return id;
} }
static void lj_intrinsic_new(lua_State *L, CTypeID id, void* wrapmc)
{
CTState *cts = ctype_cts(L);
GCcdata *cd;
lua_assert(ctype_isintrinsic(ctype_get(cts, id)->info));
cd = lj_cdata_new(cts, id, CTSIZE_PTR);
*(void **)cdataptr(cd) = wrapmc;
setcdataV(L, L->top++, cd);
}
static int parse_fprreg(const char *name, uint32_t len) static int parse_fprreg(const char *name, uint32_t len)
{ {
uint32_t rid = 0, kind = REGKIND_FPR64; uint32_t rid = 0, kind = REGKIND_FPR64;
@ -282,25 +292,31 @@ static void setopcode(lua_State *L, CIntrinsic *intrins, uint32_t opcode)
intrins->opcode = opcode; intrins->opcode = opcode;
} }
static int parse_opstr(lua_State *L, GCstr *opstr) static int parse_opstr(lua_State *L, GCstr *opstr, CIntrinsic *intrins, int* buildflags)
{ {
const char *op = strdata(opstr); const char *op = strdata(opstr);
uint32_t opcode = 0; uint32_t opcode = 0;
uint32_t i; uint32_t i;
/* Find the end of the opcode number */ /* Parse the opcode number if this is not a template */
for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) { if (op[0] != '?') {
} for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) {
}
if (i == 0 || i > 8) { if (i == 0 || i > 8) {
/* invalid or no hex number */ /* invalid or no hex number */
lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number"); lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number");
} }
/* Scan hex digits. */ /* Scan hex digits. */
for (; i; i--, op++) { for (; i; i--, op++) {
uint32_t d = *op; if (d > '9') d += 9; uint32_t d = *op; if (d > '9') d += 9;
opcode = (opcode << 4) + (d & 15); opcode = (opcode << 4) + (d & 15);
}
} else {
*buildflags |= INTRINSFLAG_TEMPLATE;
op++;
} }
return opcode; return opcode;
@ -337,6 +353,54 @@ static IntrinsicWrapper lj_intrinsic_buildwrap(lua_State *L, CIntrinsic *intrins
return (IntrinsicWrapper)state.wrapper; return (IntrinsicWrapper)state.wrapper;
} }
CTypeID lj_intrinsic_template(lua_State *L, int narg)
{
CTState *cts = ctype_cts(L);
CType *ct;
CTypeID id;
CIntrinsic* intrins;
GCstr *name = lj_lib_checkstr(L, narg);
id = lj_ctype_getname(cts, &ct, name, 1u << CT_FUNC);
if (!id) {
lj_err_argv(L, narg, LJ_ERR_FFI_NODECL, name);
} else if (!ctype_isintrinsic(ct->info)) {
lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE);
}
intrins = lj_intrinsic_get(cts, ct->size);
/* Can't be a template if it an opcode */
if ((intrins->opcode && intrins->outsz <= 4) || intrins->wrapped)
lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE);
return id;
}
int lj_intrinsic_create(lua_State *L)
{
CTState *cts = ctype_cts(L);
CTypeID id = lj_intrinsic_template(L, 1);
void *intrinsmc;
MSize asmsz;
CIntrinsic* intrins = lj_intrinsic_get(cts, ctype_get(cts, id)->size);
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&intrinsmc,
L->base+1, CCF_ARG(2));
asmsz = lj_lib_checkint(L, 3);
if (asmsz <= 0 || asmsz > 0xffff ||
asmsz > (MSize)(L2J(L)->param[JIT_P_sizemcode] << 10)) {
lj_err_callermsg(L, "bad code size");
}
intrinsmc = lj_intrinsic_buildwrap(L, intrins, intrinsmc, asmsz,
intrin_getmodrset(cts, intrins));
lj_intrinsic_new(L, id, intrinsmc);
return 1;
}
GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func) GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func)
{ {
GCcdata *cd; GCcdata *cd;
@ -346,6 +410,10 @@ GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func)
uint32_t op = intrins->opcode; uint32_t op = intrins->opcode;
void* mcode = ((char*)&op) + (4-intrin_oplen(intrins)); void* mcode = ((char*)&op) + (4-intrin_oplen(intrins));
if (intrins->opcode == 0) {
lj_err_callermsg(cts->L, "expected non template intrinsic");
}
intrins->wrapped = lj_intrinsic_buildwrap(cts->L, intrins, mcode, intrins->wrapped = lj_intrinsic_buildwrap(cts->L, intrins, mcode,
intrin_oplen(intrins), mod); intrin_oplen(intrins), mod);
@ -360,13 +428,14 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
CType *func = ctype_get(cts, fid); CType *func = ctype_get(cts, fid);
CTypeID sib = func->sib, retid = ctype_cid(func->info); CTypeID sib = func->sib, retid = ctype_cid(func->info);
uint32_t opcode; uint32_t opcode;
int buildflags = 0;
CIntrinsic _intrins; CIntrinsic _intrins;
CIntrinsic* intrins = &_intrins; CIntrinsic* intrins = &_intrins;
memset(intrins, 0, sizeof(CIntrinsic)); memset(intrins, 0, sizeof(CIntrinsic));
opcode = parse_opstr(L, opstr); opcode = parse_opstr(L, opstr, intrins, &buildflags);
if (!opcode) { if (!opcode && !(buildflags & INTRINSFLAG_TEMPLATE)) {
return 0; return 0;
} }
@ -387,7 +456,10 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
sib = retid; sib = retid;
} }
setopcode(L, intrins, opcode); /* If were a template theres no opcode to set */
if (opcode) {
setopcode(L, intrins, opcode);
}
register_intrinsic(L, intrins, ctype_get(cts, fid)); register_intrinsic(L, intrins, ctype_get(cts, fid));
lua_assert(sib > 0 && sib < cts->top); lua_assert(sib > 0 && sib < cts->top);

View File

@ -26,6 +26,10 @@ typedef enum INTRINSFLAGS {
INTRINSFLAG_CALLED = 0x20, INTRINSFLAG_CALLED = 0x20,
/* MODRM should always be set as indirect mode */ /* MODRM should always be set as indirect mode */
INTRINSFLAG_INDIRECT = 0x40, INTRINSFLAG_INDIRECT = 0x40,
/* Intrinsic is a template with no machine code set until instantiate at runtime with
** user supplied code.
*/
INTRINSFLAG_TEMPLATE = 0x40000,
INTRINSFLAG_CALLEDIND = INTRINSFLAG_CALLED | INTRINSFLAG_INDIRECT INTRINSFLAG_CALLEDIND = INTRINSFLAG_CALLED | INTRINSFLAG_INDIRECT
} INTRINSFLAGS; } INTRINSFLAGS;
@ -97,6 +101,7 @@ CTypeID1 regkind_ct[16];
#define rk_ctype(rid, kind) ((rid) < RID_MAX_GPR ? rk_ctypegpr(kind) : rk_ctypefpr(kind)) #define rk_ctype(rid, kind) ((rid) < RID_MAX_GPR ? rk_ctypegpr(kind) : rk_ctypefpr(kind))
LJ_FUNC void lj_intrinsic_init(lua_State *L); LJ_FUNC void lj_intrinsic_init(lua_State *L);
LJ_FUNC int lj_intrinsic_create(lua_State *L);
LJ_FUNC GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func); LJ_FUNC GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func);
LJ_FUNC int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opcode, uint32_t imm); LJ_FUNC int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opcode, uint32_t imm);
LJ_FUNC int lj_intrinsic_call(CTState *cts, CType *ct); LJ_FUNC int lj_intrinsic_call(CTState *cts, CType *ct);

View File

@ -30,7 +30,7 @@ describe("intrinsic tests", function()
context("nop inout", function() context("nop inout", function()
it("fpr", function() it("fpr", function()
assert_cdef([[void fpr_nop1(double xmm0) __mcode("90") __reglist(out, double xmm0)]], "fpr_nop1") assert_cdef([[void fpr_nop1(double xmm0) __mcode("90_E") __reglist(out, double xmm0)]], "fpr_nop1")
local fpr1 = ffi.C.fpr_nop1 local fpr1 = ffi.C.fpr_nop1
assert_error(function() fpr1() end) assert_error(function() fpr1() end)
@ -39,10 +39,34 @@ context("nop inout", function()
assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075) assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075)
assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075) assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075)
assert_cdef([[void fpr_all(double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7) __mcode("?E")
__reglist(out,double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7)]])
local fpr_all = ffi.intrinsic("fpr_all", "\x90", 1)
local function testfpr_all(i, r1, r2, r3, r4, r5, r6, r7, r8)
local spilled = r1*2*i
local ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8 = fpr_all(r1, r2, r3, r4, r5, r6, r7, r8)
return ro1+i, ro2+i, ro3+i, ro4+i, ro5+i, ro6+i, ro7+i, (ro8*ro3)+i, ro2+spilled
end
local function checker(i, ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8, spilled)
assert(ro1 == 1.5+i)
assert(ro2 == 2.5+i)
assert(ro3 == 3.5+i)
assert(ro4 == 4.5+i)
assert(ro5 == 5.5+i)
assert(ro6 == 60000.525+i)
assert(ro7 == i+-7.5)
assert(ro8 == (-100*3.5)+i)
assert(spilled == 2.5+(1.5*2*i))
end
assert_jitchecker(checker, testfpr_all, 1.5, 2.5, 3.5, 4.5, 5.5, 60000.525, -7.5, -100)
end) end)
it("gpr", function() it("gpr", function()
assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90") __reglist(out, int32_t eax)]], "gpr_nop1") assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax)]], "gpr_nop1")
local function testgpr1(num) local function testgpr1(num)
return (ffi.C.gpr_nop1(num)) return (ffi.C.gpr_nop1(num))
@ -67,11 +91,35 @@ context("nop inout", function()
end end
assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000) assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000)
assert_cdef([[void gpr_all(int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx) __mcode("?E")
__reglist(out, int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx)]])
local gpr_all = ffi.intrinsic("gpr_all", "\x90", 1)
local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7)
local spilled = r1+(10000*i)
local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr_all(r1, r2, r3, r4, 100, r6, r7)
return spilled+(ro1+ro2+ro3+ro4+ro5+ro6+ro7), ro3+i, ro2+i, ro1+i, ro4+i, ro5, ro6+i, ro7+i
end
local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7)
assert(ro1 == 1+i)
assert(ro2 == 2+i)
assert(ro3 == 3+i)
assert(ro4 == 4+i)
assert(ro5 == 100)
assert(ro6 == 6+i)
assert(ro7 == 7+i)
assert(spilled == 124+(10000*i))
end
assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5, 6, 7)
end) end)
if ffi.arch == "x64" then if ffi.arch == "x64" then
it("gpr64", function() it("gpr64", function()
assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90") __reglist(out, int64_t rdx)]], "gpr64_1") assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90_E") __reglist(out, int64_t rdx)]], "gpr64_1")
local function testgpr1(num) local function testgpr1(num)
return (ffi.C.gpr64_1(num)) return (ffi.C.gpr64_1(num))
@ -79,10 +127,35 @@ if ffi.arch == "x64" then
assert_jit(1235678ull, testgpr1, 1235678) assert_jit(1235678ull, testgpr1, 1235678)
assert_noexit(-1LL, testgpr1, -1) assert_noexit(-1LL, testgpr1, -1)
assert_cdef([[void gpr64(int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx) __mcode("?E")
__reglist(out, int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx)]])
local gpr7 = ffi.intrinsic("gpr64", "\x90", 1)
local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7)
local spilled = r1+(10000*i)
local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr7(r1, r2, r3, 68719476735ll, r5, r6, r7)
return spilled, ro3+i, ro2+i, ro1+i, ro4+i, ro5+i, ro6+i, ro7+i
end
local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7)
local sp = (10000*i)+1
assert(ro1 == 1+i)
assert(ro2 == 2+i)
assert(ro3 == 3+i)
assert(type(ro4) == "cdata" and ro4 == 68719476735ll+i)
assert(type(ro5) == "cdata" and ro5 == 5ll+i)
assert(type(ro6) == "cdata" and ro6 == 68719476721ll+i)
assert(type(ro7) == "cdata" and ro7 == (-7ll)+i)
assert(spilled == sp)
end
assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5ll, 68719476721ll, -7ll)
end) end)
it("rex fpr", function() it("rex fpr", function()
assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90") __reglist(out, double xmm0, double xmm9)]], "fpr_reg") assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90_E") __reglist(out, double xmm0, double xmm9)]], "fpr_reg")
local fpr = ffi.C.fpr_reg local fpr = ffi.C.fpr_reg
local function testrex(n1, n2) local function testrex(n1, n2)
@ -95,15 +168,38 @@ if ffi.arch == "x64" then
end end
it("fpr_vec", function() it("fpr_vec", function()
assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90") __reglist(out, float4 xmm7v)]], "fpr_vec") assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90_E") __reglist(out, float4 xmm7v)]], "fpr_vec")
local v1 = ffi.new("float[4]", 1, 2, 3, 4) local v1 = ffi.new("float[4]", 1, 2, 3, 4)
local xmmout = ffi.C.fpr_vec(v1) local xmmout = ffi.C.fpr_vec(v1)
assert_v4eq(xmmout, 1, 2, 3, 4) assert_v4eq(xmmout, 1, 2, 3, 4)
end) end)
it("check extra register spill", function()
local array = ffi.new("float4", 1, 2, 3, 4)
-- Use up all gpr scatch registers before loading the vec causing wrapper builder to restart with an extra spill
assert_cdef([[void spillrestart(float4 xmm0v, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx) __mcode("?E")
__reglist(out, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx, float4 xmm0v)
]])
local xmmtest = ffi.intrinsic("spillrestart", "\x90", 1)
local eax, ecx, edx, esi, edi, ebx, xmmout = xmmtest(array, 1, 2, 3, 4, 5, 6)
assert_equal(eax, 1)
assert_equal(ecx, 2)
assert_equal(edx, 3)
assert_equal(esi, 4)
assert_equal(edi, 5)
assert_equal(ebx, 6)
for i=1,4 do
assert_equal(xmmout[i-1], i)
end
end)
it("idiv", function() it("idiv", function()
assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv") assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv")
local function checker(i, result, remainder) local function checker(i, result, remainder)
local rem = i%3 local rem = i%3
@ -169,22 +265,22 @@ context("__mcode", function()
end) end)
it("invalid registers", function() it("invalid registers", function()
assert_cdef([[void validreg_gpr(int eax) __mcode("90");]], "validreg_gpr") assert_cdef([[void validreg_gpr(int eax) __mcode("90_E");]], "validreg_gpr")
assert_cdeferr([[void badreg_1(int e) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_1(int e) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_1(int r20d) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_1(int r20d) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_gpr1() __mcode("90") __reglist(out, int e);]], "invalid") assert_cdeferr([[void badreg_gpr1() __mcode("90_E") __reglist(out, int e);]], "invalid")
assert_cdeferr([[void badreg_gpr2() __mcode("90") __reglist(mod, e);]], "invalid") assert_cdeferr([[void badreg_gpr2() __mcode("90_E") __reglist(mod, e);]], "invalid")
assert_cdef([[void validreg_fpr(float xmm0) __mcode("90");]], "validreg_fpr") assert_cdef([[void validreg_fpr(float xmm0) __mcode("90_E");]], "validreg_fpr")
assert_cdeferr([[void badreg_fpr1(float x) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr1(float x) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90_E");]], "invalid")
assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90_E");]], "invalid")
--xmm register number too large --xmm register number too large
assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90");]], "invalid") assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid")
end) end)
it("multidef rollback", function() it("multidef rollback", function()
@ -199,7 +295,7 @@ context("__mcode", function()
assert_error(function() ffi.C.multi2() end) assert_error(function() ffi.C.multi2() end)
assert_not_error(function() ffi.cdef[[ assert_not_error(function() ffi.cdef[[
void multi1(int32_t eax) __mcode("90") __reglist(out, int32_t eax); void multi1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax);
]] end) ]] end)
assert_equal(ffi.C.multi1(1.1), 1) assert_equal(ffi.C.multi1(1.1), 1)
@ -217,7 +313,7 @@ context("__mcode", function()
end) end)
it("bad args", function() it("bad args", function()
assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv2") assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv2")
local idiv = ffi.C.idiv2 local idiv = ffi.C.idiv2
@ -234,8 +330,42 @@ context("__mcode", function()
assert_error(function() idiv(1, 2, 3, 4) end) assert_error(function() idiv(1, 2, 3, 4) end)
end) end)
it("idiv(template)", function()
assert_cdef([[void idivT(int32_t eax, int32_t ecx) __mcode("?E") __reglist(out, int32_t eax, int32_t edx)]])
--trying to create template intrinsic through C library should always fail
assert_error(function() return ffi.C.idivT end)
local idiv = ffi.intrinsic("idivT", "\x99\xF7\xF9", 3)
local function checker(i, result, remainder)
local rem = i%2
if rem ~= remainder then
return rem, remainder
end
local expected = (i-rem)/2
if expected ~= result then
return expected, result
end
end
local function test_idiv(value, divisor)
local result, remainder = idiv(value, divisor)
return result, remainder
end
assert_jitchecker(checker, test_idiv, 2)
-- create a second instance and check guard for wrapper pointer fails
idiv = ffi.intrinsic("idivT", "\x90", 1)
assert_exit(10, test_idiv, 10, 5)
end)
it("cpuid_brand", function() it("cpuid_brand", function()
assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid") assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2_E") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid")
local cpuid = ffi.C.cpuid local cpuid = ffi.C.cpuid
@ -304,20 +434,20 @@ context("__reglist", function()
it("stack pointer blacklist", function() it("stack pointer blacklist", function()
assert_cdeferr([[void blacklist_in(int esp) __mcode("90")]], "blacklist") assert_cdeferr([[void blacklist_in(int esp) __mcode("90_E")]], "blacklist")
assert_cdeferr([[void blacklist_out(int eax) __mcode("90") __reglist(out, int esp)]], "blacklist") assert_cdeferr([[void blacklist_out(int eax) __mcode("90_E") __reglist(out, int esp)]], "blacklist")
--FIXME --FIXME
--assert_cdeferr([[void blacklist_mod(int eax) __mcode("90") __reglist(mod, esp)]], "blacklist") --assert_cdeferr([[void blacklist_mod(int eax) __mcode("90_E") __reglist(mod, esp)]], "blacklist")
if ffi.arch == "x64" then if ffi.arch == "x64" then
assert_cdeferr([[void blacklist_64(int rsp) __mcode("90")]], "blacklist") assert_cdeferr([[void blacklist_64(int rsp) __mcode("90_E")]], "blacklist")
end end
end) end)
it("duplicate regs", function() it("duplicate regs", function()
assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90")]], "duplicate") assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90_E")]], "duplicate")
assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90")]], "duplicate") assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90_E")]], "duplicate")
assert_cdeferr([[void duplicate_out(int eax) __mcode("90") __reglist(out, int eax, int eax)]], "duplicate") assert_cdeferr([[void duplicate_out(int eax) __mcode("90_E") __reglist(out, int eax, int eax)]], "duplicate")
--FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate") --FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate")
end) end)