mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 15:34:09 +00:00
Template intrinsics user machine code
This commit is contained in:
parent
c84b176062
commit
e1a1721ae2
@ -492,6 +492,16 @@ LJLIB_CF(ffi_cdef)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LJLIB_CF(ffi_intrinsic)
|
||||||
|
{
|
||||||
|
#if LJ_HASINTRINSICS
|
||||||
|
lj_intrinsic_create(L);
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
lj_err_callermsg(L, "Intrinsics disabled");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
LJLIB_CF(ffi_new) LJLIB_REC(.)
|
LJLIB_CF(ffi_new) LJLIB_REC(.)
|
||||||
{
|
{
|
||||||
CTState *cts = ctype_cts(L);
|
CTState *cts = ctype_cts(L);
|
||||||
|
34
src/lj_asm.c
34
src/lj_asm.c
@ -2569,6 +2569,7 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta
|
|||||||
IntrinBuildState info;
|
IntrinBuildState info;
|
||||||
AsmHeader *hdr;
|
AsmHeader *hdr;
|
||||||
MCode *asmofs = NULL, *origtop;
|
MCode *asmofs = NULL, *origtop;
|
||||||
|
void* target = state->target;
|
||||||
int spadj = 0;
|
int spadj = 0;
|
||||||
|
|
||||||
lj_asm_setup_intrins(J, as);
|
lj_asm_setup_intrins(J, as);
|
||||||
@ -2587,6 +2588,12 @@ static void wrap_intrins(jit_State *J, CIntrinsic *intrins, IntrinWrapState *sta
|
|||||||
ra_modified(as, info.outcontext);
|
ra_modified(as, info.outcontext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Embed the mcode after the wrapper */
|
||||||
|
if ((intrins->flags & INTRINSFLAG_CALLED) && state->targetsz) {
|
||||||
|
*--as->mcp = XI_RET;
|
||||||
|
target = asmofs = asm_mcode(as, target, state->targetsz);
|
||||||
|
}
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
if (info.contexspill || rset_test(info.outset, info.outcontext)) {
|
if (info.contexspill || rset_test(info.outset, info.outcontext)) {
|
||||||
/* add some extra space for context spill and temp spill */
|
/* add some extra space for context spill and temp spill */
|
||||||
@ -2630,8 +2637,20 @@ restart:
|
|||||||
emit_storeofsirt(as, IRT_INTP, info.outcontext, RID_SP, TEMPSPILL);
|
emit_storeofsirt(as, IRT_INTP, info.outcontext, RID_SP, TEMPSPILL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Append the user supplied machine code */
|
if (intrins->flags & INTRINSFLAG_CALLED) {
|
||||||
asmofs = asm_mcode(as, state->target, state->targetsz);
|
Reg rin = 0;
|
||||||
|
#if LJ_64
|
||||||
|
/* Pick a scratch register in case the relative distance for the call is
|
||||||
|
** larger than a signed 32bit value
|
||||||
|
*/
|
||||||
|
rin = intrinsic_scratch(as, RSET_GPR);
|
||||||
|
#endif
|
||||||
|
/* emit a call to the target which may be collocated after us */
|
||||||
|
emit_intrins(as, intrins, rin, (uintptr_t)target);
|
||||||
|
} else {
|
||||||
|
/* Append the user supplied machine code */
|
||||||
|
asmofs = asm_mcode(as, state->target, state->targetsz);
|
||||||
|
}
|
||||||
|
|
||||||
/* Move values out the context into there respective input registers */
|
/* Move values out the context into there respective input registers */
|
||||||
intrins_loadregs(as, intrins, &info);
|
intrins_loadregs(as, intrins, &info);
|
||||||
@ -2657,9 +2676,14 @@ restart:
|
|||||||
memset(hdr, 0, sizeof(AsmHeader));
|
memset(hdr, 0, sizeof(AsmHeader));
|
||||||
hdr->totalzs = (uint32_t)(origtop-as->mcp);
|
hdr->totalzs = (uint32_t)(origtop-as->mcp);
|
||||||
|
|
||||||
lua_assert((asmofs-as->mcp) < 0xffff);
|
/* Embed info before the code to support multiple versions of a user intrinsic intrinsic */
|
||||||
hdr->asmofs = (uint16_t)(asmofs-as->mcp);
|
if ((intrins->flags & INTRINSFLAG_CALLEDIND) == INTRINSFLAG_CALLEDIND) {
|
||||||
hdr->asmsz = state->targetsz;
|
hdr->target = (uintptr_t)target;
|
||||||
|
} else if (asmofs){
|
||||||
|
lua_assert((asmofs-as->mcp) < 0xffff);
|
||||||
|
hdr->asmofs = (uint16_t)(asmofs-as->mcp);
|
||||||
|
hdr->asmsz = state->targetsz;
|
||||||
|
}
|
||||||
|
|
||||||
as->mcp = (MCode*)hdr;
|
as->mcp = (MCode*)hdr;
|
||||||
|
|
||||||
|
@ -614,6 +614,24 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
|||||||
#define TEMPSPILL (1*sizeof(intptr_t))
|
#define TEMPSPILL (1*sizeof(intptr_t))
|
||||||
#define CONTEXTSPILL (0)
|
#define CONTEXTSPILL (0)
|
||||||
|
|
||||||
|
|
||||||
|
static MCode* emit_intrins(ASMState *as, CIntrinsic *intrins, Reg r1,
|
||||||
|
uintptr_t r2)
|
||||||
|
{
|
||||||
|
if (intrins->flags & INTRINSFLAG_CALLED) {
|
||||||
|
lua_assert(r2);
|
||||||
|
emit_call_(as, (MCode*)r2, r1);
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
AsmHeader *hdr = ((AsmHeader*)r2)-1;
|
||||||
|
lua_assert((hdr->asmofs != 0 || hdr->asmofs < hdr->totalzs));
|
||||||
|
|
||||||
|
/* Directly copy the unmodified machine code of the intrinsic in */
|
||||||
|
asm_mcode(as, ((char*)r2)+hdr->asmofs, hdr->asmsz);
|
||||||
|
}
|
||||||
|
return as->mcp;
|
||||||
|
}
|
||||||
|
|
||||||
static int lj_popcnt(uint32_t i)
|
static int lj_popcnt(uint32_t i)
|
||||||
{
|
{
|
||||||
i = i - ((i >> 1) & 0x55555555);
|
i = i - ((i >> 1) & 0x55555555);
|
||||||
|
@ -83,6 +83,16 @@ static CTypeID register_intrinsic(lua_State *L, CIntrinsic* src, CType *func)
|
|||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lj_intrinsic_new(lua_State *L, CTypeID id, void* wrapmc)
|
||||||
|
{
|
||||||
|
CTState *cts = ctype_cts(L);
|
||||||
|
GCcdata *cd;
|
||||||
|
lua_assert(ctype_isintrinsic(ctype_get(cts, id)->info));
|
||||||
|
cd = lj_cdata_new(cts, id, CTSIZE_PTR);
|
||||||
|
*(void **)cdataptr(cd) = wrapmc;
|
||||||
|
setcdataV(L, L->top++, cd);
|
||||||
|
}
|
||||||
|
|
||||||
static int parse_fprreg(const char *name, uint32_t len)
|
static int parse_fprreg(const char *name, uint32_t len)
|
||||||
{
|
{
|
||||||
uint32_t rid = 0, kind = REGKIND_FPR64;
|
uint32_t rid = 0, kind = REGKIND_FPR64;
|
||||||
@ -282,27 +292,33 @@ static void setopcode(lua_State *L, CIntrinsic *intrins, uint32_t opcode)
|
|||||||
intrins->opcode = opcode;
|
intrins->opcode = opcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_opstr(lua_State *L, GCstr *opstr)
|
static int parse_opstr(lua_State *L, GCstr *opstr, CIntrinsic *intrins, int* buildflags)
|
||||||
{
|
{
|
||||||
const char *op = strdata(opstr);
|
const char *op = strdata(opstr);
|
||||||
uint32_t opcode = 0;
|
uint32_t opcode = 0;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
|
||||||
/* Find the end of the opcode number */
|
/* Parse the opcode number if this is not a template */
|
||||||
for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) {
|
if (op[0] != '?') {
|
||||||
}
|
for (i = 0; i < opstr->len && lj_char_isxdigit((uint8_t)op[i]); i++) {
|
||||||
|
}
|
||||||
|
|
||||||
if (i == 0 || i > 8) {
|
if (i == 0 || i > 8) {
|
||||||
/* invalid or no hex number */
|
/* invalid or no hex number */
|
||||||
lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number");
|
lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, op, "invalid opcode number");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scan hex digits. */
|
/* Scan hex digits. */
|
||||||
for (; i; i--, op++) {
|
for (; i; i--, op++) {
|
||||||
uint32_t d = *op; if (d > '9') d += 9;
|
uint32_t d = *op; if (d > '9') d += 9;
|
||||||
opcode = (opcode << 4) + (d & 15);
|
opcode = (opcode << 4) + (d & 15);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
*buildflags |= INTRINSFLAG_TEMPLATE;
|
||||||
|
op++;
|
||||||
|
}
|
||||||
|
|
||||||
return opcode;
|
return opcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -337,6 +353,54 @@ static IntrinsicWrapper lj_intrinsic_buildwrap(lua_State *L, CIntrinsic *intrins
|
|||||||
return (IntrinsicWrapper)state.wrapper;
|
return (IntrinsicWrapper)state.wrapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTypeID lj_intrinsic_template(lua_State *L, int narg)
|
||||||
|
{
|
||||||
|
CTState *cts = ctype_cts(L);
|
||||||
|
CType *ct;
|
||||||
|
CTypeID id;
|
||||||
|
CIntrinsic* intrins;
|
||||||
|
GCstr *name = lj_lib_checkstr(L, narg);
|
||||||
|
|
||||||
|
id = lj_ctype_getname(cts, &ct, name, 1u << CT_FUNC);
|
||||||
|
|
||||||
|
if (!id) {
|
||||||
|
lj_err_argv(L, narg, LJ_ERR_FFI_NODECL, name);
|
||||||
|
} else if (!ctype_isintrinsic(ct->info)) {
|
||||||
|
lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE);
|
||||||
|
}
|
||||||
|
|
||||||
|
intrins = lj_intrinsic_get(cts, ct->size);
|
||||||
|
|
||||||
|
/* Can't be a template if it an opcode */
|
||||||
|
if ((intrins->opcode && intrins->outsz <= 4) || intrins->wrapped)
|
||||||
|
lj_err_arg(L, narg, LJ_ERR_FFI_INVTYPE);
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lj_intrinsic_create(lua_State *L)
|
||||||
|
{
|
||||||
|
CTState *cts = ctype_cts(L);
|
||||||
|
CTypeID id = lj_intrinsic_template(L, 1);
|
||||||
|
void *intrinsmc;
|
||||||
|
MSize asmsz;
|
||||||
|
CIntrinsic* intrins = lj_intrinsic_get(cts, ctype_get(cts, id)->size);
|
||||||
|
|
||||||
|
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&intrinsmc,
|
||||||
|
L->base+1, CCF_ARG(2));
|
||||||
|
|
||||||
|
asmsz = lj_lib_checkint(L, 3);
|
||||||
|
if (asmsz <= 0 || asmsz > 0xffff ||
|
||||||
|
asmsz > (MSize)(L2J(L)->param[JIT_P_sizemcode] << 10)) {
|
||||||
|
lj_err_callermsg(L, "bad code size");
|
||||||
|
}
|
||||||
|
|
||||||
|
intrinsmc = lj_intrinsic_buildwrap(L, intrins, intrinsmc, asmsz,
|
||||||
|
intrin_getmodrset(cts, intrins));
|
||||||
|
lj_intrinsic_new(L, id, intrinsmc);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func)
|
GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func)
|
||||||
{
|
{
|
||||||
GCcdata *cd;
|
GCcdata *cd;
|
||||||
@ -345,6 +409,10 @@ GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func)
|
|||||||
RegSet mod = intrin_getmodrset(cts, intrins);
|
RegSet mod = intrin_getmodrset(cts, intrins);
|
||||||
uint32_t op = intrins->opcode;
|
uint32_t op = intrins->opcode;
|
||||||
void* mcode = ((char*)&op) + (4-intrin_oplen(intrins));
|
void* mcode = ((char*)&op) + (4-intrin_oplen(intrins));
|
||||||
|
|
||||||
|
if (intrins->opcode == 0) {
|
||||||
|
lj_err_callermsg(cts->L, "expected non template intrinsic");
|
||||||
|
}
|
||||||
|
|
||||||
intrins->wrapped = lj_intrinsic_buildwrap(cts->L, intrins, mcode,
|
intrins->wrapped = lj_intrinsic_buildwrap(cts->L, intrins, mcode,
|
||||||
intrin_oplen(intrins), mod);
|
intrin_oplen(intrins), mod);
|
||||||
@ -360,13 +428,14 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
|
|||||||
CType *func = ctype_get(cts, fid);
|
CType *func = ctype_get(cts, fid);
|
||||||
CTypeID sib = func->sib, retid = ctype_cid(func->info);
|
CTypeID sib = func->sib, retid = ctype_cid(func->info);
|
||||||
uint32_t opcode;
|
uint32_t opcode;
|
||||||
|
int buildflags = 0;
|
||||||
CIntrinsic _intrins;
|
CIntrinsic _intrins;
|
||||||
CIntrinsic* intrins = &_intrins;
|
CIntrinsic* intrins = &_intrins;
|
||||||
memset(intrins, 0, sizeof(CIntrinsic));
|
memset(intrins, 0, sizeof(CIntrinsic));
|
||||||
|
|
||||||
opcode = parse_opstr(L, opstr);
|
opcode = parse_opstr(L, opstr, intrins, &buildflags);
|
||||||
|
|
||||||
if (!opcode) {
|
if (!opcode && !(buildflags & INTRINSFLAG_TEMPLATE)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -387,7 +456,10 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
|
|||||||
sib = retid;
|
sib = retid;
|
||||||
}
|
}
|
||||||
|
|
||||||
setopcode(L, intrins, opcode);
|
/* If were a template theres no opcode to set */
|
||||||
|
if (opcode) {
|
||||||
|
setopcode(L, intrins, opcode);
|
||||||
|
}
|
||||||
register_intrinsic(L, intrins, ctype_get(cts, fid));
|
register_intrinsic(L, intrins, ctype_get(cts, fid));
|
||||||
|
|
||||||
lua_assert(sib > 0 && sib < cts->top);
|
lua_assert(sib > 0 && sib < cts->top);
|
||||||
|
@ -26,6 +26,10 @@ typedef enum INTRINSFLAGS {
|
|||||||
INTRINSFLAG_CALLED = 0x20,
|
INTRINSFLAG_CALLED = 0x20,
|
||||||
/* MODRM should always be set as indirect mode */
|
/* MODRM should always be set as indirect mode */
|
||||||
INTRINSFLAG_INDIRECT = 0x40,
|
INTRINSFLAG_INDIRECT = 0x40,
|
||||||
|
/* Intrinsic is a template with no machine code set until instantiate at runtime with
|
||||||
|
** user supplied code.
|
||||||
|
*/
|
||||||
|
INTRINSFLAG_TEMPLATE = 0x40000,
|
||||||
|
|
||||||
INTRINSFLAG_CALLEDIND = INTRINSFLAG_CALLED | INTRINSFLAG_INDIRECT
|
INTRINSFLAG_CALLEDIND = INTRINSFLAG_CALLED | INTRINSFLAG_INDIRECT
|
||||||
} INTRINSFLAGS;
|
} INTRINSFLAGS;
|
||||||
@ -97,6 +101,7 @@ CTypeID1 regkind_ct[16];
|
|||||||
#define rk_ctype(rid, kind) ((rid) < RID_MAX_GPR ? rk_ctypegpr(kind) : rk_ctypefpr(kind))
|
#define rk_ctype(rid, kind) ((rid) < RID_MAX_GPR ? rk_ctypegpr(kind) : rk_ctypefpr(kind))
|
||||||
|
|
||||||
LJ_FUNC void lj_intrinsic_init(lua_State *L);
|
LJ_FUNC void lj_intrinsic_init(lua_State *L);
|
||||||
|
LJ_FUNC int lj_intrinsic_create(lua_State *L);
|
||||||
LJ_FUNC GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func);
|
LJ_FUNC GCcdata *lj_intrinsic_createffi(CTState *cts, CType *func);
|
||||||
LJ_FUNC int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opcode, uint32_t imm);
|
LJ_FUNC int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opcode, uint32_t imm);
|
||||||
LJ_FUNC int lj_intrinsic_call(CTState *cts, CType *ct);
|
LJ_FUNC int lj_intrinsic_call(CTState *cts, CType *ct);
|
||||||
|
@ -30,7 +30,7 @@ describe("intrinsic tests", function()
|
|||||||
context("nop inout", function()
|
context("nop inout", function()
|
||||||
|
|
||||||
it("fpr", function()
|
it("fpr", function()
|
||||||
assert_cdef([[void fpr_nop1(double xmm0) __mcode("90") __reglist(out, double xmm0)]], "fpr_nop1")
|
assert_cdef([[void fpr_nop1(double xmm0) __mcode("90_E") __reglist(out, double xmm0)]], "fpr_nop1")
|
||||||
local fpr1 = ffi.C.fpr_nop1
|
local fpr1 = ffi.C.fpr_nop1
|
||||||
|
|
||||||
assert_error(function() fpr1() end)
|
assert_error(function() fpr1() end)
|
||||||
@ -39,10 +39,34 @@ context("nop inout", function()
|
|||||||
|
|
||||||
assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075)
|
assert_jit(123.075, function(num) return (fpr1(num)) end, 123.075)
|
||||||
assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075)
|
assert_noexit(-123567.075, function(num) return (fpr1(num)) end, -123567.075)
|
||||||
|
|
||||||
|
assert_cdef([[void fpr_all(double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7) __mcode("?E")
|
||||||
|
__reglist(out,double xmm0, double xmm1, double xmm2, double xmm3, double xmm4, double xmm5, double xmm6, double xmm7)]])
|
||||||
|
local fpr_all = ffi.intrinsic("fpr_all", "\x90", 1)
|
||||||
|
|
||||||
|
local function testfpr_all(i, r1, r2, r3, r4, r5, r6, r7, r8)
|
||||||
|
local spilled = r1*2*i
|
||||||
|
local ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8 = fpr_all(r1, r2, r3, r4, r5, r6, r7, r8)
|
||||||
|
return ro1+i, ro2+i, ro3+i, ro4+i, ro5+i, ro6+i, ro7+i, (ro8*ro3)+i, ro2+spilled
|
||||||
|
end
|
||||||
|
|
||||||
|
local function checker(i, ro1, ro2, ro3, ro4, ro5, ro6, ro7, ro8, spilled)
|
||||||
|
assert(ro1 == 1.5+i)
|
||||||
|
assert(ro2 == 2.5+i)
|
||||||
|
assert(ro3 == 3.5+i)
|
||||||
|
assert(ro4 == 4.5+i)
|
||||||
|
assert(ro5 == 5.5+i)
|
||||||
|
assert(ro6 == 60000.525+i)
|
||||||
|
assert(ro7 == i+-7.5)
|
||||||
|
assert(ro8 == (-100*3.5)+i)
|
||||||
|
assert(spilled == 2.5+(1.5*2*i))
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_jitchecker(checker, testfpr_all, 1.5, 2.5, 3.5, 4.5, 5.5, 60000.525, -7.5, -100)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("gpr", function()
|
it("gpr", function()
|
||||||
assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90") __reglist(out, int32_t eax)]], "gpr_nop1")
|
assert_cdef([[void gpr_nop1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax)]], "gpr_nop1")
|
||||||
|
|
||||||
local function testgpr1(num)
|
local function testgpr1(num)
|
||||||
return (ffi.C.gpr_nop1(num))
|
return (ffi.C.gpr_nop1(num))
|
||||||
@ -67,11 +91,35 @@ context("nop inout", function()
|
|||||||
end
|
end
|
||||||
|
|
||||||
assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000)
|
assert_jitchecker(checker, testgpr_scratch, 0, 1, 30000)
|
||||||
|
|
||||||
|
assert_cdef([[void gpr_all(int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx) __mcode("?E")
|
||||||
|
__reglist(out, int32_t ebp, int32_t esi, int32_t edi, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx)]])
|
||||||
|
|
||||||
|
local gpr_all = ffi.intrinsic("gpr_all", "\x90", 1)
|
||||||
|
|
||||||
|
local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7)
|
||||||
|
local spilled = r1+(10000*i)
|
||||||
|
local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr_all(r1, r2, r3, r4, 100, r6, r7)
|
||||||
|
return spilled+(ro1+ro2+ro3+ro4+ro5+ro6+ro7), ro3+i, ro2+i, ro1+i, ro4+i, ro5, ro6+i, ro7+i
|
||||||
|
end
|
||||||
|
|
||||||
|
local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7)
|
||||||
|
assert(ro1 == 1+i)
|
||||||
|
assert(ro2 == 2+i)
|
||||||
|
assert(ro3 == 3+i)
|
||||||
|
assert(ro4 == 4+i)
|
||||||
|
assert(ro5 == 100)
|
||||||
|
assert(ro6 == 6+i)
|
||||||
|
assert(ro7 == 7+i)
|
||||||
|
assert(spilled == 124+(10000*i))
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5, 6, 7)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
if ffi.arch == "x64" then
|
if ffi.arch == "x64" then
|
||||||
it("gpr64", function()
|
it("gpr64", function()
|
||||||
assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90") __reglist(out, int64_t rdx)]], "gpr64_1")
|
assert_cdef([[void gpr64_1(int64_t rdx) __mcode("90_E") __reglist(out, int64_t rdx)]], "gpr64_1")
|
||||||
|
|
||||||
local function testgpr1(num)
|
local function testgpr1(num)
|
||||||
return (ffi.C.gpr64_1(num))
|
return (ffi.C.gpr64_1(num))
|
||||||
@ -79,10 +127,35 @@ if ffi.arch == "x64" then
|
|||||||
|
|
||||||
assert_jit(1235678ull, testgpr1, 1235678)
|
assert_jit(1235678ull, testgpr1, 1235678)
|
||||||
assert_noexit(-1LL, testgpr1, -1)
|
assert_noexit(-1LL, testgpr1, -1)
|
||||||
|
|
||||||
|
assert_cdef([[void gpr64(int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx) __mcode("?E")
|
||||||
|
__reglist(out, int64_t rbp, int64_t rsi, int64_t rdi, int64_t rax, int64_t rbx, int64_t rcx, int64_t rdx)]])
|
||||||
|
|
||||||
|
local gpr7 = ffi.intrinsic("gpr64", "\x90", 1)
|
||||||
|
|
||||||
|
local function testgpr_all(i, r1, r2, r3, r4, r5, r6, r7)
|
||||||
|
local spilled = r1+(10000*i)
|
||||||
|
local ro1, ro2, ro3, ro4, ro5, ro6, ro7 = gpr7(r1, r2, r3, 68719476735ll, r5, r6, r7)
|
||||||
|
return spilled, ro3+i, ro2+i, ro1+i, ro4+i, ro5+i, ro6+i, ro7+i
|
||||||
|
end
|
||||||
|
|
||||||
|
local function checker(i, spilled, ro3, ro2, ro1, ro4, ro5, ro6, ro7)
|
||||||
|
local sp = (10000*i)+1
|
||||||
|
assert(ro1 == 1+i)
|
||||||
|
assert(ro2 == 2+i)
|
||||||
|
assert(ro3 == 3+i)
|
||||||
|
assert(type(ro4) == "cdata" and ro4 == 68719476735ll+i)
|
||||||
|
assert(type(ro5) == "cdata" and ro5 == 5ll+i)
|
||||||
|
assert(type(ro6) == "cdata" and ro6 == 68719476721ll+i)
|
||||||
|
assert(type(ro7) == "cdata" and ro7 == (-7ll)+i)
|
||||||
|
assert(spilled == sp)
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_jitchecker(checker, testgpr_all, 1, 2, 3, 4, 5ll, 68719476721ll, -7ll)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("rex fpr", function()
|
it("rex fpr", function()
|
||||||
assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90") __reglist(out, double xmm0, double xmm9)]], "fpr_reg")
|
assert_cdef([[void fpr_reg(double xmm9, double xmm0) __mcode("90_E") __reglist(out, double xmm0, double xmm9)]], "fpr_reg")
|
||||||
local fpr = ffi.C.fpr_reg
|
local fpr = ffi.C.fpr_reg
|
||||||
|
|
||||||
local function testrex(n1, n2)
|
local function testrex(n1, n2)
|
||||||
@ -95,15 +168,38 @@ if ffi.arch == "x64" then
|
|||||||
end
|
end
|
||||||
|
|
||||||
it("fpr_vec", function()
|
it("fpr_vec", function()
|
||||||
assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90") __reglist(out, float4 xmm7v)]], "fpr_vec")
|
assert_cdef([[void fpr_vec(void* xmm7v) __mcode("90_E") __reglist(out, float4 xmm7v)]], "fpr_vec")
|
||||||
|
|
||||||
local v1 = ffi.new("float[4]", 1, 2, 3, 4)
|
local v1 = ffi.new("float[4]", 1, 2, 3, 4)
|
||||||
local xmmout = ffi.C.fpr_vec(v1)
|
local xmmout = ffi.C.fpr_vec(v1)
|
||||||
assert_v4eq(xmmout, 1, 2, 3, 4)
|
assert_v4eq(xmmout, 1, 2, 3, 4)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it("check extra register spill", function()
|
||||||
|
local array = ffi.new("float4", 1, 2, 3, 4)
|
||||||
|
|
||||||
|
-- Use up all gpr scatch registers before loading the vec causing wrapper builder to restart with an extra spill
|
||||||
|
assert_cdef([[void spillrestart(float4 xmm0v, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx) __mcode("?E")
|
||||||
|
__reglist(out, int32_t eax, int32_t ecx, int32_t edx, int32_t esi, int32_t edi, int32_t ebx, float4 xmm0v)
|
||||||
|
]])
|
||||||
|
|
||||||
|
local xmmtest = ffi.intrinsic("spillrestart", "\x90", 1)
|
||||||
|
local eax, ecx, edx, esi, edi, ebx, xmmout = xmmtest(array, 1, 2, 3, 4, 5, 6)
|
||||||
|
|
||||||
|
assert_equal(eax, 1)
|
||||||
|
assert_equal(ecx, 2)
|
||||||
|
assert_equal(edx, 3)
|
||||||
|
assert_equal(esi, 4)
|
||||||
|
assert_equal(edi, 5)
|
||||||
|
assert_equal(ebx, 6)
|
||||||
|
|
||||||
|
for i=1,4 do
|
||||||
|
assert_equal(xmmout[i-1], i)
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
it("idiv", function()
|
it("idiv", function()
|
||||||
assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv")
|
assert_cdef([[void idiv(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv")
|
||||||
|
|
||||||
local function checker(i, result, remainder)
|
local function checker(i, result, remainder)
|
||||||
local rem = i%3
|
local rem = i%3
|
||||||
@ -169,22 +265,22 @@ context("__mcode", function()
|
|||||||
end)
|
end)
|
||||||
|
|
||||||
it("invalid registers", function()
|
it("invalid registers", function()
|
||||||
assert_cdef([[void validreg_gpr(int eax) __mcode("90");]], "validreg_gpr")
|
assert_cdef([[void validreg_gpr(int eax) __mcode("90_E");]], "validreg_gpr")
|
||||||
|
|
||||||
assert_cdeferr([[void badreg_1(int e) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_1(int e) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_1(int r20d) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_1(int r20d) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_gpr1() __mcode("90") __reglist(out, int e);]], "invalid")
|
assert_cdeferr([[void badreg_gpr1() __mcode("90_E") __reglist(out, int e);]], "invalid")
|
||||||
assert_cdeferr([[void badreg_gpr2() __mcode("90") __reglist(mod, e);]], "invalid")
|
assert_cdeferr([[void badreg_gpr2() __mcode("90_E") __reglist(mod, e);]], "invalid")
|
||||||
|
|
||||||
assert_cdef([[void validreg_fpr(float xmm0) __mcode("90");]], "validreg_fpr")
|
assert_cdef([[void validreg_fpr(float xmm0) __mcode("90_E");]], "validreg_fpr")
|
||||||
|
|
||||||
assert_cdeferr([[void badreg_fpr1(float x) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr1(float x) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr1(float xm) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr1(float xm0) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr1(float xmmm0) __mcode("90_E");]], "invalid")
|
||||||
assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr2(float xmm0vf) __mcode("90_E");]], "invalid")
|
||||||
--xmm register number too large
|
--xmm register number too large
|
||||||
assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90");]], "invalid")
|
assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid")
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("multidef rollback", function()
|
it("multidef rollback", function()
|
||||||
@ -199,7 +295,7 @@ context("__mcode", function()
|
|||||||
assert_error(function() ffi.C.multi2() end)
|
assert_error(function() ffi.C.multi2() end)
|
||||||
|
|
||||||
assert_not_error(function() ffi.cdef[[
|
assert_not_error(function() ffi.cdef[[
|
||||||
void multi1(int32_t eax) __mcode("90") __reglist(out, int32_t eax);
|
void multi1(int32_t eax) __mcode("90_E") __reglist(out, int32_t eax);
|
||||||
]] end)
|
]] end)
|
||||||
|
|
||||||
assert_equal(ffi.C.multi1(1.1), 1)
|
assert_equal(ffi.C.multi1(1.1), 1)
|
||||||
@ -217,7 +313,7 @@ context("__mcode", function()
|
|||||||
end)
|
end)
|
||||||
|
|
||||||
it("bad args", function()
|
it("bad args", function()
|
||||||
assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9") __reglist(out, int32_t eax, int32_t edx)]], "idiv2")
|
assert_cdef([[void idiv2(int32_t eax, int32_t ecx) __mcode("99F7F9_E") __reglist(out, int32_t eax, int32_t edx)]], "idiv2")
|
||||||
|
|
||||||
local idiv = ffi.C.idiv2
|
local idiv = ffi.C.idiv2
|
||||||
|
|
||||||
@ -232,10 +328,44 @@ context("__mcode", function()
|
|||||||
assert_error(function() idiv(1, 2, nil) end)
|
assert_error(function() idiv(1, 2, nil) end)
|
||||||
assert_error(function() idiv(1, 2, 3) end)
|
assert_error(function() idiv(1, 2, 3) end)
|
||||||
assert_error(function() idiv(1, 2, 3, 4) end)
|
assert_error(function() idiv(1, 2, 3, 4) end)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it("idiv(template)", function()
|
||||||
|
assert_cdef([[void idivT(int32_t eax, int32_t ecx) __mcode("?E") __reglist(out, int32_t eax, int32_t edx)]])
|
||||||
|
--trying to create template intrinsic through C library should always fail
|
||||||
|
assert_error(function() return ffi.C.idivT end)
|
||||||
|
|
||||||
|
local idiv = ffi.intrinsic("idivT", "\x99\xF7\xF9", 3)
|
||||||
|
|
||||||
|
local function checker(i, result, remainder)
|
||||||
|
local rem = i%2
|
||||||
|
|
||||||
|
if rem ~= remainder then
|
||||||
|
return rem, remainder
|
||||||
|
end
|
||||||
|
|
||||||
|
local expected = (i-rem)/2
|
||||||
|
|
||||||
|
if expected ~= result then
|
||||||
|
return expected, result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function test_idiv(value, divisor)
|
||||||
|
local result, remainder = idiv(value, divisor)
|
||||||
|
return result, remainder
|
||||||
|
end
|
||||||
|
|
||||||
|
assert_jitchecker(checker, test_idiv, 2)
|
||||||
|
|
||||||
|
-- create a second instance and check guard for wrapper pointer fails
|
||||||
|
idiv = ffi.intrinsic("idivT", "\x90", 1)
|
||||||
|
|
||||||
|
assert_exit(10, test_idiv, 10, 5)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("cpuid_brand", function()
|
it("cpuid_brand", function()
|
||||||
assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid")
|
assert_cdef([[void cpuid(int32_t eax, int32_t ecx) __mcode("0FA2_E") __reglist(out, int32_t eax, int32_t ebx, int32_t ecx, int32_t edx);]], "cpuid")
|
||||||
|
|
||||||
local cpuid = ffi.C.cpuid
|
local cpuid = ffi.C.cpuid
|
||||||
|
|
||||||
@ -304,20 +434,20 @@ context("__reglist", function()
|
|||||||
|
|
||||||
it("stack pointer blacklist", function()
|
it("stack pointer blacklist", function()
|
||||||
|
|
||||||
assert_cdeferr([[void blacklist_in(int esp) __mcode("90")]], "blacklist")
|
assert_cdeferr([[void blacklist_in(int esp) __mcode("90_E")]], "blacklist")
|
||||||
assert_cdeferr([[void blacklist_out(int eax) __mcode("90") __reglist(out, int esp)]], "blacklist")
|
assert_cdeferr([[void blacklist_out(int eax) __mcode("90_E") __reglist(out, int esp)]], "blacklist")
|
||||||
--FIXME
|
--FIXME
|
||||||
--assert_cdeferr([[void blacklist_mod(int eax) __mcode("90") __reglist(mod, esp)]], "blacklist")
|
--assert_cdeferr([[void blacklist_mod(int eax) __mcode("90_E") __reglist(mod, esp)]], "blacklist")
|
||||||
|
|
||||||
if ffi.arch == "x64" then
|
if ffi.arch == "x64" then
|
||||||
assert_cdeferr([[void blacklist_64(int rsp) __mcode("90")]], "blacklist")
|
assert_cdeferr([[void blacklist_64(int rsp) __mcode("90_E")]], "blacklist")
|
||||||
end
|
end
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it("duplicate regs", function()
|
it("duplicate regs", function()
|
||||||
assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90")]], "duplicate")
|
assert_cdeferr([[void duplicate_in(int eax, int eax) __mcode("90_E")]], "duplicate")
|
||||||
assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90")]], "duplicate")
|
assert_cdeferr([[void duplicate_inxmm(float4 xmm0, float4 xmm0) __mcode("90_E")]], "duplicate")
|
||||||
assert_cdeferr([[void duplicate_out(int eax) __mcode("90") __reglist(out, int eax, int eax)]], "duplicate")
|
assert_cdeferr([[void duplicate_out(int eax) __mcode("90_E") __reglist(out, int eax, int eax)]], "duplicate")
|
||||||
--FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate")
|
--FIXME assert_cdeferr([[void duplicate_mod(int eax) __mcode("90_E") __reglist(mod, eax, eax)]], "duplicate")
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user