mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 15:34:09 +00:00
Added JIT support for intrinsics. Support for vector registers is NYI.
This commit is contained in:
parent
57ff67552a
commit
f7331e9464
72
src/lj_asm.c
72
src/lj_asm.c
@ -1305,6 +1305,58 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
|
||||
return (nargs | (ir->t.irt << CCI_OTSHIFT));
|
||||
}
|
||||
|
||||
#if LJ_HASINTRINSICS
|
||||
static RegSet asm_intrinsichints(ASMState *as, IRIns *ir)
|
||||
{
|
||||
CTState *cts = ctype_ctsG(J2G(as->J));
|
||||
CIntrinsic* intrins = lj_intrinsic_get(cts, ir->op2);
|
||||
RegSet mod = intrin_getmodrset(cts, intrins);
|
||||
IRIns *ira = IR(ir->op1), *irval;
|
||||
int i;
|
||||
int dynreg = intrin_regmode(intrins);
|
||||
|
||||
/* Propagate the fixed registers of the arguments to refs passed in for them */
|
||||
for (i = intrins->insz-1; i >= 0; i--) {
|
||||
Reg r = reg_rid(intrins->in[i]);
|
||||
|
||||
if (dynreg && i < intrins->dyninsz) {
|
||||
/* Dynamic register so no hint needed */
|
||||
ira = IR(ira->op1);
|
||||
continue;
|
||||
}
|
||||
|
||||
rset_set(mod, r);
|
||||
|
||||
if (!irref_isk(ira->op2)) {
|
||||
irval = IR(ira->op2);
|
||||
|
||||
/* Back propagate the register to the arguments value if it has no register set */
|
||||
if (irval->prev == REGSP_INIT) {
|
||||
irval->prev = REGSP_HINT(r);
|
||||
}
|
||||
}
|
||||
|
||||
ira = IR(ira->op1);
|
||||
}
|
||||
|
||||
if (intrins->outsz > 0) {
|
||||
i = intrin_dynrout(intrins) ? 1 : 0;
|
||||
|
||||
for (; i < intrins->outsz; i++) {
|
||||
mod |= 1 << reg_rid(intrins->out[i]);
|
||||
}
|
||||
|
||||
if (intrin_dynrout(intrins)) {
|
||||
ir->prev = REGSP_INIT;
|
||||
} else {
|
||||
ir->prev = REGSP_HINT(reg_rid(intrins->out[0]));
|
||||
}
|
||||
}
|
||||
|
||||
return mod;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[id];
|
||||
@ -1743,6 +1795,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
|
||||
case IR_CALLXS: asm_callx(as, ir); break;
|
||||
case IR_CARG: break;
|
||||
|
||||
case IR_INTRN: asm_intrinsic(as, ir, NULL); break;
|
||||
case IR_ASMEND: asm_intrinsic(as, IR(ir->op2), ir); break;
|
||||
case IR_ASMRET: break;
|
||||
|
||||
default:
|
||||
setintV(&as->J->errinfo, ir->o);
|
||||
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
|
||||
@ -2103,6 +2159,22 @@ static void asm_setup_regsp(ASMState *as)
|
||||
as->modset |= RSET_SCRATCH;
|
||||
continue;
|
||||
}
|
||||
#if LJ_HASINTRINSICS
|
||||
case IR_INTRN: {
|
||||
Reg mod = asm_intrinsichints(as, ir);
|
||||
if (inloop)
|
||||
as->modset |= mod;
|
||||
continue;
|
||||
}
|
||||
|
||||
case IR_ASMRET: {
|
||||
Reg r = reg_rid(ir->op2);
|
||||
ir->prev = REGSP_HINT(r);
|
||||
if (inloop)
|
||||
rset_set(as->modset, r);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
|
||||
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
||||
ir->prev = asm_setup_call_slots(as, ir, ci);
|
||||
|
417
src/lj_asm_x86.h
417
src/lj_asm_x86.h
@ -397,6 +397,31 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
|
||||
return RID_MRM;
|
||||
}
|
||||
|
||||
static int asm_fusexload(ASMState *as, IRRef ref, RegSet xallow, IRRef skip)
|
||||
{
|
||||
IRIns *ir = IR(ref);
|
||||
IRRef i = as->curins;
|
||||
lua_assert(ir->o == IR_XLOAD);
|
||||
|
||||
/* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
|
||||
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
|
||||
*/
|
||||
if (irt_typerange(ir->t, IRT_I8, IRT_U16)) {
|
||||
return 0;
|
||||
}
|
||||
if (i > ref + CONFLICT_SEARCH_LIM)
|
||||
return 0; /* Give up, ref is too far away. */
|
||||
ir = as->ir;
|
||||
while (--i > ref) {
|
||||
if (ir[i].o == IR_XSTORE)
|
||||
return 0; /* Conflict found. */
|
||||
else if ((ir[i].op1 == ref || ir[i].op2 == ref) && i != skip)
|
||||
return 0;
|
||||
}
|
||||
asm_fusexref(as, IR(ref)->op1, xallow);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Fuse load into memory operand.
|
||||
**
|
||||
** Important caveat: this may emit RIP-relative loads! So don't place any
|
||||
@ -467,12 +492,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_XLOAD) {
|
||||
/* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
|
||||
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
|
||||
*/
|
||||
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
|
||||
noconflict(as, ref, IR_XSTORE, 0)) {
|
||||
asm_fusexref(as, ir->op1, xallow);
|
||||
if (asm_fusexload(as, ref, xallow, REF_NIL)) {
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
@ -642,6 +662,366 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if LJ_HASINTRINSICS
|
||||
|
||||
typedef struct IntrinsInfo {
|
||||
CIntrinsic *intrins;
|
||||
IRIns *asmend;
|
||||
IRRef args[LJ_INTRINS_MAXREG];
|
||||
/* input register list that gets mutated for opcode intrinsics */
|
||||
uint8_t inregs[LJ_INTRINS_MAXREG];
|
||||
RegSet inset, outset, modset;
|
||||
/* First CARG ref used as limit for duplicate load checking when fusing */
|
||||
IRRef a1;
|
||||
} IntrinsInfo;
|
||||
|
||||
static int asm_swaprefs(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
|
||||
|
||||
static void asm_asmsetupargs(ASMState *as, IntrinsInfo *ininfo)
|
||||
{
|
||||
MSize n;
|
||||
CIntrinsic *intrins = ininfo->intrins;
|
||||
|
||||
/* move or load args into input registers */
|
||||
for (n = 0; n < intrins->insz; n++) {
|
||||
IRRef ref = ininfo->args[n];
|
||||
IRIns *ir = IR(ref);
|
||||
Reg r = reg_rid(ininfo->inregs[n]);
|
||||
|
||||
/* Skip any dynamic registers already setup by opcode intrinsics */
|
||||
if (ininfo->inregs[n] == 0xff) {
|
||||
lua_assert(n < intrins->dyninsz);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ra_hasreg(ir->r) && r < RID_MAX_GPR && ref < ASMREF_TMP1) {
|
||||
#if LJ_64
|
||||
if (ir->o == IR_KINT64)
|
||||
emit_loadu64(as, r, ir_kint64(ir)->u64);
|
||||
else
|
||||
#endif
|
||||
emit_loadi(as, r, ir->i);
|
||||
} else {
|
||||
/* if we have a fixed register it must of been evicted earlier */
|
||||
lua_assert(rset_test(as->freeset, r) || regcost_ref(as->cost[r]) == ref ||
|
||||
n < intrins->dyninsz);
|
||||
|
||||
if (ra_hasreg(ir->r)) {
|
||||
ra_noweak(as, ir->r);
|
||||
if (r != ir->r) {
|
||||
lua_assert(n >= intrins->dyninsz);
|
||||
emit_movrr(as, ir, r, ir->r);
|
||||
}
|
||||
} else {
|
||||
/* Dynamic registers should never end up here */
|
||||
lua_assert(!intrin_regmode(intrins) || n >= intrins->dyninsz);
|
||||
ra_allocref(as, ref, RID2RSET(r));
|
||||
}
|
||||
}
|
||||
checkmclim(as);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo)
|
||||
{
|
||||
CIntrinsic *intrins = ininfo->intrins;
|
||||
IRRef *args = ininfo->args;
|
||||
uint8_t *in = ininfo->inregs;
|
||||
uint32_t dynreg = intrin_regmode(intrins);
|
||||
RegSet allow;
|
||||
IRRef lref = 0, rref = 0;
|
||||
Reg right, dest = RID_NONE;
|
||||
int dynrout = intrins->outsz > 0 && intrin_dynrout(intrins);
|
||||
|
||||
/* Swap to refs to native ordering */
|
||||
if (dynreg >= DYNREG_SWAPREGS) {
|
||||
IRRef temp = ininfo->args[0];
|
||||
ininfo->args[0] = ininfo->args[1]; ininfo->args[1] = temp;
|
||||
}
|
||||
|
||||
rref = args[0];
|
||||
right = IR(rref)->r;
|
||||
|
||||
if (intrins->dyninsz > 1) {
|
||||
lref = args[1];
|
||||
dest = IR(lref)->r;
|
||||
|
||||
if (ra_hasreg(dest))
|
||||
in[1] = reg_setrid(in[1], dest);
|
||||
}
|
||||
|
||||
as->mrm.idx = as->mrm.base = RID_NONE;
|
||||
as->mrm.scale = as->mrm.ofs = 0;
|
||||
|
||||
/* Allocate the dynamic output register if we have one */
|
||||
if (dynrout) {
|
||||
allow = reg_torset(intrins->out[0]);
|
||||
if (ra_hasreg(right)) {
|
||||
rset_clear(allow, right);
|
||||
ra_noweak(as, right);
|
||||
}
|
||||
dest = ra_dest(as, ir, allow);
|
||||
if (dynreg == DYNREG_OPEXT) {
|
||||
/* Set input register the same as the output since the op is destructive */
|
||||
right = dest;
|
||||
}
|
||||
}
|
||||
|
||||
if (intrins->dyninsz > 1 && dynreg != DYNREG_TWOSTORE) {
|
||||
if (lref == rref) {
|
||||
if (dynreg == DYNREG_INOUT)
|
||||
right = dest;
|
||||
/* Only load/move the value to register once.
|
||||
** ra_left will do the move for INOUT.
|
||||
*/
|
||||
in[0] = 0xff;
|
||||
} else if (ra_noreg(right)) {
|
||||
if (intrin_iscomm(intrins) && asm_swaprefs(as, ir, lref, rref)) {
|
||||
IRRef tmp = lref; lref = rref; rref = tmp;
|
||||
/* Must be same register kinds and RID register type ranges */
|
||||
lua_assert(reg_isgpr(in[0]) == reg_isgpr(in[1]) &&
|
||||
reg_kind(in[0]) == reg_kind(in[1]));
|
||||
|
||||
args[0] = rref;
|
||||
args[1] = lref;
|
||||
/* lref(now swapped to rref) may already have a register set so update
|
||||
** the right register to it in case we don't fuse a load.
|
||||
*/
|
||||
right = IR(rref)->r;
|
||||
}
|
||||
if (!(intrins->flags & INTRINSFLAG_NOFUSE) && !ra_hasreg(right)) {
|
||||
RegSet rallow = reg_torset(in[0]);
|
||||
rset_clear(rallow, dest);
|
||||
/* Handle XLOAD directly so we can tell noconflict to skip our IR_CARG
|
||||
** that holds the ref of the load were fusing.
|
||||
*/
|
||||
if (IR(rref)->o == IR_XLOAD) {
|
||||
if (mayfuse(as, rref) && asm_fusexload(as, rref, rallow, ininfo->a1)) {
|
||||
right = RID_MRM;
|
||||
}
|
||||
} else {
|
||||
right = asm_fuseload(as, rref, rallow);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (intrins->flags & INTRINSFLAG_INDIRECT) {
|
||||
/* force indirect MODRM mode. rref should always be a memory address */
|
||||
if (ra_noreg(right)) {
|
||||
allow = RSET_GPR & ~ininfo->inset;
|
||||
/* If part of the opcode is encoded in ModRM avoid picking a register that
|
||||
** will corrupt it */
|
||||
if (dynreg == DYNREG_OPEXT)
|
||||
rset_clear(allow, RID_EBP);
|
||||
asm_fusexref(as, rref, allow);
|
||||
} else {
|
||||
as->mrm.base = IR(rref)->r;
|
||||
}
|
||||
right = RID_MRM;
|
||||
}
|
||||
|
||||
/* Handle second input reg for any two input dynamic in register modes
|
||||
** which isn't DYNREG_INOUT
|
||||
*/
|
||||
if (intrins->dyninsz > 1 && ra_noreg(dest)) {
|
||||
Reg r;
|
||||
allow = reg_torset(in[1]) & ~ininfo->inset;
|
||||
if (ra_hasreg(right) && right != RID_MRM)
|
||||
rset_clear(allow, right);
|
||||
|
||||
r = ra_allocref(as, args[1], allow);
|
||||
in[1] = reg_setrid(in[1], r);
|
||||
dest = r;
|
||||
}
|
||||
|
||||
if (right == RID_MRM) {
|
||||
/* Skip trying to load what we fused into the instruction */
|
||||
in[0] = 0xff;
|
||||
} else {
|
||||
if (ra_noreg(right)) {
|
||||
lua_assert(ra_noreg(IR(rref)->r));
|
||||
allow = reg_torset(in[0]);
|
||||
rset_clear(allow, dest);
|
||||
if(dynreg == DYNREG_OPEXT)
|
||||
rset_clear(allow, RID_EBP);
|
||||
right = ra_allocref(as, rref, allow);
|
||||
}
|
||||
in[0] = reg_setrid(in[0], right);
|
||||
}
|
||||
|
||||
lua_assert(ra_hasreg(right) && (ra_hasreg(dest) || intrins->dyninsz < 2));
|
||||
emit_intrins(as, intrins, right, dest);
|
||||
|
||||
if (dynreg == DYNREG_INOUT) {
|
||||
lua_assert(lref);
|
||||
ra_left(as, dest, lref);
|
||||
/* no need to load the register since ra_left already did */
|
||||
in[1] = 0xff;
|
||||
} else if (dynreg == DYNREG_OPEXT && dynrout) {
|
||||
/* Handle destructive ONEOPEXT opcodes */
|
||||
lua_assert(rref);
|
||||
ra_left(as, dest, rref);
|
||||
in[0] = 0xff;
|
||||
}
|
||||
|
||||
checkmclim(as);
|
||||
}
|
||||
|
||||
void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo)
|
||||
{
|
||||
IRRef results[LJ_INTRINS_MAXREG];
|
||||
RegSet evict = 0, outset = 0, aout = 0;
|
||||
int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
|
||||
int32_t dynout = intrin_dynrout(intrins) ? 1 : 0;
|
||||
|
||||
/* Gather the output register IR instructions */
|
||||
if (intrins->outsz > 0) {
|
||||
IRIns *irret = ininfo->asmend ? IR(ininfo->asmend->op1) : ir;
|
||||
int32_t n;
|
||||
|
||||
for (n = intrins->outsz-1;; n--) {
|
||||
lua_assert(n >= 0 && (irret->o == IR_ASMRET || irret->o == IR_INTRN));
|
||||
results[n] = (IRRef)(irret - as->ir);
|
||||
|
||||
if (ra_used(irret)) {
|
||||
if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) {
|
||||
rset_set(aout, irret->r);
|
||||
}
|
||||
}
|
||||
|
||||
if (irret->o == IR_INTRN) {
|
||||
break;
|
||||
}
|
||||
irret = IR(irret->op1);
|
||||
}
|
||||
}
|
||||
|
||||
evict = ininfo->modset;
|
||||
|
||||
/* Check what registers need evicting for fixed input registers */
|
||||
i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
|
||||
for (; i < intrins->insz; i++) {
|
||||
Reg r = reg_rid(intrins->in[i]);
|
||||
IRIns *arg = IR(ininfo->args[i]);
|
||||
|
||||
ininfo->inset |= RID2RSET(r);
|
||||
/* Don't evict if the arg was allocated the correct register */
|
||||
if (!rset_test(as->freeset, r) && arg->r != r) {
|
||||
evict |= RID2RSET(r);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = dynout; i < intrins->outsz; i++) {
|
||||
outset |= RID2RSET(reg_rid(intrins->out[i]));
|
||||
}
|
||||
ininfo->outset = outset;
|
||||
/* Don't evict register that currently have our output values live in them */
|
||||
evict &= ~aout;
|
||||
|
||||
/* Evict any values in input and modified registers and any fixed out registers
|
||||
** that are unused or didn't get allocated the same register as there fixed one.
|
||||
*/
|
||||
ra_evictset(as, evict);
|
||||
|
||||
/* Handle any fixed output registers */
|
||||
if (intrins->outsz > dynout) {
|
||||
int32_t stop = dynout;
|
||||
for (i = intrins->outsz-1; i >= stop; i--) {
|
||||
IRIns *irret = IR(results[i]);
|
||||
Reg r = intrins->out[i];
|
||||
|
||||
if (!ra_used(irret) || (!rset_test(as->freeset, r) && irret->r != r)) {
|
||||
ra_evictset(as, RID2RSET(r));
|
||||
if (!ra_used(irret))
|
||||
continue;
|
||||
}
|
||||
|
||||
ra_destreg(as, irret, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
||||
{
|
||||
CTState *cts = ctype_ctsG(J2G(as->J));
|
||||
CIntrinsic *intrins = lj_intrinsic_get(cts, ir->op2);
|
||||
IRIns *ira = ir;
|
||||
uintptr_t target = 0;
|
||||
uint32_t n = 0;
|
||||
IntrinsInfo ininfo;
|
||||
memset(&ininfo, 0, sizeof(IntrinsInfo));
|
||||
ininfo.intrins = intrins;
|
||||
ininfo.modset = intrin_getmodrset(cts, intrins);
|
||||
ininfo.asmend = asmend;
|
||||
memcpy(ininfo.inregs, intrins->in, sizeof(ininfo.inregs));
|
||||
|
||||
if (!intrins->wrapped) {
|
||||
/* Last CARG in the chain is the wrapper pointer */
|
||||
ira = IR(ira->op1);
|
||||
#if LJ_64
|
||||
if (IR(ira->op2)->o == IR_KINT64) {
|
||||
target = (uintptr_t)ir_k64(IR(ira->op2))->u64;
|
||||
}
|
||||
#endif
|
||||
if (!target) {
|
||||
target = (uintptr_t)IR(ira->op2)->i;
|
||||
}
|
||||
} else {
|
||||
target = (uintptr_t)intrins->wrapped;
|
||||
}
|
||||
|
||||
n = intrins->insz;
|
||||
/* Collect the input register argument refs */
|
||||
while (ira->op1 != REF_NIL) {
|
||||
ira = IR(ira->op1);
|
||||
lua_assert(ira->o == IR_CARG);
|
||||
ininfo.args[--n] = ira->op2;
|
||||
/* Save the ref of our first CARG so we can use it to skip the arg chain
|
||||
** when looking for conflicts during when fusing a XLOAD.
|
||||
*/
|
||||
if (n == 0)
|
||||
ininfo.a1 = (IRRef)(ira-as->ir);
|
||||
}
|
||||
lua_assert(n == 0);
|
||||
|
||||
asm_intrin_results(as, ir, intrins, &ininfo);
|
||||
|
||||
if (intrin_regmode(intrins)) {
|
||||
asm_intrin_opcode(as, ir, &ininfo);
|
||||
} else {
|
||||
Reg r1 = 0;
|
||||
|
||||
if (intrins->flags & INTRINSFLAG_CALLED) {
|
||||
AsmHeader *hdr = ((AsmHeader*)target)-1;
|
||||
MCode *p;
|
||||
target = intrins->flags & INTRINSFLAG_INDIRECT ?
|
||||
hdr->target : (target+hdr->asmofs);
|
||||
p = (MCode*)target;
|
||||
if (LJ_64 && (p-as->mcp) != (int32_t)(p-as->mcp)) {
|
||||
r1 = ra_scratch(as, RSET_GPR & ~(ininfo.inset | ininfo.outset));
|
||||
}
|
||||
}
|
||||
emit_intrins(as, intrins, r1, target);
|
||||
}
|
||||
|
||||
asm_asmsetupargs(as, &ininfo);
|
||||
if (ininfo.asmend) {
|
||||
/* Skip over our IR_INTRN since were emitting from the tail */
|
||||
as->curins = (IRRef)(ir - as->ir);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
|
||||
{
|
||||
UNUSED(as); UNUSED(ir);
|
||||
}
|
||||
|
||||
void asm_asmret(ASMState *as, IRIns *ir)
|
||||
{
|
||||
UNUSED(as); UNUSED(ir);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Setup result reg/sp for call. Evict scratch regs. */
|
||||
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
{
|
||||
@ -1395,7 +1775,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
|
||||
asm_fusefref(as, ir, RSET_GPR);
|
||||
else
|
||||
asm_fusexref(as, ir->op1, RSET_GPR);
|
||||
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
|
||||
/* ir->op2 is ignored for non vectors -- unaligned loads are ok on x86. */
|
||||
switch (irt_type(ir->t)) {
|
||||
case IRT_I8: xo = XO_MOVSXb; break;
|
||||
case IRT_U8: xo = XO_MOVZXb; break;
|
||||
@ -1462,7 +1842,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
|
||||
xo = XO_MOVto;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_mrm(as, xo, src, RID_MRM);
|
||||
|
||||
if (!LJ_64 && src != osrc) {
|
||||
ra_noweak(as, osrc);
|
||||
emit_rr(as, XO_MOV, src, osrc);
|
||||
@ -2004,23 +2386,21 @@ static void asm_pow(ASMState *as, IRIns *ir)
|
||||
asm_fppowi(as, ir);
|
||||
}
|
||||
|
||||
static int asm_swapops(ASMState *as, IRIns *ir)
|
||||
static int asm_swaprefs(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
|
||||
{
|
||||
IRIns *irl = IR(ir->op1);
|
||||
IRIns *irr = IR(ir->op2);
|
||||
IRIns *irl = IR(lref);
|
||||
IRIns *irr = IR(rref);
|
||||
lua_assert(ra_noreg(irr->r));
|
||||
if (!irm_iscomm(lj_ir_mode[ir->o]))
|
||||
return 0; /* Can't swap non-commutative operations. */
|
||||
if (irref_isk(ir->op2))
|
||||
if (irref_isk(rref))
|
||||
return 0; /* Don't swap constants to the left. */
|
||||
if (ra_hasreg(irl->r))
|
||||
return 1; /* Swap if left already has a register. */
|
||||
if (ra_samehint(ir->r, irr->r))
|
||||
return 1; /* Swap if dest and right have matching hints. */
|
||||
if (as->curins > as->loopref) { /* In variant part? */
|
||||
if (ir->op2 < as->loopref && !irt_isphi(irr->t))
|
||||
if (rref < as->loopref && !irt_isphi(irr->t))
|
||||
return 0; /* Keep invariants on the right. */
|
||||
if (ir->op1 < as->loopref && !irt_isphi(irl->t))
|
||||
if (lref < as->loopref && !irt_isphi(irl->t))
|
||||
return 1; /* Swap invariants to the right. */
|
||||
}
|
||||
if (opisfusableload(irl->o))
|
||||
@ -2028,6 +2408,13 @@ static int asm_swapops(ASMState *as, IRIns *ir)
|
||||
return 0; /* Otherwise don't swap. */
|
||||
}
|
||||
|
||||
static int asm_swapops(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if (!irm_iscomm(lj_ir_mode[ir->o]))
|
||||
return 0; /* Can't swap non-commutative operations. */
|
||||
return asm_swaprefs(as, ir, ir->op1, ir->op2);
|
||||
}
|
||||
|
||||
static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
|
||||
{
|
||||
IRRef lref = ir->op1;
|
||||
|
149
src/lj_crecord.c
149
src/lj_crecord.c
@ -33,6 +33,7 @@
|
||||
#include "lj_dispatch.h"
|
||||
#include "lj_strfmt.h"
|
||||
#include "lj_intrinsic.h"
|
||||
#include "lj_target.h"
|
||||
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
@ -1201,6 +1202,8 @@ static void crec_snap_caller(jit_State *J)
|
||||
J->base[-1-LJ_FR2] = ftr; J->pc = pc;
|
||||
}
|
||||
|
||||
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *cts);
|
||||
|
||||
/* Record function call. */
|
||||
static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
|
||||
{
|
||||
@ -1212,7 +1215,8 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
|
||||
ct = ctype_rawchild(cts, ct);
|
||||
}
|
||||
if (ctype_isintrinsic(ct->info)) {
|
||||
lj_trace_err(J, LJ_TRERR_NYICALL);
|
||||
crec_call_intrins(J, rd, ct);
|
||||
return 1;
|
||||
}else if (ctype_isfunc(ct->info)) {
|
||||
TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR);
|
||||
CType *ctr = ctype_rawchild(cts, ct);
|
||||
@ -1273,6 +1277,138 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if LJ_HASINTRINSICS
|
||||
|
||||
static IRType intrins_retit(jit_State *J, CTState *cts, CType *arg)
|
||||
{
|
||||
uint32_t reg = arg->size;
|
||||
|
||||
if (reg_isgpr(reg)) {
|
||||
IRType irt = crec_ct2irt(cts, ctype_rawchild(cts, arg));
|
||||
lua_assert(irt != IRT_CDATA);
|
||||
return irt;
|
||||
} else {
|
||||
if (reg_isvec(reg)) {
|
||||
/* NYI: support for vectors */
|
||||
lj_trace_err(J, LJ_TRERR_NYIVEC);
|
||||
}
|
||||
return reg_irt(reg);
|
||||
}
|
||||
}
|
||||
|
||||
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
|
||||
{
|
||||
CTState *cts = ctype_ctsG(J2G(J));
|
||||
TRef arg = TREF_NIL;
|
||||
CIntrinsic *intrins = lj_intrinsic_get(cts, func->size);
|
||||
void* target = *(void**)cdataptr(cdataV(&rd->argv[0]));
|
||||
MSize i;
|
||||
IRType it;
|
||||
int argofs = 1;
|
||||
CTypeID sib = func->sib, retid = 0;
|
||||
|
||||
/* Fetch the parameter list chain */
|
||||
retid = ctype_cid(func->info);
|
||||
|
||||
if (intrins->wrapped == 0) {
|
||||
TRef tr = emitir(IRT(IR_FLOAD, IRT_INTP), J->base[0], IRFL_CDATA_PTR);
|
||||
emitir(IRTG(IR_EQ, IRT_INTP), tr, lj_ir_kintp(J, target));
|
||||
}
|
||||
|
||||
/* Convert parameters and load them into the input registers */
|
||||
for (i = 0; i < intrins->insz; i++) {
|
||||
CType *ct = ctype_get(cts, sib);
|
||||
TRef tra = J->base[i+argofs];
|
||||
CType *d = ctype_rawchild(cts, ct);
|
||||
sib = ct->sib;
|
||||
|
||||
if (reg_isvec(ct->size)) {
|
||||
/* NYI: support for vectors */
|
||||
lj_trace_err(J, LJ_TRERR_NYIVEC);
|
||||
}
|
||||
|
||||
tra = crec_ct_tv(J, d, 0, tra, &rd->argv[i+argofs]);
|
||||
arg = emitir(IRT(IR_CARG, IRT_NIL), arg, tra);
|
||||
}
|
||||
|
||||
/* Append the wrapper pointer if were created from a template */
|
||||
if (intrins->wrapped == NULL) {
|
||||
arg = emitir(IRT(IR_CARG, IRT_NIL), arg, lj_ir_kintp(J, target));
|
||||
}
|
||||
|
||||
it = IRT_NIL;
|
||||
if (intrins->outsz > 0) {
|
||||
it = intrins_retit(J, cts, ctype_get(cts, retid));
|
||||
}
|
||||
|
||||
J->base[0] = emitir(IRT(IR_INTRN, it), arg, (func->size & LJ_INTRINS_MAXID));
|
||||
|
||||
if (intrins->flags & INTRINSFLAG_MEMORYSIDE) {
|
||||
emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
|
||||
}
|
||||
|
||||
arg = J->base[0];
|
||||
sib = retid;
|
||||
for (i = 1; i < intrins->outsz; i++) {
|
||||
CType *ct = ctype_get(cts, sib);
|
||||
uint32_t reg = ct->size;
|
||||
IRType irt = 0;
|
||||
sib = ct->sib;
|
||||
|
||||
if (reg_isgpr(reg)) {
|
||||
irt = intrins_retit(J, cts, ct);
|
||||
lua_assert(irt != IRT_CDATA);
|
||||
} else {
|
||||
irt = reg_irt(reg);
|
||||
}
|
||||
|
||||
J->base[i] = arg = emitir(IRT(IR_ASMRET, irt), arg, reg_rid(reg));
|
||||
}
|
||||
|
||||
if (intrins->outsz > 1) {
|
||||
emitir(IRT(IR_ASMEND, IRT_NIL), arg, J->base[0]);
|
||||
}
|
||||
|
||||
sib = retid;
|
||||
/* Second pass to box values after all ASMRET have run to shuffle/spill the
|
||||
* output registers.
|
||||
*/
|
||||
for (i = 0; i < intrins->outsz; i++) {
|
||||
CType *ct = ctype_get(cts, sib);
|
||||
CTypeID id = ctype_cid(ct->info);
|
||||
uint32_t reg = ct->size;
|
||||
uint32_t kind = reg_kind(reg);
|
||||
sib = ct->sib;
|
||||
|
||||
if (reg_isgpr(reg)) {
|
||||
CTypeID cid = ctype_typeid(cts, ctype_raw(cts, id));
|
||||
if (cid != CTID_INT32) {
|
||||
/* Box the u32/64 bit value in the register */
|
||||
J->base[i] = emitir(IRT(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), J->base[i]);
|
||||
}
|
||||
} else {
|
||||
if (kind == REGKIND_FPR32) {
|
||||
J->base[i] = emitconv(J->base[i], IRT_NUM, IRT_FLOAT, 0);
|
||||
} else if(rk_isvec(kind)) {
|
||||
/* NYI: support for vectors */
|
||||
lj_trace_err(J, LJ_TRERR_NYIVEC);
|
||||
} else {
|
||||
lua_assert(kind == REGKIND_FPR64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Intrinsics are assumed to always have side effects */
|
||||
J->needsnap = 1;
|
||||
rd->nres = intrins->outsz;
|
||||
}
|
||||
#else
|
||||
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
|
||||
{
|
||||
UNUSED(J);UNUSED(rd);UNUSED(func);
|
||||
}
|
||||
#endif
|
||||
|
||||
void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
|
||||
{
|
||||
CTState *cts = ctype_ctsG(J2G(J));
|
||||
@ -1568,9 +1704,16 @@ void LJ_FASTCALL recff_clib_index(jit_State *J, RecordFFData *rd)
|
||||
CLibrary *cl = (CLibrary *)uddata(udataV(&rd->argv[0]));
|
||||
GCstr *name = strV(&rd->argv[1]);
|
||||
CType *ct;
|
||||
CTypeID id = lj_ctype_getname(cts, &ct, name, CLNS_INDEX);
|
||||
CTypeID id;
|
||||
cTValue *tv = lj_tab_getstr(cl->cache, name);
|
||||
rd->nres = rd->data;
|
||||
rd->nres = rd->data > 0 ? 1 : 0;
|
||||
if (rd->data < 2) {
|
||||
id = lj_ctype_getname(cts, &ct, name, CLNS_INDEX);
|
||||
} else {
|
||||
/* set some dummy values for the intrinsic namespace */
|
||||
id = CTID_VOID;
|
||||
ct = ctype_get(cts, id);
|
||||
}
|
||||
if (id && tv && !tvisnil(tv)) {
|
||||
/* Specialize to the symbol name and make the result a constant. */
|
||||
emitir(IRTG(IR_EQ, IRT_STR), J->base[1], lj_ir_kstr(J, name));
|
||||
|
@ -313,6 +313,9 @@ static int parse_opmode(const char *op, MSize len)
|
||||
case 'C':
|
||||
flags |= INTRINSFLAG_CALLED;
|
||||
break;
|
||||
case 'c':
|
||||
flags |= INTRINSFLAG_ISCOMM;
|
||||
break;
|
||||
case 'X':
|
||||
flags |= INTRINSFLAG_REXW;
|
||||
break;
|
||||
@ -681,6 +684,13 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
|
||||
if (opcode) {
|
||||
setopcode(L, intrins, opcode);
|
||||
}
|
||||
|
||||
if (intrin_iscomm(intrins) &&
|
||||
(intrins->insz < 2 || intrins->in[0] != intrins->in[1])) {
|
||||
lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, strdata(opstr),
|
||||
"bad registers for commutative mode");
|
||||
}
|
||||
|
||||
if (intrin_regmode(intrins) == DYNREG_FIXED) {
|
||||
/* dyninsz is overlapped by input registers 6/7/8 */
|
||||
if ((intrins->insz < 6 && intrins->dyninsz > 0) || dynout) {
|
||||
@ -837,7 +847,7 @@ int lj_intrinsic_call(CTState *cts, CType *ct)
|
||||
}
|
||||
|
||||
/* Swap input values around to match the platform ordering the wrapper expects */
|
||||
if (intrin_regmode(intrins) >= DYNREG_SWAPREGS &&
|
||||
if (intrin_regmode(intrins) >= DYNREG_SWAPREGS && !intrin_iscomm(intrins) &&
|
||||
reg_isgpr(intrins->in[0]) == reg_isgpr(intrins->in[1])) {
|
||||
if (reg_isgpr(intrins->in[0])) {
|
||||
intptr_t temp = context.gpr[0];
|
||||
|
@ -59,6 +59,8 @@ typedef enum INTRINSFLAGS {
|
||||
INTRINSFLAG_IMMB = 0x400,
|
||||
/* Opcode is larger than the emit system normally handles x86/x64(4 bytes) */
|
||||
INTRINSFLAG_LARGEOP = 0x800,
|
||||
/* Opcode is commutative allowing the input registers to be swapped to allow better fusing */
|
||||
INTRINSFLAG_ISCOMM = 0x1000,
|
||||
|
||||
/* Opcode uses ymm registers */
|
||||
INTRINSFLAG_VEX256 = 0x4000,
|
||||
@ -86,6 +88,7 @@ typedef struct AsmHeader {
|
||||
#define intrin_regmode(intrins) ((intrins)->flags & INTRINSFLAG_REGMODEMASK)
|
||||
#define intrin_setregmode(intrins, mode) \
|
||||
(intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode)
|
||||
#define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM)
|
||||
|
||||
#define intrin_getopextb(intrins) ((intrins)->out[3])
|
||||
#define intrin_setopextb(intrins, opext) \
|
||||
@ -140,6 +143,7 @@ CTypeID1 regkind_ct[16];
|
||||
#define reg_isfp(reg) (reg_rid(reg) >= RID_MIN_FPR)
|
||||
#define reg_isvec(reg) (reg_rid(reg) >= RID_MIN_FPR && reg_kind(reg) >= REGKIND_VEC_START)
|
||||
#define reg_isdyn(reg) (reg_rid(reg) == RID_DYN_GPR || reg_rid(reg) == RID_DYN_FPR)
|
||||
#define reg_torset(reg) (reg_isgpr(reg) ? RSET_GPR : RSET_FPR)
|
||||
|
||||
#define reg_irt(reg) (reg_isgpr(reg) ? rk_irtgpr(reg_kind(reg)) : rk_irtfpr(reg_kind(reg)))
|
||||
#define rk_irtgpr(kind) ((IRType)regkind_it[(kind)])
|
||||
|
@ -145,6 +145,9 @@
|
||||
_(CALLS, S , ref, lit) \
|
||||
_(CALLXS, S , ref, ref) \
|
||||
_(CARG, N , ref, ref) \
|
||||
_(ASMRET, L, ref, lit) \
|
||||
_(INTRN, S, ref, lit) \
|
||||
_(ASMEND, S, ref, ref) \
|
||||
\
|
||||
/* End of list. */
|
||||
|
||||
|
@ -2405,6 +2405,8 @@ LJFOLD(TDUP any)
|
||||
LJFOLD(CNEW any any)
|
||||
LJFOLD(XSNEW any any)
|
||||
LJFOLD(BUFHDR any any)
|
||||
LJFOLD(INTRN any any)
|
||||
LJFOLD(ASMRET any any)
|
||||
LJFOLDX(lj_ir_emit)
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
|
@ -38,6 +38,7 @@ TREDEF(NYITMIX, "NYI: mixed sparse/dense table")
|
||||
TREDEF(NOCACHE, "symbol not in cache")
|
||||
TREDEF(NYICONV, "NYI: unsupported C type conversion")
|
||||
TREDEF(NYICALL, "NYI: unsupported C function type")
|
||||
TREDEF(NYIVEC, "NYI: unsupported vector operation or type")
|
||||
|
||||
/* Optimizations. */
|
||||
TREDEF(GFAIL, "guard would always fail")
|
||||
|
@ -335,6 +335,15 @@ context("__mcode", function()
|
||||
assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid")
|
||||
end)
|
||||
|
||||
it("invalid commutative mode registers", function()
|
||||
assert_cdef([[int4 valid_comm(int4 v1, int4 v2) __mcode("90rMc");]], "valid_comm")
|
||||
--must have 1+ input argument
|
||||
assert_cdeferr([[int4 invalid_comm1(int4 v1) __mcode("90rMc");]])
|
||||
-- input register types must match
|
||||
assert_cdeferr([[void invalid_comm2(int32_t i, int4 v1) __mcode("90rMc");]])
|
||||
assert_cdeferr([[void invalid_comm3(int4 v1, int32_t i) __mcode("90rMc");]])
|
||||
end)
|
||||
|
||||
it("multidef rollback", function()
|
||||
|
||||
--check ctype rollback after parsing a valid intrinsic the line before
|
||||
|
@ -4,22 +4,10 @@ local telescope = require("telescope")
|
||||
local ffi = require("ffi")
|
||||
local C = ffi.C
|
||||
|
||||
local function check(expect, func, ...)
|
||||
local result = func(...)
|
||||
assert(result == expect, tostring(result))
|
||||
return true
|
||||
end
|
||||
|
||||
telescope.make_assertion("jit", "", check)
|
||||
telescope.make_assertion("exit", "", check)
|
||||
telescope.make_assertion("noexit", "", check)
|
||||
|
||||
telescope.make_assertion("jitchecker", "", function(checker, func, ...)
|
||||
|
||||
local expected, value = checker(1, func(1, ...))
|
||||
assert(expected == value)
|
||||
return true
|
||||
end)
|
||||
telescope.make_assertion("jit", "", tester.testsingle)
|
||||
telescope.make_assertion("jitchecker", "", tester.testwithchecker)
|
||||
telescope.make_assertion("noexit", "", tester.testnoexit)
|
||||
telescope.make_assertion("exit", "", tester.testexit)
|
||||
|
||||
telescope.make_assertion("cdef", "", function(cdef, name)
|
||||
assert(not name or type(name) == "string")
|
||||
|
Loading…
Reference in New Issue
Block a user