Added JIT support for intrinsics. Support for vector registers is NYI.

This commit is contained in:
fsfod 2016-03-29 11:14:43 +01:00
parent 57ff67552a
commit f7331e9464
10 changed files with 655 additions and 36 deletions

View File

@ -1305,6 +1305,58 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
return (nargs | (ir->t.irt << CCI_OTSHIFT));
}
#if LJ_HASINTRINSICS
static RegSet asm_intrinsichints(ASMState *as, IRIns *ir)
{
CTState *cts = ctype_ctsG(J2G(as->J));
CIntrinsic* intrins = lj_intrinsic_get(cts, ir->op2);
RegSet mod = intrin_getmodrset(cts, intrins);
IRIns *ira = IR(ir->op1), *irval;
int i;
int dynreg = intrin_regmode(intrins);
/* Propagate the fixed registers of the arguments to refs passed in for them */
for (i = intrins->insz-1; i >= 0; i--) {
Reg r = reg_rid(intrins->in[i]);
if (dynreg && i < intrins->dyninsz) {
/* Dynamic register so no hint needed */
ira = IR(ira->op1);
continue;
}
rset_set(mod, r);
if (!irref_isk(ira->op2)) {
irval = IR(ira->op2);
/* Back propagate the register to the arguments value if it has no register set */
if (irval->prev == REGSP_INIT) {
irval->prev = REGSP_HINT(r);
}
}
ira = IR(ira->op1);
}
if (intrins->outsz > 0) {
i = intrin_dynrout(intrins) ? 1 : 0;
for (; i < intrins->outsz; i++) {
mod |= 1 << reg_rid(intrins->out[i]);
}
if (intrin_dynrout(intrins)) {
ir->prev = REGSP_INIT;
} else {
ir->prev = REGSP_HINT(reg_rid(intrins->out[0]));
}
}
return mod;
}
#endif
static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
{
const CCallInfo *ci = &lj_ir_callinfo[id];
@ -1743,6 +1795,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_CALLXS: asm_callx(as, ir); break;
case IR_CARG: break;
case IR_INTRN: asm_intrinsic(as, ir, NULL); break;
case IR_ASMEND: asm_intrinsic(as, IR(ir->op2), ir); break;
case IR_ASMRET: break;
default:
setintV(&as->J->errinfo, ir->o);
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@ -2103,6 +2159,22 @@ static void asm_setup_regsp(ASMState *as)
as->modset |= RSET_SCRATCH;
continue;
}
#if LJ_HASINTRINSICS
case IR_INTRN: {
Reg mod = asm_intrinsichints(as, ir);
if (inloop)
as->modset |= mod;
continue;
}
case IR_ASMRET: {
Reg r = reg_rid(ir->op2);
ir->prev = REGSP_HINT(r);
if (inloop)
rset_set(as->modset, r);
continue;
}
#endif
case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
ir->prev = asm_setup_call_slots(as, ir, ci);

View File

@ -397,6 +397,31 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
return RID_MRM;
}
static int asm_fusexload(ASMState *as, IRRef ref, RegSet xallow, IRRef skip)
{
IRIns *ir = IR(ref);
IRRef i = as->curins;
lua_assert(ir->o == IR_XLOAD);
/* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if (irt_typerange(ir->t, IRT_I8, IRT_U16)) {
return 0;
}
if (i > ref + CONFLICT_SEARCH_LIM)
return 0; /* Give up, ref is too far away. */
ir = as->ir;
while (--i > ref) {
if (ir[i].o == IR_XSTORE)
return 0; /* Conflict found. */
else if ((ir[i].op1 == ref || ir[i].op2 == ref) && i != skip)
return 0;
}
asm_fusexref(as, IR(ref)->op1, xallow);
return 1;
}
/* Fuse load into memory operand.
**
** Important caveat: this may emit RIP-relative loads! So don't place any
@ -467,12 +492,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_XLOAD) {
/* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 0)) {
asm_fusexref(as, ir->op1, xallow);
if (asm_fusexload(as, ref, xallow, REF_NIL)) {
return RID_MRM;
}
} else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
@ -642,6 +662,366 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
#endif
}
#if LJ_HASINTRINSICS
typedef struct IntrinsInfo {
CIntrinsic *intrins;
IRIns *asmend;
IRRef args[LJ_INTRINS_MAXREG];
/* input register list that gets mutated for opcode intrinsics */
uint8_t inregs[LJ_INTRINS_MAXREG];
RegSet inset, outset, modset;
/* First CARG ref used as limit for duplicate load checking when fusing */
IRRef a1;
} IntrinsInfo;
static int asm_swaprefs(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
static void asm_asmsetupargs(ASMState *as, IntrinsInfo *ininfo)
{
MSize n;
CIntrinsic *intrins = ininfo->intrins;
/* move or load args into input registers */
for (n = 0; n < intrins->insz; n++) {
IRRef ref = ininfo->args[n];
IRIns *ir = IR(ref);
Reg r = reg_rid(ininfo->inregs[n]);
/* Skip any dynamic registers already setup by opcode intrinsics */
if (ininfo->inregs[n] == 0xff) {
lua_assert(n < intrins->dyninsz);
continue;
}
if (!ra_hasreg(ir->r) && r < RID_MAX_GPR && ref < ASMREF_TMP1) {
#if LJ_64
if (ir->o == IR_KINT64)
emit_loadu64(as, r, ir_kint64(ir)->u64);
else
#endif
emit_loadi(as, r, ir->i);
} else {
/* if we have a fixed register it must of been evicted earlier */
lua_assert(rset_test(as->freeset, r) || regcost_ref(as->cost[r]) == ref ||
n < intrins->dyninsz);
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
if (r != ir->r) {
lua_assert(n >= intrins->dyninsz);
emit_movrr(as, ir, r, ir->r);
}
} else {
/* Dynamic registers should never end up here */
lua_assert(!intrin_regmode(intrins) || n >= intrins->dyninsz);
ra_allocref(as, ref, RID2RSET(r));
}
}
checkmclim(as);
}
}
static void asm_intrin_opcode(ASMState *as, IRIns *ir, IntrinsInfo *ininfo)
{
CIntrinsic *intrins = ininfo->intrins;
IRRef *args = ininfo->args;
uint8_t *in = ininfo->inregs;
uint32_t dynreg = intrin_regmode(intrins);
RegSet allow;
IRRef lref = 0, rref = 0;
Reg right, dest = RID_NONE;
int dynrout = intrins->outsz > 0 && intrin_dynrout(intrins);
/* Swap to refs to native ordering */
if (dynreg >= DYNREG_SWAPREGS) {
IRRef temp = ininfo->args[0];
ininfo->args[0] = ininfo->args[1]; ininfo->args[1] = temp;
}
rref = args[0];
right = IR(rref)->r;
if (intrins->dyninsz > 1) {
lref = args[1];
dest = IR(lref)->r;
if (ra_hasreg(dest))
in[1] = reg_setrid(in[1], dest);
}
as->mrm.idx = as->mrm.base = RID_NONE;
as->mrm.scale = as->mrm.ofs = 0;
/* Allocate the dynamic output register if we have one */
if (dynrout) {
allow = reg_torset(intrins->out[0]);
if (ra_hasreg(right)) {
rset_clear(allow, right);
ra_noweak(as, right);
}
dest = ra_dest(as, ir, allow);
if (dynreg == DYNREG_OPEXT) {
/* Set input register the same as the output since the op is destructive */
right = dest;
}
}
if (intrins->dyninsz > 1 && dynreg != DYNREG_TWOSTORE) {
if (lref == rref) {
if (dynreg == DYNREG_INOUT)
right = dest;
/* Only load/move the value to register once.
** ra_left will do the move for INOUT.
*/
in[0] = 0xff;
} else if (ra_noreg(right)) {
if (intrin_iscomm(intrins) && asm_swaprefs(as, ir, lref, rref)) {
IRRef tmp = lref; lref = rref; rref = tmp;
/* Must be same register kinds and RID register type ranges */
lua_assert(reg_isgpr(in[0]) == reg_isgpr(in[1]) &&
reg_kind(in[0]) == reg_kind(in[1]));
args[0] = rref;
args[1] = lref;
/* lref(now swapped to rref) may already have a register set so update
** the right register to it in case we don't fuse a load.
*/
right = IR(rref)->r;
}
if (!(intrins->flags & INTRINSFLAG_NOFUSE) && !ra_hasreg(right)) {
RegSet rallow = reg_torset(in[0]);
rset_clear(rallow, dest);
/* Handle XLOAD directly so we can tell noconflict to skip our IR_CARG
** that holds the ref of the load were fusing.
*/
if (IR(rref)->o == IR_XLOAD) {
if (mayfuse(as, rref) && asm_fusexload(as, rref, rallow, ininfo->a1)) {
right = RID_MRM;
}
} else {
right = asm_fuseload(as, rref, rallow);
}
}
}
} else if (intrins->flags & INTRINSFLAG_INDIRECT) {
/* force indirect MODRM mode. rref should always be a memory address */
if (ra_noreg(right)) {
allow = RSET_GPR & ~ininfo->inset;
/* If part of the opcode is encoded in ModRM avoid picking a register that
** will corrupt it */
if (dynreg == DYNREG_OPEXT)
rset_clear(allow, RID_EBP);
asm_fusexref(as, rref, allow);
} else {
as->mrm.base = IR(rref)->r;
}
right = RID_MRM;
}
/* Handle second input reg for any two input dynamic in register modes
** which isn't DYNREG_INOUT
*/
if (intrins->dyninsz > 1 && ra_noreg(dest)) {
Reg r;
allow = reg_torset(in[1]) & ~ininfo->inset;
if (ra_hasreg(right) && right != RID_MRM)
rset_clear(allow, right);
r = ra_allocref(as, args[1], allow);
in[1] = reg_setrid(in[1], r);
dest = r;
}
if (right == RID_MRM) {
/* Skip trying to load what we fused into the instruction */
in[0] = 0xff;
} else {
if (ra_noreg(right)) {
lua_assert(ra_noreg(IR(rref)->r));
allow = reg_torset(in[0]);
rset_clear(allow, dest);
if(dynreg == DYNREG_OPEXT)
rset_clear(allow, RID_EBP);
right = ra_allocref(as, rref, allow);
}
in[0] = reg_setrid(in[0], right);
}
lua_assert(ra_hasreg(right) && (ra_hasreg(dest) || intrins->dyninsz < 2));
emit_intrins(as, intrins, right, dest);
if (dynreg == DYNREG_INOUT) {
lua_assert(lref);
ra_left(as, dest, lref);
/* no need to load the register since ra_left already did */
in[1] = 0xff;
} else if (dynreg == DYNREG_OPEXT && dynrout) {
/* Handle destructive ONEOPEXT opcodes */
lua_assert(rref);
ra_left(as, dest, rref);
in[0] = 0xff;
}
checkmclim(as);
}
void asm_intrin_results(ASMState *as, IRIns *ir, CIntrinsic* intrins, IntrinsInfo* ininfo)
{
IRRef results[LJ_INTRINS_MAXREG];
RegSet evict = 0, outset = 0, aout = 0;
int32_t i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
int32_t dynout = intrin_dynrout(intrins) ? 1 : 0;
/* Gather the output register IR instructions */
if (intrins->outsz > 0) {
IRIns *irret = ininfo->asmend ? IR(ininfo->asmend->op1) : ir;
int32_t n;
for (n = intrins->outsz-1;; n--) {
lua_assert(n >= 0 && (irret->o == IR_ASMRET || irret->o == IR_INTRN));
results[n] = (IRRef)(irret - as->ir);
if (ra_used(irret)) {
if (n >= dynout && irret->r == reg_rid(ininfo->inregs[n])) {
rset_set(aout, irret->r);
}
}
if (irret->o == IR_INTRN) {
break;
}
irret = IR(irret->op1);
}
}
evict = ininfo->modset;
/* Check what registers need evicting for fixed input registers */
i = intrin_regmode(intrins) ? intrins->dyninsz : 0;
for (; i < intrins->insz; i++) {
Reg r = reg_rid(intrins->in[i]);
IRIns *arg = IR(ininfo->args[i]);
ininfo->inset |= RID2RSET(r);
/* Don't evict if the arg was allocated the correct register */
if (!rset_test(as->freeset, r) && arg->r != r) {
evict |= RID2RSET(r);
}
}
for (i = dynout; i < intrins->outsz; i++) {
outset |= RID2RSET(reg_rid(intrins->out[i]));
}
ininfo->outset = outset;
/* Don't evict register that currently have our output values live in them */
evict &= ~aout;
/* Evict any values in input and modified registers and any fixed out registers
** that are unused or didn't get allocated the same register as there fixed one.
*/
ra_evictset(as, evict);
/* Handle any fixed output registers */
if (intrins->outsz > dynout) {
int32_t stop = dynout;
for (i = intrins->outsz-1; i >= stop; i--) {
IRIns *irret = IR(results[i]);
Reg r = intrins->out[i];
if (!ra_used(irret) || (!rset_test(as->freeset, r) && irret->r != r)) {
ra_evictset(as, RID2RSET(r));
if (!ra_used(irret))
continue;
}
ra_destreg(as, irret, r);
}
}
}
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
{
CTState *cts = ctype_ctsG(J2G(as->J));
CIntrinsic *intrins = lj_intrinsic_get(cts, ir->op2);
IRIns *ira = ir;
uintptr_t target = 0;
uint32_t n = 0;
IntrinsInfo ininfo;
memset(&ininfo, 0, sizeof(IntrinsInfo));
ininfo.intrins = intrins;
ininfo.modset = intrin_getmodrset(cts, intrins);
ininfo.asmend = asmend;
memcpy(ininfo.inregs, intrins->in, sizeof(ininfo.inregs));
if (!intrins->wrapped) {
/* Last CARG in the chain is the wrapper pointer */
ira = IR(ira->op1);
#if LJ_64
if (IR(ira->op2)->o == IR_KINT64) {
target = (uintptr_t)ir_k64(IR(ira->op2))->u64;
}
#endif
if (!target) {
target = (uintptr_t)IR(ira->op2)->i;
}
} else {
target = (uintptr_t)intrins->wrapped;
}
n = intrins->insz;
/* Collect the input register argument refs */
while (ira->op1 != REF_NIL) {
ira = IR(ira->op1);
lua_assert(ira->o == IR_CARG);
ininfo.args[--n] = ira->op2;
/* Save the ref of our first CARG so we can use it to skip the arg chain
** when looking for conflicts during when fusing a XLOAD.
*/
if (n == 0)
ininfo.a1 = (IRRef)(ira-as->ir);
}
lua_assert(n == 0);
asm_intrin_results(as, ir, intrins, &ininfo);
if (intrin_regmode(intrins)) {
asm_intrin_opcode(as, ir, &ininfo);
} else {
Reg r1 = 0;
if (intrins->flags & INTRINSFLAG_CALLED) {
AsmHeader *hdr = ((AsmHeader*)target)-1;
MCode *p;
target = intrins->flags & INTRINSFLAG_INDIRECT ?
hdr->target : (target+hdr->asmofs);
p = (MCode*)target;
if (LJ_64 && (p-as->mcp) != (int32_t)(p-as->mcp)) {
r1 = ra_scratch(as, RSET_GPR & ~(ininfo.inset | ininfo.outset));
}
}
emit_intrins(as, intrins, r1, target);
}
asm_asmsetupargs(as, &ininfo);
if (ininfo.asmend) {
/* Skip over our IR_INTRN since were emitting from the tail */
as->curins = (IRRef)(ir - as->ir);
}
}
#else
static void asm_intrinsic(ASMState *as, IRIns *ir, IRIns *asmend)
{
UNUSED(as); UNUSED(ir);
}
void asm_asmret(ASMState *as, IRIns *ir)
{
UNUSED(as); UNUSED(ir);
}
#endif
/* Setup result reg/sp for call. Evict scratch regs. */
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
@ -1395,7 +1775,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
asm_fusefref(as, ir, RSET_GPR);
else
asm_fusexref(as, ir->op1, RSET_GPR);
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
/* ir->op2 is ignored for non vectors -- unaligned loads are ok on x86. */
switch (irt_type(ir->t)) {
case IRT_I8: xo = XO_MOVSXb; break;
case IRT_U8: xo = XO_MOVZXb; break;
@ -1462,7 +1842,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
xo = XO_MOVto;
break;
}
emit_mrm(as, xo, src, RID_MRM);
if (!LJ_64 && src != osrc) {
ra_noweak(as, osrc);
emit_rr(as, XO_MOV, src, osrc);
@ -2004,23 +2386,21 @@ static void asm_pow(ASMState *as, IRIns *ir)
asm_fppowi(as, ir);
}
static int asm_swapops(ASMState *as, IRIns *ir)
static int asm_swaprefs(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
{
IRIns *irl = IR(ir->op1);
IRIns *irr = IR(ir->op2);
IRIns *irl = IR(lref);
IRIns *irr = IR(rref);
lua_assert(ra_noreg(irr->r));
if (!irm_iscomm(lj_ir_mode[ir->o]))
return 0; /* Can't swap non-commutative operations. */
if (irref_isk(ir->op2))
if (irref_isk(rref))
return 0; /* Don't swap constants to the left. */
if (ra_hasreg(irl->r))
return 1; /* Swap if left already has a register. */
if (ra_samehint(ir->r, irr->r))
return 1; /* Swap if dest and right have matching hints. */
if (as->curins > as->loopref) { /* In variant part? */
if (ir->op2 < as->loopref && !irt_isphi(irr->t))
if (rref < as->loopref && !irt_isphi(irr->t))
return 0; /* Keep invariants on the right. */
if (ir->op1 < as->loopref && !irt_isphi(irl->t))
if (lref < as->loopref && !irt_isphi(irl->t))
return 1; /* Swap invariants to the right. */
}
if (opisfusableload(irl->o))
@ -2028,6 +2408,13 @@ static int asm_swapops(ASMState *as, IRIns *ir)
return 0; /* Otherwise don't swap. */
}
static int asm_swapops(ASMState *as, IRIns *ir)
{
if (!irm_iscomm(lj_ir_mode[ir->o]))
return 0; /* Can't swap non-commutative operations. */
return asm_swaprefs(as, ir, ir->op1, ir->op2);
}
static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
{
IRRef lref = ir->op1;

View File

@ -33,6 +33,7 @@
#include "lj_dispatch.h"
#include "lj_strfmt.h"
#include "lj_intrinsic.h"
#include "lj_target.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@ -1201,6 +1202,8 @@ static void crec_snap_caller(jit_State *J)
J->base[-1-LJ_FR2] = ftr; J->pc = pc;
}
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *cts);
/* Record function call. */
static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
{
@ -1212,7 +1215,8 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
ct = ctype_rawchild(cts, ct);
}
if (ctype_isintrinsic(ct->info)) {
lj_trace_err(J, LJ_TRERR_NYICALL);
crec_call_intrins(J, rd, ct);
return 1;
}else if (ctype_isfunc(ct->info)) {
TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR);
CType *ctr = ctype_rawchild(cts, ct);
@ -1273,6 +1277,138 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
return 0;
}
#if LJ_HASINTRINSICS
static IRType intrins_retit(jit_State *J, CTState *cts, CType *arg)
{
uint32_t reg = arg->size;
if (reg_isgpr(reg)) {
IRType irt = crec_ct2irt(cts, ctype_rawchild(cts, arg));
lua_assert(irt != IRT_CDATA);
return irt;
} else {
if (reg_isvec(reg)) {
/* NYI: support for vectors */
lj_trace_err(J, LJ_TRERR_NYIVEC);
}
return reg_irt(reg);
}
}
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
{
CTState *cts = ctype_ctsG(J2G(J));
TRef arg = TREF_NIL;
CIntrinsic *intrins = lj_intrinsic_get(cts, func->size);
void* target = *(void**)cdataptr(cdataV(&rd->argv[0]));
MSize i;
IRType it;
int argofs = 1;
CTypeID sib = func->sib, retid = 0;
/* Fetch the parameter list chain */
retid = ctype_cid(func->info);
if (intrins->wrapped == 0) {
TRef tr = emitir(IRT(IR_FLOAD, IRT_INTP), J->base[0], IRFL_CDATA_PTR);
emitir(IRTG(IR_EQ, IRT_INTP), tr, lj_ir_kintp(J, target));
}
/* Convert parameters and load them into the input registers */
for (i = 0; i < intrins->insz; i++) {
CType *ct = ctype_get(cts, sib);
TRef tra = J->base[i+argofs];
CType *d = ctype_rawchild(cts, ct);
sib = ct->sib;
if (reg_isvec(ct->size)) {
/* NYI: support for vectors */
lj_trace_err(J, LJ_TRERR_NYIVEC);
}
tra = crec_ct_tv(J, d, 0, tra, &rd->argv[i+argofs]);
arg = emitir(IRT(IR_CARG, IRT_NIL), arg, tra);
}
/* Append the wrapper pointer if were created from a template */
if (intrins->wrapped == NULL) {
arg = emitir(IRT(IR_CARG, IRT_NIL), arg, lj_ir_kintp(J, target));
}
it = IRT_NIL;
if (intrins->outsz > 0) {
it = intrins_retit(J, cts, ctype_get(cts, retid));
}
J->base[0] = emitir(IRT(IR_INTRN, it), arg, (func->size & LJ_INTRINS_MAXID));
if (intrins->flags & INTRINSFLAG_MEMORYSIDE) {
emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
}
arg = J->base[0];
sib = retid;
for (i = 1; i < intrins->outsz; i++) {
CType *ct = ctype_get(cts, sib);
uint32_t reg = ct->size;
IRType irt = 0;
sib = ct->sib;
if (reg_isgpr(reg)) {
irt = intrins_retit(J, cts, ct);
lua_assert(irt != IRT_CDATA);
} else {
irt = reg_irt(reg);
}
J->base[i] = arg = emitir(IRT(IR_ASMRET, irt), arg, reg_rid(reg));
}
if (intrins->outsz > 1) {
emitir(IRT(IR_ASMEND, IRT_NIL), arg, J->base[0]);
}
sib = retid;
/* Second pass to box values after all ASMRET have run to shuffle/spill the
* output registers.
*/
for (i = 0; i < intrins->outsz; i++) {
CType *ct = ctype_get(cts, sib);
CTypeID id = ctype_cid(ct->info);
uint32_t reg = ct->size;
uint32_t kind = reg_kind(reg);
sib = ct->sib;
if (reg_isgpr(reg)) {
CTypeID cid = ctype_typeid(cts, ctype_raw(cts, id));
if (cid != CTID_INT32) {
/* Box the u32/64 bit value in the register */
J->base[i] = emitir(IRT(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), J->base[i]);
}
} else {
if (kind == REGKIND_FPR32) {
J->base[i] = emitconv(J->base[i], IRT_NUM, IRT_FLOAT, 0);
} else if(rk_isvec(kind)) {
/* NYI: support for vectors */
lj_trace_err(J, LJ_TRERR_NYIVEC);
} else {
lua_assert(kind == REGKIND_FPR64);
}
}
}
/* Intrinsics are assumed to always have side effects */
J->needsnap = 1;
rd->nres = intrins->outsz;
}
#else
void crec_call_intrins(jit_State *J, RecordFFData *rd, CType *func)
{
UNUSED(J);UNUSED(rd);UNUSED(func);
}
#endif
void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
{
CTState *cts = ctype_ctsG(J2G(J));
@ -1568,9 +1704,16 @@ void LJ_FASTCALL recff_clib_index(jit_State *J, RecordFFData *rd)
CLibrary *cl = (CLibrary *)uddata(udataV(&rd->argv[0]));
GCstr *name = strV(&rd->argv[1]);
CType *ct;
CTypeID id = lj_ctype_getname(cts, &ct, name, CLNS_INDEX);
CTypeID id;
cTValue *tv = lj_tab_getstr(cl->cache, name);
rd->nres = rd->data;
rd->nres = rd->data > 0 ? 1 : 0;
if (rd->data < 2) {
id = lj_ctype_getname(cts, &ct, name, CLNS_INDEX);
} else {
/* set some dummy values for the intrinsic namespace */
id = CTID_VOID;
ct = ctype_get(cts, id);
}
if (id && tv && !tvisnil(tv)) {
/* Specialize to the symbol name and make the result a constant. */
emitir(IRTG(IR_EQ, IRT_STR), J->base[1], lj_ir_kstr(J, name));

View File

@ -313,6 +313,9 @@ static int parse_opmode(const char *op, MSize len)
case 'C':
flags |= INTRINSFLAG_CALLED;
break;
case 'c':
flags |= INTRINSFLAG_ISCOMM;
break;
case 'X':
flags |= INTRINSFLAG_REXW;
break;
@ -681,6 +684,13 @@ int lj_intrinsic_fromcdef(lua_State *L, CTypeID fid, GCstr *opstr, uint32_t imm)
if (opcode) {
setopcode(L, intrins, opcode);
}
if (intrin_iscomm(intrins) &&
(intrins->insz < 2 || intrins->in[0] != intrins->in[1])) {
lj_err_callerv(L, LJ_ERR_FFI_BADOPSTR, strdata(opstr),
"bad registers for commutative mode");
}
if (intrin_regmode(intrins) == DYNREG_FIXED) {
/* dyninsz is overlapped by input registers 6/7/8 */
if ((intrins->insz < 6 && intrins->dyninsz > 0) || dynout) {
@ -837,7 +847,7 @@ int lj_intrinsic_call(CTState *cts, CType *ct)
}
/* Swap input values around to match the platform ordering the wrapper expects */
if (intrin_regmode(intrins) >= DYNREG_SWAPREGS &&
if (intrin_regmode(intrins) >= DYNREG_SWAPREGS && !intrin_iscomm(intrins) &&
reg_isgpr(intrins->in[0]) == reg_isgpr(intrins->in[1])) {
if (reg_isgpr(intrins->in[0])) {
intptr_t temp = context.gpr[0];

View File

@ -59,6 +59,8 @@ typedef enum INTRINSFLAGS {
INTRINSFLAG_IMMB = 0x400,
/* Opcode is larger than the emit system normally handles x86/x64(4 bytes) */
INTRINSFLAG_LARGEOP = 0x800,
/* Opcode is commutative allowing the input registers to be swapped to allow better fusing */
INTRINSFLAG_ISCOMM = 0x1000,
/* Opcode uses ymm registers */
INTRINSFLAG_VEX256 = 0x4000,
@ -86,6 +88,7 @@ typedef struct AsmHeader {
#define intrin_regmode(intrins) ((intrins)->flags & INTRINSFLAG_REGMODEMASK)
#define intrin_setregmode(intrins, mode) \
(intrins)->flags = ((intrins)->flags & ~INTRINSFLAG_REGMODEMASK)|(mode)
#define intrin_iscomm(intrins) ((intrins)->flags & INTRINSFLAG_ISCOMM)
#define intrin_getopextb(intrins) ((intrins)->out[3])
#define intrin_setopextb(intrins, opext) \
@ -140,6 +143,7 @@ CTypeID1 regkind_ct[16];
#define reg_isfp(reg) (reg_rid(reg) >= RID_MIN_FPR)
#define reg_isvec(reg) (reg_rid(reg) >= RID_MIN_FPR && reg_kind(reg) >= REGKIND_VEC_START)
#define reg_isdyn(reg) (reg_rid(reg) == RID_DYN_GPR || reg_rid(reg) == RID_DYN_FPR)
#define reg_torset(reg) (reg_isgpr(reg) ? RSET_GPR : RSET_FPR)
#define reg_irt(reg) (reg_isgpr(reg) ? rk_irtgpr(reg_kind(reg)) : rk_irtfpr(reg_kind(reg)))
#define rk_irtgpr(kind) ((IRType)regkind_it[(kind)])

View File

@ -145,6 +145,9 @@
_(CALLS, S , ref, lit) \
_(CALLXS, S , ref, ref) \
_(CARG, N , ref, ref) \
_(ASMRET, L, ref, lit) \
_(INTRN, S, ref, lit) \
_(ASMEND, S, ref, ref) \
\
/* End of list. */

View File

@ -2405,6 +2405,8 @@ LJFOLD(TDUP any)
LJFOLD(CNEW any any)
LJFOLD(XSNEW any any)
LJFOLD(BUFHDR any any)
LJFOLD(INTRN any any)
LJFOLD(ASMRET any any)
LJFOLDX(lj_ir_emit)
/* ------------------------------------------------------------------------ */

View File

@ -38,6 +38,7 @@ TREDEF(NYITMIX, "NYI: mixed sparse/dense table")
TREDEF(NOCACHE, "symbol not in cache")
TREDEF(NYICONV, "NYI: unsupported C type conversion")
TREDEF(NYICALL, "NYI: unsupported C function type")
TREDEF(NYIVEC, "NYI: unsupported vector operation or type")
/* Optimizations. */
TREDEF(GFAIL, "guard would always fail")

View File

@ -335,6 +335,15 @@ context("__mcode", function()
assert_cdeferr([[void badreg_fpr1(float xmm20) __mcode("90_E");]], "invalid")
end)
it("invalid commutative mode registers", function()
assert_cdef([[int4 valid_comm(int4 v1, int4 v2) __mcode("90rMc");]], "valid_comm")
--must have 1+ input argument
assert_cdeferr([[int4 invalid_comm1(int4 v1) __mcode("90rMc");]])
-- input register types must match
assert_cdeferr([[void invalid_comm2(int32_t i, int4 v1) __mcode("90rMc");]])
assert_cdeferr([[void invalid_comm3(int4 v1, int32_t i) __mcode("90rMc");]])
end)
it("multidef rollback", function()
--check ctype rollback after parsing a valid intrinsic the line before

View File

@ -4,22 +4,10 @@ local telescope = require("telescope")
local ffi = require("ffi")
local C = ffi.C
local function check(expect, func, ...)
local result = func(...)
assert(result == expect, tostring(result))
return true
end
telescope.make_assertion("jit", "", check)
telescope.make_assertion("exit", "", check)
telescope.make_assertion("noexit", "", check)
telescope.make_assertion("jitchecker", "", function(checker, func, ...)
local expected, value = checker(1, func(1, ...))
assert(expected == value)
return true
end)
telescope.make_assertion("jit", "", tester.testsingle)
telescope.make_assertion("jitchecker", "", tester.testwithchecker)
telescope.make_assertion("noexit", "", tester.testnoexit)
telescope.make_assertion("exit", "", tester.testexit)
telescope.make_assertion("cdef", "", function(cdef, name)
assert(not name or type(name) == "string")