diff --git a/lib/dump.lua b/lib/dump.lua index 555b4b45..652761ff 100644 --- a/lib/dump.lua +++ b/lib/dump.lua @@ -429,9 +429,15 @@ local function dump_ir(tr, dumpsnap, dumpreg) band(ot, 128) == 0 and " " or ">", band(ot, 64) == 0 and " " or "+", irtype[t], op)) - local m1 = band(m, 3) + local m1, m2 = band(m, 3), band(m, 3*4) if sub(op, 1, 4) == "CALL" then - out:write(format("%-10s (", vmdef.ircall[op2])) + if m2 == 1*4 then -- op2 == IRMlit + out:write(format("%-10s (", vmdef.ircall[op2])) + elseif op2 < 0 then + out:write(format("[0x%x](", tonumber((tracek(tr, op2))))) + else + out:write(format("%04d (", op2)) + end if op1 ~= -1 then dumpcallargs(tr, op1) end out:write(")") elseif op == "CNEW " and op2 == -1 then @@ -442,7 +448,6 @@ local function dump_ir(tr, dumpsnap, dumpreg) else out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) end - local m2 = band(m, 3*4) if m2 ~= 3*4 then -- op2 != IRMnone if m2 == 1*4 then -- op2 == IRMlit local litn = litname[op] diff --git a/src/lj_asm.c b/src/lj_asm.c index 77b55f0c..acad3b25 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1459,7 +1459,6 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) /* Generate a call to a C function. */ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { - RegSet allow = RSET_ALL; uint32_t n, nargs = CCI_NARGS(ci); int32_t ofs = STACKARG_OFS; uint32_t gprs = REGARG_GPRS; @@ -1467,24 +1466,25 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) Reg fpr = REGARG_FIRSTFPR; #endif lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ - emit_call(as, ci->func); + if ((void *)ci->func) + emit_call(as, ci->func); for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); Reg r; #if LJ_64 && LJ_ABI_WIN /* Windows/x64 argument registers are strictly positional. */ - r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); + r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); fpr++; gprs >>= 5; #elif LJ_64 /* POSIX/x64 argument registers are used in order of appearance. */ - if (irt_isnum(ir->t)) { + if (irt_isfp(ir->t)) { r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++; } else { r = gprs & 31; gprs >>= 5; } #else - if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) { + if (irt_isfp(ir->t) || !(ci->flags & CCI_FASTCALL)) { r = 0; } else { r = gprs & 31; gprs >>= 5; @@ -1514,22 +1514,16 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); } else { - if ((allow & RSET_FPR) == RSET_EMPTY) - lj_trace_err(as->J, LJ_TRERR_NYICOAL); - r = ra_alloc1(as, ref, allow & RSET_FPR); - allow &= ~RID2RSET(r); + r = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); } - ofs += 8; + ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8; } else { /* Non-FP argument is on stack. */ if (LJ_32 && ref < ASMREF_TMP1) { emit_movmroi(as, RID_ESP, ofs, ir->i); } else { - if ((allow & RSET_GPR) == RSET_EMPTY) - lj_trace_err(as->J, LJ_TRERR_NYICOAL); - r = ra_alloc1(as, ref, allow & RSET_GPR); - allow &= ~RID2RSET(r); + r = ra_alloc1(as, ref, RSET_GPR); emit_movtomro(as, REX_64IR(ir, r), RID_ESP, ofs); } ofs += sizeof(intptr_t); @@ -1575,7 +1569,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) emit_movtomro(as, RID_RET, RID_ESP, ofs); emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); } else { - emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, + irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); } #endif } else { @@ -1585,7 +1580,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } } -/* Collect arguments from CALL* and ARG instructions. */ +/* Collect arguments from CALL* and CARG instructions. */ static void asm_collectargs(ASMState *as, IRIns *ir, const CCallInfo *ci, IRRef *args) { @@ -1610,6 +1605,40 @@ static void asm_call(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); } +/* Reconstruct CCallInfo flags for CALLX*. */ +static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) +{ + uint32_t nargs = 0; + if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ + IRIns *ira = IR(ir->op1); + nargs++; + while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } + } + /* NYI: fastcall etc. */ + return (nargs | (ir->t.irt << CCI_OTSHIFT)); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX]; + CCallInfo ci; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + irf = IR(ir->op2); + if (LJ_32 && irref_isk(ir->op2)) { /* Call to constant address on x86. */ + ci.func = (ASMFunction)(void *)(uintptr_t)(uint32_t)irf->i; + } else { + /* Prefer a non-argument register or RID_RET for indirect calls. */ + RegSet allow = (RSET_GPR & ~RSET_SCRATCH)|RID2RSET(RID_RET); + Reg r = ra_alloc1(as, ir->op2, allow); + emit_rr(as, XO_GROUP5, XOg_CALL, r); + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + /* -- Returns ------------------------------------------------------------- */ /* Return to lower frame. Guard that it goes to the right spot. */ @@ -4086,6 +4115,7 @@ static void asm_ir(ASMState *as, IRIns *ir) /* Calls. */ case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; + case IR_CALLXS: asm_callx(as, ir); break; case IR_CARG: break; default: @@ -4113,6 +4143,43 @@ static void asm_trace(ASMState *as) /* -- Trace setup --------------------------------------------------------- */ +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX]; + uint32_t nargs = (int)CCI_NARGS(ci); + int nslots = 0; + asm_collectargs(as, ir, ci, args); +#if LJ_64 + if (LJ_ABI_WIN) { + nslots = (int)(nargs*2); /* Only matters for more than four args. */ + } else { + uint32_t i; + int ngpr = 6, nfpr = 8; + for (i = 0; i < nargs; i++) + if (irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots += 2; + } else { + if (ngpr > 0) ngpr--; else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); +#else + if ((ci->flags & CCI_FASTCALL)) { + lua_assert(nargs <= 2); + } else { + uint32_t i; + for (i = 0; i < nargs; i++) + nslots += irt_isnum(IR(args[i])->t) ? 2 : 1; + if (nslots > as->evenspill) /* Leave room for args. */ + as->evenspill = nslots; + } + return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET); +#endif +} + /* Clear reg/sp for all instructions and add register hints. */ static void asm_setup_regsp(ASMState *as, GCtrace *T) { @@ -4161,17 +4228,17 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) } } break; + case IR_CALLXS: { + CCallInfo ci; + ci.flags = asm_callx_flags(as, ir); + ir->prev = asm_setup_call_slots(as, ir, &ci); + if (inloop) + as->modset |= RSET_SCRATCH; + continue; + } case IR_CALLN: case IR_CALLL: case IR_CALLS: { const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; -#if LJ_64 - lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); - ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); -#else - lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2); - if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ - as->evenspill = (int32_t)CCI_NARGS(ci); - ir->prev = REGSP_HINT(RID_RET); -#endif + ir->prev = asm_setup_call_slots(as, ir, ci); if (inloop) as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; diff --git a/src/lj_crecord.c b/src/lj_crecord.c index b4bfd0c2..2ecd2867 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -670,14 +670,79 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) } } +/* Record argument conversions. */ +static TRef crec_call_args(jit_State *J, RecordFFData *rd, + CTState *cts, CType *ct) +{ + TRef args[CCI_NARGS_MAX]; + MSize i, n; + TRef tr; + args[0] = TREF_NIL; + for (n = 0; J->base[n+1]; n++) { + CType *d; + do { + if (!ct->sib) + lj_trace_err(J, LJ_TRERR_NYICALL); + ct = ctype_get(cts, ct->sib); + } while (ctype_isattrib(ct->info)); + if (!ctype_isfield(ct->info)) + lj_trace_err(J, LJ_TRERR_NYICALL); + d = ctype_rawchild(cts, ct); + if (ctype_isenum(d->info)) d = ctype_child(cts, d); + if (!(ctype_isnum(d->info) || ctype_isptr(d->info))) + lj_trace_err(J, LJ_TRERR_NYICALL); + args[n] = crec_ct_tv(J, d, 0, J->base[n+1], &rd->argv[n+1]); + } + tr = args[0]; + for (i = 1; i < n; i++) + tr = emitir(IRT(IR_CARG, IRT_NIL), tr, args[i]); + return tr; +} + +/* Record function call. */ +static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CType *ct = ctype_raw(cts, cd->typeid); + IRType tp = IRT_PTR; + if (ctype_isptr(ct->info)) { + tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; + ct = ctype_rawchild(cts, ct); + } + if (ctype_isfunc(ct->info)) { + TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); + CType *ctr = ctype_rawchild(cts, ct); + IRType t = crec_ct2irt(ctr); + TRef tr; + if (ctype_isenum(ctr->info)) ctr = ctype_child(cts, ctr); + if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info)) || + ctype_isbool(ctr->info) || (ct->info & CTF_VARARG) || +#if LJ_TARGET_X86 + ctype_cconv(ct->info) != CTCC_CDECL || +#endif + t == IRT_CDATA || (LJ_32 && (t == IRT_I64 || t == IRT_U64))) + lj_trace_err(J, LJ_TRERR_NYICALL); + tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func); + if (t == IRT_FLOAT || t == IRT_U32) { + tr = emitconv(tr, IRT_NUM, t, 0); + } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || + (LJ_64 && (t == IRT_I64 || t == IRT_U64))) { + TRef trid = lj_ir_kint(J, ctype_cid(ct->info)); + tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); + } + J->base[0] = tr; + return 1; + } + return 0; +} + void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) { GCcdata *cd = argv2cdata(J, J->base[0], &rd->argv[0]); - if (cd->typeid == CTID_CTYPEID) { + if (cd->typeid == CTID_CTYPEID) crec_alloc(J, rd, crec_constructor(J, cd, J->base[0])); - } else { + else if (!crec_call(J, rd, cd)) lj_trace_err(J, LJ_TRERR_BADTYPE); - } } static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) diff --git a/src/lj_ir.h b/src/lj_ir.h index bedb4c2b..532d7a9e 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -129,6 +129,7 @@ _(CALLN, N , ref, lit) \ _(CALLL, L , ref, lit) \ _(CALLS, S , ref, lit) \ + _(CALLXS, S , ref, ref) \ _(CARG, N , ref, ref) \ \ /* End of list. */ @@ -236,7 +237,7 @@ typedef struct CCallInfo { } CCallInfo; #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ -#define CCI_NARGS_MAX 16 /* Max. # of args. */ +#define CCI_NARGS_MAX 32 /* Max. # of args. */ #define CCI_OTSHIFT 16 #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ @@ -590,7 +591,6 @@ typedef union IRIns { check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) - LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W); /* A store or any other op with a non-weak guard has a side-effect. */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index ef397aea..1172f4fc 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1891,6 +1891,7 @@ LJFOLDX(lj_opt_dse_xstore) LJFOLD(NEWREF any any) /* Treated like a store. */ LJFOLD(CALLS any any) LJFOLD(CALLL any any) /* Safeguard fallback. */ +LJFOLD(CALLXS any any) LJFOLD(RETF any any) /* Modifies BASE. */ LJFOLD(TNEW any any) LJFOLD(TDUP any) diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index cad85bb7..211c329a 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -675,7 +675,8 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) /* Search for conflicting stores. */ ref = J->chain[IR_XSTORE]; retry: - while (ref > xref) { + if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS]; + while (ref > lim) { IRIns *store = IR(ref); switch (aa_xref(J, xr, fins, store)) { case ALIAS_NO: break; /* Continue searching. */ @@ -732,10 +733,12 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); + IRRef lim = xref; /* Search limit. */ IRRef val = fins->op2; /* Stored value reference. */ IRRef1 *refp = &J->chain[IR_XSTORE]; IRRef ref = *refp; - while (ref > xref) { /* Search for redundant or conflicting stores. */ + if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS]; + while (ref > lim) { /* Search for redundant or conflicting stores. */ IRIns *store = IR(ref); switch (aa_xref(J, xr, fins, store)) { case ALIAS_NO: diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index f0c45963..756330ec 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h @@ -37,6 +37,7 @@ TREDEF(NYITMIX, "NYI: mixed sparse/dense table") /* Recording C data operations. */ TREDEF(NOCACHE, "symbol not in cache") TREDEF(NYICONV, "NYI: unsupported C type conversion") +TREDEF(NYICALL, "NYI: unsupported C function type") /* Optimizations. */ TREDEF(GFAIL, "guard would always fail")