diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index 15c9a6db..6f84533c 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -1219,9 +1219,8 @@ suboptimal performance, especially when used in inner loops:
  • Vector operations.
  • Table initializers.
  • Initialization of nested struct/union types.
  • -
  • Allocations of variable-length arrays or structs.
  • -
  • Allocations of C types with a size > 128 bytes or an -alignment > 8 bytes.
  • +
  • Non-default initialization of VLA/VLS or large C types +(> 128 bytes or > 16 array elements.
  • Conversions from lightuserdata to void *.
  • Pointer differences for element sizes that are not a power of two.
  • diff --git a/src/lj_asm.c b/src/lj_asm.c index 7542c77c..a80d6adf 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2050,7 +2050,16 @@ static void asm_setup_regsp(ASMState *as) case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: if (REGARG_NUMGPR < 3 && as->evenspill < 3) as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ - case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: +#if LJ_TARGET_X86 && LJ_HASFFI + if (0) { + case IR_CNEW: + if (ir->op2 != REF_NIL && as->evenspill < 4) + as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ + } +#else + case IR_CNEW: +#endif + case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: case IR_BUFSTR: ir->prev = REGSP_HINT(RID_RET); if (inloop) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index ddf1480f..9b661eb7 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1209,19 +1209,16 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; + IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -1243,16 +1240,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; ir--; } + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ { - uint32_t k = emit_isk12(ARMI_MOV, ctypeid); - Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); + uint32_t k = emit_isk12(ARMI_MOV, id); + Reg r = k ? RID_R1 : ra_allock(as, id, allow); emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); if (k) emit_d(as, ARMI_MOV^k, RID_R1); } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index fe7d55d3..3d061eb4 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -975,19 +975,15 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -996,6 +992,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); int32_t ofs = sizeof(GCcdata); lua_assert(sz == 4 || sz == 8); if (sz == 8) { @@ -1010,12 +1007,24 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; if (LJ_BE) ir++; else ir--; } + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ + emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); @@ -1197,11 +1206,16 @@ static void asm_arithov(ASMState *as, IRIns *ir) static void asm_mulov(ASMState *as, IRIns *ir) { -#if LJ_DUALNUM -#error "NYI: MULOV" -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ -#endif + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), + right), dest)); + asm_guard(as, MIPSI_BNE, RID_TMP, tmp); + emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); + emit_dst(as, MIPSI_MFHI, tmp, 0, 0); + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_MULT, 0, left, right); } #if LJ_HASFFI diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 1cac6fa9..10cd79dd 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1009,19 +1009,15 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -1030,6 +1026,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); int32_t ofs = sizeof(GCcdata); lua_assert(sz == 4 || sz == 8); if (sz == 8) { @@ -1043,12 +1040,24 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; ir++; } + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ + emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 25da5246..f2f8157d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1438,15 +1438,13 @@ static void asm_sload(ASMState *as, IRIns *ir) static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - lua_assert(sz != CTSIZE_INVALID); + IRRef args[4]; + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1489,15 +1487,26 @@ static void asm_cnew(ASMState *as, IRIns *ir) } while (1); #endif lua_assert(sz == 4 || sz == 8); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } /* Combine initialization of marked, gct and ctypeid. */ emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, - (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); + (int32_t)((~LJ_TCDATA<<8)+(id<<16))); emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } diff --git a/src/lj_crecord.c b/src/lj_crecord.c index e88e3579..ef6e5f82 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -888,10 +888,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) CTSize sz; CTInfo info = lj_ctype_info(cts, id, &sz); CType *d = ctype_raw(cts, id); - TRef trid; - if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) - lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ - trid = lj_ir_kint(J, id); + TRef trcd, trid = lj_ir_kint(J, id); + cTValue *fin; /* Use special instruction to box pointer or 32/64 bit integer. */ if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : @@ -899,11 +897,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) sz == 4 ? lj_ir_kint(J, 0) : (lj_needsplit(J), lj_ir_kint64(J, 0)); J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); + return; } else { - TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); - cTValue *fin; - J->base[0] = trcd; - if (J->base[1] && !J->base[2] && + TRef trsz = TREF_NIL; + if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */ + CTSize sz0, sz1; + if (!J->base[1] || J->base[2]) + lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */ + trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, + J->base[1], &rd->argv[1]); + sz0 = lj_ctype_vlsize(cts, d, 0); + sz1 = lj_ctype_vlsize(cts, d, 1); + trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0))); + trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0)); + J->base[1] = 0; /* Simplify logic below. */ + } else if (ctype_align(info) > CT_MEMALIGN) { + trsz = lj_ir_kint(J, sz); + } + trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz); + if (sz > 128 || (info & CTF_VLA)) { + TRef dp; + CTSize align; + special: /* Only handle bulk zero-fill for large/VLA/VLS types. */ + if (J->base[1]) + lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */ + dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); + if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz); + align = ctype_align(info); + if (align < CT_MEMALIGN) align = CT_MEMALIGN; + crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align)); + } else if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(cts, d, &rd->argv[1])) { goto single_init; } else if (ctype_isarray(d->info)) { @@ -914,8 +937,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) TValue *sval = &tv; MSize i; tv.u64 = 0; - if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) - lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ + if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) || + esize * CREC_FILL_MAXUNROLL < sz) + goto special; for (i = 1, ofs = 0; ofs < sz; ofs += esize) { TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, ofs + sizeof(GCcdata))); @@ -972,11 +996,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); } } - /* Handle __gc metamethod. */ - fin = lj_ctype_meta(cts, id, MM_gc); - if (fin) - crec_finalizer(J, trcd, fin); } + J->base[0] = trcd; + /* Handle __gc metamethod. */ + fin = lj_ctype_meta(cts, id, MM_gc); + if (fin) + crec_finalizer(J, trcd, fin); } /* Record argument conversions. */ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 7271ceca..bdcfea0b 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -202,6 +202,7 @@ typedef struct CCallInfo { _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \ _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ _(FFI, strlen, 1, L, INTP, 0) \ _(FFI, memcpy, 3, S, PTR, 0) \ diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index 36f46c83..9bf1d2cb 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -169,6 +169,9 @@ typedef enum MIPSIns { MIPSI_SLTU = 0x0000002b, MIPSI_MOVZ = 0x0000000a, MIPSI_MOVN = 0x0000000b, + MIPSI_MFHI = 0x00000010, + MIPSI_MFLO = 0x00000012, + MIPSI_MULT = 0x00000018, MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002,