Add IR_KINT64.

This commit is contained in:
Mike Pall 2010-12-05 21:50:52 +01:00
parent b1fb71fb98
commit 5a13fa69d9
9 changed files with 135 additions and 95 deletions

View File

@ -384,16 +384,24 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
emit_loadi(as, (r), ptr2addr((addr)))
#if LJ_64
/* mov r, imm64 */
static void emit_loadu64(ASMState *as, Reg r, uint64_t i)
/* mov r, imm64 or shorter 32 bit extended load. */
static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
{
if (checku32(u64)) { /* 32 bit load clears upper 32 bits. */
emit_loadi(as, r, (int32_t)u64);
} else if (checki32((int64_t)u64)) { /* Sign-extended 32 bit load. */
MCode *p = as->mcp;
*(uint64_t *)(p-8) = i;
*(int32_t *)(p-4) = (int32_t)u64;
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
} else { /* Full-size 64 bit load. */
MCode *p = as->mcp;
*(uint64_t *)(p-8) = u64;
p[-9] = (MCode)(XI_MOVri+(r&7));
p[-10] = 0x48 + ((r>>3)&1);
p -= 10;
as->mcp = p;
}
}
#endif
/* movsd r, [&tv->n] / xorps r, r */
@ -618,6 +626,10 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
} else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
lua_assert(irt_isnil(ir->t));
emit_getgl(as, r, jit_L);
#if LJ_64 /* NYI: 32 bit register pairs. */
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, r, ir_kint64(ir)->u64);
#endif
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
ir->o == IR_KPTR || ir->o == IR_KNULL);
@ -909,6 +921,11 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
emit_loadn(as, dest, tv);
return;
}
#if LJ_64 /* NYI: 32 bit register pairs. */
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, dest, ir_kint64(ir)->u64);
return;
#endif
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
ir->o == IR_KPTR || ir->o == IR_KNULL);
@ -1343,7 +1360,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
emit_call(as, ci->func);
for (n = 0; n < nargs; n++) { /* Setup args. */
IRIns *ir = IR(args[n]);
IRRef ref = args[n];
IRIns *ir = IR(ref);
Reg r;
#if LJ_64 && LJ_ABI_WIN
/* Windows/x64 argument registers are strictly positional. */
@ -1364,7 +1382,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
}
#endif
if (r) { /* Argument is in a register. */
if (r < RID_MAX_GPR && args[n] < ASMREF_TMP1) {
if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
#if LJ_64 /* NYI: 32 bit register pairs. */
if (ir->o == IR_KINT64)
emit_loadu64(as, r, ir_kint64(ir)->u64);
else
#endif
emit_loadi(as, r, ir->i);
} else {
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
@ -1372,30 +1395,29 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
ra_noweak(as, ir->r);
ra_movrr(as, ir, r, ir->r);
} else {
ra_allocref(as, args[n], RID2RSET(r));
ra_allocref(as, ref, RID2RSET(r));
}
}
} else if (irt_isnum(ir->t)) { /* FP argument is on stack. */
if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) {
if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
/* Split stores for unaligned FP consts. */
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
} else {
if ((allow & RSET_FPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_FPR);
r = ra_alloc1(as, ref, allow & RSET_FPR);
allow &= ~RID2RSET(r);
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
}
ofs += 8;
} else { /* Non-FP argument is on stack. */
/* NYI: no widening for 64 bit parameters on x64. */
if (args[n] < ASMREF_TMP1) {
if (LJ_32 && ref < ASMREF_TMP1) {
emit_movmroi(as, RID_ESP, ofs, ir->i);
} else {
if ((allow & RSET_GPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_GPR);
r = ra_alloc1(as, ref, allow & RSET_GPR);
allow &= ~RID2RSET(r);
emit_movtomro(as, REX_64IR(ir, r), RID_ESP, ofs);
}
@ -1936,8 +1958,9 @@ static void asm_fstore(ASMState *as, IRIns *ir)
/* The IRT_I16/IRT_U16 stores should never be simplified for constant
** values since mov word [mem], imm16 has a length-changing prefix.
*/
lua_assert(!(irref_isk(ir->op2) && irt_is64(ir->t))); /* NYI: KINT64. */
if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) {
if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t) ||
(LJ_64 && irt_is64(ir->t) &&
!checki32((int64_t)ir_k64(IR(ir->op2))->u64))) {
RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
src = ra_alloc1(as, ir->op2, allow8);
rset_clear(allow, src);
@ -2496,7 +2519,7 @@ static void asm_add(ASMState *as, IRIns *ir)
if (irt_isnum(ir->t))
asm_fparith(as, ir, XO_ADDSD);
else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp ||
!asm_lea(as, ir))
irt_is64(ir->t) || !asm_lea(as, ir))
asm_intarith(as, ir, XOg_ADD);
}
@ -2615,7 +2638,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */
lref = ir->op2; rref = ir->op1;
}
if (irref_isk(rref)) {
if (irref_isk(rref) && IR(rref)->o != IR_KINT64) {
IRIns *irl = IR(lref);
int32_t imm = IR(rref)->i;
/* Check wether we can use test ins. Not for unsigned, since CF=0. */

View File

@ -62,7 +62,7 @@ typedef unsigned __int32 uintptr_t;
#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
#define LJ_MIN_KNUMSZ 16 /* Min. size for chained KNUM array. */
#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
/* JIT compiler limits. */
#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
@ -90,6 +90,7 @@ typedef unsigned __int32 uintptr_t;
#define checki16(x) ((x) == (int32_t)(int16_t)(x))
#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
#define checki32(x) ((x) == (int32_t)(x))
#define checku32(x) ((x) == (uint32_t)(x))
#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
/* Every half-decent C compiler transforms this into a rotate instruction. */

View File

@ -167,88 +167,95 @@ found:
return TREF(ref, IRT_INT);
}
/* The MRef inside the KNUM IR instruction holds the address of the constant
** (an aligned double or a special 64 bit pattern). The KNUM constants
** themselves are stored in a chained array and shared across traces.
/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the
** 64 bit constant. The constants themselves are stored in a chained array
** and shared across traces.
**
** Rationale for choosing this data structure:
** - The address of the constants is embedded in the generated machine code
** and must never move. A resizable array or hash table wouldn't work.
** - Most apps need very few non-integer constants (less than a dozen).
** - Most apps need very few non-32 bit integer constants (less than a dozen).
** - Linear search is hard to beat in terms of speed and low complexity.
*/
typedef struct KNumArray {
typedef struct K64Array {
MRef next; /* Pointer to next list. */
MSize numk; /* Number of used elements in this array. */
TValue k[LJ_MIN_KNUMSZ]; /* Array of constants. */
} KNumArray;
TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
} K64Array;
/* Free all chained arrays. */
void lj_ir_knum_freeall(jit_State *J)
void lj_ir_k64_freeall(jit_State *J)
{
KNumArray *kn;
for (kn = mref(J->knum, KNumArray); kn; ) {
KNumArray *next = mref(kn->next, KNumArray);
lj_mem_free(J2G(J), kn, sizeof(KNumArray));
kn = next;
K64Array *k;
for (k = mref(J->k64, K64Array); k; ) {
K64Array *next = mref(k->next, K64Array);
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
}
/* Find KNUM constant in chained array or add it. */
static cTValue *ir_knum_find(jit_State *J, uint64_t nn)
/* Find 64 bit constant in chained array or add it. */
static cTValue *ir_k64_find(jit_State *J, uint64_t u64)
{
KNumArray *kn, *knp = NULL;
K64Array *k, *kp = NULL;
TValue *ntv;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (kn = mref(J->knum, KNumArray); kn; kn = mref(kn->next, KNumArray)) {
knp = kn; /* Remember previous element in list. */
for (idx = 0; idx < kn->numk; idx++) { /* Search one array. */
TValue *tv = &kn->k[idx];
if (tv->u64 == nn) /* Needed for +-0/NaN/absmask. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
return tv;
}
}
/* Constant was not found, need to add it. */
if (!(knp && knp->numk < LJ_MIN_KNUMSZ)) { /* Allocate a new array. */
KNumArray *nkn = lj_mem_newt(J->L, sizeof(KNumArray), KNumArray);
setmref(nkn->next, NULL);
nkn->numk = 0;
if (knp)
setmref(knp->next, nkn); /* Chain to the end of the list. */
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
setmref(kn->next, NULL);
kn->numk = 0;
if (kp)
setmref(kp->next, kn); /* Chain to the end of the list. */
else
setmref(J->knum, nkn); /* Link first array. */
knp = nkn;
setmref(J->k64, kn); /* Link first array. */
kp = kn;
}
ntv = &knp->k[knp->numk++]; /* Add to current array. */
ntv->u64 = nn;
ntv = &kp->k[kp->numk++]; /* Add to current array. */
ntv->u64 = u64;
return ntv;
}
/* Intern FP constant, given by its address. */
TRef lj_ir_knum_addr(jit_State *J, cTValue *tv)
/* Intern 64 bit constant, given by its address. */
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
for (ref = J->chain[IR_KNUM]; ref; ref = cir[ref].prev)
if (ir_knum(&cir[ref]) == tv)
IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
if (ir_k64(&cir[ref]) == tv)
goto found;
ref = ir_nextk(J);
ir = IR(ref);
lua_assert(checkptr32(tv));
setmref(ir->ptr, tv);
ir->t.irt = IRT_NUM;
ir->o = IR_KNUM;
ir->prev = J->chain[IR_KNUM];
J->chain[IR_KNUM] = (IRRef1)ref;
ir->t.irt = t;
ir->o = op;
ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref;
found:
return TREF(ref, IRT_NUM);
return TREF(ref, t);
}
/* Intern FP constant, given by its 64 bit pattern. */
TRef lj_ir_knum_nn(jit_State *J, uint64_t nn)
TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
{
return lj_ir_knum_addr(J, ir_knum_find(J, nn));
return lj_ir_k64(J, IR_KNUM, ir_k64_find(J, u64));
}
/* Intern 64 bit integer constant. */
TRef lj_ir_kint64(jit_State *J, uint64_t u64)
{
return lj_ir_k64(J, IR_KINT64, ir_k64_find(J, u64));
}
/* Check whether a number is int and return it. -0 is NOT considered an int. */
@ -373,6 +380,9 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
} else if (irt_isnum(ir->t)) {
lua_assert(ir->o == IR_KNUM);
setnumV(tv, ir_knum(ir)->n);
} else if (irt_is64(ir->t)) {
lua_assert(ir->o == IR_KINT64);
setnumV(tv, (int64_t)ir_kint64(ir)->u64); /* NYI: use FFI int64_t. */
} else if (irt_ispri(ir->t)) {
lua_assert(ir->o == IR_KPRI);
setitype(tv, irt_toitype(ir->t));

View File

@ -12,6 +12,24 @@
/* IR instruction definition. Order matters, see below. */
#define IRDEF(_) \
/* Guarded assertions. */ \
/* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \
_(LT, N , ref, ref) \
_(GE, N , ref, ref) \
_(LE, N , ref, ref) \
_(GT, N , ref, ref) \
\
_(ULT, N , ref, ref) \
_(UGE, N , ref, ref) \
_(ULE, N , ref, ref) \
_(UGT, N , ref, ref) \
\
_(EQ, C , ref, ref) \
_(NE, C , ref, ref) \
\
_(ABC, N , ref, ref) \
_(RETF, S , ref, ref) \
\
/* Miscellaneous ops. */ \
_(NOP, N , ___, ___) \
_(BASE, N , lit, lit) \
@ -26,26 +44,9 @@
_(KPTR, N , cst, ___) \
_(KNULL, N , cst, ___) \
_(KNUM, N , cst, ___) \
_(KINT64, N , cst, ___) \
_(KSLOT, N , ref, lit) \
\
/* Guarded assertions. */ \
/* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \
_(EQ, C , ref, ref) \
_(NE, C , ref, ref) \
\
_(ABC, N , ref, ref) \
_(RETF, S , ref, ref) \
\
_(LT, N , ref, ref) \
_(GE, N , ref, ref) \
_(LE, N , ref, ref) \
_(GT, N , ref, ref) \
\
_(ULT, N , ref, ref) \
_(UGE, N , ref, ref) \
_(ULE, N , ref, ref) \
_(UGT, N , ref, ref) \
\
/* Bit ops. */ \
_(BNOT, N , ref, ___) \
_(BSWAP, N , ref, ___) \
@ -536,6 +537,9 @@ typedef union IRIns {
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue))
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
#define ir_k64(ir) \
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
#define ir_kptr(ir) check_exp((ir)->o == IR_KPTR, mref((ir)->ptr, void))
LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);

View File

@ -39,10 +39,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
/* Interning of constants. */
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
LJ_FUNC void lj_ir_knum_freeall(jit_State *J);
LJ_FUNC TRef lj_ir_knum_addr(jit_State *J, cTValue *tv);
LJ_FUNC TRef lj_ir_knum_nn(jit_State *J, uint64_t nn);
LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
LJ_FUNC TRef lj_ir_kptr(jit_State *J, void *ptr);
LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
@ -52,7 +53,7 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
{
TValue tv;
tv.n = n;
return lj_ir_knum_nn(J, tv.u64);
return lj_ir_knum_u64(J, tv.u64);
}
#define lj_ir_kstr(J, str) lj_ir_kgc(J, obj2gco((str)), IRT_STR)
@ -60,13 +61,13 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
#define lj_ir_kfunc(J, func) lj_ir_kgc(J, obj2gco((func)), IRT_FUNC)
/* Special FP constants. */
#define lj_ir_knum_zero(J) lj_ir_knum_nn(J, U64x(00000000,00000000))
#define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000))
#define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000))
#define lj_ir_knum_zero(J) lj_ir_knum_u64(J, U64x(00000000,00000000))
#define lj_ir_knum_one(J) lj_ir_knum_u64(J, U64x(3ff00000,00000000))
#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
/* Special 128 bit SIMD constants. */
#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, LJ_KSIMD(J, LJ_KSIMD_ABS))
#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, LJ_KSIMD(J, LJ_KSIMD_NEG))
#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS))
#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG))
/* Access to constants. */
LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);

View File

@ -271,7 +271,7 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
MRef knum; /* Pointer to chained array of KNUM constants. */
MRef k64; /* Pointer to chained array of 64 bit constants. */
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */

View File

@ -188,7 +188,7 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
lua_assert(itype2irt(tv) == irt_type(fins->t));
if (irt_isnum(fins->t))
return lj_ir_knum_nn(J, tv->u64);
return lj_ir_knum_u64(J, tv->u64);
else
return lj_ir_kstr(J, strV(tv));
}

View File

@ -1863,7 +1863,8 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break;
case IR_KINT64: tr = lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); break;
case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */
/* Inherited SLOADs don't need a guard or type check. */
case IR_SLOAD:

View File

@ -306,7 +306,7 @@ void lj_trace_freestate(global_State *g)
}
#endif
lj_mcode_free(J);
lj_ir_knum_freeall(J);
lj_ir_k64_freeall(J);
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);