diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 7bd72fda..bbde98fe 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir) dest, dest); if (irt_isfloat(ir->t)) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_4F_4F], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_41E], RSET_GPR); emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); @@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir) tmp, left, tmp); if (st == IRT_FLOAT) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_4F_4F], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_41E], RSET_GPR); } else { emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 0ebed40f..a66668c9 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_TOINTG], RSET_GPR); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fb(as, PPCI_FCTIWZ, tmp, left); } @@ -437,7 +436,7 @@ static void asm_conv(ASMState *as, IRIns *ir) if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); + kbias = (const float *)&as->J->k64[LJ_K64_TOINTG]; if (st == IRT_U32) kbias++; emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, rset_clear(allow, hibias)); @@ -472,8 +471,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, tmp); emit_fab(as, PPCI_FSUB, tmp, left, tmp); emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_4F], RSET_GPR); } else { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); @@ -975,7 +973,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), + (void *)&as->J->k64[LJ_K64_TOINTG], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 66fb4403..74e27d18 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -696,7 +696,7 @@ static void asm_conv(ASMState *as, IRIns *ir) if (left == dest) return; /* Avoid the XO_XORPS. */ } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ - cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_TOBIT]; Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); if (irt_isfloat(ir->t)) emit_rr(as, XO_CVTSD2SS, dest, dest); @@ -711,7 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir) asm_fuseloadm(as, lref, RSET_GPR, st64); if (LJ_64 && st == IRT_U64) { MCLabel l_end = emit_label(as); - const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_2P64]; emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ @@ -738,11 +738,11 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[ + LJ_64 ? LJ_K64_M2P64 : LJ_K64_M2P31]); else - emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[ + LJ_64 ? LJ_K32_M2P64 : LJ_K32_M2P31]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ emit_rr(as, op, dest|REX_64, tmp); @@ -828,8 +828,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(43f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { @@ -869,8 +868,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); emit_sjcc(as, CC_NS, l_pop); emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ } diff --git a/src/lj_ir.c b/src/lj_ir.c index b4087aa7..6a1ecc13 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -204,12 +204,12 @@ typedef struct K64Array { void lj_ir_k64_freeall(jit_State *J) { K64Array *k; - for (k = mref(J->k64, K64Array); k; ) { + for (k = mref(J->k64p, K64Array); k; ) { K64Array *next = mref(k->next, K64Array); lj_mem_free(J2G(J), k, sizeof(K64Array)); k = next; } - setmref(J->k64, NULL); + setmref(J->k64p, NULL); } /* Get new 64 bit constant slot. */ @@ -223,7 +223,7 @@ static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64) if (kp) setmref(kp->next, kn); /* Chain to the end of the list. */ else - setmref(J->k64, kn); /* Link first array. */ + setmref(J->k64p, kn); /* Link first array. */ kp = kn; } ntv = &kp->k[kp->numk++]; /* Add to current array. */ @@ -237,7 +237,7 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) K64Array *k, *kp = NULL; MSize idx; /* Search for the constant in the whole chain of arrays. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { + for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) { kp = k; /* Remember previous element in list. */ for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ TValue *tv = &k->k[idx]; @@ -254,7 +254,7 @@ TValue *lj_ir_k64_reserve(jit_State *J) K64Array *k, *kp = NULL; lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */ /* Find last K64Array, if any. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k; + for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) kp = k; return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ } diff --git a/src/lj_jit.h b/src/lj_jit.h index 2d2e833a..5619e0dd 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -308,6 +308,24 @@ enum { LJ_KSIMD__MAX }; +enum { + LJ_K64_TOBIT, /* 2^52+2^51 */ + LJ_K64_2P64, /* 2^64 */ + LJ_K64_M2P64, /* -2^64 */ + LJ_K64_M2P31, /* -2^31 */ + LJ_K64_4F, + LJ_K64_4F_4F, + LJ_K64_41E, + LJ_K64_TOINTG, + LJ_K64__MAX +}; + +enum { + LJ_K32_M2P64, /* -2^64 */ + LJ_K32_M2P31, /* -2^31 */ + LJ_K32__MAX +}; + /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) @@ -360,8 +378,10 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ - MRef k64; /* Pointer to chained array of 64 bit constants. */ + MRef k64p; /* Pointer to chained array of 64 bit constants. */ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by assemblers. */ + uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 7970aba6..f8f85696 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -304,6 +304,18 @@ void lj_trace_initstate(global_State *g) tv = LJ_KSIMD(J, LJ_KSIMD_NEG); tv[0].u64 = U64x(80000000,00000000); tv[1].u64 = U64x(80000000,00000000); + + J->k64[LJ_K64_TOBIT].u64 = U64x(43380000, 00000000); + J->k64[LJ_K64_2P64].u64 = U64x(43f00000, 00000000); + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000, 00000000); + J->k64[LJ_K64_M2P31].u64 = U64x(c1e00000, 00000000); + J->k64[LJ_K64_4F].u64 = U64x(4f000000, 00000000); + J->k64[LJ_K64_4F_4F].u64 = U64x(4f000000, 4f000000); + J->k64[LJ_K64_41E].u64 = U64x(41e00000, 00000000); + J->k64[LJ_K64_TOINTG].u64 = U64x(59800004, 59800000); + + J->k32[LJ_K32_M2P64] = 0xdf800000; + J->k32[LJ_K32_M2P31] = 0xcf000000; } /* Free everything associated with the JIT compiler state. */