Move common 32/64 bit in-memory FP constants to jit_State.

Prerequisite for immovable IR.
Contributed by Peter Cawley.
This commit is contained in:
Mike Pall 2016-05-21 00:02:45 +02:00
parent 1931b38da5
commit cfa188f134
6 changed files with 77 additions and 34 deletions

View File

@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
dest, dest);
if (irt_isfloat(ir->t))
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
RSET_GPR);
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
tmp, left, tmp);
if (st == IRT_FLOAT)
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
RSET_GPR);
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
} else {
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,

View File

@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fb(as, PPCI_FCTIWZ, tmp, left);
}
@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, lref, allow);
Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
const float *kbias;
if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
if (st == IRT_U32) kbias++;
emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
emit_lsptr(as, PPCI_LFS, (fbias & 31),
&as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
RID_SP, SPOFS_TMPLO);
@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
emit_fab(as, PPCI_FSUB, tmp, left, tmp);
emit_lsptr(as, PPCI_LFS, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
} else {
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
@ -974,7 +970,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
(void *)&as->J->k32[LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);

View File

@ -696,7 +696,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
if (left == dest) return; /* Avoid the XO_XORPS. */
} else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
/* number = (2^52+2^51 .. u32) - (2^52+2^51) */
cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
cTValue *k = &as->J->k64[LJ_K64_TOBIT];
Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
if (irt_isfloat(ir->t))
emit_rr(as, XO_CVTSD2SS, dest, dest);
@ -711,7 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
asm_fuseloadm(as, lref, RSET_GPR, st64);
if (LJ_64 && st == IRT_U64) {
MCLabel l_end = emit_label(as);
const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
cTValue *k = &as->J->k64[LJ_K64_2P64];
emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@ -738,11 +738,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
emit_rr(as, op, dest|REX_64, tmp);
if (st == IRT_NUM)
emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
else
emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
emit_rr(as, op, dest|REX_64, tmp);
@ -828,8 +826,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
/* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
MCLabel l_end = emit_label(as);
emit_rma(as, XO_FADDq, XOg_FADDq,
lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
} else {
@ -869,8 +866,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
else
emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
emit_rma(as, XO_FADDq, XOg_FADDq,
lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
emit_sjcc(as, CC_NS, l_pop);
emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
}

View File

@ -204,12 +204,12 @@ typedef struct K64Array {
void lj_ir_k64_freeall(jit_State *J)
{
K64Array *k;
for (k = mref(J->k64, K64Array); k; ) {
for (k = mref(J->k64p, K64Array); k; ) {
K64Array *next = mref(k->next, K64Array);
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
setmref(J->k64, NULL);
setmref(J->k64p, NULL);
}
/* Get new 64 bit constant slot. */
@ -223,7 +223,7 @@ static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
if (kp)
setmref(kp->next, kn); /* Chain to the end of the list. */
else
setmref(J->k64, kn); /* Link first array. */
setmref(J->k64p, kn); /* Link first array. */
kp = kn;
}
ntv = &kp->k[kp->numk++]; /* Add to current array. */
@ -237,7 +237,7 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
K64Array *k, *kp = NULL;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
@ -254,7 +254,7 @@ TValue *lj_ir_k64_reserve(jit_State *J)
K64Array *k, *kp = NULL;
lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
/* Find last K64Array, if any. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) kp = k;
return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
}

View File

@ -308,6 +308,37 @@ enum {
LJ_KSIMD__MAX
};
enum {
#if LJ_TARGET_X86ORX64
LJ_K64_TOBIT, /* 2^52 + 2^51 */
LJ_K64_2P64, /* 2^64 */
LJ_K64_M2P64, /* -2^64 */
#if LJ_32
LJ_K64_M2P64_31, /* -2^64 or -2^31 */
#else
LJ_K64_M2P64_31 = LJ_K64_M2P64,
#endif
#endif
#if LJ_TARGET_MIPS
LJ_K64_2P31, /* 2^31 */
#endif
LJ_K64__MAX,
};
enum {
#if LJ_TARGET_X86ORX64
LJ_K32_M2P64_31, /* -2^64 or -2^31 */
#endif
#if LJ_TARGET_PPC
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
LJ_K32_2P52, /* 2^52 */
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
LJ_K32_2P31, /* 2^31 */
#endif
LJ_K32__MAX
};
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
@ -360,8 +391,10 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
MRef k64; /* Pointer to chained array of 64 bit constants. */
MRef k64p; /* Pointer to chained array of 64 bit constants. */
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */

View File

@ -297,13 +297,35 @@ void lj_trace_initstate(global_State *g)
{
jit_State *J = G2J(g);
TValue *tv;
/* Initialize SIMD constants. */
/* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff);
tv[1].u64 = U64x(7fffffff,ffffffff);
tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
tv[0].u64 = U64x(80000000,00000000);
tv[1].u64 = U64x(80000000,00000000);
/* Initialize 32/64 bit constants. */
#if LJ_TARGET_X86ORX64
J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
#if LJ_32
J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
#endif
J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
#endif
#if LJ_TARGET_PPC
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
J->k32[LJ_K32_2P52] = 0x59800000;
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
J->k32[LJ_K32_2P31] = 0x4f000000;
#endif
#if LJ_TARGET_MIPS
J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
#endif
}
/* Free everything associated with the JIT compiler state. */