Split up FP IR instructions with SPLIT pass for soft-float targets.

This commit is contained in:
Mike Pall 2011-05-22 17:41:59 +02:00
parent d0115c65f5
commit 138f54352a
13 changed files with 479 additions and 72 deletions

View File

@ -147,6 +147,7 @@ local irtype_text = {
"u32", "u32",
"i64", "i64",
"u64", "u64",
"sfp",
} }
local colortype_ansi = { local colortype_ansi = {
@ -173,6 +174,7 @@ local colortype_ansi = {
"\027[35m%s\027[m", "\027[35m%s\027[m",
"\027[35m%s\027[m", "\027[35m%s\027[m",
"\027[35m%s\027[m", "\027[35m%s\027[m",
"\027[35m%s\027[m",
} }
local function colorize_text(s, t) local function colorize_text(s, t)
@ -317,12 +319,12 @@ local function printsnap(tr, snap)
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
if ref < 0 then if ref < 0 then
out:write(formatk(tr, ref)) out:write(formatk(tr, ref))
else
if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
else else
local m, ot, op1, op2 = traceir(tr, ref) local m, ot, op1, op2 = traceir(tr, ref)
out:write(colorize(format("%04d", ref), band(ot, 31))) out:write(colorize(format("%04d", ref), band(ot, 31)))
if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
local m, ot, op1, op2 = traceir(tr, ref+1)
out:write(colorize(format("/%04d", ref+1), band(ot, 31)))
end end
end end
out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME

View File

@ -108,7 +108,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
lj_lib.h lj_vm.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
lj_state.h lj_lex.h lj_parse.h lj_char.h lj_state.h lj_lex.h lj_parse.h lj_char.h

View File

@ -85,6 +85,9 @@ typedef struct ASMState {
IRRef1 phireg[RID_MAX]; /* PHI register references. */ IRRef1 phireg[RID_MAX]; /* PHI register references. */
uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */
#if LJ_SOFTFP
uint16_t parentmaphi[LJ_MAX_JSLOTS]; /* Parent slot to hi RegSP map. */
#endif
} ASMState; } ASMState;
#define IR(ref) (&as->ir[(ref)]) #define IR(ref) (&as->ir[(ref)])
@ -273,9 +276,12 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
ra_modified(as, r); ra_modified(as, r);
ir->r = RID_INIT; /* Do not keep any hint. */ ir->r = RID_INIT; /* Do not keep any hint. */
RA_DBGX((as, "remat $i $r", ir, r)); RA_DBGX((as, "remat $i $r", ir, r));
#if !LJ_SOFTFP
if (ir->o == IR_KNUM) { if (ir->o == IR_KNUM) {
emit_loadn(as, r, ir_knum(ir)); emit_loadn(as, r, ir_knum(ir));
} else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { } else
#endif
if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base); emit_getgl(as, r, jit_base);
} else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
@ -596,18 +602,12 @@ static int asm_snap_canremat(ASMState *as)
return 0; return 0;
} }
/* Allocate registers or spill slots for refs escaping to a snapshot. */ /* Allocate register or spill slot for a ref that escapes to a snapshot. */
static void asm_snap_alloc(ASMState *as) static void asm_snap_alloc1(ASMState *as, IRRef ref)
{ {
SnapShot *snap = &as->T->snap[as->snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (!irref_isk(ref)) {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if (!ra_used(ir)) { if (!ra_used(ir)) {
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR;
/* Get a weak register if we have a free one or can rematerialize. */ /* Get a weak register if we have a free one or can rematerialize. */
if ((as->freeset & allow) || if ((as->freeset & allow) ||
(allow == RSET_FPR && asm_snap_canremat(as))) { (allow == RSET_FPR && asm_snap_canremat(as))) {
@ -622,6 +622,21 @@ static void asm_snap_alloc(ASMState *as)
} }
} }
} }
/* Allocate refs escaping to a snapshot. */
static void asm_snap_alloc(ASMState *as)
{
SnapShot *snap = &as->T->snap[as->snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
IRRef ref = snap_ref(sn);
if (!irref_isk(ref)) {
asm_snap_alloc1(as, ref);
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM))
asm_snap_alloc1(as, ref+1);
}
} }
} }
@ -997,6 +1012,15 @@ static void asm_head_root(ASMState *as)
as->T->topslot = gcref(as->T->startpt)->pt.framesize; as->T->topslot = gcref(as->T->startpt)->pt.framesize;
} }
/* Get RegSP for parent slot. */
static LJ_AINLINE RegSP asm_head_parentrs(ASMState *as, IRIns *ir)
{
#if LJ_SOFTFP
if (ir->o == IR_HIOP) return as->parentmaphi[(ir-1)->op1];
#endif
return as->parentmap[ir->op1];
}
/* Head of a side trace. /* Head of a side trace.
** **
** The current simplistic algorithm requires that all slots inherited ** The current simplistic algorithm requires that all slots inherited
@ -1022,8 +1046,9 @@ static void asm_head_side(ASMState *as)
for (i = as->stopins; i > REF_BASE; i--) { for (i = as->stopins; i > REF_BASE; i--) {
IRIns *ir = IR(i); IRIns *ir = IR(i);
RegSP rs; RegSP rs;
lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)); lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
rs = as->parentmap[ir->op1]; (LJ_SOFTFP && ir->o == IR_HIOP));
rs = asm_head_parentrs(as, ir);
if (ra_hasreg(ir->r)) { if (ra_hasreg(ir->r)) {
rset_clear(allow, ir->r); rset_clear(allow, ir->r);
if (ra_hasspill(ir->s)) if (ra_hasspill(ir->s))
@ -1052,6 +1077,12 @@ static void asm_head_side(ASMState *as)
} }
as->T->spadjust = (uint16_t)spadj; as->T->spadjust = (uint16_t)spadj;
#if !LJ_TARGET_X86ORX64
/* Restore BASE register from parent spill slot. */
if (ra_hasspill(irp->s))
emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, spdelta + sps_scale(irp->s));
#endif
/* Reload spilled target registers. */ /* Reload spilled target registers. */
if (pass2) { if (pass2) {
for (i = as->stopins; i > REF_BASE; i--) { for (i = as->stopins; i > REF_BASE; i--) {
@ -1061,12 +1092,12 @@ static void asm_head_side(ASMState *as)
Reg r; Reg r;
RegSP rs; RegSP rs;
irt_clearmark(ir->t); irt_clearmark(ir->t);
rs = as->parentmap[ir->op1]; rs = asm_head_parentrs(as, ir);
if (!ra_hasspill(regsp_spill(rs))) if (!ra_hasspill(regsp_spill(rs)))
ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
continue; /* Same spill slot, do nothing. */ continue; /* Same spill slot, do nothing. */
mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow; mask = ((!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
if (mask == RSET_EMPTY) if (mask == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL); lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_allocref(as, i, mask); r = ra_allocref(as, i, mask);
@ -1093,7 +1124,7 @@ static void asm_head_side(ASMState *as)
while (work) { while (work) {
Reg r = rset_pickbot(work); Reg r = rset_pickbot(work);
IRIns *ir = IR(regcost_ref(as->cost[r])); IRIns *ir = IR(regcost_ref(as->cost[r]));
RegSP rs = as->parentmap[ir->op1]; RegSP rs = asm_head_parentrs(as, ir);
rset_clear(work, r); rset_clear(work, r);
if (ra_hasspill(regsp_spill(rs))) { if (ra_hasspill(regsp_spill(rs))) {
int32_t ofs = sps_scale(regsp_spill(rs)); int32_t ofs = sps_scale(regsp_spill(rs));
@ -1262,13 +1293,37 @@ static void asm_setup_regsp(ASMState *as)
(RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
continue; continue;
} }
#if LJ_32 && LJ_HASFFI #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
case IR_HIOP: case IR_HIOP:
if ((ir-1)->o == IR_CALLN) { switch ((ir-1)->o) {
ir->prev = REGSP_HINT(RID_RETHI); #if LJ_SOFTFP
case IR_SLOAD:
if (((ir-1)->op2 & IRSLOAD_PARENT)) {
RegSP rs = as->parentmaphi[(ir-1)->op1];
lua_assert(regsp_used(rs));
as->stopins = i;
if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
continue; continue;
} }
}
break; break;
#endif
case IR_CALLN: case IR_CALLXS:
#if LJ_SOFTFP
case IR_MIN: case IR_MAX:
#endif
ir->prev = REGSP_HINT(RID_RETHI);
continue;
default:
break;
}
break;
#endif
#if LJ_SOFTFP
case IR_MIN: case IR_MAX:
if ((ir+1)->o != IR_HIOP) break;
/* fallthrough */
#endif #endif
/* C calls evict all scratch regs and return results in RID_RET. */ /* C calls evict all scratch regs and return results in RID_RET. */
case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
@ -1387,7 +1442,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->loopinv = 0; as->loopinv = 0;
if (J->parent) { if (J->parent) {
as->parent = traceref(J, J->parent); as->parent = traceref(J, J->parent);
lj_snap_regspmap(as->parentmap, as->parent, J->exitno); lj_snap_regspmap(as->parentmap, as->parent, J->exitno, 0);
#if LJ_SOFTFP
lj_snap_regspmap(as->parentmaphi, as->parent, J->exitno, 1);
#endif
} else { } else {
as->parent = NULL; as->parent = NULL;
} }

View File

@ -438,7 +438,12 @@ static void LJ_FASTCALL recff_math_unary(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr = lj_ir_tonum(J, J->base[0]);
J->base[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, J->base[1])); #if LJ_TARGET_X86ORX64
TRef tr2 = lj_ir_tonum(J, J->base[1]);
#else
TRef tr2 = lj_opt_narrow_toint(J, J->base[1]);
#endif
J->base[0] = emitir(IRTN(rd->data), tr, tr2);
} }
/* Record math.asin, math.acos, math.atan. */ /* Record math.asin, math.acos, math.atan. */

View File

@ -27,6 +27,7 @@
#include "lj_cdata.h" #include "lj_cdata.h"
#include "lj_carith.h" #include "lj_carith.h"
#endif #endif
#include "lj_vm.h"
#include "lj_lib.h" #include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */ /* Some local macros to save typing. Undef'd at the end. */

View File

@ -283,7 +283,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
_(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \ _(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \
_(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \ _(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \
_(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \ _(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \
/* There is room for 10 more types. */ _(SOFTFP) /* There is room for 9 more types. */
/* IR result type and flags (8 bit). */ /* IR result type and flags (8 bit). */
typedef enum { typedef enum {

View File

@ -38,6 +38,72 @@ typedef struct CCallInfo {
#define CCI_FASTCALL 0x0800 /* Fastcall convention. */ #define CCI_FASTCALL 0x0800 /* Fastcall convention. */
/* Function definitions for CALL* instructions. */ /* Function definitions for CALL* instructions. */
#if LJ_SOFTFP
#if LJ_HASFFI
#define IRCALLDEF_SOFTFP_FFI(_) \
_(softfp_ui2d, 1, N, NUM, 0) \
_(softfp_l2d, 2, N, NUM, 0) \
_(softfp_ul2d, 2, N, NUM, 0) \
_(softfp_f2d, 1, N, NUM, 0) \
_(softfp_d2ui, 2, N, INT, 0) \
_(softfp_d2l, 2, N, I64, 0) \
_(softfp_d2ul, 2, N, U64, 0) \
_(softfp_d2f, 2, N, FLOAT, 0) \
_(softfp_i2f, 1, N, FLOAT, 0) \
_(softfp_ui2f, 1, N, FLOAT, 0) \
_(softfp_l2f, 2, N, FLOAT, 0) \
_(softfp_ul2f, 2, N, FLOAT, 0) \
_(softfp_f2i, 1, N, INT, 0) \
_(softfp_f2ui, 1, N, INT, 0) \
_(softfp_f2l, 1, N, I64, 0) \
_(softfp_f2ul, 1, N, U64, 0)
#else
#define IRCALLDEF_SOFTFP_FFI(_)
#endif
#define IRCALLDEF_SOFTFP(_) \
_(lj_vm_tobit, 2, N, INT, 0) \
_(softfp_add, 4, N, NUM, 0) \
_(softfp_sub, 4, N, NUM, 0) \
_(softfp_mul, 4, N, NUM, 0) \
_(softfp_div, 4, N, NUM, 0) \
_(softfp_cmp, 4, N, NIL, 0) \
_(softfp_i2d, 1, N, NUM, 0) \
_(softfp_d2i, 2, N, INT, 0) \
IRCALLDEF_SOFTFP_FFI(_)
#else
#define IRCALLDEF_SOFTFP(_)
#endif
#if LJ_TARGET_X86ORX64
/* Use lj_vm_* helpers and x87 ops. */
#define IRCALLDEF_FPMATH(_)
#else
/* Use standard math library calls. */
#if LJ_SOFTFP
#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */
#else
#define ARG1_FP 1
#endif
/* ORDER FPM */
#define IRCALLDEF_FPMATH(_) \
_(lj_vm_floor, ARG1_FP, N, NUM, 0) \
_(lj_vm_ceil, ARG1_FP, N, NUM, 0) \
_(lj_vm_trunc, ARG1_FP, N, NUM, 0) \
_(sqrt, ARG1_FP, N, NUM, 0) \
_(exp, ARG1_FP, N, NUM, 0) \
_(exp2, ARG1_FP, N, NUM, 0) \
_(log, ARG1_FP, N, NUM, 0) \
_(log2, ARG1_FP, N, NUM, 0) \
_(log10, ARG1_FP, N, NUM, 0) \
_(sin, ARG1_FP, N, NUM, 0) \
_(cos, ARG1_FP, N, NUM, 0) \
_(tan, ARG1_FP, N, NUM, 0) \
_(lj_vm_powi, ARG1_FP+1, N, NUM, 0) \
_(pow, ARG1_FP*2, N, NUM, 0) \
_(atan2, ARG1_FP*2, N, NUM, 0) \
_(ldexp, ARG1_FP+1, N, NUM, 0)
#endif
#if LJ_HASFFI #if LJ_HASFFI
#if LJ_32 #if LJ_32
#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ #define ARG2_64 4 /* Treat as 4 32 bit arguments. */
@ -62,6 +128,7 @@ typedef struct CCallInfo {
#else #else
#define IRCALLDEF_FFI(_) #define IRCALLDEF_FFI(_)
#endif #endif
#define IRCALLDEF(_) \ #define IRCALLDEF(_) \
_(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(lj_str_new, 3, S, STR, CCI_L) \ _(lj_str_new, 3, S, STR, CCI_L) \
@ -76,6 +143,8 @@ typedef struct CCallInfo {
_(lj_gc_barrieruv, 2, FS, NIL, 0) \ _(lj_gc_barrieruv, 2, FS, NIL, 0) \
_(lj_mem_newgco, 2, FS, P32, CCI_L) \ _(lj_mem_newgco, 2, FS, P32, CCI_L) \
_(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
IRCALLDEF_SOFTFP(_) \
IRCALLDEF_FPMATH(_) \
IRCALLDEF_FFI(_) \ IRCALLDEF_FFI(_) \
_(sinh, 1, N, NUM, 0) \ _(sinh, 1, N, NUM, 0) \
_(cosh, 1, N, NUM, 0) \ _(cosh, 1, N, NUM, 0) \
@ -97,4 +166,58 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
/* Soft-float declarations. */
#if LJ_SOFTFP
#if LJ_TARGET_ARM
#define softfp_add __aeabi_dadd
#define softfp_sub __aeabi_dsub
#define softfp_mul __aeabi_dmul
#define softfp_div __aeabi_ddiv
#define softfp_cmp __aeabi_cdcmple
#define softfp_i2d __aeabi_i2d
#define softfp_ui2d __aeabi_ui2d
#define softfp_l2d __aeabi_l2d
#define softfp_ul2d __aeabi_ul2d
#define softfp_f2d __aeabi_f2d
#define softfp_d2i __aeabi_d2iz
#define softfp_d2ui __aeabi_d2uiz
#define softfp_d2l __aeabi_d2lz
#define softfp_d2ul __aeabi_d2ulz
#define softfp_d2f __aeabi_d2f
#define softfp_i2f __aeabi_i2f
#define softfp_ui2f __aeabi_ui2f
#define softfp_l2f __aeabi_l2f
#define softfp_ul2f __aeabi_ul2f
#define softfp_f2i __aeabi_f2iz
#define softfp_f2ui __aeabi_f2uiz
#define softfp_f2l __aeabi_f2lz
#define softfp_f2ul __aeabi_f2ulz
#else
#error "Missing soft-float definitions for target architecture"
#endif
extern double softfp_add(double a, double b);
extern double softfp_sub(double a, double b);
extern double softfp_mul(double a, double b);
extern double softfp_div(double a, double b);
extern void softfp_cmp(double a, double b);
extern double softfp_i2d(int32_t a);
extern double softfp_ui2d(uint32_t a);
extern double softfp_l2d(int64_t a);
extern double softfp_ul2d(uint64_t a);
extern double softfp_f2d(float a);
extern int32_t softfp_d2i(double a);
extern uint32_t softfp_d2ui(double a);
extern int64_t softfp_d2l(double a);
extern uint64_t softfp_d2ul(double a);
extern float softfp_d2f(double a);
extern float softfp_i2f(int32_t a);
extern float softfp_ui2f(uint32_t a);
extern float softfp_l2f(int64_t a);
extern float softfp_ul2f(uint64_t a);
extern int32_t softfp_f2i(float a);
extern uint32_t softfp_f2ui(float a);
extern int64_t softfp_f2l(float a);
extern uint64_t softfp_f2ul(float a);
#endif
#endif #endif

View File

@ -148,7 +148,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */ /* Optimization passes. */
LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC void lj_opt_dce(jit_State *J);
LJ_FUNC int lj_opt_loop(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J);
#if LJ_HASFFI && LJ_32 #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
LJ_FUNC void lj_opt_split(jit_State *J); LJ_FUNC void lj_opt_split(jit_State *J);
#else #else
#define lj_opt_split(J) UNUSED(J) #define lj_opt_split(J) UNUSED(J)

View File

@ -250,7 +250,7 @@ enum {
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
/* Set/reset flag to activate the SPLIT pass for the current trace. */ /* Set/reset flag to activate the SPLIT pass for the current trace. */
#if LJ_32 && LJ_HASFFI #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
#define lj_needsplit(J) (J->needsplit = 1) #define lj_needsplit(J) (J->needsplit = 1)
#define lj_resetsplit(J) (J->needsplit = 0) #define lj_resetsplit(J) (J->needsplit = 0)
#else #else
@ -311,7 +311,7 @@ typedef struct jit_State {
MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
PostProc postproc; /* Required post-processing after execution. */ PostProc postproc; /* Required post-processing after execution. */
#if LJ_32 && LJ_HASFFI #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
int needsplit; /* Need SPLIT pass. */ int needsplit; /* Need SPLIT pass. */
#endif #endif

View File

@ -8,7 +8,7 @@
#include "lj_obj.h" #include "lj_obj.h"
#if LJ_HASJIT && LJ_HASFFI && LJ_32 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
#include "lj_err.h" #include "lj_err.h"
#include "lj_str.h" #include "lj_str.h"
@ -21,9 +21,9 @@
/* SPLIT pass: /* SPLIT pass:
** **
** This pass splits up 64 bit IR instructions into multiple 32 bit IR ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
** instructions. It's only active for 32 bit CPUs which lack native 64 bit ** instructions. It's only active for soft-float targets or for 32 bit CPUs
** operations. The FFI is currently the only emitter for 64 bit ** which lack native 64 bit integer operations (the FFI is currently the
** instructions, so this pass is disabled if the FFI is disabled. ** only emitter for 64 bit integer instructions).
** **
** Splitting the IR in a separate pass keeps each 32 bit IR assembler ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
** backend simple. Only a small amount of extra functionality needs to be ** backend simple. Only a small amount of extra functionality needs to be
@ -41,14 +41,19 @@
** The operands of HIOP hold the hiword input references. The output of HIOP ** The operands of HIOP hold the hiword input references. The output of HIOP
** is the hiword output reference, which is also used to hold the hiword ** is the hiword output reference, which is also used to hold the hiword
** register or spill slot information. The register allocator treats this ** register or spill slot information. The register allocator treats this
** instruction independent of any other instruction, which improves code ** instruction independently of any other instruction, which improves code
** quality compared to using fixed register pairs. ** quality compared to using fixed register pairs.
** **
** It's easier to split up some instructions into two regular 32 bit ** It's easier to split up some instructions into two regular 32 bit
** instructions. E.g. XLOAD is split up into two XLOADs with two different ** instructions. E.g. XLOAD is split up into two XLOADs with two different
** addresses. Obviously 64 bit constants need to be split up into two 32 bit ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
** constants, too. Some hiword instructions can be entirely omitted, e.g. ** constants, too. Some hiword instructions can be entirely omitted, e.g.
** when zero-extending a 32 bit value to 64 bits. ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
** are split up into two 32 bit arguments each.
**
** On soft-float targets, floating-point instructions are directly converted
** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
** **
** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
** two int64_t fields: ** two int64_t fields:
@ -101,8 +106,41 @@ static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
return nref; return nref;
} }
#if LJ_SOFTFP
/* Emit a CALLN with one split 64 bit argument. */
static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
IRIns *ir, IRCallID id)
{
IRRef tmp, op1 = ir->op1;
J->cur.nins--;
#if LJ_LE
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
#else
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
}
/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
IRIns *ir, IRCallID id)
{
IRRef tmp, op1 = ir->op1, op2 = ir->op2;
J->cur.nins--;
#if LJ_LE
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
#else
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
}
#endif
/* Emit a CALLN with two split 64 bit arguments. */ /* Emit a CALLN with two split 64 bit arguments. */
static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
IRIns *ir, IRCallID id) IRIns *ir, IRCallID id)
{ {
IRRef tmp, op1 = ir->op1, op2 = ir->op2; IRRef tmp, op1 = ir->op1, op2 = ir->op2;
@ -117,7 +155,9 @@ static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
#endif #endif
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
return split_emit(J, IRTI(IR_HIOP), tmp, tmp); return split_emit(J,
IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
tmp, tmp);
} }
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
@ -155,7 +195,8 @@ static void split_ir(jit_State *J)
/* Process constants and fixed references. */ /* Process constants and fixed references. */
for (ref = nk; ref <= REF_BASE; ref++) { for (ref = nk; ref <= REF_BASE; ref++) {
IRIns *ir = &oir[ref]; IRIns *ir = &oir[ref];
if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
/* Split up 64 bit constant. */
TValue tv = *ir_k64(ir); TValue tv = *ir_k64(ir);
ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
@ -181,6 +222,106 @@ static void split_ir(jit_State *J)
hisubst[ref] = 0; hisubst[ref] = 0;
/* Split 64 bit instructions. */ /* Split 64 bit instructions. */
#if LJ_SOFTFP
if (irt_isnum(ir->t)) {
nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
/* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
switch (ir->o) {
case IR_ADD:
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
break;
case IR_SUB:
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
break;
case IR_MUL:
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
break;
case IR_DIV:
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
break;
case IR_POW:
hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
break;
case IR_FPMATH:
hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
break;
case IR_ATAN2:
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
break;
case IR_LDEXP:
hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
break;
case IR_NEG: case IR_ABS:
nir->o = IR_CONV; /* Pass through loword. */
nir->op2 = (IRT_INT << 5) | IRT_INT;
hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
hisubst[ir->op1], hisubst[ir->op2]);
break;
case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
case IR_MIN: case IR_MAX:
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
break;
case IR_XLOAD:
hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP),
split_ptr(J, nir->op1), ir->op2);
#if LJ_BE
ir->prev = hi; hi = nref;
#endif
break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
break;
case IR_XSTORE: {
#if LJ_LE
IRRef hiref = hisubst[ir->op2];
#else
IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
#endif
split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
split_ptr(J, nir->op1), hiref);
break;
}
case IR_CONV: { /* Conversion to number. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_32 && LJ_HASFFI
if (st == IRT_I64 || st == IRT_U64) {
hi = split_call_l(J, hisubst, oir, ir,
st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d);
break;
}
#endif
lua_assert(st == IRT_INT ||
(LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
nir->o = IR_CALLN;
#if LJ_32 && LJ_HASFFI
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
st == IRT_FLOAT ? IRCALL_softfp_f2d :
IRCALL_softfp_ui2d;
#else
nir->op2 = IRCALL_softfp_i2d;
#endif
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
break;
}
case IR_CALLS:
case IR_CALLXS:
goto split_call;
case IR_PHI:
if (nir->op1 == nir->op2)
J->cur.nins--; /* Drop useless PHIs. */
if (hisubst[ir->op1] != hisubst[ir->op2])
split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
hisubst[ir->op1], hisubst[ir->op2]);
break;
default:
lua_assert(ir->o <= IR_NE);
split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
hisubst[ir->op1], hisubst[ir->op2]);
break;
}
} else
#endif
#if LJ_32 && LJ_HASFFI
if (irt_isint64(ir->t)) { if (irt_isint64(ir->t)) {
IRRef hiref = hisubst[ir->op1]; IRRef hiref = hisubst[ir->op1];
nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
@ -199,20 +340,20 @@ static void split_ir(jit_State *J)
hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
break; break;
case IR_MUL: case IR_MUL:
hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
break; break;
case IR_DIV: case IR_DIV:
hi = split_call64(J, hisubst, oir, ir, hi = split_call_ll(J, hisubst, oir, ir,
irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
IRCALL_lj_carith_divu64); IRCALL_lj_carith_divu64);
break; break;
case IR_MOD: case IR_MOD:
hi = split_call64(J, hisubst, oir, ir, hi = split_call_ll(J, hisubst, oir, ir,
irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
IRCALL_lj_carith_modu64); IRCALL_lj_carith_modu64);
break; break;
case IR_POW: case IR_POW:
hi = split_call64(J, hisubst, oir, ir, hi = split_call_ll(J, hisubst, oir, ir,
irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
IRCALL_lj_carith_powu64); IRCALL_lj_carith_powu64);
break; break;
@ -239,9 +380,21 @@ static void split_ir(jit_State *J)
break; break;
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_SOFTFP
if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
split_call_l(J, hisubst, oir, ir,
irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul);
} else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
nir->o = IR_CALLN;
nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul;
hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
}
#else
if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
} else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ }
#endif
else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
/* Drop cast, since assembler doesn't care. */ /* Drop cast, since assembler doesn't care. */
goto fwdlo; goto fwdlo;
} else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
@ -274,13 +427,37 @@ static void split_ir(jit_State *J)
split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
break; break;
} }
} else if (ir->o == IR_CONV) { /* See above, too. */ } else
#endif
#if LJ_SOFTFP
if (ir->o == IR_TOBIT) {
IRRef tmp, op1 = ir->op1;
J->cur.nins--;
#if LJ_LE
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
#else
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
} else
#endif
if (ir->o == IR_CONV) { /* See above, too. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_32 && LJ_HASFFI
if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
#if LJ_SOFTFP
if (irt_isfloat(ir->t)) {
split_call_l(J, hisubst, oir, ir,
st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f);
J->cur.nins--; /* Drop unused HIOP. */
}
#else
if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
hisubst[ir->op1], nref); hisubst[ir->op1], nref);
} else { /* Truncate to lower 32 bits. */ }
#endif
else { /* Truncate to lower 32 bits. */
fwdlo: fwdlo:
ir->prev = nir->op1; /* Forward loword. */ ir->prev = nir->op1; /* Forward loword. */
/* Replace with NOP to avoid messing up the snapshot logic. */ /* Replace with NOP to avoid messing up the snapshot logic. */
@ -288,6 +465,36 @@ static void split_ir(jit_State *J)
nir->op1 = nir->op2 = 0; nir->op1 = nir->op2 = 0;
} }
} }
#endif
#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
else if (irt_isfloat(ir->t)) {
if (st == IRT_NUM) {
split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
J->cur.nins--; /* Drop unused HIOP. */
} else {
nir->o = IR_CALLN;
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
}
} else if (st == IRT_FLOAT) {
nir->o = IR_CALLN;
nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
} else
#endif
#if LJ_SOFTFP
if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
if (irt_isguard(ir->t)) {
lua_assert(0); /* NYI: missing check. */
}
split_call_l(J, hisubst, oir, ir,
#if LJ_32 && LJ_HASFFI
st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
#else
IRCALL_softfp_d2i
#endif
);
J->cur.nins--; /* Drop unused HIOP. */
}
#endif
} else if (ir->o == IR_CALLXS) { } else if (ir->o == IR_CALLXS) {
IRRef hiref; IRRef hiref;
split_call: split_call:
@ -303,8 +510,10 @@ static void split_ir(jit_State *J)
#endif #endif
ir->prev = nref = split_emit(J, ot, nref, op2); ir->prev = nref = split_emit(J, ot, nref, op2);
} }
if (irt_isint64(ir->t)) if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
hi = split_emit(J, IRTI(IR_HIOP), nref, nref); hi = split_emit(J,
IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
nref, nref);
} else if (ir->o == IR_CARG) { } else if (ir->o == IR_CARG) {
IRRef hiref = hisubst[ir->op1]; IRRef hiref = hisubst[ir->op1];
if (hiref) { if (hiref) {
@ -367,17 +576,18 @@ static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
return NULL; return NULL;
} }
#ifdef LUA_USE_ASSERT #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
/* Slow, but sure way to check whether a SPLIT pass is needed. */ /* Slow, but sure way to check whether a SPLIT pass is needed. */
static int split_needsplit(jit_State *J) static int split_needsplit(jit_State *J)
{ {
IRIns *ir, *irend; IRIns *ir, *irend;
IRRef ref; IRRef ref;
for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
if (irt_isint64(ir->t)) if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
return 1; return 1;
for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) ||
(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64) (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
return 1; return 1;
return 0; /* Nope. */ return 0; /* Nope. */
@ -387,7 +597,12 @@ static int split_needsplit(jit_State *J)
/* SPLIT pass. */ /* SPLIT pass. */
void lj_opt_split(jit_State *J) void lj_opt_split(jit_State *J)
{ {
#if LJ_SOFTFP
if (!J->needsplit)
J->needsplit = split_needsplit(J);
#else
lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
#endif
if (J->needsplit) { if (J->needsplit) {
int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
if (errcode) { if (errcode) {

View File

@ -1992,6 +1992,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
IRRef ref = snap_ref(sn); IRRef ref = snap_ref(sn);
BCReg s = snap_slot(sn); BCReg s = snap_slot(sn);
IRIns *ir = &T->ir[ref]; IRIns *ir = &T->ir[ref];
IRType t = irt_type(ir->t);
TRef tr; TRef tr;
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) { if (bloomtest(seen, ref)) {
@ -2005,7 +2006,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
bloomset(seen, ref); bloomset(seen, ref);
switch ((IROp)ir->o) { switch ((IROp)ir->o) {
/* Only have to deal with constants that can occur in stack slots. */ /* Only have to deal with constants that can occur in stack slots. */
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; case IR_KPRI: tr = TREF_PRI(t); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break; case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break; case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break;
@ -2013,13 +2014,14 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */ case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */
/* Inherited SLOADs don't need a guard or type check. */ /* Inherited SLOADs don't need a guard or type check. */
case IR_SLOAD: case IR_SLOAD:
tr = emitir_raw(ir->ot & ~IRT_GUARD, s, if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
tr = emitir_raw(IRT(IR_SLOAD, t), s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break; break;
/* Parent refs are already typed and don't need a guard. */ /* Parent refs are already typed and don't need a guard. */
default: default:
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
IRSLOAD_INHERIT|IRSLOAD_PARENT); tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT);
break; break;
} }
setslot: setslot:

View File

@ -307,7 +307,7 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
/* Convert a snapshot into a linear slot -> RegSP map. /* Convert a snapshot into a linear slot -> RegSP map.
** Note: unused slots are not initialized! ** Note: unused slots are not initialized!
*/ */
void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi)
{ {
SnapShot *snap = &T->snap[snapno]; SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent; MSize n, nent = snap->nent;
@ -316,7 +316,7 @@ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
for (n = 0; n < nent; n++) { for (n = 0; n < nent; n++) {
SnapEntry sn = map[n]; SnapEntry sn = map[n];
IRRef ref = snap_ref(sn); IRRef ref = snap_ref(sn);
if (!irref_isk(ref)) { if ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : !irref_isk(ref)) {
IRIns *ir = &T->ir[ref]; IRIns *ir = &T->ir[ref];
uint32_t rs = ir->prev; uint32_t rs = ir->prev;
if (bloomtest(rfilt, ref)) if (bloomtest(rfilt, ref))

View File

@ -13,7 +13,8 @@
LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_add(jit_State *J);
LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J);
LJ_FUNC void lj_snap_shrink(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J);
LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno); LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno,
int hi);
LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);