From 138f54352ad604ef50f77cbcc15abec6dbd883c0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 22 May 2011 17:41:59 +0200 Subject: [PATCH] Split up FP IR instructions with SPLIT pass for soft-float targets. --- lib/dump.lua | 10 +- src/Makefile.dep | 2 +- src/lj_asm.c | 112 ++++++++++++++----- src/lj_ffrecord.c | 7 +- src/lj_ir.c | 1 + src/lj_ir.h | 2 +- src/lj_ircall.h | 123 ++++++++++++++++++++ src/lj_iropt.h | 2 +- src/lj_jit.h | 4 +- src/lj_opt_split.c | 271 ++++++++++++++++++++++++++++++++++++++++----- src/lj_record.c | 10 +- src/lj_snap.c | 4 +- src/lj_snap.h | 3 +- 13 files changed, 479 insertions(+), 72 deletions(-) diff --git a/lib/dump.lua b/lib/dump.lua index 0f9f7b2b..a6b61f53 100644 --- a/lib/dump.lua +++ b/lib/dump.lua @@ -147,6 +147,7 @@ local irtype_text = { "u32", "i64", "u64", + "sfp", } local colortype_ansi = { @@ -173,6 +174,7 @@ local colortype_ansi = { "\027[35m%s\027[m", "\027[35m%s\027[m", "\027[35m%s\027[m", + "\027[35m%s\027[m", } local function colorize_text(s, t) @@ -318,11 +320,11 @@ local function printsnap(tr, snap) if ref < 0 then out:write(formatk(tr, ref)) else - local m, ot, op1, op2 = traceir(tr, ref) - out:write(colorize(format("%04d", ref), band(ot, 31))) if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM - local m, ot, op1, op2 = traceir(tr, ref+1) - out:write(colorize(format("/%04d", ref+1), band(ot, 31))) + out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) + else + local m, ot, op1, op2 = traceir(tr, ref) + out:write(colorize(format("%04d", ref), band(ot, 31))) end end out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME diff --git a/src/Makefile.dep b/src/Makefile.dep index 89ac79e8..8e0d7a1c 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -108,7 +108,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ - lj_lib.h + lj_vm.h lj_lib.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ lj_state.h lj_lex.h lj_parse.h lj_char.h diff --git a/src/lj_asm.c b/src/lj_asm.c index f33dc790..18383bcc 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -85,6 +85,9 @@ typedef struct ASMState { IRRef1 phireg[RID_MAX]; /* PHI register references. */ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ +#if LJ_SOFTFP + uint16_t parentmaphi[LJ_MAX_JSLOTS]; /* Parent slot to hi RegSP map. */ +#endif } ASMState; #define IR(ref) (&as->ir[(ref)]) @@ -273,9 +276,12 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ RA_DBGX((as, "remat $i $r", ir, r)); +#if !LJ_SOFTFP if (ir->o == IR_KNUM) { emit_loadn(as, r, ir_knum(ir)); - } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { + } else +#endif + if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { @@ -596,31 +602,40 @@ static int asm_snap_canremat(ASMState *as) return 0; } -/* Allocate registers or spill slots for refs escaping to a snapshot. */ +/* Allocate register or spill slot for a ref that escapes to a snapshot. */ +static void asm_snap_alloc1(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (!ra_used(ir)) { + RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR; + /* Get a weak register if we have a free one or can rematerialize. */ + if ((as->freeset & allow) || + (allow == RSET_FPR && asm_snap_canremat(as))) { + Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ + if (!irt_isphi(ir->t)) + ra_weak(as, r); /* But mark it as weakly referenced. */ + checkmclim(as); + RA_DBGX((as, "snapreg $f $r", ref, ir->r)); + } else { + ra_spill(as, ir); /* Otherwise force a spill slot. */ + RA_DBGX((as, "snapspill $f $s", ref, ir->s)); + } + } +} + +/* Allocate refs escaping to a snapshot. */ static void asm_snap_alloc(ASMState *as) { SnapShot *snap = &as->T->snap[as->snapno]; SnapEntry *map = &as->T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { - IRRef ref = snap_ref(map[n]); + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); if (!irref_isk(ref)) { - IRIns *ir = IR(ref); - if (!ra_used(ir)) { - RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - /* Get a weak register if we have a free one or can rematerialize. */ - if ((as->freeset & allow) || - (allow == RSET_FPR && asm_snap_canremat(as))) { - Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ - if (!irt_isphi(ir->t)) - ra_weak(as, r); /* But mark it as weakly referenced. */ - checkmclim(as); - RA_DBGX((as, "snapreg $f $r", ref, ir->r)); - } else { - ra_spill(as, ir); /* Otherwise force a spill slot. */ - RA_DBGX((as, "snapspill $f $s", ref, ir->s)); - } - } + asm_snap_alloc1(as, ref); + if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) + asm_snap_alloc1(as, ref+1); } } } @@ -997,6 +1012,15 @@ static void asm_head_root(ASMState *as) as->T->topslot = gcref(as->T->startpt)->pt.framesize; } +/* Get RegSP for parent slot. */ +static LJ_AINLINE RegSP asm_head_parentrs(ASMState *as, IRIns *ir) +{ +#if LJ_SOFTFP + if (ir->o == IR_HIOP) return as->parentmaphi[(ir-1)->op1]; +#endif + return as->parentmap[ir->op1]; +} + /* Head of a side trace. ** ** The current simplistic algorithm requires that all slots inherited @@ -1022,8 +1046,9 @@ static void asm_head_side(ASMState *as) for (i = as->stopins; i > REF_BASE; i--) { IRIns *ir = IR(i); RegSP rs; - lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)); - rs = as->parentmap[ir->op1]; + lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || + (LJ_SOFTFP && ir->o == IR_HIOP)); + rs = asm_head_parentrs(as, ir); if (ra_hasreg(ir->r)) { rset_clear(allow, ir->r); if (ra_hasspill(ir->s)) @@ -1052,6 +1077,12 @@ static void asm_head_side(ASMState *as) } as->T->spadjust = (uint16_t)spadj; +#if !LJ_TARGET_X86ORX64 + /* Restore BASE register from parent spill slot. */ + if (ra_hasspill(irp->s)) + emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, spdelta + sps_scale(irp->s)); +#endif + /* Reload spilled target registers. */ if (pass2) { for (i = as->stopins; i > REF_BASE; i--) { @@ -1061,12 +1092,12 @@ static void asm_head_side(ASMState *as) Reg r; RegSP rs; irt_clearmark(ir->t); - rs = as->parentmap[ir->op1]; + rs = asm_head_parentrs(as, ir); if (!ra_hasspill(regsp_spill(rs))) ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) continue; /* Same spill slot, do nothing. */ - mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow; + mask = ((!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR) & allow; if (mask == RSET_EMPTY) lj_trace_err(as->J, LJ_TRERR_NYICOAL); r = ra_allocref(as, i, mask); @@ -1093,7 +1124,7 @@ static void asm_head_side(ASMState *as) while (work) { Reg r = rset_pickbot(work); IRIns *ir = IR(regcost_ref(as->cost[r])); - RegSP rs = as->parentmap[ir->op1]; + RegSP rs = asm_head_parentrs(as, ir); rset_clear(work, r); if (ra_hasspill(regsp_spill(rs))) { int32_t ofs = sps_scale(regsp_spill(rs)); @@ -1262,13 +1293,37 @@ static void asm_setup_regsp(ASMState *as) (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; continue; } -#if LJ_32 && LJ_HASFFI +#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) case IR_HIOP: - if ((ir-1)->o == IR_CALLN) { + switch ((ir-1)->o) { +#if LJ_SOFTFP + case IR_SLOAD: + if (((ir-1)->op2 & IRSLOAD_PARENT)) { + RegSP rs = as->parentmaphi[(ir-1)->op1]; + lua_assert(regsp_used(rs)); + as->stopins = i; + if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) { + ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs)); + continue; + } + } + break; +#endif + case IR_CALLN: case IR_CALLXS: +#if LJ_SOFTFP + case IR_MIN: case IR_MAX: +#endif ir->prev = REGSP_HINT(RID_RETHI); continue; + default: + break; } break; +#endif +#if LJ_SOFTFP + case IR_MIN: case IR_MAX: + if ((ir+1)->o != IR_HIOP) break; + /* fallthrough */ #endif /* C calls evict all scratch regs and return results in RID_RET. */ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: @@ -1387,7 +1442,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->loopinv = 0; if (J->parent) { as->parent = traceref(J, J->parent); - lj_snap_regspmap(as->parentmap, as->parent, J->exitno); + lj_snap_regspmap(as->parentmap, as->parent, J->exitno, 0); +#if LJ_SOFTFP + lj_snap_regspmap(as->parentmaphi, as->parent, J->exitno, 1); +#endif } else { as->parent = NULL; } diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 91d31b29..0de54f04 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -438,7 +438,12 @@ static void LJ_FASTCALL recff_math_unary(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); - J->base[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, J->base[1])); +#if LJ_TARGET_X86ORX64 + TRef tr2 = lj_ir_tonum(J, J->base[1]); +#else + TRef tr2 = lj_opt_narrow_toint(J, J->base[1]); +#endif + J->base[0] = emitir(IRTN(rd->data), tr, tr2); } /* Record math.asin, math.acos, math.atan. */ diff --git a/src/lj_ir.c b/src/lj_ir.c index b7d1e7a7..59ffcfde 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -27,6 +27,7 @@ #include "lj_cdata.h" #include "lj_carith.h" #endif +#include "lj_vm.h" #include "lj_lib.h" /* Some local macros to save typing. Undef'd at the end. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 1bc6c332..aac34350 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -283,7 +283,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; _(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \ _(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \ _(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \ - /* There is room for 10 more types. */ + _(SOFTFP) /* There is room for 9 more types. */ /* IR result type and flags (8 bit). */ typedef enum { diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 3131b15d..b83a0a81 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -38,6 +38,72 @@ typedef struct CCallInfo { #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ /* Function definitions for CALL* instructions. */ +#if LJ_SOFTFP +#if LJ_HASFFI +#define IRCALLDEF_SOFTFP_FFI(_) \ + _(softfp_ui2d, 1, N, NUM, 0) \ + _(softfp_l2d, 2, N, NUM, 0) \ + _(softfp_ul2d, 2, N, NUM, 0) \ + _(softfp_f2d, 1, N, NUM, 0) \ + _(softfp_d2ui, 2, N, INT, 0) \ + _(softfp_d2l, 2, N, I64, 0) \ + _(softfp_d2ul, 2, N, U64, 0) \ + _(softfp_d2f, 2, N, FLOAT, 0) \ + _(softfp_i2f, 1, N, FLOAT, 0) \ + _(softfp_ui2f, 1, N, FLOAT, 0) \ + _(softfp_l2f, 2, N, FLOAT, 0) \ + _(softfp_ul2f, 2, N, FLOAT, 0) \ + _(softfp_f2i, 1, N, INT, 0) \ + _(softfp_f2ui, 1, N, INT, 0) \ + _(softfp_f2l, 1, N, I64, 0) \ + _(softfp_f2ul, 1, N, U64, 0) +#else +#define IRCALLDEF_SOFTFP_FFI(_) +#endif +#define IRCALLDEF_SOFTFP(_) \ + _(lj_vm_tobit, 2, N, INT, 0) \ + _(softfp_add, 4, N, NUM, 0) \ + _(softfp_sub, 4, N, NUM, 0) \ + _(softfp_mul, 4, N, NUM, 0) \ + _(softfp_div, 4, N, NUM, 0) \ + _(softfp_cmp, 4, N, NIL, 0) \ + _(softfp_i2d, 1, N, NUM, 0) \ + _(softfp_d2i, 2, N, INT, 0) \ + IRCALLDEF_SOFTFP_FFI(_) +#else +#define IRCALLDEF_SOFTFP(_) +#endif + +#if LJ_TARGET_X86ORX64 +/* Use lj_vm_* helpers and x87 ops. */ +#define IRCALLDEF_FPMATH(_) +#else +/* Use standard math library calls. */ +#if LJ_SOFTFP +#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ +#else +#define ARG1_FP 1 +#endif +/* ORDER FPM */ +#define IRCALLDEF_FPMATH(_) \ + _(lj_vm_floor, ARG1_FP, N, NUM, 0) \ + _(lj_vm_ceil, ARG1_FP, N, NUM, 0) \ + _(lj_vm_trunc, ARG1_FP, N, NUM, 0) \ + _(sqrt, ARG1_FP, N, NUM, 0) \ + _(exp, ARG1_FP, N, NUM, 0) \ + _(exp2, ARG1_FP, N, NUM, 0) \ + _(log, ARG1_FP, N, NUM, 0) \ + _(log2, ARG1_FP, N, NUM, 0) \ + _(log10, ARG1_FP, N, NUM, 0) \ + _(sin, ARG1_FP, N, NUM, 0) \ + _(cos, ARG1_FP, N, NUM, 0) \ + _(tan, ARG1_FP, N, NUM, 0) \ + _(lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ + _(pow, ARG1_FP*2, N, NUM, 0) \ + _(atan2, ARG1_FP*2, N, NUM, 0) \ + _(ldexp, ARG1_FP+1, N, NUM, 0) +#endif + #if LJ_HASFFI #if LJ_32 #define ARG2_64 4 /* Treat as 4 32 bit arguments. */ @@ -62,6 +128,7 @@ typedef struct CCallInfo { #else #define IRCALLDEF_FFI(_) #endif + #define IRCALLDEF(_) \ _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ _(lj_str_new, 3, S, STR, CCI_L) \ @@ -76,6 +143,8 @@ typedef struct CCallInfo { _(lj_gc_barrieruv, 2, FS, NIL, 0) \ _(lj_mem_newgco, 2, FS, P32, CCI_L) \ _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ + IRCALLDEF_SOFTFP(_) \ + IRCALLDEF_FPMATH(_) \ IRCALLDEF_FFI(_) \ _(sinh, 1, N, NUM, 0) \ _(cosh, 1, N, NUM, 0) \ @@ -97,4 +166,58 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; +/* Soft-float declarations. */ +#if LJ_SOFTFP +#if LJ_TARGET_ARM +#define softfp_add __aeabi_dadd +#define softfp_sub __aeabi_dsub +#define softfp_mul __aeabi_dmul +#define softfp_div __aeabi_ddiv +#define softfp_cmp __aeabi_cdcmple +#define softfp_i2d __aeabi_i2d +#define softfp_ui2d __aeabi_ui2d +#define softfp_l2d __aeabi_l2d +#define softfp_ul2d __aeabi_ul2d +#define softfp_f2d __aeabi_f2d +#define softfp_d2i __aeabi_d2iz +#define softfp_d2ui __aeabi_d2uiz +#define softfp_d2l __aeabi_d2lz +#define softfp_d2ul __aeabi_d2ulz +#define softfp_d2f __aeabi_d2f +#define softfp_i2f __aeabi_i2f +#define softfp_ui2f __aeabi_ui2f +#define softfp_l2f __aeabi_l2f +#define softfp_ul2f __aeabi_ul2f +#define softfp_f2i __aeabi_f2iz +#define softfp_f2ui __aeabi_f2uiz +#define softfp_f2l __aeabi_f2lz +#define softfp_f2ul __aeabi_f2ulz +#else +#error "Missing soft-float definitions for target architecture" +#endif +extern double softfp_add(double a, double b); +extern double softfp_sub(double a, double b); +extern double softfp_mul(double a, double b); +extern double softfp_div(double a, double b); +extern void softfp_cmp(double a, double b); +extern double softfp_i2d(int32_t a); +extern double softfp_ui2d(uint32_t a); +extern double softfp_l2d(int64_t a); +extern double softfp_ul2d(uint64_t a); +extern double softfp_f2d(float a); +extern int32_t softfp_d2i(double a); +extern uint32_t softfp_d2ui(double a); +extern int64_t softfp_d2l(double a); +extern uint64_t softfp_d2ul(double a); +extern float softfp_d2f(double a); +extern float softfp_i2f(int32_t a); +extern float softfp_ui2f(uint32_t a); +extern float softfp_l2f(int64_t a); +extern float softfp_ul2f(uint64_t a); +extern int32_t softfp_f2i(float a); +extern uint32_t softfp_f2ui(float a); +extern int64_t softfp_f2l(float a); +extern uint64_t softfp_f2ul(float a); +#endif + #endif diff --git a/src/lj_iropt.h b/src/lj_iropt.h index daba5296..7ab42b7a 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -148,7 +148,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J); -#if LJ_HASFFI && LJ_32 +#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) LJ_FUNC void lj_opt_split(jit_State *J); #else #define lj_opt_split(J) UNUSED(J) diff --git a/src/lj_jit.h b/src/lj_jit.h index dd74dedb..63584355 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -250,7 +250,7 @@ enum { ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) /* Set/reset flag to activate the SPLIT pass for the current trace. */ -#if LJ_32 && LJ_HASFFI +#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) #define lj_needsplit(J) (J->needsplit = 1) #define lj_resetsplit(J) (J->needsplit = 0) #else @@ -311,7 +311,7 @@ typedef struct jit_State { MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ PostProc postproc; /* Required post-processing after execution. */ -#if LJ_32 && LJ_HASFFI +#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) int needsplit; /* Need SPLIT pass. */ #endif diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 2f8b1e9c..67436a65 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -8,7 +8,7 @@ #include "lj_obj.h" -#if LJ_HASJIT && LJ_HASFFI && LJ_32 +#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) #include "lj_err.h" #include "lj_str.h" @@ -21,9 +21,9 @@ /* SPLIT pass: ** ** This pass splits up 64 bit IR instructions into multiple 32 bit IR -** instructions. It's only active for 32 bit CPUs which lack native 64 bit -** operations. The FFI is currently the only emitter for 64 bit -** instructions, so this pass is disabled if the FFI is disabled. +** instructions. It's only active for soft-float targets or for 32 bit CPUs +** which lack native 64 bit integer operations (the FFI is currently the +** only emitter for 64 bit integer instructions). ** ** Splitting the IR in a separate pass keeps each 32 bit IR assembler ** backend simple. Only a small amount of extra functionality needs to be @@ -41,14 +41,19 @@ ** The operands of HIOP hold the hiword input references. The output of HIOP ** is the hiword output reference, which is also used to hold the hiword ** register or spill slot information. The register allocator treats this -** instruction independent of any other instruction, which improves code +** instruction independently of any other instruction, which improves code ** quality compared to using fixed register pairs. ** ** It's easier to split up some instructions into two regular 32 bit ** instructions. E.g. XLOAD is split up into two XLOADs with two different ** addresses. Obviously 64 bit constants need to be split up into two 32 bit ** constants, too. Some hiword instructions can be entirely omitted, e.g. -** when zero-extending a 32 bit value to 64 bits. +** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls +** are split up into two 32 bit arguments each. +** +** On soft-float targets, floating-point instructions are directly converted +** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). +** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). ** ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with ** two int64_t fields: @@ -101,9 +106,42 @@ static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) return nref; } -/* Emit a CALLN with two split 64 bit arguments. */ -static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, +#if LJ_SOFTFP +/* Emit a CALLN with one split 64 bit argument. */ +static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, IRIns *ir, IRCallID id) +{ + IRRef tmp, op1 = ir->op1; + J->cur.nins--; +#if LJ_LE + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); +#else + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); +#endif + ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); + return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); +} + +/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ +static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, + IRIns *ir, IRCallID id) +{ + IRRef tmp, op1 = ir->op1, op2 = ir->op2; + J->cur.nins--; +#if LJ_LE + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); +#else + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); +#endif + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); + ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); + return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); +} +#endif + +/* Emit a CALLN with two split 64 bit arguments. */ +static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, + IRIns *ir, IRCallID id) { IRRef tmp, op1 = ir->op1, op2 = ir->op2; J->cur.nins--; @@ -117,7 +155,9 @@ static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); #endif ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, IRTI(IR_HIOP), tmp, tmp); + return split_emit(J, + IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), + tmp, tmp); } /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ @@ -155,7 +195,8 @@ static void split_ir(jit_State *J) /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; - if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ + if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { + /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); @@ -181,6 +222,106 @@ static void split_ir(jit_State *J) hisubst[ref] = 0; /* Split 64 bit instructions. */ +#if LJ_SOFTFP + if (irt_isnum(ir->t)) { + nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ + /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ + switch (ir->o) { + case IR_ADD: + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); + break; + case IR_SUB: + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); + break; + case IR_MUL: + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); + break; + case IR_DIV: + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); + break; + case IR_POW: + hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); + break; + case IR_FPMATH: + hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); + break; + case IR_ATAN2: + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); + break; + case IR_LDEXP: + hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); + break; + case IR_NEG: case IR_ABS: + nir->o = IR_CONV; /* Pass through loword. */ + nir->op2 = (IRT_INT << 5) | IRT_INT; + hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), + hisubst[ir->op1], hisubst[ir->op2]); + break; + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_MIN: case IR_MAX: + hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); + break; + case IR_XLOAD: + hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), + split_ptr(J, nir->op1), ir->op2); +#if LJ_BE + ir->prev = hi; hi = nref; +#endif + break; + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: + split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); + break; + case IR_XSTORE: { +#if LJ_LE + IRRef hiref = hisubst[ir->op2]; +#else + IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2]; +#endif + split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP), + split_ptr(J, nir->op1), hiref); + break; + } + case IR_CONV: { /* Conversion to number. Others handled below. */ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if LJ_32 && LJ_HASFFI + if (st == IRT_I64 || st == IRT_U64) { + hi = split_call_l(J, hisubst, oir, ir, + st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d); + break; + } +#endif + lua_assert(st == IRT_INT || + (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); + nir->o = IR_CALLN; +#if LJ_32 && LJ_HASFFI + nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : + st == IRT_FLOAT ? IRCALL_softfp_f2d : + IRCALL_softfp_ui2d; +#else + nir->op2 = IRCALL_softfp_i2d; +#endif + hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); + break; + } + case IR_CALLS: + case IR_CALLXS: + goto split_call; + case IR_PHI: + if (nir->op1 == nir->op2) + J->cur.nins--; /* Drop useless PHIs. */ + if (hisubst[ir->op1] != hisubst[ir->op2]) + split_emit(J, IRT(IR_PHI, IRT_SOFTFP), + hisubst[ir->op1], hisubst[ir->op2]); + break; + default: + lua_assert(ir->o <= IR_NE); + split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), + hisubst[ir->op1], hisubst[ir->op2]); + break; + } + } else +#endif +#if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ @@ -199,22 +340,22 @@ static void split_ir(jit_State *J) hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: - hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: - hi = split_call64(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); + hi = split_call_ll(J, hisubst, oir, ir, + irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); break; case IR_MOD: - hi = split_call64(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); + hi = split_call_ll(J, hisubst, oir, ir, + irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); break; case IR_POW: - hi = split_call64(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); + hi = split_call_ll(J, hisubst, oir, ir, + irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); @@ -239,9 +380,21 @@ static void split_ir(jit_State *J) break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if LJ_SOFTFP + if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ + split_call_l(J, hisubst, oir, ir, + irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul); + } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ + nir->o = IR_CALLN; + nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul; + hi = split_emit(J, IRTI(IR_HIOP), nref, nref); + } +#else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); - } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ + } +#endif + else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ @@ -274,13 +427,37 @@ static void split_ir(jit_State *J) split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } - } else if (ir->o == IR_CONV) { /* See above, too. */ + } else +#endif +#if LJ_SOFTFP + if (ir->o == IR_TOBIT) { + IRRef tmp, op1 = ir->op1; + J->cur.nins--; +#if LJ_LE + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); +#else + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); +#endif + ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); + } else +#endif + if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ +#if LJ_SOFTFP + if (irt_isfloat(ir->t)) { + split_call_l(J, hisubst, oir, ir, + st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f); + J->cur.nins--; /* Drop unused HIOP. */ + } +#else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); - } else { /* Truncate to lower 32 bits. */ + } +#endif + else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ @@ -288,6 +465,36 @@ static void split_ir(jit_State *J) nir->op1 = nir->op2 = 0; } } +#endif +#if LJ_SOFTFP && LJ_32 && LJ_HASFFI + else if (irt_isfloat(ir->t)) { + if (st == IRT_NUM) { + split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); + J->cur.nins--; /* Drop unused HIOP. */ + } else { + nir->o = IR_CALLN; + nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; + } + } else if (st == IRT_FLOAT) { + nir->o = IR_CALLN; + nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; + } else +#endif +#if LJ_SOFTFP + if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { + if (irt_isguard(ir->t)) { + lua_assert(0); /* NYI: missing check. */ + } + split_call_l(J, hisubst, oir, ir, +#if LJ_32 && LJ_HASFFI + st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i +#else + IRCALL_softfp_d2i +#endif + ); + J->cur.nins--; /* Drop unused HIOP. */ + } +#endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: @@ -303,8 +510,10 @@ static void split_ir(jit_State *J) #endif ir->prev = nref = split_emit(J, ot, nref, op2); } - if (irt_isint64(ir->t)) - hi = split_emit(J, IRTI(IR_HIOP), nref, nref); + if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) + hi = split_emit(J, + IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), + nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { @@ -367,17 +576,18 @@ static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) return NULL; } -#ifdef LUA_USE_ASSERT +#if defined(LUA_USE_ASSERT) || LJ_SOFTFP /* Slow, but sure way to check whether a SPLIT pass is needed. */ static int split_needsplit(jit_State *J) { IRIns *ir, *irend; IRRef ref; for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) - if (irt_isint64(ir->t)) + if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) return 1; for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) - if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || + if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) || + (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64) return 1; return 0; /* Nope. */ @@ -387,7 +597,12 @@ static int split_needsplit(jit_State *J) /* SPLIT pass. */ void lj_opt_split(jit_State *J) { +#if LJ_SOFTFP + if (!J->needsplit) + J->needsplit = split_needsplit(J); +#else lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ +#endif if (J->needsplit) { int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); if (errcode) { diff --git a/src/lj_record.c b/src/lj_record.c index 64eba291..b712ec56 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1992,6 +1992,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T) IRRef ref = snap_ref(sn); BCReg s = snap_slot(sn); IRIns *ir = &T->ir[ref]; + IRType t = irt_type(ir->t); TRef tr; /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ if (bloomtest(seen, ref)) { @@ -2005,7 +2006,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T) bloomset(seen, ref); switch ((IROp)ir->o) { /* Only have to deal with constants that can occur in stack slots. */ - case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; + case IR_KPRI: tr = TREF_PRI(t); break; case IR_KINT: tr = lj_ir_kint(J, ir->i); break; case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break; @@ -2013,13 +2014,14 @@ static void rec_setup_side(jit_State *J, GCtrace *T) case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */ /* Inherited SLOADs don't need a guard or type check. */ case IR_SLOAD: - tr = emitir_raw(ir->ot & ~IRT_GUARD, s, + if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + tr = emitir_raw(IRT(IR_SLOAD, t), s, (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); break; /* Parent refs are already typed and don't need a guard. */ default: - tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, - IRSLOAD_INHERIT|IRSLOAD_PARENT); + if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT); break; } setslot: diff --git a/src/lj_snap.c b/src/lj_snap.c index dd70ece1..1af7ef85 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -307,7 +307,7 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) /* Convert a snapshot into a linear slot -> RegSP map. ** Note: unused slots are not initialized! */ -void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) +void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi) { SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; @@ -316,7 +316,7 @@ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); - if (!irref_isk(ref)) { + if ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : !irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) diff --git a/src/lj_snap.h b/src/lj_snap.h index 031b0ac3..da9813b9 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -13,7 +13,8 @@ LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); -LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno); +LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, + int hi); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);