diff --git a/src/Makefile b/src/Makefile index 278324a1..4ea8c85e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer # # Target-specific compiler options: # -# x86 only: it's recommended to compile at least for i686. Better yet, -# compile for an architecture that has SSE2, too (-msse -msse2). -# # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute # the binaries to a different machine you could also use: -march=native # -CCOPT_x86= -march=i686 +CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse CCOPT_x64= CCOPT_arm= CCOPT_ppc= @@ -394,11 +391,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN endif -ifeq (x86,$(TARGET_LJARCH)) - ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D SSE - endif -else ifeq (x64,$(TARGET_LJARCH)) DASM_ARCH= x86 else @@ -423,7 +415,6 @@ ifeq (ppc,$(TARGET_LJARCH)) endif endif endif -endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc diff --git a/src/lib_jit.c b/src/lib_jit.c index 82e68258..1b69caa5 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -538,18 +538,14 @@ static uint32_t jit_cpudetect(lua_State *L) uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { #if !LJ_HASJIT -#define JIT_F_CMOV 1 #define JIT_F_SSE2 2 #endif - flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; #if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ - flags |= JIT_F_P4; /* Currently unused. */ - else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ + if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ flags |= JIT_F_LEA_AGU; } else if (vendor[2] == 0x444d4163) { /* AMD. */ uint32_t fam = (features[0] & 0x0ff00f00); @@ -562,14 +558,8 @@ static uint32_t jit_cpudetect(lua_State *L) } /* Check for required instruction set support on x86 (unnecessary on x64). */ #if LJ_TARGET_X86 -#if !defined(LUAJIT_CPU_NOCMOV) - if (!(flags & JIT_F_CMOV)) - luaL_error(L, "CPU not supported"); -#endif -#if defined(LUAJIT_CPU_SSE2) if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); -#endif + luaL_error(L, "CPU with SSE2 required"); #endif #elif LJ_TARGET_ARM #if LJ_HASJIT @@ -631,11 +621,7 @@ static void jit_init(lua_State *L) uint32_t flags = jit_cpudetect(L); #if LJ_HASJIT jit_State *J = L2J(L); -#if LJ_TARGET_X86 - /* Silently turn off the JIT compiler on CPUs without SSE2. */ - if ((flags & JIT_F_SSE2)) -#endif - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); #else @@ -645,6 +631,7 @@ static void jit_init(lua_State *L) LUALIB_API int luaopen_jit(lua_State *L) { + jit_init(L); lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); lua_pushinteger(L, LUAJIT_VERSION_NUM); @@ -657,7 +644,6 @@ LUALIB_API int luaopen_jit(lua_State *L) LJ_LIB_REG(L, "jit.opt", jit_opt); #endif L->top -= 2; - jit_init(L); return 1; } diff --git a/src/lj_asm.c b/src/lj_asm.c index c7365404..a01b4e52 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1730,7 +1730,7 @@ static void asm_setup_regsp(ASMState *as) break; case IR_FPMATH: #if LJ_TARGET_X86ORX64 - if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ + if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */ ir->prev = REGSP_HINT(RID_XMM0); #if !LJ_64 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index c0b1c41e..8b42dd4e 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -14,18 +14,16 @@ /* CPU-specific JIT engine flags. */ #if LJ_TARGET_X86ORX64 -#define JIT_F_CMOV 0x00000010 -#define JIT_F_SSE2 0x00000020 -#define JIT_F_SSE3 0x00000040 -#define JIT_F_SSE4_1 0x00000080 -#define JIT_F_P4 0x00000100 -#define JIT_F_PREFER_IMUL 0x00000200 -#define JIT_F_SPLIT_XMM 0x00000400 -#define JIT_F_LEA_AGU 0x00000800 +#define JIT_F_SSE2 0x00000010 +#define JIT_F_SSE3 0x00000020 +#define JIT_F_SSE4_1 0x00000040 +#define JIT_F_PREFER_IMUL 0x00000080 +#define JIT_F_SPLIT_XMM 0x00000100 +#define JIT_F_LEA_AGU 0x00000200 /* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_CMOV -#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" +#define JIT_F_CPU_FIRST JIT_F_SSE2 +#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\2K8\4ATOM" #elif LJ_TARGET_ARM #define JIT_F_ARMV6_ 0x00000010 #define JIT_F_ARMV6T2_ 0x00000020 diff --git a/src/lj_vm.h b/src/lj_vm.h index c5d05de4..948d63c2 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -49,12 +49,14 @@ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); /* Internal math helper functions. */ -#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC +#if LJ_TARGET_PPC #define lj_vm_floor floor #define lj_vm_ceil ceil #else LJ_ASMF double lj_vm_floor(double); +#if !LJ_TARGET_X86ORX64 LJ_ASMF double lj_vm_ceil(double); +#endif #if LJ_TARGET_ARM LJ_ASMF double lj_vm_floor_sf(double); LJ_ASMF double lj_vm_ceil_sf(double); diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 745c93ff..1d5bd55a 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -35,6 +35,7 @@ if exist minilua.exe.manifest^ @if errorlevel 8 goto :X64 @set DASMFLAGS=-D WIN -D JIT -D FFI @set LJARCH=x86 +@set LJCOMPILE=%LJCOMPILE% /arch:SSE2 :X64 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc @if errorlevel 1 goto :BAD diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index b4674e2b..7020eb27 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -18,7 +18,6 @@ | |.if P64 |.define X64, 1 -|.define SSE, 1 |.if WIN |.define X64WIN, 1 |.endif @@ -856,13 +855,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov TMP2, LJ_TISNUM | mov TMP1, RC - |.elif SSE + |.else | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 - |.else - | mov ARG4, RC - | fild ARG4 - | fstp TMPQ |.endif | lea RCa, TMPQ // Store temp. TValue in TMPQ. | jmp >1 @@ -935,13 +930,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov TMP2, LJ_TISNUM | mov TMP1, RC - |.elif SSE + |.else | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 - |.else - | mov ARG4, RC - | fild ARG4 - | fstp TMPQ |.endif | lea RCa, TMPQ // Store temp. TValue in TMPQ. | jmp >1 @@ -1509,11 +1500,7 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 - |.else - | fld qword [BASE]; jmp ->fff_resn - |.endif | |.ffunc_1 tostring | // Only handles the string or number case inline. @@ -1631,19 +1618,12 @@ static void build_subroutines(BuildCtx *ctx) | add RD, 1 | mov dword [BASE-4], LJ_TISNUM | mov dword [BASE-8], RD - |.elif SSE + |.else | movsd xmm0, qword [BASE+8] | sseconst_1 xmm1, RBa | addsd xmm0, xmm1 | cvtsd2si RD, xmm0 | movsd qword [BASE-8], xmm0 - |.else - | fld qword [BASE+8] - | fld1 - | faddp st1 - | fist ARG1 - | fstp qword [BASE-8] - | mov RD, ARG1 |.endif | mov TAB:RB, [BASE] | cmp RD, TAB:RB->asize; jae >2 // Not in array part? @@ -1690,12 +1670,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov dword [BASE+12], LJ_TISNUM | mov dword [BASE+8], 0 - |.elif SSE + |.else | xorps xmm0, xmm0 | movsd qword [BASE+8], xmm0 - |.else - | fldz - | fstp qword [BASE+8] |.endif | mov RD, 1+3 | jmp ->fff_res @@ -1925,12 +1902,10 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resi: // Dummy. |.endif | - |.if SSE |->fff_resn: | mov PC, [BASE-4] | fstp qword [BASE-8] | jmp ->fff_res1 - |.endif | | .ffunc_1 math_abs |.if DUALNUM @@ -1954,8 +1929,6 @@ static void build_subroutines(BuildCtx *ctx) |.else | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |.endif - | - |.if SSE | movsd xmm0, qword [BASE] | sseconst_abs xmm1, RDa | andps xmm0, xmm1 @@ -1963,15 +1936,6 @@ static void build_subroutines(BuildCtx *ctx) | mov PC, [BASE-4] | movsd qword [BASE-8], xmm0 | // fallthrough - |.else - | fld qword [BASE] - | fabs - | // fallthrough - |->fff_resxmm0: // Dummy. - |->fff_resn: - | mov PC, [BASE-4] - | fstp qword [BASE-8] - |.endif | |->fff_res1: | mov RD, 1+1 @@ -2008,48 +1972,24 @@ static void build_subroutines(BuildCtx *ctx) |.else | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE] - | call ->vm_ .. func - | .if DUALNUM - | cvtsd2si RB, xmm0 - | cmp RB, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RB - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - | .endif - | jmp ->fff_resxmm0 - |.else - | fld qword [BASE] - | call ->vm_ .. func - | .if DUALNUM - | fist ARG1 - | mov RB, ARG1 - | cmp RB, 0x80000000; jne >2 - | fdup - | fild ARG1 - | fcomparepp - | jp ->fff_resn - | jne ->fff_resn - |2: - | fpop - | jmp ->fff_resi - | .else - | jmp ->fff_resn - | .endif + | call ->vm_ .. func .. _sse + |.if DUALNUM + | cvtsd2si RB, xmm0 + | cmp RB, 0x80000000 + | jne ->fff_resi + | cvtsi2sd xmm1, RB + | ucomisd xmm0, xmm1 + | jp ->fff_resxmm0 + | je ->fff_resi |.endif + | jmp ->fff_resxmm0 |.endmacro | | math_round floor | math_round ceil | - |.if SSE |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 - |.else - |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn - |.endif | |.ffunc math_log | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. @@ -2072,23 +2012,18 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | |.macro math_extern, func - |.if SSE | .ffunc_nsse math_ .. func - | .if not X64 - | movsd FPARG1, xmm0 - | .endif - |.else - | .ffunc_n math_ .. func - | fstp FPARG1 + |.if not X64 + | movsd FPARG1, xmm0 |.endif | mov RB, BASE | call extern lj_vm_ .. func | mov BASE, RB - | .if X64 - | jmp ->fff_resxmm0 - | .else - | jmp ->fff_resn - | .endif + |.if X64 + | jmp ->fff_resxmm0 + |.else + | jmp ->fff_resn + |.endif |.endmacro | | math_extern sinh @@ -2096,17 +2031,10 @@ static void build_subroutines(BuildCtx *ctx) | math_extern tanh | |->ff_math_deg: - |.if SSE |.ffunc_nsse math_rad | mov CFUNC:RB, [BASE-8] | mulsd xmm0, qword CFUNC:RB->upvalue[0] | jmp ->fff_resxmm0 - |.else - |.ffunc_n math_rad - | mov CFUNC:RB, [BASE-8] - | fmul qword CFUNC:RB->upvalue[0] - | jmp ->fff_resn - |.endif | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn @@ -2123,65 +2051,34 @@ static void build_subroutines(BuildCtx *ctx) | cmp RB, 0x00200000; jb >4 |1: | shr RB, 21; sub RB, RC // Extract and unbias exponent. - |.if SSE | cvtsi2sd xmm0, RB - |.else - | mov TMP1, RB; fild TMP1 - |.endif | mov RB, [BASE-4] | and RB, 0x800fffff // Mask off exponent. | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | mov [BASE-4], RB |2: - |.if SSE | movsd qword [BASE], xmm0 - |.else - | fstp qword [BASE] - |.endif | mov RD, 1+2 | jmp ->fff_res |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. - |.if SSE | xorps xmm0, xmm0; jmp <2 - |.else - | fldz; jmp <2 - |.endif |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. - |.if SSE | movsd xmm0, qword [BASE] | sseconst_hi xmm1, RBa, 43500000 // 2^54. | mulsd xmm0, xmm1 | movsd qword [BASE-8], xmm0 - |.else - | fld qword [BASE] - | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 - | fstp qword [BASE-8] - |.endif | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | - |.if SSE |.ffunc_nsse math_modf - |.else - |.ffunc_n math_modf - |.endif | mov RB, [BASE+4] | mov PC, [BASE-4] | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? - |.if SSE | movaps xmm4, xmm0 - | call ->vm_trunc + | call ->vm_trunc_sse | subsd xmm4, xmm0 |1: | movsd qword [BASE-8], xmm0 | movsd qword [BASE], xmm4 - |.else - | fdup - | call ->vm_trunc - | fsub st1, st0 - |1: - | fstp qword [BASE-8] - | fstp qword [BASE] - |.endif | mov RC, [BASE-4]; mov RB, [BASE+4] | xor RC, RB; js >3 // Need to adjust sign? |2: @@ -2191,24 +2088,16 @@ static void build_subroutines(BuildCtx *ctx) | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | jmp <2 |4: - |.if SSE | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. - |.else - | fldz; fxch; jmp <1 // Return +-Inf and +-0. - |.endif | |.ffunc_nnr math_fmod |1: ; fprem; fnstsw ax; sahf; jp <1 | fpop1 | jmp ->fff_resn | - |.if SSE - |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 - |.else - |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn - |.endif + |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 | - |.macro math_minmax, name, cmovop, fcmovop, sseop + |.macro math_minmax, name, cmovop, sseop | .ffunc name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM @@ -2225,12 +2114,7 @@ static void build_subroutines(BuildCtx *ctx) |3: | ja ->fff_fallback | // Convert intermediate result to number and continue below. - |.if SSE | cvtsi2sd xmm0, RB - |.else - | mov TMP1, RB - | fild TMP1 - |.endif | jmp >6 |4: | ja ->fff_fallback @@ -2238,7 +2122,6 @@ static void build_subroutines(BuildCtx *ctx) | jae ->fff_fallback |.endif | - |.if SSE | movsd xmm0, qword [BASE] |5: // Handle numbers or integers. | cmp RA, RD; jae ->fff_resxmm0 @@ -2257,34 +2140,10 @@ static void build_subroutines(BuildCtx *ctx) | sseop xmm0, xmm1 | add RA, 1 | jmp <5 - |.else - | fld qword [BASE] - |5: // Handle numbers or integers. - | cmp RA, RD; jae ->fff_resn - | cmp dword [BASE+RA*8-4], LJ_TISNUM - |.if DUALNUM - | jb >6 - | ja >9 - | fild dword [BASE+RA*8-8] - | jmp >7 - |.else - | jae >9 - |.endif - |6: - | fld qword [BASE+RA*8-8] - |7: - | fucomi st1; fcmovop st1; fpop1 - | add RA, 1 - | jmp <5 - |.endif |.endmacro | - | math_minmax math_min, cmovg, fcmovnbe, minsd - | math_minmax math_max, cmovl, fcmovbe, maxsd - |.if not SSE - |9: - | fpop; jmp ->fff_fallback - |.endif + | math_minmax math_min, cmovg, minsd + | math_minmax math_max, cmovl, maxsd | |//-- String library ----------------------------------------------------- | @@ -2293,10 +2152,8 @@ static void build_subroutines(BuildCtx *ctx) | mov STR:RB, [BASE] |.if DUALNUM | mov RB, dword STR:RB->len; jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |.else - | fild dword STR:RB->len; jmp ->fff_resn + | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |.endif | |.ffunc string_byte // Only handle the 1-arg case here. @@ -2309,10 +2166,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx RB, byte STR:RB[1] |.if DUALNUM | jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |.else - | mov TMP1, RB; fild TMP1; jmp ->fff_resn + | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |.endif | |.ffunc string_char // Only handle the 1-arg case here. @@ -2324,16 +2179,11 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, dword [BASE] | cmp RB, 255; ja ->fff_fallback | mov TMP2, RB - |.elif SSE + |.else | jae ->fff_fallback | cvttsd2si RB, qword [BASE] | cmp RB, 255; ja ->fff_fallback | mov TMP2, RB - |.else - | jae ->fff_fallback - | fld qword [BASE] - | fistp TMP2 - | cmp TMP2, 255; ja ->fff_fallback |.endif |.if X64 | mov TMP3, 1 @@ -2371,14 +2221,10 @@ static void build_subroutines(BuildCtx *ctx) | jne ->fff_fallback | mov RB, dword [BASE+16] | mov TMP2, RB - |.elif SSE + |.else | jae ->fff_fallback | cvttsd2si RB, qword [BASE+16] | mov TMP2, RB - |.else - | jae ->fff_fallback - | fld qword [BASE+16] - | fistp TMP2 |.endif |1: | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback @@ -2393,12 +2239,8 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, STR:RB->len |.if DUALNUM | mov RA, dword [BASE+8] - |.elif SSE - | cvttsd2si RA, qword [BASE+8] |.else - | fld qword [BASE+8] - | fistp ARG3 - | mov RA, ARG3 + | cvttsd2si RA, qword [BASE+8] |.endif | mov RC, TMP2 | cmp RB, RC // len < end? (unsigned compare) @@ -2451,14 +2293,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | jne ->fff_fallback | mov RC, dword [BASE+8] - |.elif SSE - | jae ->fff_fallback - | cvttsd2si RC, qword [BASE+8] |.else | jae ->fff_fallback - | fld qword [BASE+8] - | fistp TMP2 - | mov RC, TMP2 + | cvttsd2si RC, qword [BASE+8] |.endif | test RC, RC | jle ->fff_emptystr // Count <= 0? (or non-int) @@ -2554,10 +2391,8 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, RB // Restore BASE. |.if DUALNUM | mov RB, RD; jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |.else - | mov ARG1, RD; fild ARG1; jmp ->fff_resn + | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |.endif | |//-- Bit library -------------------------------------------------------- @@ -2567,11 +2402,7 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_bit, name, kind | .ffunc_1 name |.if kind == 2 - |.if SSE | sseconst_tobit xmm1, RBa - |.else - | mov TMP1, TOBIT_BIAS - |.endif |.endif | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM @@ -2587,37 +2418,17 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE] |.if kind < 2 | sseconst_tobit xmm1, RBa |.endif | addsd xmm0, xmm1 | movd RB, xmm0 - |.else - | fld qword [BASE] - |.if kind < 2 - | mov TMP1, TOBIT_BIAS - |.endif - | fadd TMP1 - | fstp FPARG1 - |.if kind > 0 - | mov RB, ARG1 - |.endif - |.endif |2: |.endmacro | |.ffunc_bit bit_tobit, 0 - |.if DUALNUM or SSE - |.if not SSE - | mov RB, ARG1 - |.endif | jmp ->fff_resbit - |.else - | fild ARG1 - | jmp ->fff_resn - |.endif | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name, 2 @@ -2637,17 +2448,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback_bit_op |.endif - |.if SSE | movsd xmm0, qword [RD] | addsd xmm0, xmm1 | movd RA, xmm0 | ins RB, RA - |.else - | fld qword [RD] - | fadd TMP1 - | fstp FPARG1 - | ins RB, ARG1 - |.endif | sub RD, 8 | jmp <1 |.endmacro @@ -2664,15 +2468,10 @@ static void build_subroutines(BuildCtx *ctx) | not RB |.if DUALNUM | jmp ->fff_resbit - |.elif SSE + |.else |->fff_resbit: | cvtsi2sd xmm0, RB | jmp ->fff_resxmm0 - |.else - |->fff_resbit: - | mov ARG1, RB - | fild ARG1 - | jmp ->fff_resn |.endif | |->fff_fallback_bit_op: @@ -2685,22 +2484,13 @@ static void build_subroutines(BuildCtx *ctx) | // Note: no inline conversion from number for 2nd argument! | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | mov RA, dword [BASE+8] - |.elif SSE + |.else | .ffunc_nnsse name | sseconst_tobit xmm2, RBa | addsd xmm0, xmm2 | addsd xmm1, xmm2 | movd RB, xmm0 | movd RA, xmm1 - |.else - | .ffunc_nn name - | mov TMP1, TOBIT_BIAS - | fadd TMP1 - | fstp FPARG3 - | fadd TMP1 - | fstp FPARG1 - | mov RA, ARG3 - | mov RB, ARG1 |.endif | ins RB, cl // Assumes RA is ecx. | jmp ->fff_resbit @@ -3051,27 +2841,9 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. - | - |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. - |.macro vm_round_x87, mode1, mode2 - | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. - | mov [esp+8], eax - | mov ax, mode1 - | or ax, [esp+4] - |.if mode2 ~= 0xffff - | and ax, mode2 - |.endif - | mov [esp+6], ax - | fldcw word [esp+6] - | frndint - | fldcw word [esp+4] - | mov eax, [esp+8] - | ret - |.endmacro - | - |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. - |.macro vm_round_sse, mode + |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. + |.macro vm_round, name, mode + |->name .. _sse: | sseconst_abs xmm2, RDa | sseconst_2p52 xmm3, RDa | movaps xmm1, xmm0 @@ -3107,22 +2879,21 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endmacro | - |.macro vm_round, name, ssemode, mode1, mode2 - |->name: - |.if not SSE - | vm_round_x87 mode1, mode2 + |->vm_floor: + |.if not X64 + | movsd xmm0, qword [esp+4] + | call ->vm_floor_sse + | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. + | fld qword [esp+4] + | ret |.endif - |->name .. _sse: - | vm_round_sse ssemode - |.endmacro | - | vm_round vm_floor, 0, 0x0400, 0xf7ff - | vm_round vm_ceil, 1, 0x0800, 0xfbff - | vm_round vm_trunc, 2, 0x0c00, 0xffff + | vm_round vm_floor, 0 + | vm_round vm_ceil, 1 + | vm_round vm_trunc, 2 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |->vm_mod: - |.if SSE |// Args in xmm0/xmm1, return value in xmm0. |// Caveat: xmm0-xmm5 and RC (eax) modified! | movaps xmm5, xmm0 @@ -3150,23 +2921,6 @@ static void build_subroutines(BuildCtx *ctx) | movaps xmm0, xmm5 | subsd xmm0, xmm1 | ret - |.else - |// Args/ret on x87 stack (y on top). No xmm registers modified. - |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! - | fld st1 - | fdiv st1 - | fnstcw word [esp+4] - | mov ax, 0x0400 - | or ax, [esp+4] - | and ax, 0xf7ff - | mov [esp+6], ax - | fldcw word [esp+6] - | frndint - | fldcw word [esp+4] - | fmulp st1 - | fsubp st1 - | ret - |.endif | |// FP log2(x). Called by math.log(x, base). |->vm_log2: @@ -3217,96 +2971,6 @@ static void build_subroutines(BuildCtx *ctx) | |// Generic power function x^y. Called by BC_POW, math.pow fast function, |// and vm_arith. - |// Args/ret on x87 stack (y on top). RC (eax) modified. - |// Caveat: needs 3 slots on x87 stack! - |->vm_pow: - |.if not SSE - | fist dword [esp+4] // Store/reload int before comparison. - | fild dword [esp+4] // Integral exponent used in vm_powi. - | fucomip st1 - | jnz >8 // Branch for FP exponents. - | jp >9 // Branch for NaN exponent. - | fpop // Pop y and fallthrough to vm_powi. - | - |// FP/int power function x^i. Arg1/ret on x87 stack. - |// Arg2 (int) on C stack. RC (eax) modified. - |// Caveat: needs 2 slots on x87 stack! - | mov eax, [esp+4] - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | fmul st0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | fdup - |3: // Handle trailing bits. - | fmul st0 - | shr eax, 1; jz >4 - | jnc <3 - | fmul st1, st0 - | jmp <3 - |4: - | fmulp st1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 - | fld1; fdivrp st1 - | neg eax - | cmp eax, 1; je <5 // x^-1 ==> 1/x - | jmp <1 // x^-i ==> (1/x)^i - |7: - | fpop; fld1 // x^0 ==> 1 - | ret - | - |8: // FP/FP power function x^y. - | fst dword [esp+4] - | fxch - | fst dword [esp+8] - | mov eax, [esp+4]; shl eax, 1 - | cmp eax, 0xff000000; je >2 // x^+-Inf? - | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? - | cmp eax, 0xff000000; je >4 // +-Inf^y? - | fyl2x - | jmp ->vm_exp2raw - | - |9: // Handle x^NaN. - | fld1 - | fucomip st2 - | je >1 // 1^NaN ==> 1 - | fxch // x^NaN ==> NaN - |1: - | fpop - | ret - | - |2: // Handle x^+-Inf. - | fabs - | fld1 - | fucomip st1 - | je >3 // +-1^+-Inf ==> 1 - | fpop; fabs; fldz; mov eax, 0; setc al - | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 - | fxch - |3: - | fpop1; fabs - | ret - | - |4: // Handle +-0^y or +-Inf^y. - | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| - | fpop; fpop - | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf - | fldz // y < 0, +-Inf^y ==> 0 - | ret - |5: - | mov dword [esp+4], 0x7f800000 // Return +Inf. - | fld dword [esp+4] - | ret - |.endif - | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. |// Needs 16 byte scratch area for x86. Also called from JIT code. |->vm_pow_sse: @@ -3315,7 +2979,7 @@ static void build_subroutines(BuildCtx *ctx) | ucomisd xmm1, xmm2 | jnz >8 // Branch for FP exponents. | jp >9 // Branch for NaN exponent. - | // Fallthrough to vm_powi_sse. + | // Fallthrough. | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |->vm_powi_sse: @@ -3437,8 +3101,8 @@ static void build_subroutines(BuildCtx *ctx) | .else | .define fpmop, CARG1d | .endif - | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil - | cmp fpmop, 3; jb ->vm_trunc; ja >2 + | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse + | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 | sqrtsd xmm0, xmm0; ret |2: | .if X64WIN @@ -3478,14 +3142,13 @@ static void build_subroutines(BuildCtx *ctx) | ret |.else // x86 calling convention. | .define fpmop, eax - |.if SSE | mov fpmop, [esp+12] | movsd xmm0, qword [esp+4] | cmp fpmop, 1; je >1; ja >2 - | call ->vm_floor; jmp >7 - |1: ; call ->vm_ceil; jmp >7 + | call ->vm_floor_sse; jmp >7 + |1: ; call ->vm_ceil_sse; jmp >7 |2: ; cmp fpmop, 3; je >1; ja >2 - | call ->vm_trunc; jmp >7 + | call ->vm_trunc_sse; jmp >7 |1: | sqrtsd xmm0, xmm0 |7: @@ -3503,23 +3166,6 @@ static void build_subroutines(BuildCtx *ctx) |2: ; cmp fpmop, 11; je >1; ja >9 | fcos; ret |1: ; fptan; fpop; ret - |.else - | mov fpmop, [esp+12] - | fld qword [esp+4] - | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil - | cmp fpmop, 3; jb ->vm_trunc; ja >2 - | fsqrt; ret - |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 - | cmp fpmop, 7; je >1; ja >2 - | fldln2; fxch; fyl2x; ret - |1: ; fld1; fxch; fyl2x; ret - |2: ; cmp fpmop, 9; je >1; ja >2 - | fldlg2; fxch; fyl2x; ret - |1: ; fsin; ret - |2: ; cmp fpmop, 11; je >1; ja >9 - | fcos; ret - |1: ; fptan; fpop; ret - |.endif |.endif |9: ; int3 // Bad fpm. |.endif @@ -3541,7 +3187,7 @@ static void build_subroutines(BuildCtx *ctx) |2: ; cmp foldop, 3; je >1; ja >2 | mulsd xmm0, xmm1; ret |1: ; divsd xmm0, xmm1; ret - |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow + |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse | cmp foldop, 7; je >1; ja >2 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret @@ -3574,7 +3220,7 @@ static void build_subroutines(BuildCtx *ctx) |1: ; maxsd xmm0, xmm1; ret |9: ; int3 // Bad op. | - |.elif SSE // x86 calling convention with SSE ops. + |.else // x86 calling convention. | | .define foldop, eax | mov foldop, [esp+20] @@ -3593,7 +3239,7 @@ static void build_subroutines(BuildCtx *ctx) |2: ; cmp foldop, 5 | je >1; ja >2 | call ->vm_mod; jmp <7 - |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. + |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. |2: ; cmp foldop, 7; je >1; ja >2 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 @@ -3608,29 +3254,6 @@ static void build_subroutines(BuildCtx *ctx) |1: ; maxsd xmm0, xmm1; jmp <7 |9: ; int3 // Bad op. | - |.else // x86 calling convention with x87 ops. - | - | mov eax, [esp+20] - | fld qword [esp+4] - | fld qword [esp+12] - | cmp eax, 1; je >1; ja >2 - | faddp st1; ret - |1: ; fsubp st1; ret - |2: ; cmp eax, 3; je >1; ja >2 - | fmulp st1; ret - |1: ; fdivp st1; ret - |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow - | cmp eax, 7; je >1; ja >2 - | fpop; fchs; ret - |1: ; fpop; fabs; ret - |2: ; cmp eax, 9; je >1; ja >2 - | fpatan; ret - |1: ; fxch; fscale; fpop1; ret - |2: ; cmp eax, 11; je >1; ja >9 - | fucomi st1; fcmovnbe st1; fpop1; ret - |1: ; fucomi st1; fcmovbe st1; fpop1; ret - |9: ; int3 // Bad op. - | |.endif | |//----------------------------------------------------------------------- @@ -3943,19 +3566,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA is a number. | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | // RA is a number, RD is an integer. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RD*8] | jmp >2 - |.else - | fld qword [BASE+RA*8] - | fild dword [BASE+RD*8] - | jmp >3 - |.endif | |8: // RA is an integer, RD is not an integer. | ja ->vmeta_comp | // RA is an integer, RD is a number. - |.if SSE | cvtsi2sd xmm1, dword [BASE+RA*8] | movsd xmm0, qword [BASE+RD*8] | add PC, 4 @@ -3963,29 +3579,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jmp_comp jbe, ja, jb, jae, <9 | jmp <6 |.else - | fild dword [BASE+RA*8] - | jmp >2 - |.endif - |.else | checknum RA, ->vmeta_comp | checknum RD, ->vmeta_comp |.endif - |.if SSE |1: | movsd xmm0, qword [BASE+RD*8] |2: | add PC, 4 | ucomisd xmm0, qword [BASE+RA*8] |3: - |.else - |1: - | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. - |2: - | fld qword [BASE+RD*8] - |3: - | add PC, 4 - | fcomparepp - |.endif | // Unordered: all of ZF CF PF set, ordered: PF clear. | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |.if DUALNUM @@ -4025,43 +3627,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RD is a number. | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | // RD is a number, RA is an integer. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RA*8] - |.else - | fild dword [BASE+RA*8] - |.endif | jmp >2 | |8: // RD is an integer, RA is not an integer. | ja >5 | // RD is an integer, RA is a number. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RD*8] | ucomisd xmm0, qword [BASE+RA*8] - |.else - | fild dword [BASE+RD*8] - | fld qword [BASE+RA*8] - |.endif | jmp >4 | |.else | cmp RB, LJ_TISNUM; jae >5 | checknum RA, >5 |.endif - |.if SSE |1: | movsd xmm0, qword [BASE+RA*8] |2: | ucomisd xmm0, qword [BASE+RD*8] |4: - |.else - |1: - | fld qword [BASE+RA*8] - |2: - | fld qword [BASE+RD*8] - |4: - | fcomparepp - |.endif iseqne_fp: if (vk) { | jp >2 // Unordered means not equal. @@ -4184,39 +3768,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA is a number. | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | // RA is a number, RD is an integer. - |.if SSE | cvtsi2sd xmm0, dword [KBASE+RD*8] - |.else - | fild dword [KBASE+RD*8] - |.endif | jmp >2 | |8: // RA is an integer, RD is a number. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RA*8] | ucomisd xmm0, qword [KBASE+RD*8] - |.else - | fild dword [BASE+RA*8] - | fld qword [KBASE+RD*8] - |.endif | jmp >4 |.else | cmp RB, LJ_TISNUM; jae >3 |.endif - |.if SSE |1: | movsd xmm0, qword [KBASE+RD*8] |2: | ucomisd xmm0, qword [BASE+RA*8] |4: - |.else - |1: - | fld qword [KBASE+RD*8] - |2: - | fld qword [BASE+RA*8] - |4: - | fcomparepp - |.endif goto iseqne_fp; case BC_ISEQP: case BC_ISNEP: vk = op == BC_ISEQP; @@ -4310,16 +3876,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | checknum RD, ->vmeta_unm |.endif - |.if SSE | movsd xmm0, qword [BASE+RD*8] | sseconst_sign xmm1, RDa | xorps xmm0, xmm1 | movsd qword [BASE+RA*8], xmm0 - |.else - | fld qword [BASE+RD*8] - | fchs - | fstp qword [BASE+RA*8] - |.endif |.if DUALNUM | jmp <9 |.else @@ -4335,15 +3895,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |1: | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RD - |.elif SSE + |.else | xorps xmm0, xmm0 | cvtsi2sd xmm0, dword STR:RD->len |1: | movsd qword [BASE+RA*8], xmm0 - |.else - | fild dword STR:RD->len - |1: - | fstp qword [BASE+RA*8] |.endif | ins_next |2: @@ -4361,11 +3917,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Length of table returned in eax (RD). |.if DUALNUM | // Nothing to do. - |.elif SSE - | cvtsi2sd xmm0, RD |.else - | mov ARG1, RD - | fild ARG1 + | cvtsi2sd xmm0, RD |.endif | mov BASE, RB // Restore BASE. | movzx RA, PC_RA @@ -4380,7 +3933,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Binary ops -------------------------------------------------------- */ - |.macro ins_arithpre, x87ins, sseins, ssereg + |.macro ins_arithpre, sseins, ssereg | ins_ABC ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { @@ -4389,37 +3942,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .if DUALNUM | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | .endif - | .if SSE - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [KBASE+RC*8] - | .else - | fld qword [BASE+RB*8] - | x87ins qword [KBASE+RC*8] - | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [KBASE+RC*8] || break; ||case 1: | checknum RB, ->vmeta_arith_nv | .if DUALNUM | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | .endif - | .if SSE - | movsd xmm0, qword [KBASE+RC*8] - | sseins ssereg, qword [BASE+RB*8] - | .else - | fld qword [KBASE+RC*8] - | x87ins qword [BASE+RB*8] - | .endif + | movsd xmm0, qword [KBASE+RC*8] + | sseins ssereg, qword [BASE+RB*8] || break; ||default: | checknum RB, ->vmeta_arith_vv | checknum RC, ->vmeta_arith_vv - | .if SSE - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [BASE+RC*8] - | .else - | fld qword [BASE+RB*8] - | x87ins qword [BASE+RC*8] - | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [BASE+RC*8] || break; ||} |.endmacro @@ -4457,54 +3995,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endmacro | |.macro ins_arithpost - |.if SSE | movsd qword [BASE+RA*8], xmm0 - |.else - | fstp qword [BASE+RA*8] - |.endif |.endmacro | - |.macro ins_arith, x87ins, sseins - | ins_arithpre x87ins, sseins, xmm0 + |.macro ins_arith, sseins + | ins_arithpre sseins, xmm0 | ins_arithpost | ins_next |.endmacro | - |.macro ins_arith, intins, x87ins, sseins + |.macro ins_arith, intins, sseins |.if DUALNUM | ins_arithdn intins |.else - | ins_arith, x87ins, sseins + | ins_arith, sseins |.endif |.endmacro | // RA = dst, RB = src1 or num const, RC = src2 or num const case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add, fadd, addsd + | ins_arith add, addsd break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub, fsub, subsd + | ins_arith sub, subsd break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith imul, fmul, mulsd + | ins_arith imul, mulsd break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith fdiv, divsd + | ins_arith divsd break; case BC_MODVN: - | ins_arithpre fld, movsd, xmm1 + | ins_arithpre movsd, xmm1 |->BC_MODVN_Z: | call ->vm_mod | ins_arithpost | ins_next break; case BC_MODNV: case BC_MODVV: - | ins_arithpre fld, movsd, xmm1 + | ins_arithpre movsd, xmm1 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. break; case BC_POW: - | ins_arithpre fld, movsd, xmm1 - | call ->vm_pow + | ins_arithpre movsd, xmm1 + | call ->vm_pow_sse | ins_arithpost | ins_next break; @@ -4573,25 +4107,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movsx RD, RDW | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RD - |.elif SSE + |.else | movsx RD, RDW // Sign-extend literal. | cvtsi2sd xmm0, RD | movsd qword [BASE+RA*8], xmm0 - |.else - | fild PC_RD // Refetch signed RD from instruction. - | fstp qword [BASE+RA*8] |.endif | ins_next break; case BC_KNUM: | ins_AD // RA = dst, RD = num const - |.if SSE | movsd xmm0, qword [KBASE+RD*8] | movsd qword [BASE+RA*8], xmm0 - |.else - | fld qword [KBASE+RD*8] - | fstp qword [BASE+RA*8] - |.endif | ins_next break; case BC_KPRI: @@ -4698,18 +4224,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_USETN: | ins_AD // RA = upvalue #, RD = num const | mov LFUNC:RB, [BASE-8] - |.if SSE | movsd xmm0, qword [KBASE+RD*8] - |.else - | fld qword [KBASE+RD*8] - |.endif | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | mov RA, UPVAL:RB->v - |.if SSE | movsd qword [RA], xmm0 - |.else - | fstp qword [RA] - |.endif | ins_next break; case BC_USETP: @@ -4863,18 +4381,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | // Convert number to int and back and compare. | checknum RC, >5 - |.if SSE | movsd xmm0, qword [BASE+RC*8] | cvtsd2si RC, xmm0 | cvtsi2sd xmm1, RC | ucomisd xmm0, xmm1 - |.else - | fld qword [BASE+RC*8] - | fist ARG1 - | fild ARG1 - | fcomparepp - | mov RC, ARG1 - |.endif | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |.endif | cmp RC, TAB:RB->asize // Takes care of unordered, too. @@ -5011,18 +4521,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | // Convert number to int and back and compare. | checknum RC, >5 - |.if SSE | movsd xmm0, qword [BASE+RC*8] | cvtsd2si RC, xmm0 | cvtsi2sd xmm1, RC | ucomisd xmm0, xmm1 - |.else - | fld qword [BASE+RC*8] - | fist ARG1 - | fild ARG1 - | fcomparepp - | mov RC, ARG1 - |.endif | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |.endif | cmp RC, TAB:RB->asize // Takes care of unordered, too. @@ -5386,10 +4888,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if DUALNUM | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RC - |.elif SSE - | cvtsi2sd xmm0, RC |.else - | fild dword [BASE+RA*8-8] + | cvtsi2sd xmm0, RC |.endif | // Copy array slot to returned value. |.if X64 @@ -5405,10 +4905,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Return array index as a numeric key. |.if DUALNUM | // See above. - |.elif SSE - | movsd qword [BASE+RA*8], xmm0 |.else - | fstp qword [BASE+RA*8] + | movsd qword [BASE+RA*8], xmm0 |.endif | mov [BASE+RA*8-8], RC // Update control var. |2: @@ -5421,9 +4919,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |4: // Skip holes in array part. | add RC, 1 - |.if not (DUALNUM or SSE) - | mov [BASE+RA*8-8], RC - |.endif | jmp <1 | |5: // Traverse hash part. @@ -5757,7 +5252,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (!vk) { | cmp RB, LJ_TISNUM; jae ->vmeta_for } - |.if SSE | movsd xmm0, qword FOR_IDX | movsd xmm1, qword FOR_STOP if (vk) { @@ -5770,22 +5264,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ucomisd xmm1, xmm0 |1: | movsd qword FOR_EXT, xmm0 - |.else - | fld qword FOR_STOP - | fld qword FOR_IDX - if (vk) { - | fadd qword FOR_STEP // nidx = idx + step - | fst qword FOR_IDX - | fst qword FOR_EXT - | test RB, RB; js >1 - } else { - | fst qword FOR_EXT - | jl >1 - } - | fxch // Swap lim/(n)idx if step non-negative. - |1: - | fcomparepp - |.endif if (op == BC_FORI) { |.if DUALNUM | jnb <7 @@ -5813,11 +5291,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: | ins_next |.endif - |.if SSE + | |3: // Invert comparison if step is negative. | ucomisd xmm0, xmm1 | jmp <1 - |.endif break; case BC_ITERL: