diff --git a/src/Makefile b/src/Makefile index 07a94251..82a57032 100644 --- a/src/Makefile +++ b/src/Makefile @@ -603,7 +603,6 @@ E= @echo default all: $(TARGET_T) amalg: - @grep "^[+|]" ljamalg.c $(MAKE) all "LJCORE_O=ljamalg.o" clean: diff --git a/src/lib_jit.c b/src/lib_jit.c index c97b0d53..acd6c293 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -104,8 +104,8 @@ LJLIB_CF(jit_status) jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); - flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); - flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); return (int)(L->top - L->base); #else setboolV(L->top++, 0); @@ -471,7 +471,7 @@ static int jitopt_flag(jit_State *J, const char *str) str += str[2] == '-' ? 3 : 2; set = 0; } - for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + for (opt = JIT_F_OPT; ; opt <<= 1) { size_t len = *(const uint8_t *)lst; if (len == 0) break; @@ -640,59 +640,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif #if LJ_TARGET_ARM && LJ_TARGET_LINUX #include #endif -/* Arch-dependent CPU detection. */ -static uint32_t jit_cpudetect(lua_State *L) +/* Arch-dependent CPU feature detection. */ +static uint32_t jit_cpudetect(void) { uint32_t flags = 0; #if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif - flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; - if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ - flags |= JIT_F_LEA_AGU; - } else if (vendor[2] == 0x444d4163) { /* AMD. */ - uint32_t fam = (features[0] & 0x0ff00f00); - if (fam >= 0x00000f00) /* K8, K10. */ - flags |= JIT_F_PREFER_IMUL; - } if (vendor[0] >= 7) { uint32_t xfeatures[4]; lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } - /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif + /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ + #elif LJ_TARGET_ARM -#if LJ_HASJIT + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ #if LJ_TARGET_LINUX if (ver < 70) { /* Runtime ARM CPU detection. */ struct utsname ut; uname(&ut); if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; + if (ut.machine[4] >= '8') ver = 80; + else if (ut.machine[4] == '7') ver = 70; + else if (ut.machine[4] == '6') ver = 60; } } #endif @@ -700,20 +682,22 @@ static uint32_t jit_cpudetect(lua_State *L) ver >= 61 ? JIT_F_ARMV6T2_ : ver >= 60 ? JIT_F_ARMV6_ : 0; flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif + #elif LJ_TARGET_ARM64 + /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_PPC -#if LJ_HASJIT + #if LJ_ARCH_SQRT flags |= JIT_F_SQRT; #endif #if LJ_ARCH_ROUND flags |= JIT_F_ROUND; #endif -#endif + #elif LJ_TARGET_MIPS -#if LJ_HASJIT + /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 flags |= JIT_F_MIPSXXR2; @@ -731,31 +715,28 @@ static uint32_t jit_cpudetect(lua_State *L) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif -#endif + #else #error "Missing CPU detection for this architecture" #endif - UNUSED(L); return flags; } /* Initialize JIT compiler. */ static void jit_init(lua_State *L) { - uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } +#endif LUALIB_API int luaopen_jit(lua_State *L) { +#if LJ_HASJIT jit_init(L); +#endif lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); lua_pushinteger(L, LUAJIT_VERSION_NUM); diff --git a/src/lj_arch.h b/src/lj_arch.h index 027b39ce..70426838 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -208,13 +208,13 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ +#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ #define LJ_ARCH_VERSION 60 #else #define LJ_ARCH_VERSION 50 diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index db3409b9..bf818f5a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1214,13 +1214,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); - if ((as->flags & JIT_F_PREFER_IMUL)) { - emit_i8(as, sizeof(Node)); - emit_rr(as, XO_IMULi8, dest, dest); - } else { - emit_shifti(as, XOg_SHL, dest, 3); - emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); - } + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); if (isk) { emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); @@ -1279,7 +1274,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(ofs % sizeof(Node) == 0); if (ra_hasreg(dest)) { if (ofs != 0) { - if (dest == node && !(as->flags & JIT_F_LEA_AGU)) + if (dest == node) emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); @@ -2180,8 +2175,7 @@ static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) asm_fparith(as, ir, XO_ADDSD); - else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || - irt_is64(ir->t) || !asm_lea(as, ir)) + else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) asm_intarith(as, ir, XOg_ADD); } @@ -2903,7 +2897,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= LJ_64 ? 7 : 6; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2915,20 +2909,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) p1 = p-9; *(int32_t *)p1 = spadj; } - if ((as->flags & JIT_F_LEA_AGU)) { #if LJ_64 - p1[-4] = 0x48; + p1[-3] = 0x48; #endif - p1[-3] = (MCode)XI_LEA; - p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); - p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - } else { -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); - } + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; @@ -2959,7 +2944,7 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (LJ_64 ? 7 : 6); as->invmcp = NULL; } } diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 8553438c..39416d00 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -252,15 +252,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 - else if ((G2J(g)->flags & JIT_F_SSE2)) - G2J(g)->flags |= (uint32_t)JIT_F_ON; - else - return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else else G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bc4391a0..b17e28a5 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -559,10 +559,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); - else - emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index efb7c3f3..9110dc7e 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 -ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else ERRDEF(NOJIT, "JIT compiler disabled") -#endif #elif defined(LJ_ARCH_NOJIT) ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") #else diff --git a/src/lj_jit.h b/src/lj_jit.h index f179f17f..a9c602f0 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -9,47 +9,49 @@ #include "lj_obj.h" #include "lj_ir.h" -/* JIT engine flags. */ +/* -- JIT engine flags ---------------------------------------------------- */ + +/* General JIT engine flags. 4 bits. */ #define JIT_F_ON 0x00000001 -/* CPU-specific JIT engine flags. */ +/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ +#define JIT_F_CPU 0x00000010 + #if LJ_TARGET_X86ORX64 -#define JIT_F_SSE2 0x00000010 -#define JIT_F_SSE3 0x00000020 -#define JIT_F_SSE4_1 0x00000040 -#define JIT_F_PREFER_IMUL 0x00000080 -#define JIT_F_LEA_AGU 0x00000100 -#define JIT_F_BMI2 0x00000200 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" +#define JIT_F_SSE3 (JIT_F_CPU << 0) +#define JIT_F_SSE4_1 (JIT_F_CPU << 1) +#define JIT_F_BMI2 (JIT_F_CPU << 2) + + +#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" + #elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) +#define JIT_F_ARMV6_ (JIT_F_CPU << 0) +#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) +#define JIT_F_ARMV7 (JIT_F_CPU << 2) +#define JIT_F_ARMV8 (JIT_F_CPU << 3) +#define JIT_F_VFPV2 (JIT_F_CPU << 4) +#define JIT_F_VFPV3 (JIT_F_CPU << 5) + +#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) +#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" +#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" + #elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT +#define JIT_F_SQRT (JIT_F_CPU << 0) +#define JIT_F_ROUND (JIT_F_CPU << 1) + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" -#elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#elif LJ_TARGET_MIPS + +#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) + #if LJ_TARGET_MIPS32 #if LJ_TARGET_MIPSR6 #define JIT_F_CPUSTRING "\010MIPS32R6" @@ -63,27 +65,29 @@ #define JIT_F_CPUSTRING "\010MIPS64R2" #endif #endif + #else -#define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" + #endif -/* Optimization flags. */ +/* Optimization flags. 12 bits. */ +#define JIT_F_OPT 0x00010000 #define JIT_F_OPT_MASK 0x0fff0000 -#define JIT_F_OPT_FOLD 0x00010000 -#define JIT_F_OPT_CSE 0x00020000 -#define JIT_F_OPT_DCE 0x00040000 -#define JIT_F_OPT_FWD 0x00080000 -#define JIT_F_OPT_DSE 0x00100000 -#define JIT_F_OPT_NARROW 0x00200000 -#define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_ABC 0x00800000 -#define JIT_F_OPT_SINK 0x01000000 -#define JIT_F_OPT_FUSE 0x02000000 +#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) +#define JIT_F_OPT_CSE (JIT_F_OPT << 1) +#define JIT_F_OPT_DCE (JIT_F_OPT << 2) +#define JIT_F_OPT_FWD (JIT_F_OPT << 3) +#define JIT_F_OPT_DSE (JIT_F_OPT << 4) +#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) +#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) +#define JIT_F_OPT_ABC (JIT_F_OPT << 7) +#define JIT_F_OPT_SINK (JIT_F_OPT << 8) +#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) /* Optimizations names for -O. Must match the order above. */ -#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" @@ -95,6 +99,8 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 +/* -- JIT engine parameters ----------------------------------------------- */ + #if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 @@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM) #define JIT_PARAMSTR(len, name, value) #len #name #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) +/* -- JIT engine data structures ------------------------------------------ */ + /* Trace compiler state. */ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ diff --git a/src/ljamalg.c b/src/ljamalg.c index 39542981..6712d435 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -3,16 +3,6 @@ ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h */ -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ -*/ - #define ljamalg_c #define LUA_CORE