diff --git a/doc/ext_jit.html b/doc/ext_jit.html index 9489c6f5..86098019 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html @@ -153,7 +153,7 @@ Contains the target OS name:

jit.arch

Contains the target architecture name: -"x86", "x64", "arm", "ppc", or "mips". +"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".

jit.opt.* — JIT compiler optimization control

diff --git a/doc/extensions.html b/doc/extensions.html index 7f712a62..9d666293 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -349,6 +349,7 @@ break the Lua/C API and ABI (e.g. _ENV). LuaJIT supports some extensions from Lua 5.3:

C++ Exception Interoperability

@@ -365,25 +366,30 @@ the toolchain used to compile LuaJIT: POSIX/x64, DWARF2 unwinding -GCC 4.3+ +GCC 4.3+, Clang Full -Other platforms, DWARF2 unwinding -GCC -Limited +ARM -DLUAJIT_UNWIND_EXTERNAL +GCC, Clang +Full +Other platforms, DWARF2 unwinding +GCC, Clang +Limited + + Windows/x64 MSVC or WinSDK Full - + Windows/x86 Any -No +Full - + Other platforms Other compilers No @@ -432,14 +438,6 @@ C++ destructors.
  • Lua errors cannot be caught on the C++ side.
  • Throwing Lua errors across C++ frames will not call C++ destructors.
  • -
  • Additionally, on Windows/x86 with SEH-based C++ exceptions: -it's not safe to throw a Lua error across any frames containing -a C++ function with any try/catch construct or using variables with -(implicit) destructors. This also applies to any functions which may be -inlined in such a function. It doesn't matter whether lua_error() -is called inside or outside of a try/catch or whether any object actually -needs to be destroyed: the SEH chain is corrupted and this will eventually -lead to the termination of the process.

  • diff --git a/doc/install.html b/doc/install.html index 9921258e..efeda33c 100644 --- a/doc/install.html +++ b/doc/install.html @@ -122,7 +122,7 @@ operating systems, CPUs and compilers: x64 (64 bit) GCC 4.2+ -ORBIS (PS4) +GCC 4.2+
    ORBIS (PS4) XCode 5.0+
    Clang MSVC + SDK v7.0
    WinSDK v7.0
    Durango (Xbox One) @@ -148,7 +148,7 @@ operating systems, CPUs and compilers: XEDK (Xbox 360) -MIPS +MIPS32
    MIPS64
    GCC 4.3+ GCC 4.3+   @@ -202,7 +202,7 @@ which is probably the default on your system, anyway. Simply run: make

    -This always builds a native x86, x64 or PPC binary, depending on the host OS +This always builds a native binary, depending on the host OS you're running this command on. Check the section on cross-compilation for more options.

    @@ -333,25 +333,36 @@ directory where luajit.exe is installed

    Cross-compiling LuaJIT

    -The GNU Makefile-based build system allows cross-compiling on any host -for any supported target, as long as both architectures have the same -pointer size. If you want to cross-compile to any 32 bit target on an -x64 OS, you need to install the multilib development package (e.g. -libc6-dev-i386 on Debian/Ubuntu) and build a 32 bit host part -(HOST_CC="gcc -m32"). +First, let's clear up some terminology:

    + +

    +The GNU Makefile-based build system allows cross-compiling on any host +for any supported target: +

    +

    You need to specify TARGET_SYS whenever the host OS and the -target OS differ, or you'll get assembler or linker errors. E.g. if -you're compiling on a Windows or OSX host for embedded Linux or Android, -you need to add TARGET_SYS=Linux to the examples below. For a -minimal target OS, you may need to disable the built-in allocator in -src/Makefile and use TARGET_SYS=Other. Don't forget to -specify the same TARGET_SYS for the install step, too. +target OS differ, or you'll get assembler or linker errors:

    +

    -The examples below only show some popular targets — please check -the comments in src/Makefile for more details. +Here are some examples where host and target have the same CPU:

     # Cross-compile to a 32 bit binary on a multilib x64 OS
    @@ -369,38 +380,47 @@ use the canonical toolchain triplets for Linux.
     

    Since there's often no easy way to detect CPU features at runtime, it's -important to compile with the proper CPU or architecture settings. You -can specify these when building the toolchain yourself. Or add --mcpu=... or -march=... to TARGET_CFLAGS. For -ARM it's important to have the correct -mfloat-abi=... setting, -too. Otherwise LuaJIT may not run at the full performance of your target -CPU. +important to compile with the proper CPU or architecture settings: + +

    +

    +Here are some examples for targets with a different CPU than the host:

     # ARM soft-float
     make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
          TARGET_CFLAGS="-mfloat-abi=soft"
     
    -# ARM soft-float ABI with VFP (example for Cortex-A8)
    +# ARM soft-float ABI with VFP (example for Cortex-A9)
     make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
    -     TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
    +     TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
     
    -# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
    +# ARM hard-float ABI with VFP (armhf, most modern toolchains)
     make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
     
    -# ARM64 (requires x64 host)
    +# ARM64
     make CROSS=aarch64-linux-
     
     # PPC
     make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
     
    -# MIPS big-endian
    +# MIPS32 big-endian
     make HOST_CC="gcc -m32" CROSS=mips-linux-
    -# MIPS little-endian
    +# MIPS32 little-endian
     make HOST_CC="gcc -m32" CROSS=mipsel-linux-
    +
    +# MIPS64 big-endian
    +make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
    +# MIPS64 little-endian
    +make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
     

    -You can cross-compile for Android using the » Android NDK. +You can cross-compile for Android using the Android NDK. The environment variables need to match the install locations and the desired target platform. E.g. Android 4.0 corresponds to ABI level 14. For details check the folder docs in the NDK directory. @@ -414,7 +434,7 @@ to build/deploy or which lowest common denominator you want to pick: # Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) NDK=/opt/android/ndk NDKABI=8 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -422,16 +442,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" -# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS) +# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/mipsel-linux-android-4.6 +NDKVER=$NDK/toolchains/mipsel-linux-android-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -439,7 +459,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" # Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/x86-4.6 +NDKVER=$NDK/toolchains/x86-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -459,14 +479,15 @@ Or use Android. :-p ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ICC=$(xcrun --sdk iphoneos --find clang) ISDKF="-arch armv7 -isysroot $ISDKP" -make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \ - TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \ + CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS # iOS/ARM64 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ICC=$(xcrun --sdk iphoneos --find clang) ISDKF="-arch arm64 -isysroot $ISDKP" -make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ + TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS

    Cross-compiling for consoles

    diff --git a/doc/luajit.html b/doc/luajit.html index c7a92b86..44a7b8a1 100644 --- a/doc/luajit.html +++ b/doc/luajit.html @@ -169,10 +169,10 @@ LuaJIT is Copyright © 2005-2016 Mike Pall, released under the PS3PS4PS VitaXbox 360Xbox One - +
    GCCCLANG
    LLVM
    MSVC
    GCCClang
    LLVM
    MSVC
    - +
    x86x64ARMARM64PPCMIPS
    x86
    x64
    ARM
    ARM64
    PPCMIPS32
    MIPS64
    diff --git a/doc/status.html b/doc/status.html index 6f57db1b..d10033b0 100644 --- a/doc/status.html +++ b/doc/status.html @@ -91,12 +91,6 @@ hooks for non-Lua functions) and shows slightly different behavior in LuaJIT (no per-coroutine hooks, no tail call counting).
  • -Some checks are missing in the JIT-compiled code for obscure situations -with open upvalues aliasing one of the SSA slots later on (or -vice versa). Bonus points, if you can find a real world test case for -this. -
  • -
  • Currently some out-of-memory errors from on-trace code are not handled correctly. The error may fall through an on-trace pcall or it may be passed on to the function set with diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h index c10528fa..f3b43211 100644 --- a/dynasm/dasm_mips.h +++ b/dynasm/dasm_mips.h @@ -21,7 +21,7 @@ enum { /* The following actions need a buffer position. */ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, DASM__MAX }; @@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...) *pl = -pos; /* Label exists now. */ b[pos++] = ofs; /* Store pass1 offset estimate. */ break; - case DASM_IMM: + case DASM_IMM: case DASM_IMMS: #ifdef DASM_CHECKS CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); #endif @@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: pos++; break; + case DASM_IMM: case DASM_IMMS: pos++; break; } } stop: (void)0; @@ -356,7 +356,7 @@ int dasm_encode(Dst_DECL, void *buffer) if (ins & 2048) n = n - (int)((char *)cp - base); else - n = (n + (int)base) & 0x0fffffff; + n = (n + (int)(size_t)base) & 0x0fffffff; patchrel: CK((n & 3) == 0 && ((n + ((ins & 2048) ? 0x00020000 : 0)) >> @@ -367,6 +367,9 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); break; case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ case DASM_IMM: cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); break; diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua index c5a5595c..c8010561 100644 --- a/dynasm/dasm_mips.lua +++ b/dynasm/dasm_mips.lua @@ -1,17 +1,19 @@ ------------------------------------------------------------------------------ --- DynASM MIPS module. +-- DynASM MIPS32/MIPS64 module. -- -- Copyright (C) 2005-2016 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ +local mips64 = mips64 + -- Module information: local _info = { - arch = "mips", - description = "DynASM MIPS module", + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", version = "1.4.0", vernum = 10400, - release = "2015-10-18", + release = "2016-05-24", author = "Mike Pall", license = "MIT", } @@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char local match, gmatch = _s.match, _s.gmatch local concat, sort = table.concat, table.sort local bit = bit or require("bit") -local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex -- Inherited tables and callbacks. local g_opt, g_arch @@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn local action_names = { "STOP", "SECTION", "ESC", "REL_EXT", "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", + "REL_PC", "LABEL_PC", "IMM", "IMMS", } -- Maximum number of section buffer positions for dasm_put(). @@ -251,6 +254,10 @@ local map_op = { bnel_3 = "54000000STB", blezl_2 = "58000000SB", bgtzl_2 = "5c000000SB", + daddi_3 = mips64 and "60000000TSI", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", lb_2 = "80000000TO", lh_2 = "84000000TO", lwl_2 = "88000000TO", @@ -258,23 +265,30 @@ local map_op = { lbu_2 = "90000000TO", lhu_2 = "94000000TO", lwr_2 = "98000000TO", + lwu_2 = mips64 and "9c000000TO", sb_2 = "a0000000TO", sh_2 = "a4000000TO", swl_2 = "a8000000TO", sw_2 = "ac000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", swr_2 = "b8000000TO", cache_2 = "bc000000NO", ll_2 = "c0000000TO", lwc1_2 = "c4000000HO", pref_2 = "cc000000NO", ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", sc_2 = "e0000000TO", swc1_2 = "e4000000HO", + scd_2 = mips64 and "f0000000TO", sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", -- Opcode SPECIAL. nop_0 = "00000000", sll_3 = "00000000DTA", + sextw_2 = "00000000DT", movf_2 = "00000001DS", movf_3 = "00000001DSC", movt_2 = "00010001DS", @@ -285,6 +299,7 @@ local map_op = { sllv_3 = "00000004DTS", srlv_3 = "00000006DTS", rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", srav_3 = "00000007DTS", jr_1 = "00000008S", jalr_1 = "0000f809S", @@ -300,15 +315,22 @@ local map_op = { mthi_1 = "00000011S", mflo_1 = "00000012D", mtlo_1 = "00000013S", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", mult_2 = "00000018ST", multu_2 = "00000019ST", div_2 = "0000001aST", divu_2 = "0000001bST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + ddiv_2 = mips64 and "0000001eST", + ddivu_2 = mips64 and "0000001fST", add_3 = "00000020DST", - move_2 = "00000021DS", + move_2 = mips64 and "00000025DS" or "00000021DS", addu_3 = "00000021DST", sub_3 = "00000022DST", - negu_2 = "00000023DT", + negu_2 = mips64 and "0000002fDT" or "00000023DT", subu_3 = "00000023DST", and_3 = "00000024DST", or_3 = "00000025DST", @@ -317,6 +339,10 @@ local map_op = { nor_3 = "00000027DST", slt_3 = "0000002aDST", sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", tge_2 = "00000030ST", tge_3 = "00000030STZ", tgeu_2 = "00000031ST", @@ -329,6 +355,14 @@ local map_op = { teq_3 = "00000034STZ", tne_2 = "00000036ST", tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", -- Opcode REGIMM. bltz_2 = "04000000SB", @@ -356,13 +390,24 @@ local map_op = { msubu_2 = "70000005ST", clz_2 = "70000020DS=", clo_2 = "70000021DS=", + dclz_2 = mips64 and "70000024DS=", + dclo_2 = mips64 and "70000025DS=", sdbbp_0 = "7000003f", sdbbp_1 = "7000003fY", -- Opcode SPECIAL3. ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", seb_2 = "7c000420DT", seh_2 = "7c000620DT", rdhwr_2 = "7c00003bTD", @@ -370,8 +415,12 @@ local map_op = { -- Opcode COP0. mfc0_2 = "40000000TD", mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", mtc0_2 = "40800000TD", mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", rdpgpr_2 = "41400000DT", di_0 = "41606000", di_1 = "41606000T", @@ -388,9 +437,11 @@ local map_op = { -- Opcode COP1. mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", cfc1_2 = "44400000TG", mfhc1_2 = "44600000TG", mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", ctc1_2 = "44c00000TG", mthc1_2 = "44e00000TG", @@ -633,7 +684,7 @@ local function parse_fpr(expr) werror("bad register name `"..expr.."'") end -local function parse_imm(imm, bits, shift, scale, signed) +local function parse_imm(imm, bits, shift, scale, signed, action) local n = tonumber(imm) if n then local m = sar(n, scale) @@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed) match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then werror("expected immediate operand, got register") else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) return 0 end end @@ -763,6 +815,9 @@ map_op[".template__"] = function(params, template, nparams) n = n + 1 elseif p == "A" then op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) elseif p == "M" then op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 elseif p == "N" then diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua new file mode 100644 index 00000000..94f21921 --- /dev/null +++ b/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/src/Makefile b/src/Makefile index 3f25192b..4e479ae5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -121,8 +121,8 @@ XCFLAGS= # # Use the system provided memory allocator (realloc) instead of the # bundled memory allocator. This is slower, but sometimes helpful for -# debugging. This option cannot be enabled on x64, since realloc usually -# doesn't return addresses in the right address range. +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and # the only way to get useful results from it for all other architectures. #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC @@ -166,10 +166,6 @@ else HOST_SYS= Windows HOST_MSYS= cygwin endif - # Use Clang for OSX host. - ifeq (Darwin,$(HOST_SYS)) - DEFAULT_CC= clang - endif endif ############################################################################## @@ -257,7 +253,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__MIPSEL__=1 endif - TARGET_LJARCH= mips + ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= mips64 + else + TARGET_LJARCH= mips + endif else $(error Unsupported target architecture) endif diff --git a/src/Makefile.dep b/src/Makefile.dep index 2c329f55..4ef002e9 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ - lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h + lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ @@ -215,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ - lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ - lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ - lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ - lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ - lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ - lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ - lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ - lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ - lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ - lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ - lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ - lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ - lib_init.c + lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ + lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ + lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ + lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ + lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ + lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ + lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ + lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ + lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ + lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ + lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ + lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ + lib_ffi.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/host/buildvm.c b/src/host/buildvm.c index 6d9e09e1..57b4dc97 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx, if (p) { #if LJ_TARGET_X86ORX64 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) - name[0] = '@'; + name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ else *p = '\0'; #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index e8c927d8..42f6ac84 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c @@ -109,6 +109,8 @@ enum { #if LJ_TARGET_X64 PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, +#elif LJ_TARGET_X86 + PEOBJ_SECT_SXDATA, #endif PEOBJ_SECT_RDATA_Z, PEOBJ_NSECTIONS @@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx) sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; +#elif LJ_TARGET_X86 + memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1); + pesect[PEOBJ_SECT_SXDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4); + pesect[PEOBJ_SECT_SXDATA].relocofs = sofs; + /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */ + pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240; #endif memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); @@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx) nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; #if LJ_TARGET_X64 - pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ + pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif /* Write PE object header and all sections. */ @@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_X86 + /* Write .sxdata section. */ + for (i = 0; i < nrsym; i++) { + if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) { + uint32_t symidx = 1+2+i; + owrite(ctx, &symidx, 4); + break; + } + } + if (i == nrsym) { + fprintf(stderr, "Error: extern lj_err_unwind_win not used\n"); + exit(1); + } #endif /* Write .rdata$Z section. */ @@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); - emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, + emit_peobj_sym(ctx, "lj_err_unwind_win", 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); +#elif LJ_TARGET_X86 + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA); #endif emit_peobj_sym(ctx, ctx->beginsym, 0, diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua index e9a68297..16a81a23 100644 --- a/src/host/genminilua.lua +++ b/src/host/genminilua.lua @@ -157,11 +157,11 @@ local function merge_includes(src) if includes[name] then return "" end includes[name] = true local fp = assert(io.open(LUA_SOURCE..name, "r")) - local src = fp:read("*a") + local inc = fp:read("*a") assert(fp:close()) - src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "") - src = gsub(src, "#endif%s*$", "") - return merge_includes(src) + inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "") + inc = gsub(inc, "#endif%s*$", "") + return merge_includes(inc) end) end diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index d0968b18..5117f714 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -125,12 +125,12 @@ extern "C" #ifdef _WIN32 __declspec(dllexport) #endif -const char %s%s[] = { +const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) else fp:write(string.format([[ #define %s%s_SIZE %d -static const char %s%s[] = { +static const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) end local t, n, m = {}, 0, 0 diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index 1296d816..5d2cdbda 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua @@ -12,7 +12,7 @@ local type = type local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local match, gmatch = string.match, string.gmatch local concat = table.concat local bit = require("bit") local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index 2bf8b389..c8fb0ea8 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua @@ -11,8 +11,8 @@ ------------------------------------------------------------------------------ local type = type -local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch local concat = table.concat local bit = require("bit") local band, bor, tohex = bit.band, bit.bor, bit.tohex @@ -38,7 +38,7 @@ local map_special = { "multST", "multuST", "divST", "divuST", false, false, false, false, "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", - "andDST", "orDST", "xorDST", "nor|notDST0", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", false, false, "sltDST", "sltuDST", false, false, false, false, "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", @@ -214,7 +214,7 @@ local map_pri = { map_cop0, map_cop1, false, map_cop1x, "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", false, false, false, false, - map_special2, false, false, map_special3, + map_special2, "jalxJ", false, map_special3, "lbTSO", "lhTSO", "lwlTSO", "lwTSO", "lbuTSO", "lhuTSO", "lwrTSO", false, "sbTSO", "shTSO", "swlTSO", "swTSO", diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua index 30f51ecd..30eb3978 100644 --- a/src/jit/dis_ppc.lua +++ b/src/jit/dis_ppc.lua @@ -13,7 +13,7 @@ ------------------------------------------------------------------------------ local type = type -local sub, byte, format = string.sub, string.byte, string.format +local byte, format = string.byte, string.format local match, gmatch, gsub = string.match, string.gmatch, string.gsub local concat = table.concat local bit = require("bit") diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 0bbd198f..8cac9ae9 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -818,7 +818,7 @@ map_act = { m = b%32; b = (b-m)/32 local nb = b%2; b = (b-nb)/2 if nb == 0 then ctx.rexb = true end - local nx = b%2; b = (b-nx)/2 + local nx = b%2 if nx == 0 then ctx.rexx = true end b = byte(ctx.code, pos, pos) if not b then return incomplete(ctx) end diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 9a722f73..1eca12a8 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr local bit = require("bit") -local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex +local band, shr, tohex = bit.band, bit.rshift, bit.tohex local sub, gsub, format = string.sub, string.gsub, string.format -local byte, char, rep = string.byte, string.char, string.rep +local byte, rep = string.byte, string.rep local type, tostring = type, tostring local stdout, stderr = io.stdout, io.stderr @@ -213,7 +213,7 @@ local colortype_ansi = { "\027[35m%s\027[m", } -local function colorize_text(s, t) +local function colorize_text(s) return s end @@ -310,15 +310,17 @@ local function fmtfunc(func, pc) end end -local function formatk(tr, idx) +local function formatk(tr, idx, sn) local k, t, slot = tracek(tr, idx) local tn = type(k) local s if tn == "number" then - if k == 2^52+2^51 then + if band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then s = "bias" else - s = format("%+.14g", k) + s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) end elseif tn == "string" then s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) @@ -354,7 +356,7 @@ local function printsnap(tr, snap) n = n + 1 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS if ref < 0 then - out:write(formatk(tr, ref)) + out:write(formatk(tr, ref, sn)) elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else diff --git a/src/jit/p.lua b/src/jit/p.lua index 5323728b..09b3b9fe 100644 --- a/src/jit/p.lua +++ b/src/jit/p.lua @@ -120,7 +120,7 @@ end -- Show top N list. local function prof_top(count1, count2, samples, indent) local t, n = {}, 0 - for k, v in pairs(count1) do + for k in pairs(count1) do n = n + 1 t[n] = k end diff --git a/src/lib_aux.c b/src/lib_aux.c index 0e61d951..d6f56e30 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -302,7 +302,7 @@ static int panic(lua_State *L) #ifdef LUAJIT_USE_SYSMALLOC -#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) +#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) #error "Must use builtin allocator for 64 bit target" #endif @@ -334,7 +334,7 @@ LUALIB_API lua_State *luaL_newstate(void) lua_State *L; void *ud = lj_alloc_create(); if (ud == NULL) return NULL; -#if LJ_64 +#if LJ_64 && !LJ_GC64 L = lj_state_newstate(lj_alloc_f, ud); #else L = lua_newstate(lj_alloc_f, ud); @@ -343,7 +343,7 @@ LUALIB_API lua_State *luaL_newstate(void) return L; } -#if LJ_64 +#if LJ_64 && !LJ_GC64 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) { UNUSED(f); UNUSED(ud); diff --git a/src/lib_jit.c b/src/lib_jit.c index c6330c49..592538bd 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -715,15 +715,19 @@ static uint32_t jit_cpudetect(lua_State *L) #if LJ_HASJIT /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 - flags |= JIT_F_MIPS32R2; + flags |= JIT_F_MIPSXXR2; #endif /* Runtime MIPS CPU detection. */ #if defined(__GNUC__) - if (!(flags & JIT_F_MIPS32R2)) { + if (!(flags & JIT_F_MIPSXXR2)) { int x; +#ifdef __mips16 + x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ +#else /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); - if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ +#endif + if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif #endif diff --git a/src/lj.supp b/src/lj.supp index acb9e789..217f7c89 100644 --- a/src/lj.supp +++ b/src/lj.supp @@ -27,15 +27,15 @@ { Optimized string compare Memcheck:Addr4 - fun:lj_str_fastcmp + fun:str_fastcmp } { Optimized string compare Memcheck:Addr1 - fun:lj_str_fastcmp + fun:str_fastcmp } { Optimized string compare Memcheck:Cond - fun:lj_str_fastcmp + fun:str_fastcmp } diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 32de45ec..95d15d04 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -72,13 +72,56 @@ #define IS_DIRECT_BIT (SIZE_T_ONE) + +/* Determine system-specific block allocation method. */ #if LJ_TARGET_WINDOWS #define WIN32_LEAN_AND_MEAN #include -#if LJ_64 && !LJ_GC64 +#define LJ_ALLOC_VIRTUALALLOC 1 +#if LJ_64 && !LJ_GC64 +#define LJ_ALLOC_NTAVM 1 +#endif + +#else + +#include +/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ +#include + +#define LJ_ALLOC_MMAP 1 + +#if LJ_64 + +#define LJ_ALLOC_MMAP_PROBE 1 + +#if LJ_GC64 +#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ +#elif LJ_TARGET_X64 && LJ_HASJIT +/* Due to limitations in the x64 compiler backend. */ +#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ +#else +#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ +#endif + +#endif + +#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) +#define LJ_ALLOC_MMAP32 1 +#endif + +#if LJ_TARGET_LINUX +#define LJ_ALLOC_MREMAP 1 +#endif + +#endif + + +#if LJ_ALLOC_VIRTUALALLOC + +#if LJ_ALLOC_NTAVM /* Undocumented, but hey, that's what we all love so much about Windows. */ typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, size_t *size, ULONG alloctype, ULONG prot); @@ -89,14 +132,15 @@ static PNTAVM ntavm; */ #define NTAVM_ZEROBITS 1 -static void INIT_MMAP(void) +static void init_mmap(void) { ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), "NtAllocateVirtualMemory"); } +#define INIT_MMAP() init_mmap() /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *CALL_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *DIRECT_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) #else -#define INIT_MMAP() ((void)0) - /* Win32 MMAP via VirtualAlloc */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *CALL_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); @@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *DIRECT_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, @@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) #endif /* This function supports releasing coalesed segments */ -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static int CALL_MUNMAP(void *ptr, size_t size) { DWORD olderr = GetLastError(); MEMORY_BASIC_INFORMATION minfo; @@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return 0; } -#else - -#include -#include +#elif LJ_ALLOC_MMAP #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) @@ -174,107 +213,151 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) #endif #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#if LJ_64 && !LJ_GC64 -/* 64 bit mode with 32 bit pointers needs special support for allocating -** memory in the lower 2GB. -*/ +#if LJ_ALLOC_MMAP_PROBE -#if defined(MAP_32BIT) - -#if defined(__sun__) -#define MMAP_REGION_START ((uintptr_t)0x1000) +#ifdef MAP_TRYFIXED +#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) #else -/* Actually this only gives us max. 1GB in current Linux kernels. */ -#define MMAP_REGION_START ((uintptr_t)0) +#define MMAP_FLAGS_PROBE MMAP_FLAGS #endif -static LJ_AINLINE void *CALL_MMAP(size_t size) +#define LJ_ALLOC_MMAP_PROBE_MAX 30 +#define LJ_ALLOC_MMAP_PROBE_LINEAR 5 + +#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) + +/* No point in a giant ifdef mess. Just try to open /dev/urandom. +** It doesn't really matter if this fails, since we get some ASLR bits from +** every unsuitable allocation, too. And we prefer linear allocation, anyway. +*/ +#include +#include + +static uintptr_t mmap_probe_seed(void) { - int olderr = errno; - void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); - errno = olderr; - return ptr; + uintptr_t val; + int fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) { + int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); + (void)close(fd); + if (ok) return val; + } + return 1; /* Punt. */ } -#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN - -/* OSX and FreeBSD mmap() use a naive first-fit linear search. -** That's perfect for us. Except that -pagezero_size must be set for OSX, -** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs -** to be reduced to 250MB on FreeBSD. -*/ -#if LJ_TARGET_OSX || defined(__DragonFly__) -#define MMAP_REGION_START ((uintptr_t)0x10000) -#elif LJ_TARGET_PS4 -#define MMAP_REGION_START ((uintptr_t)0x4000) -#else -#define MMAP_REGION_START ((uintptr_t)0x10000000) -#endif -#define MMAP_REGION_END ((uintptr_t)0x80000000) - -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -#include -#endif - -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *mmap_probe(size_t size) { - int olderr = errno; /* Hint for next allocation. Doesn't need to be thread-safe. */ - static uintptr_t alloc_hint = MMAP_REGION_START; - int retry = 0; -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - static int rlimit_modified = 0; - if (LJ_UNLIKELY(rlimit_modified == 0)) { - struct rlimit rlim; - rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; - setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ - rlimit_modified = 1; - } -#endif - for (;;) { - void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); - if ((uintptr_t)p >= MMAP_REGION_START && - (uintptr_t)p + size < MMAP_REGION_END) { - alloc_hint = (uintptr_t)p + size; + static uintptr_t hint_addr = 0; + static uintptr_t hint_prng = 0; + int olderr = errno; + int retry; + for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { + void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); + uintptr_t addr = (uintptr_t)p; + if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { + /* We got a suitable address. Bump the hint address. */ + hint_addr = addr + size; errno = olderr; return p; } - if (p != CMFAIL) munmap(p, size); -#if defined(__sun__) || defined(__DragonFly__) - alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ - if (alloc_hint + size < MMAP_REGION_END) continue; -#endif - if (retry) break; - retry = 1; - alloc_hint = MMAP_REGION_START; + if (p != MFAIL) { + munmap(p, size); + } else if (errno == ENOMEM) { + return MFAIL; + } + if (hint_addr) { + /* First, try linear probing. */ + if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { + hint_addr += 0x1000000; + if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) + hint_addr = 0; + continue; + } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { + /* Next, try a no-hint probe to get back an ASLR address. */ + hint_addr = 0; + continue; + } + } + /* Finally, try pseudo-random probing. */ + if (LJ_UNLIKELY(hint_prng == 0)) { + hint_prng = mmap_probe_seed(); + } + /* The unsuitable address we got has some ASLR PRNG bits. */ + hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); + do { /* The PRNG itself is very weak, but see above. */ + hint_prng = hint_prng * 1103515245 + 12345; + hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; + hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); + } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); } errno = olderr; - return CMFAIL; + return MFAIL; } -#else - -#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" - #endif -#else +#if LJ_ALLOC_MMAP32 -/* 32 bit mode and GC64 mode is easy. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +#if defined(__sun__) +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) +#else +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0) +#endif + +static void *mmap_map32(size_t size) +{ +#if LJ_ALLOC_MMAP_PROBE + static int fallback = 0; + if (fallback) + return mmap_probe(size); +#endif + { + int olderr = errno; + void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); + errno = olderr; + /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ +#if LJ_ALLOC_MMAP_PROBE + if (ptr == MFAIL) { + fallback = 1; + return mmap_probe(size); + } +#endif + return ptr; + } +} + +#endif + +#if LJ_ALLOC_MMAP32 +#define CALL_MMAP(size) mmap_map32(size) +#elif LJ_ALLOC_MMAP_PROBE +#define CALL_MMAP(size) mmap_probe(size) +#else +static void *CALL_MMAP(size_t size) { int olderr = errno; void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); errno = olderr; return ptr; } +#endif + +#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 + +#include + +static void init_mmap(void) +{ + struct rlimit rlim; + rlim.rlim_cur = rlim.rlim_max = 0x10000; + setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ +} +#define INIT_MMAP() init_mmap() #endif -#define INIT_MMAP() ((void)0) -#define DIRECT_MMAP(s) CALL_MMAP(s) - -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static int CALL_MUNMAP(void *ptr, size_t size) { int olderr = errno; int ret = munmap(ptr, size); @@ -282,10 +365,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return ret; } -#if LJ_TARGET_LINUX +#if LJ_ALLOC_MREMAP /* Need to define _GNU_SOURCE to get the mremap prototype. */ -static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, - int flags) +static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) { int olderr = errno; ptr = mremap(ptr, osz, nsz, flags); @@ -305,6 +387,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, #endif + +#ifndef INIT_MMAP +#define INIT_MMAP() ((void)0) +#endif + +#ifndef DIRECT_MMAP +#define DIRECT_MMAP(s) CALL_MMAP(s) +#endif + #ifndef CALL_MREMAP #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index 8743babe..f775743c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -25,6 +25,10 @@ #define LUAJIT_ARCH_ppc 5 #define LUAJIT_ARCH_MIPS 6 #define LUAJIT_ARCH_mips 6 +#define LUAJIT_ARCH_MIPS32 6 +#define LUAJIT_ARCH_mips32 6 +#define LUAJIT_ARCH_MIPS64 7 +#define LUAJIT_ARCH_mips64 7 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -47,8 +51,10 @@ #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC +#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else #error "No support for this architecture (yet)" #endif @@ -289,13 +295,21 @@ #define LJ_ARCH_XENON 1 #endif -#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS +#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_NAME "mipsel" +#else +#define LJ_ARCH_NAME "mips64el" +#endif #define LJ_ARCH_ENDIAN LUAJIT_LE #else +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_NAME "mips" +#else +#define LJ_ARCH_NAME "mips64" +#endif #define LJ_ARCH_ENDIAN LUAJIT_BE #endif @@ -307,11 +321,6 @@ #endif #endif -/* Temporarily disable features until the code has been merged. */ -#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ -#define LUAJIT_NO_UNWIND 1 -#endif - #if !defined(LJ_ABI_SOFTFP) #ifdef __mips_soft_float #define LJ_ABI_SOFTFP 1 @@ -320,7 +329,15 @@ #endif #endif +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_BITS 32 +#define LJ_TARGET_MIPS32 1 +#else +#define LJ_ARCH_BITS 64 +#define LJ_TARGET_MIPS64 1 +#define LJ_TARGET_GC64 1 +#define LJ_ARCH_NOJIT 1 /* NYI */ +#endif #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ @@ -329,7 +346,7 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if _MIPS_ARCH_MIPS32R2 +#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 #define LJ_ARCH_VERSION 20 #else #define LJ_ARCH_VERSION 10 @@ -410,9 +427,13 @@ #ifdef __NO_FPRS__ #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #endif -#elif LJ_TARGET_MIPS -#if defined(_LP64) -#error "No support for MIPS64" +#elif LJ_TARGET_MIPS32 +#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) +#error "Only o32 ABI supported for MIPS32" +#endif +#elif LJ_TARGET_MIPS64 +#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) +#error "Only n64 ABI supported for MIPS64" #endif #endif #endif @@ -453,7 +474,7 @@ #endif /* Disable or enable the JIT compiler. */ -#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) #define LJ_HASJIT 0 #else #define LJ_HASJIT 1 @@ -524,6 +545,11 @@ #define LJ_NO_SYSTEM 1 #endif +#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ +/* NYI: no support for compact unwind specification, yet. */ +#define LUAJIT_NO_UNWIND 1 +#endif + #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 #define LJ_NO_UNWIND 1 #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 94d7bfc4..7ce58924 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) RA_DBGX((as, "remat $i $r", ir, r)); #if !LJ_SOFTFP if (ir->o == IR_KNUM) { - emit_loadn(as, r, ir_knum(ir)); + emit_loadk64(as, r, ir); } else #endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { @@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref) #if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); +#if LJ_GC64 + } else if (ir->o == IR_KGC) { + emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); +#endif #endif } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || @@ -619,10 +625,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) return r; } +/* Add a register rename to the IR. */ +static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) +{ + IRRef ren; + lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); + ren = tref_ref(lj_ir_emit(as->J)); + as->J->cur.ir[ren].r = (uint8_t)down; + as->J->cur.ir[ren].s = SPS_NONE; +} + /* Rename register allocation and emit move. */ static void ra_rename(ASMState *as, Reg down, Reg up) { - IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); + IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); IRIns *ir = IR(ref); ir->r = (uint8_t)up; as->cost[down] = 0; @@ -635,11 +651,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)down; - IR(ren)->s = SPS_NONE; + ra_addrename(as, down, ref, as->snapno); } } @@ -689,16 +701,20 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) if (ra_noreg(left)) { if (irref_isk(lref)) { if (ir->o == IR_KNUM) { - cTValue *tv = ir_knum(ir); /* FP remat needs a load except for +0. Still better than eviction. */ - if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { - emit_loadn(as, dest, tv); + if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { + emit_loadk64(as, dest, ir); return; } #if LJ_64 } else if (ir->o == IR_KINT64) { - emit_loadu64(as, dest, ir_kint64(ir)->u64); + emit_loadk64(as, dest, ir); return; +#if LJ_GC64 + } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadk64(as, dest, ir); + return; +#endif #endif } else if (ir->o != IR_KPRI) { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || @@ -941,7 +957,7 @@ static void asm_snap_prep(ASMState *as) } else { /* Process any renames above the highwater mark. */ for (; as->snaprename < as->T->nins; as->snaprename++) { - IRIns *ir = IR(as->snaprename); + IRIns *ir = &as->T->ir[as->snaprename]; if (asm_snap_checkrename(as, ir->op1)) ir->op2 = REF_BIAS-1; /* Kill rename. */ } @@ -1055,7 +1071,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir) } } else { Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); - /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ + /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); } @@ -1472,12 +1488,7 @@ static void asm_phi_fixup(ASMState *as) irt_clearmark(ir->t); /* Left PHI gained a spill slot before the loop? */ if (ra_hasspill(ir->s)) { - IRRef ren; - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)r; - IR(ren)->s = SPS_NONE; + ra_addrename(as, r, lref, as->loopsnapno); } } rset_clear(work, r); @@ -1888,7 +1899,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) SnapEntry sn = map[n-1]; if ((sn & SNAP_FRAME)) { *gotframe = 1; - return snap_slot(sn); + return snap_slot(sn) - LJ_FR2; } } return 0; @@ -1908,16 +1919,20 @@ static void asm_tail_link(ASMState *as) if (as->T->link == 0) { /* Setup fixed registers for exit to interpreter. */ - const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); + const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); int32_t mres; if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; if (bc_isret(bc_op(*retpc))) pc = retpc; } +#if LJ_GC64 + emit_loadu64(as, RID_LPC, u64ptr(pc)); +#else ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); - mres = (int32_t)(snap->nslots - baseslot); +#endif + mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; @@ -1932,6 +1947,11 @@ static void asm_tail_link(ASMState *as) } emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); + if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ + setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); + IR(as->J->ktrace)->o = IR_KGC; + } + /* Sync the interpreter state with the on-trace state. */ asm_stack_restore(as, snap); @@ -1957,17 +1977,22 @@ static void asm_setup_regsp(ASMState *as) ra_setup(as); /* Clear reg/sp for constants. */ - for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) + for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { +#if LJ_GC64 + ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ +#else + /* Make life easier for backends by putting address of constant in i. */ + ir->i = (int32_t)(intptr_t)(ir+1); +#endif + ir++; + } + } /* REF_BASE is used for implicit references to the BASE register. */ lastir->prev = REGSP_HINT(RID_BASE); - ir = IR(nins-1); - if (ir->o == IR_RENAME) { - do { ir--; nins--; } while (ir->o == IR_RENAME); - T->nins = nins; /* Remove any renames left over from ASM restart. */ - } as->snaprename = nins; as->snapref = nins; as->snapno = T->nsnap; @@ -2199,14 +2224,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) ASMState *as = &as_; MCode *origtop; + /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ + { + IRRef nins = T->nins; + IRIns *ir = &T->ir[nins-1]; + if (ir->o == IR_NOP || ir->o == IR_RENAME) { + do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); + T->nins = nins; + } + } + /* Ensure an initialized instruction beyond the last one for HIOP checks. */ - J->cur.nins = lj_ir_nextins(J); - J->cur.ir[J->cur.nins].o = IR_NOP; + /* This also allows one RENAME to be added without reallocating curfinal. */ + as->orignins = lj_ir_nextins(J); + J->cur.ir[as->orignins].o = IR_NOP; /* Setup initial state. Copy some fields to reduce indirections. */ as->J = J; as->T = T; - as->ir = T->ir; + J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ as->flags = J->flags; as->loopref = J->loopref; as->realign = NULL; @@ -2219,12 +2255,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->mclim = as->mcbot + MCLIM_REDZONE; asm_setup_target(as); - do { + /* + ** This is a loop, because the MCode may have to be (re-)assembled + ** multiple times: + ** + ** 1. as->realign is set (and the assembly aborted), if the arch-specific + ** backend wants the MCode to be aligned differently. + ** + ** This is currently only the case on x86/x64, where small loops get + ** an aligned loop body plus a short branch. Not much effort is wasted, + ** because the abort happens very quickly and only once. + ** + ** 2. The IR is immovable, since the MCode embeds pointers to various + ** constants inside the IR. But RENAMEs may need to be added to the IR + ** during assembly, which might grow and reallocate the IR. We check + ** at the end if the IR (in J->cur.ir) has actually grown, resize the + ** copy (in J->curfinal.ir) and try again. + ** + ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have + ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to + ** always have one spare slot in the IR (see above), which means we + ** have to redo the assembly for only ~2% of all traces. + ** + ** Very, very rarely, this needs to be done repeatedly, since the + ** location of constants inside the IR (actually, reachability from + ** a global pointer) may affect register allocation and thus the + ** number of RENAMEs. + */ + for (;;) { as->mcp = as->mctop; #ifdef LUA_USE_ASSERT as->mcp_prev = as->mcp; #endif - as->curins = T->nins; + as->ir = J->curfinal->ir; /* Use the copied IR. */ + as->curins = J->cur.nins = as->orignins; + RA_DBG_START(); RA_DBGX((as, "===== STOP =====")); @@ -2252,22 +2317,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) checkmclim(as); asm_ir(as, ir); } - } while (as->realign); /* Retry in case the MCode needs to be realigned. */ - /* Emit head of trace. */ - RA_DBG_REF(); - checkmclim(as); - if (as->gcsteps > 0) { - as->curins = as->T->snap[0].ref; - asm_snap_prep(as); /* The GC check is a guard. */ - asm_gc_check(as); + if (as->realign && J->curfinal->nins >= T->nins) + continue; /* Retry in case only the MCode needs to be realigned. */ + + /* Emit head of trace. */ + RA_DBG_REF(); + checkmclim(as); + if (as->gcsteps > 0) { + as->curins = as->T->snap[0].ref; + asm_snap_prep(as); /* The GC check is a guard. */ + asm_gc_check(as); + as->curins = as->stopins; + } + ra_evictk(as); + if (as->parent) + asm_head_side(as); + else + asm_head_root(as); + asm_phi_fixup(as); + + if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ + lua_assert(J->curfinal->nk == T->nk); + memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, + (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ + T->nins = J->curfinal->nins; + break; /* Done. */ + } + + /* Otherwise try again with a bigger IR. */ + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ + J->curfinal = lj_trace_alloc(J->L, T); + as->realign = NULL; } - ra_evictk(as); - if (as->parent) - asm_head_side(as); - else - asm_head_root(as); - asm_phi_fixup(as); RA_DBGX((as, "===== START ====")); RA_DBG_FLUSH(); diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index ff4068a3..23f42919 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -909,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); @@ -998,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; + if (ir->op1 == REF_NIL) { + lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); + ARMIns ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); + return; + } } + ofs = field_ofs[ir->op2]; + if ((ai & 0x04000000)) + emit_lso(as, ai, dest, idx, ofs); + else + emit_lsox(as, ai, dest, idx, ofs); } - ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); } static void asm_fstore(ASMState *as, IRIns *ir) diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 7bd72fda..cf446346 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir) dest, dest); if (irt_isfloat(ir->t)) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); @@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir) tmp, left, tmp); if (st == IRT_FLOAT) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); } else { emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, @@ -514,7 +510,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left = ra_alloc1(as, ir->op1, RSET_GPR); lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); if ((ir->op2 & IRCONV_SEXT)) { - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); } else { uint32_t shift = st == IRT_I8 ? 24 : 16; @@ -743,7 +739,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp1); @@ -810,7 +806,6 @@ nolo: static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); @@ -901,17 +896,23 @@ static MIPSIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); MIPSIns mi = asm_fxloadins(ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { + idx = RID_JGL; + ofs = ir->op2 - 32768; + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; lua_assert(!irt_isfp(ir->t)); emit_tsi(as, mi, dest, idx, ofs); } @@ -1456,7 +1457,7 @@ static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); } else { @@ -1512,7 +1513,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) static void asm_bror(ASMState *as, IRIns *ir) { - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); } else { Reg dest = ra_dest(as, ir, RSET_GPR); diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 0ebed40f..46821515 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fb(as, PPCI_FCTIWZ, tmp, left); } @@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left = ra_alloc1(as, lref, allow); Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - const float *kbias; if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); - if (st == IRT_U32) kbias++; - emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, + emit_lsptr(as, PPCI_LFS, (fbias & 31), + &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, RID_SP, SPOFS_TMPLO); @@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, tmp); emit_fab(as, PPCI_FSUB, tmp, left, tmp); emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); } else { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); @@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); @@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); PPCIns pi = asm_fxloadins(ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { + idx = RID_JGL; + ofs = ir->op2 - 32768; + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tai(as, PPCI_ADDI, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; lua_assert(!irt_isi8(ir->t)); emit_tai(as, pi, dest, idx, ofs); } @@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), + (void *)&as->J->k32[LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 718cb12e..50784daa 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); +#if !LJ_GC64 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ *mxp++ = XI_MOVmi; *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); *mxp++ = 2*sizeof(void *); *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; +#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); @@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc) target = p; cc ^= 1; if (as->realign) { + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ emit_sjcc(as, cc, target); return; } } + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ emit_jcc(as, cc, target); } @@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); +#if LJ_GC64 + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } +#else if (ir->o != IR_KINT64) { *k = ir->i; return 1; @@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_kint64(ir)->u64; return 1; } +#endif } return 0; } @@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; +#if LJ_GC64 + int64_t ofs = dispofs(as, &uv->tv); + if (checki32(ofs) && checki32(ofs+4)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; + return; + } +#else as->mrm.ofs = ptr2addr(&uv->tv); as->mrm.base = as->mrm.idx = RID_NONE; return; +#endif } break; default: @@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) { lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); - as->mrm.ofs = field_ofs[ir->op2]; as->mrm.idx = RID_NONE; - if (irref_isk(ir->op1)) { - as->mrm.ofs += IR(ir->op1)->i; + if (ir->op1 == REF_NIL) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); as->mrm.base = RID_NONE; - } else { - as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); +#endif + return; } + as->mrm.ofs = field_ofs[ir->op2]; + if (irref_isk(ir->op1)) { + IRIns *op1 = IR(ir->op1); +#if LJ_GC64 + if (ir->op1 == REF_NIL) { + as->mrm.ofs -= GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; + return; + } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { + intptr_t ofs = dispofs(as, ir_kptr(op1)); + if (checki32(as->mrm.ofs + ofs)) { + as->mrm.ofs += (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } +#else + as->mrm.ofs += op1->i; + as->mrm.base = RID_NONE; + return; +#endif + } + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } /* Fuse string reference into memory operand. */ @@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); - if (irref_isk(ir->op1)) { + if (!LJ_GC64 && irref_isk(ir->op1)) { as->mrm.ofs += IR(ir->op1)->i; } else { Reg r = ra_alloc1(as, ir->op1, allow); @@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { +#if LJ_GC64 + intptr_t ofs = dispofs(as, ir_kptr(ir)); + if (checki32(ofs)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } if (0) { +#else as->mrm.ofs = ir->i; as->mrm.base = RID_NONE; } else if (ir->o == IR_STRREF) { asm_fusestrref(as, ir, allow); +#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -301,7 +363,45 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) } } -/* Fuse load into memory operand. */ +/* Fuse load of 64 bit IR constant into memory operand. */ +static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + if (!LJ_GC64 || checki32((intptr_t)k)) { + as->mrm.ofs = ptr2addr(k); + as->mrm.base = RID_NONE; +#if LJ_GC64 + } else if (checki32(dispofs(as, k))) { + as->mrm.ofs = (int32_t)dispofs(as, k); + as->mrm.base = RID_DISPATCH; + } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && + checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { + as->mrm.ofs = (int32_t)mcpofs(as, k); + as->mrm.base = RID_RIP; + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else { + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t*)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +#endif + } + as->mrm.idx = RID_NONE; + return RID_MRM; +} + +/* Fuse load into memory operand. +** +** Important caveat: this may emit RIP-relative loads! So don't place any +** code emitters between this function and the use of its result. +** The only permitted exception is asm_guardcc(). +*/ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); @@ -320,26 +420,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; lua_assert(allow != RSET_EMPTY); - if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_knum(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; - } - } else if (ir->o == IR_KINT64) { + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); + } else if (ref == REF_BASE || ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; lua_assert(allow != RSET_EMPTY); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_kint64(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; + if (ref == REF_BASE) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); + as->mrm.base = RID_NONE; +#endif + as->mrm.idx = RID_NONE; + return RID_MRM; + } else { + return asm_fuseloadk64(as, ir); + } } } else if (mayfuse(as, ref)) { RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0)) { + noconflict(as, ref, IR_RETF, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -351,7 +460,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -364,12 +474,16 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD) { + } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } } - if (!(as->freeset & allow) && !irref_isk(ref) && + if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) { + asm_fusefref(as, ir, RSET_EMPTY); + return RID_MRM; + } + if (!(as->freeset & allow) && !emit_canremat(ref) && (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) goto fusespill; return ra_allocref(as, ref, allow); @@ -485,8 +599,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { #if LJ_64 - if (ir->o == IR_KINT64) - emit_loadu64(as, r, ir_kint64(ir)->u64); + if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) + emit_loadu64(as, r, ir_k64(ir)->u64); else #endif emit_loadi(as, r, ir->i); @@ -642,6 +756,9 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); +#if LJ_FR2 + Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); +#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -650,7 +767,12 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); +#if LJ_FR2 + emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); + emit_loadu64(as, rpc, u64ptr(pc)); +#else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); +#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -674,8 +796,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) Reg tmp = ra_noreg(IR(ir->op1)->r) ? ra_alloc1(as, ir->op1, RSET_FPR) : ra_scratch(as, RSET_FPR); - Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); + Reg right; emit_rr(as, XO_MOVDto, tmp, dest); + right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); emit_mrm(as, XO_ADDSD, tmp, right); ra_left(as, tmp, ir->op1); } @@ -696,13 +819,13 @@ static void asm_conv(ASMState *as, IRIns *ir) if (left == dest) return; /* Avoid the XO_XORPS. */ } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ - cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_TOBIT]; Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); if (irt_isfloat(ir->t)) emit_rr(as, XO_CVTSD2SS, dest, dest); emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ - emit_loadn(as, bias, k); + emit_rma(as, XO_MOVSD, bias, k); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ @@ -711,7 +834,7 @@ static void asm_conv(ASMState *as, IRIns *ir) asm_fuseloadm(as, lref, RSET_GPR, st64); if (LJ_64 && st == IRT_U64) { MCLabel l_end = emit_label(as); - const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_2P64]; emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ @@ -738,23 +861,20 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); else - emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); } else { - Reg left = asm_fuseload(as, lref, RSET_FPR); if (LJ_64 && irt_isu32(ir->t)) emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ emit_mrm(as, op, dest|((LJ_64 && (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), - left); + asm_fuseload(as, lref, RSET_FPR)); } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ @@ -828,8 +948,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(43f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { @@ -869,8 +988,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); emit_sjcc(as, CC_NS, l_pop); emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ } @@ -934,6 +1052,24 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_GC64 + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_movmroi(as, dest, 4, k.u32.hi); + emit_movmroi(as, dest, 0, k.u32.lo); + } else { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + if (irt_is64(ir->t)) { + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); + } else { + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff); + } + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } +#else if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); emit_movtomro(as, REX_64IR(ir, src), dest, 0); @@ -942,6 +1078,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) } if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, dest, 4, irt_toitype(ir->t)); +#endif emit_loada(as, dest, &J2G(as->J)->tmptv); } } @@ -951,9 +1088,9 @@ static void asm_aref(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); asm_fusearef(as, ir, RSET_GPR); if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); else if (as->mrm.base != dest) - emit_rr(as, XO_MOV, dest, as->mrm.base); + emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -980,7 +1117,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (!isk) { rset_clear(allow, tab); key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - if (!irt_isstr(kt)) + if (LJ_GC64 || !irt_isstr(kt)) tmp = ra_scratch(as, rset_exclude(allow, key)); } @@ -993,8 +1130,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) /* Follow hash chain until the end. */ l_loop = emit_sjcc_label(as, CC_NZ); - emit_rr(as, XO_TEST, dest, dest); - emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); + emit_rr(as, XO_TEST, dest|REX_GC64, dest); + emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ @@ -1015,7 +1152,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 emit_u32(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else @@ -1023,10 +1160,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif } -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(kt)) { emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); #endif +#if LJ_GC64 + } else if (irt_isaddr(kt)) { + if (isk) { + TValue k; + k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), + k.u32.lo); + emit_sjcc(as, CC_NE, l_next); + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), + k.u32.hi); + } else { + emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); + } + } else { + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); +#else } else { if (!irt_ispri(kt)) { lua_assert(irt_isaddr(kt)); @@ -1040,16 +1195,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) lua_assert(!irt_isnil(kt)); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); +#endif } emit_sfixup(as, l_loop); checkmclim(as); +#if LJ_GC64 + if (!isk && irt_isaddr(kt)) { + emit_rr(as, XO_OR, tmp|REX_64, key); + emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); + } +#endif /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { - emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { - emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); if ((as->flags & JIT_F_PREFER_IMUL)) { emit_i8(as, sizeof(Node)); emit_rr(as, XO_IMULi8, dest, dest); @@ -1104,11 +1266,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { if (ofs != 0) { if (dest == node && !(as->flags & JIT_F_LEA_AGU)) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else - emit_rmro(as, XO_LEA, dest, node, ofs); + emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); } else if (dest != node) { - emit_rr(as, XO_MOV, dest, node); + emit_rr(as, XO_MOV, dest|REX_GC64, node); } } asm_guardcc(as, CC_NE); @@ -1120,13 +1282,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : +#if LJ_GC64 + ((uint64_t)irt_toitype(irkey->t) << 47) | + (uint64_t)ir_kgc(irkey)); +#else ((uint64_t)irt_toitype(irkey->t) << 32) | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); +#endif } else { lua_assert(!irt_isnil(irkey->t)); +#if LJ_GC64 + emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); +#else emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); +#endif } #else l_exit = emit_label(as); @@ -1157,25 +1330,25 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_rma(as, XO_MOV, dest, v); + emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); + emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); asm_guardcc(as, CC_NE); emit_i8(as, 1); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); } else { - emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); + emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); } - emit_rmro(as, XO_MOV, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_rmro(as, XO_MOV, uv|REX_GC64, func, + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -1193,9 +1366,9 @@ static void asm_strref(ASMState *as, IRIns *ir) if (as->mrm.base == RID_NONE) emit_loadi(as, dest, as->mrm.ofs); else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); else - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); } /* -- Loads and stores ---------------------------------------------------- */ @@ -1264,7 +1437,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 +#if LJ_64 && !LJ_GC64 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ #endif default: @@ -1296,7 +1469,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) #define asm_fstore(as, ir) asm_fxstore(as, ir) #define asm_xstore(as, ir) asm_fxstore(as, ir) -#if LJ_64 +#if LJ_64 && !LJ_GC64 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) { if (ra_used(ir) || typecheck) { @@ -1318,9 +1491,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) static void asm_ahuvload(ASMState *as, IRIns *ir) { +#if LJ_GC64 + Reg tmp = RID_NONE; +#endif lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 +#if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { @@ -1334,20 +1510,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(ir->t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); + } else { + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; + } else +#endif emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { - asm_fuseahuref(as, ir->op1, RSET_GPR); + RegSet gpr = RSET_GPR; +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + tmp = ra_scratch(as, RSET_GPR); + gpr = rset_exclude(gpr, tmp); + } +#endif + asm_fuseahuref(as, ir->op1, gpr); } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#if LJ_GC64 + } else if (irt_isaddr(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, irt_toitype(ir->t)); + emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); + } else if (irt_isnil(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, -1); + emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); + } else { + emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); + emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#else } else { emit_i8(as, irt_toitype(ir->t)); emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); +#endif } } @@ -1359,11 +1579,27 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(ir->t)) { Reg src = ra_alloc1(as, ir->op2, RSET_GPR); asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); +#endif +#if LJ_GC64 + } else if (irref_isk(ir->op2)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); + asm_fuseahuref(as, ir->op1, RSET_GPR); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); + } else { + emit_u32(as, k.u32.lo); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + as->mrm.ofs += 4; + emit_u32(as, k.u32.hi); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } #endif } else { IRIns *irr = IR(ir->op2); @@ -1375,6 +1611,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { +#if LJ_GC64 + if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + as->mrm.ofs += 4; + emit_u32(as, irt_toitype(ir->t) << 15); + emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); + as->mrm.ofs -= 4; + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); + return; + } +#endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); @@ -1382,14 +1629,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; +#if LJ_GC64 + lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); + emit_i32(as, LJ_TNUMX << 15); +#else emit_i32(as, (int32_t)irt_toitype(ir->t)); +#endif emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ @@ -1402,7 +1655,7 @@ static void asm_sload(ASMState *as, IRIns *ir) base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(t)) { Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); if (ra_hasreg(dest)) { @@ -1420,6 +1673,36 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { +#if LJ_GC64 + if (irt_isaddr(t)) { + /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: + ** + ** mov r64, [addr] rorx r64, [addr], 47 + ** ror r64, 47 + ** cmp r16, itype cmp r16, itype + ** jne ->exit jne ->exit + ** shr r64, 16 shr r64, 16 + */ + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + } + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + else + emit_shifti(as, XOg_SHL|REX_64, dest, 17); + emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); + } + return; + } else +#endif emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { @@ -1432,11 +1715,42 @@ static void asm_sload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(t) >= IRT_NUM) { lua_assert(irt_isinteger(t) || irt_isnum(t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); +#if LJ_GC64 + } else if (irt_isnil(t)) { + /* LJ_GC64 type check for nil: + ** + ** cmp qword [addr], -1 + ** jne ->exit + */ + emit_i8(as, -1); + emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); + } else if (irt_ispri(t)) { + emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); + } else { + /* LJ_GC64 type check only: + ** + ** mov r64, [addr] + ** sar r64, 47 + ** cmp r32, itype + ** jne ->exit + */ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); +#else } else { emit_i8(as, irt_toitype(t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); +#endif } } } @@ -1530,7 +1844,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); MCLabel l_end = emit_label(as); - emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); + emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, tmp, gc.grayagain); emit_i8(as, ~LJ_GC_BLACK); @@ -2066,7 +2380,6 @@ static void asm_comp(ASMState *as, IRIns *ir) cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ } left = ra_alloc1(as, lref, RSET_FPR); - right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); l_around = emit_label(as); asm_guardcc(as, cc >> 4); if (cc & VCC_P) { /* Extra CC_P branch required? */ @@ -2083,6 +2396,7 @@ static void asm_comp(ASMState *as, IRIns *ir) emit_jcc(as, CC_P, as->mcp); } } + right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); emit_mrm(as, XO_UCOMISD, left, right); } else { IRRef lref = ir->op1, rref = ir->op2; @@ -2359,13 +2673,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); else ra_modified(as, r); - emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); + emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); if (ra_hasreg(pbase) && pbase != r) - emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); + emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else +#if LJ_GC64 + emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, + (int32_t)dispofs(as, &J2G(as->J)->jit_base)); +#else emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); - emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); +#endif + emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); @@ -2375,13 +2694,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -2394,16 +2715,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) (LJ_DUALNUM && irt_isinteger(ir->t))); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); +#if LJ_GC64 + if (irt_is64(ir->t)) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); + } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); + } else { + emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); + } +#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); +#if LJ_GC64 + } else { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); + } else { + emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); + emit_movmroi(as, RID_BASE, ofs, k.u32.lo); + } +#else } else if (!irt_ispri(ir->t)) { emit_movmroi(as, RID_BASE, ofs, ir->i); +#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { +#if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); +#endif +#if !LJ_GC64 } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); +#endif } } checkmclim(as); @@ -2429,11 +2778,15 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); +#if LJ_GC64 + emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); +#else emit_loada(as, tmp, J2G(as->J)); +#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); - emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); + emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); emit_getgl(as, tmp, gc.total); as->gcsteps = 0; checkmclim(as); @@ -2498,7 +2851,7 @@ static void asm_head_root_base(ASMState *as) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (r != RID_BASE) - emit_rr(as, XO_MOV, r, RID_BASE); + emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); } } @@ -2514,8 +2867,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (irp->r == r) { rset_clear(allow, r); /* Mark same BASE register as coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ rset_clear(allow, irp->r); - emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ + emit_rr(as, XO_MOV, r|REX_GC64, irp->r); } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } @@ -2616,10 +2970,111 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) static void asm_setup_target(ASMState *as) { asm_exitstub_setup(as, as->T->nsnap); + as->mrm.base = 0; } /* -- Trace patching ------------------------------------------------------ */ +static const uint8_t map_op1[256] = { +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +#if LJ_64 +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, +#else +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, +#if LJ_64 +0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#else +0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, +0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, +0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 +}; + +static const uint8_t map_op2[256] = { +0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 +}; + +static uint32_t asm_x86_inslen(const uint8_t* p) +{ + uint32_t result = 0; + uint32_t prefixes = 0; + uint32_t x = map_op1[*p]; + for (;;) { + switch (x >> 4) { + case 0: return result + x + (prefixes & 4); + case 1: prefixes |= x; x = map_op1[*++p]; result++; break; + case 2: x = map_op2[*++p]; break; + case 3: p++; goto mrm; + case 4: result -= (prefixes & 2); /* fallthrough */ + case 5: return result + (x & 15); + case 6: /* Group 3. */ + if (p[1] & 0x38) x = 2; + else if ((prefixes & 2) && (x == 0x66)) x = 4; + goto mrm; + case 7: /* VEX c4/c5. */ + if (LJ_32 && p[1] < 0xc0) { + x = 2; + goto mrm; + } + if (x == 0x70) { + x = *++p & 0x1f; + result++; + if (x >= 2) { + p += 2; + result += 2; + goto mrm; + } + } + p++; + result++; + x = map_op2[*++p]; + break; + case 8: result -= (prefixes & 2); /* fallthrough */ + case 9: mrm: /* ModR/M and possibly SIB. */ + result += (x & 15); + x = *++p; + switch (x >> 6) { + case 0: if ((x & 7) == 5) return result + 4; break; + case 1: result++; break; + case 2: result += 4; break; + case 3: return result; + } + if ((x & 7) == 4) { + result++; + if (x < 0x40 && (p[1] & 7) == 5) result += 4; + } + return result; + } + } +} + /* Patch exit jumps of existing machine code to a new target. */ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) { @@ -2628,22 +3083,23 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; - uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); +#if LJ_GC64 + uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); +#else + uint32_t statei = u32ptr(&J2G(J)->vmstate); +#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ - for (; p < pe; p++) - if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { - p += LJ_64 ? 11 : 10; + for (; p < pe; p += asm_x86_inslen(p)) { + intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; - } - lua_assert(p < pe); - for (; p < pe; p++) { - if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { - *(int32_t *)(p+2) = jmprel(p+6, target); - p += 5; - } } + lua_assert(p < pe); + for (; p < pe; p += asm_x86_inslen(p)) + if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) + *(int32_t *)(p+2) = jmprel(p+6, target); lj_mcode_sync(T->mcode, T->mcode + T->szmcode); lj_mcode_patch(J, mcarea, 1); } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index da8578ea..7ff93826 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -439,8 +439,8 @@ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ -#elif LJ_TARGET_MIPS -/* -- MIPS calling conventions -------------------------------------------- */ +#elif LJ_TARGET_MIPS32 +/* -- MIPS o32 calling conventions ---------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ @@ -515,6 +515,78 @@ sp = (uint8_t *)&cc->fpr[0].f; #endif +#elif LJ_TARGET_MIPS64 +/* -- MIPS n64 calling conventions ---------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#if LJ_ABI_SOFTFP /* MIPS64 soft-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + } else { /* Copy complex double from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + +/* Position of soft-float 'float' return value depends on endianess. */ +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); + +#else /* MIPS64 hard-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + if (ngpr < maxgpr) \ + sz *= 2; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#endif + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass all structs by value in registers and/or on the stack. */ + +#define CCALL_HANDLE_REGARG \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -754,6 +826,78 @@ noth: /* Not a homogeneous float/double aggregate. */ #endif +/* -- MIPS64 ABI struct classification ---------------------------- */ + +#if LJ_TARGET_MIPS64 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +/* Classify FP fields (max. 2) and their types. */ +static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) +{ + int n = 0, ft = 0; + if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) + goto noth; + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (n == 2) { + goto noth; + } else if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; + n++; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + goto noth; + } + } + if (n <= 2) + return ft; +noth: /* Not a homogeneous float/double aggregate. */ + return 0; /* Struct is in GPRs. */ +} + +void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) +{ + if (LJ_ABI_SOFTFP ? ft : + ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { + int i, ofs = 0; + for (i = 0; ft != 0; i++, ft >>= 2) { + if ((ft & 3) == FTYPE_FLOAT) { +#if LJ_ABI_SOFTFP + /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ + memcpy((uint8_t *)dp + ofs, + (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); +#else + *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; +#endif + ofs += 4; + } else { + ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ +#if LJ_ABI_SOFTFP + *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; +#else + *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; +#endif + ofs += 8; + } + } + } else { +#if !LJ_ABI_SOFTFP + if (ft) sp = (uint8_t *)&cc->fpr[0]; +#endif + memcpy(dp, sp, ctr->size); + } +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -921,6 +1065,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_MIPS64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || + (isfp && nsp == 0)) && d->size <= 4) { + *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) @@ -936,7 +1086,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } -#elif LJ_TARGET_ARM64 +#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { /* Split float HFA or complex float into separate registers. */ CTSize i = (sz >> 2) - 1; @@ -983,7 +1133,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_COMPLEXRET2 return 1; /* One GC step. */ } - if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) + if (LJ_BE && ctr->size < CTSIZE_PTR && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) sp += (CTSIZE_PTR - ctr->size); #if CCALL_NUM_FPR if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) diff --git a/src/lj_ccall.h b/src/lj_ccall.h index e85f0c4f..68b85af9 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -104,11 +104,11 @@ typedef union FPRArg { typedef intptr_t GPRArg; typedef double FPRArg; -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 #define CCALL_NARG_GPR 4 #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_NRET_GPR 2 +#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) #define CCALL_SPS_EXTRA 7 #define CCALL_SPS_FREE 1 @@ -119,6 +119,22 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_MIPS64 + +/* FP args are positional and overlay the GPR array. */ +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 0 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index abe097e1..892827aa 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -67,9 +67,13 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 24 #endif -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 -#define CALLBACK_MCODE_HEAD 24 +#define CALLBACK_MCODE_HEAD 20 + +#elif LJ_TARGET_MIPS64 + +#define CALLBACK_MCODE_HEAD 52 #else @@ -221,14 +225,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; MSize slot; - *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); +#if LJ_TARGET_MIPS32 + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); +#else + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); +#endif + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); *p++ = MIPSI_JR | MIPSF_S(RID_R3); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); p++; @@ -440,7 +457,7 @@ void lj_ccallback_mcode_free(CTState *cts) if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 #define CALLBACK_HANDLE_GPR \ if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ @@ -466,6 +483,29 @@ void lj_ccallback_mcode_free(CTState *cts) UNUSED(isfp); #endif +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + +#elif LJ_TARGET_MIPS64 + +#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } +#else /* MIPS64 soft-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + UNUSED(isfp); \ + sp = (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } +#endif + #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; @@ -557,7 +597,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) nsp += n; done: - if (LJ_BE && cta->size < CTSIZE_PTR) + if (LJ_BE && cta->size < CTSIZE_PTR +#if LJ_TARGET_MIPS64 + && !(isfp && nsp) +#endif + ) sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); } @@ -608,6 +652,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_MIPS64 + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; +#endif #if LJ_TARGET_X86 if (ctype_isfp(ctr->info)) cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 05e27dad..e8ffdbcb 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -93,11 +93,13 @@ void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) setcdataV(L, &tmp, cd); lj_gc_anybarriert(L, t); tv = lj_tab_set(L, t, &tmp); - setgcV(L, tv, obj, it); - if (!tvisnil(tv)) - cd->marked |= LJ_GC_CDATA_FIN; - else + if (it == LJ_TNIL) { + setnilV(tv); cd->marked &= ~LJ_GC_CDATA_FIN; + } else { + setgcV(L, tv, obj, it); + cd->marked |= LJ_GC_CDATA_FIN; + } } } diff --git a/src/lj_cparse.c b/src/lj_cparse.c index 7ec4a5ee..16d2cb65 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -297,13 +297,17 @@ static CPToken cp_next_(CPState *cp) else return '/'; break; case '|': - if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR; + if (cp_get(cp) != '|') return '|'; + cp_get(cp); return CTOK_OROR; case '&': - if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND; + if (cp_get(cp) != '&') return '&'; + cp_get(cp); return CTOK_ANDAND; case '=': - if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ; + if (cp_get(cp) != '=') return '='; + cp_get(cp); return CTOK_EQ; case '!': - if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE; + if (cp_get(cp) != '=') return '!'; + cp_get(cp); return CTOK_NE; case '<': if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; } else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; } @@ -313,7 +317,8 @@ static CPToken cp_next_(CPState *cp) else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; } return '>'; case '-': - if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF; + if (cp_get(cp) != '>') return '-'; + cp_get(cp); return CTOK_DEREF; case '$': return cp_param(cp); case '\0': return CTOK_EOF; diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c0f7e3d7..4799031a 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) return tr; } +/* Tailcall to function. */ +static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) +{ + TRef kfunc = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-2] = kfunc; + J->base[-1] = TREF_FRAME; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif + rd->nres = -1; /* Pending tailcall. */ +} + /* Record ctype __index/__newindex metamethods. */ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, RecordFFData *rd) @@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, if (!tv) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { /* Specialize to result of __index lookup. */ cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); @@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J) lua_State *L = J->L; TValue *base = L->base, *top = L->top; const BCIns *pc = J->pc; - TRef ftr = J->base[-1]; + TRef ftr = J->base[-1-LJ_FR2]; ptrdiff_t delta; if (!frame_islua(base-1) || J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYICALL); J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); L->top = base; L->base = base - delta; - J->base[-1] = TREF_FALSE; + J->base[-1-LJ_FR2] = TREF_FALSE; J->base -= delta; J->baseslot -= (BCReg)delta; - J->maxslot = (BCReg)delta; J->framedepth--; + J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; lj_snap_add(J); L->base = base; L->top = top; J->framedepth++; J->maxslot = 1; J->base += delta; J->baseslot += (BCReg)delta; - J->base[-1] = ftr; J->pc = pc; + J->base[-1-LJ_FR2] = ftr; J->pc = pc; } /* Record function call. */ @@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return; } } else if (mm == MM_new) { @@ -1238,7 +1249,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) { - if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { + if (sp[0] && sp[1] && ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { IRType dt; CTypeID id; TRef tr; @@ -1296,6 +1307,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) { CTState *cts = ctype_ctsG(J2G(J)); CType *ctp = s[0]; + if (!(sp[0] && sp[1])) return 0; if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) { if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) && (ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) { @@ -1373,8 +1385,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, } if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return 0; } /* NYI: non-function metamethods. */ } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ diff --git a/src/lj_ctype.h b/src/lj_ctype.h index e9b426f0..4e49f57f 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -42,18 +42,18 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT); ** ---------- info ------------ ** |type flags... A cid | size | sib | next | name | ** +----------------------------+--------+-------+-------+-------+-- -** |NUM BFvcUL.. A | size | | type | | -** |STRUCT ..vcU..V A | size | field | name? | name? | -** |PTR ..vcR... A cid | size | | type | | -** |ARRAY VCvc...V A cid | size | | type | | -** |VOID ..vc.... A | size | | type | | +** |NUM BFcvUL.. A | size | | type | | +** |STRUCT ..cvU..V A | size | field | name? | name? | +** |PTR ..cvR... A cid | size | | type | | +** |ARRAY VCcv...V A cid | size | | type | | +** |VOID ..cv.... A | size | | type | | ** |ENUM A cid | size | const | name? | name? | ** |FUNC ....VS.. cc cid | nargs | field | name? | name? | ** |TYPEDEF cid | | | name | name | ** |ATTRIB attrnum cid | attr | sib? | type? | | ** |FIELD cid | offset | field | | name? | -** |BITFIELD B.vcU csz bsz pos | offset | field | | name? | -** |CONSTVAL c cid | value | const | name | name | +** |BITFIELD B.cvU csz bsz pos | offset | field | | name? | +** |CONSTVAL c cid | value | const | name | name | ** |EXTERN cid | | sib? | name | name | ** |KW tok | size | | name | name | ** +----------------------------+--------+-------+-------+-------+-- diff --git a/src/lj_def.h b/src/lj_def.h index 29d3fdda..9413399d 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -95,6 +95,8 @@ typedef unsigned int uintptr_t; #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) +#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) +#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 36b920ad..e5aa495d 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -75,7 +75,7 @@ void lj_dispatch_init(GG_State *GG) for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); #if LJ_TARGET_MIPS - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); #endif } diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 47fee5fc..dff9fac4 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) #if !LJ_SOFTFP /* Load a number constant into an FPR. */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { + cTValue *tv = ir_knum(ir); int32_t i; if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { uint32_t hi = tv->u32.hi; diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index fdebe94b..d35f830b 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -35,7 +35,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) { - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, src, shift); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp); @@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) emit_tsi(as, mi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) @@ -157,7 +157,8 @@ static void emit_call(ASMState *as, void *target, int needcfa) MCode *p = as->mcp; *--p = MIPSI_NOP; if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { - *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); + *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | + (((uintptr_t)target >>2) & 0x03ffffffu); } else { /* Target out of range: need indirect call. */ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); needcfa = 1; diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 4eb933ea..5163012a 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) emit_tai(as, pi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index cbaf4e85..f0bca938 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -20,6 +20,11 @@ #define REX_64 0 #define VEX_64 0 #endif +#if LJ_GC64 +#define REX_GC64 REX_64 +#else +#define REX_GC64 0 +#endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) @@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p) #define ptr2addr(p) (i32ptr((p))) #endif -/* op r, [addr] */ -static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) -{ - MCode *p = as->mcp; - *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 - p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif -} - /* op r, [base+ofs] */ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) { MCode *p = as->mcp; x86Mode mode; if (ra_hasreg(rb)) { - if (ofs == 0 && (rb&7) != RID_EBP) { + if (LJ_GC64 && rb == RID_RIP) { + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = ofs; + } else if (ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; } else if (checki8(ofs)) { *--p = (MCode)ofs; @@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; #endif + } else if (LJ_GC64 && rb == RID_RIP) { + lua_assert(as->mrm.idx == RID_NONE); + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; } else { if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; @@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) /* Get/set global_State fields. */ #define emit_opgl(as, xo, r, field) \ emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) -#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) -#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) +#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) +#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) #define emit_setvmstate(as, i) \ (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) @@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_GC64 +#define dispofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) +#define mctopofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) +/* mov r, addr */ +#define emit_loada(as, r, addr) \ + emit_loadu64(as, (r), (uintptr_t)(addr)) +#else /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadi(as, (r), ptr2addr((addr))) +#endif #if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ @@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); +#if LJ_GC64 + } else if (checki32(dispofs(as, u64))) { + emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); + } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { + /* Since as->realign assumes the code size doesn't change, check + ** RIP-relative addressing reachability for both as->mcp and as->mctop. + */ + emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); +#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -313,13 +335,70 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } #endif -/* movsd r, [&tv->n] / xorps r, r */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +/* op r, [addr] */ +static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) { - if (tvispzero(tv)) /* Use xor only for +0. */ - emit_rr(as, XO_XORPS, r, r); - else - emit_rma(as, XO_MOVSD, r, &tv->n); +#if LJ_GC64 + if (checki32(dispofs(as, addr))) { + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); + } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { + emit_rmro(as, xo, rr, rr, 0); + emit_loadu64(as, rr, (uintptr_t)addr); + } else +#endif + { + MCode *p = as->mcp; + *(int32_t *)(p-4) = ptr2addr(addr); +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); +#else + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); +#endif + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + Reg r64; + x86Op xo; + const uint64_t *k = &ir_k64(ir)->u64; + if (rset_test(RSET_FPR, r)) { + r64 = r; + xo = XO_MOVSD; + } else { + r64 = r | REX_64; + xo = XO_MOV; + } + if (*k == 0) { + emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); +#if LJ_GC64 + } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || + (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { + emit_rma(as, xo, r64, k); + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { + emit_loadu64(as, r, *k); + return; + } else { + /* If all else fails, add the FP constant at the MCode area bottom. */ + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t *)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + } + emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); +#else + } else { + emit_rma(as, xo, r64, k); +#endif + } } /* -- Emit control-flow instructions -------------------------------------- */ @@ -460,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r, r, ofs); + emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); else - emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_err.c b/src/lj_err.c index a847ca07..600e6ee6 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -46,7 +46,8 @@ ** the wrapper function feature. Lua errors thrown through C++ frames ** cannot be caught by C++ code and C++ destructors are not run. ** -** EXT is the default on x64 systems, INT is the default on all other systems. +** EXT is the default on x64 systems and on Windows, INT is the default on all +** other systems. ** ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled @@ -55,7 +56,6 @@ ** and all C libraries that have callbacks which may be used to call back ** into Lua. C++ code must *not* be compiled with -fno-exceptions. ** -** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. ** EXT is mandatory on WIN64 since the calling convention has an abundance ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). @@ -63,7 +63,7 @@ #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND #define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS +#elif LJ_TARGET_WINDOWS #define LJ_UNWIND_EXT 1 #endif @@ -384,7 +384,7 @@ static void err_raise_ext(int errcode) #endif /* LJ_TARGET_ARM */ -#elif LJ_TARGET_X64 && LJ_ABI_WIN +#elif LJ_ABI_WIN /* ** Someone in Redmond owes me several days of my life. A lot of this is @@ -402,6 +402,7 @@ static void err_raise_ext(int errcode) #define WIN32_LEAN_AND_MEAN #include +#if LJ_TARGET_X64 /* Taken from: http://www.nynaeve.net/?p=99 */ typedef struct UndocumentedDispatcherContext { ULONG64 ControlPc; @@ -416,11 +417,14 @@ typedef struct UndocumentedDispatcherContext { ULONG ScopeIndex; ULONG Fill0; } UndocumentedDispatcherContext; +#else +typedef void *UndocumentedDispatcherContext; +#endif /* Another wild guess. */ extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); -#ifdef MINGW_SDK_INIT +#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) /* Workaround for broken MinGW64 declaration. */ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); #define RtlUnwindEx RtlUnwindEx_FIXED @@ -434,10 +438,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) -/* Win64 exception handler for interpreter frame. */ -LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, - void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +/* Windows exception handler for interpreter frame. */ +LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, + void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) { +#if LJ_TARGET_X64 + void *cf = f; +#else + void *cf = (char *)f - CFRAME_OFS_SEH; +#endif lua_State *L = cframe_L(cf); int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; @@ -455,8 +464,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { /* Don't catch access violations etc. */ - return ExceptionContinueSearch; + return 1; /* ExceptionContinueSearch */ } +#if LJ_TARGET_X64 /* Unwind the stack and call all handlers for all lower C frames ** (including ourselves) again with EH_UNWINDING set. Then set ** rsp = cf, rax = errcode and jump to the specified target. @@ -466,9 +476,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, lj_vm_unwind_c_eh), rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); /* RtlUnwindEx should never return. */ +#else + UNUSED(ctx); + UNUSED(dispatch); + /* Call all handlers for all lower C frames (including ourselves) again + ** with EH_UNWINDING set. Then call the specified function, passing cf + ** and errcode. + */ + lj_vm_rtlunwind(cf, (void *)rec, + (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); + /* lj_vm_rtlunwind does not return. */ +#endif } } - return ExceptionContinueSearch; + return 1; /* ExceptionContinueSearch */ } /* Raise Windows exception. */ diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index d05dc5d7..6d141a20 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -102,42 +102,41 @@ static void recff_stitch(jit_State *J) ASMFunction cont = lj_cont_stitch; lua_State *L = J->L; TValue *base = L->base; + BCReg nslot = J->maxslot + 1 + LJ_FR2; + TValue *nframe = base + 1 + LJ_FR2; const BCIns *pc = frame_pc(base-1); TValue *pframe = frame_prevl(base-1); - TRef trcont; - lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */ /* Move func + args up in Lua stack and insert continuation. */ - memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); - setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); - setcont(base, cont); + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); + setcont(base-LJ_FR2, cont); setframe_pc(base, pc); - setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ - L->base += 2; - L->top += 2; + setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ + L->base += 2 + LJ_FR2; + L->top += 2 + LJ_FR2; /* Ditto for the IR. */ - memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); -#if LJ_64 - trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); + memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); +#if LJ_FR2 + J->base[2] = TREF_FRAME; + J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; #else - trcont = lj_ir_kptr(J, (void *)cont); + J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; #endif - J->base[0] = trcont | TREF_CONT; - J->ktracep = lj_ir_k64_reserve(J); - lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); - J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0); - J->base += 2; - J->baseslot += 2; + J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); + J->base += 2 + LJ_FR2; + J->baseslot += 2 + LJ_FR2; J->framedepth++; lj_record_stop(J, LJ_TRLINK_STITCH, 0); /* Undo Lua stack changes. */ - memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); setframe_pc(base-1, pc); - L->base -= 2; - L->top -= 2; + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; } /* Fallback handler for fast functions that are not recorded (yet). */ @@ -179,7 +178,7 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) /* Emit BUFHDR for the global temporary buffer. */ static TRef recff_bufhdr(jit_State *J) { - return emitir(IRT(IR_BUFHDR, IRT_P32), + return emitir(IRT(IR_BUFHDR, IRT_PGC), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); } @@ -229,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) ix.tab = tr; copyTV(J->L, &ix.tabv, &rd->argv[0]); lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ - fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); + fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META); mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); if (!tref_isnil(mt)) @@ -295,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) if (strV(tv)->len == 1) { emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); } else { - TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); + TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); } @@ -380,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) int errcode; TValue argv0; /* Temporarily insert metamethod below object. */ - J->base[1] = J->base[0]; + J->base[1+LJ_FR2] = J->base[0]; J->base[0] = ix.mobj; copyTV(J->L, &argv0, &rd->argv[0]); - copyTV(J->L, &rd->argv[1], &rd->argv[0]); + copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); copyTV(J->L, &rd->argv[0], &ix.mobjv); /* Need to protect lj_record_tailcall because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); @@ -450,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); +#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -469,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) TValue argv0, argv1; TRef tmp; int errcode; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ /* Swap function and traceback. */ tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); +#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ @@ -504,7 +510,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); - J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); + J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); UNUSED(rd); } @@ -613,10 +619,8 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { - TRef tr = lj_ir_tonum(J, J->base[0]); - if (!tref_isnumber_str(J->base[1])) - lj_trace_err(J, LJ_TRERR_BADTYPE); - J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]); + J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], + &rd->argv[0], &rd->argv[1]); UNUSED(rd); } @@ -822,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) /* Also handle empty range here, to avoid extra traces. */ TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); emitir(IRTGI(IR_GE), trslen, tr0); - trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); + trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); } else { /* Range underflow: return empty string. */ emitir(IRTGI(IR_LT), trend, trstart); @@ -838,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) rd->nres = len; for (i = 0; i < len; i++) { TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); - tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); + tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp); J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); } } else { /* Empty range or range underflow: return no results. */ @@ -860,7 +864,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) if (i > 1) { /* Concatenate the strings, if there's more than one. */ TRef hdr = recff_bufhdr(J), tr = hdr; for (i = 0; J->base[i] != 0; i++) - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } UNUSED(rd); @@ -877,14 +881,14 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); if (vrep > 1) { TRef hdr2 = recff_bufhdr(J); - TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep); - tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str); + TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep); + tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str); str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); } } tr = hdr = recff_bufhdr(J); if (str2) { - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str); str = str2; rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); } @@ -935,8 +939,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) if ((J->base[2] && tref_istruecond(J->base[3])) || (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), !lj_str_haspattern(pat))) { /* Search for fixed string. */ - TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); - TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0); + TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); + TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0); TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); @@ -944,13 +948,13 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), str->len-(MSize)start, pat->len)) { TRef pos; - emitir(IRTG(IR_NE, IRT_P32), tr, trp0); - pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0)); + emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); + pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0)); J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); rd->nres = 2; } else { - emitir(IRTG(IR_EQ, IRT_P32), tr, trp0); + emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0); J->base[0] = TREF_NIL; } } else { /* Search for pattern. */ @@ -977,7 +981,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) IRCallID id; switch (STRFMT_TYPE(sf)) { case STRFMT_LIT: - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); break; case STRFMT_INT: @@ -986,7 +990,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) if (!tref_isinteger(tra)) goto handle_num; if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); } else { #if LJ_HASFFI @@ -1016,7 +1020,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) return; } if (sf == STRFMT_STR) /* Shortcut for plain %s. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra); else if ((sf & STRFMT_T_QUOTED)) tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); else @@ -1025,7 +1029,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) case STRFMT_CHAR: tra = lj_opt_narrow_toint(J, tra); if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); else tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); @@ -1110,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); +#else tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); +#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) @@ -1133,7 +1142,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) ptrdiff_t i = rd->data == 0 ? 1 : 0; for (; J->base[i]; i++) { TRef str = lj_ir_tostr(J, J->base[i]); - TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); + TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero); TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); if (tref_isk(len) && IR(tref_ref(len))->i == 1) { IRIns *irs = IR(tref_ref(str)); diff --git a/src/lj_frame.h b/src/lj_frame.h index 25c28bd4..7ef5f7d7 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -116,6 +116,17 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ /* These definitions must match with the arch-specific *.dasc files. */ #if LJ_TARGET_X86 +#if LJ_ABI_WIN +#define CFRAME_OFS_ERRF (19*4) +#define CFRAME_OFS_NRES (18*4) +#define CFRAME_OFS_PREV (17*4) +#define CFRAME_OFS_L (16*4) +#define CFRAME_OFS_SEH (9*4) +#define CFRAME_OFS_PC (6*4) +#define CFRAME_OFS_MULTRES (5*4) +#define CFRAME_SIZE (16*4) +#define CFRAME_SHIFT_MULTRES 0 +#else #define CFRAME_OFS_ERRF (15*4) #define CFRAME_OFS_NRES (14*4) #define CFRAME_OFS_PREV (13*4) @@ -124,6 +135,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_MULTRES (5*4) #define CFRAME_SIZE (12*4) #define CFRAME_SHIFT_MULTRES 0 +#endif #elif LJ_TARGET_X64 #if LJ_ABI_WIN #define CFRAME_OFS_PREV (13*8) @@ -226,26 +238,41 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_SIZE 272 #define CFRAME_SHIFT_MULTRES 3 #endif -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 #if LJ_ARCH_HASFPU #define CFRAME_OFS_ERRF 124 #define CFRAME_OFS_NRES 120 #define CFRAME_OFS_PREV 116 #define CFRAME_OFS_L 112 -#define CFRAME_OFS_PC 20 -#define CFRAME_OFS_MULTRES 16 #define CFRAME_SIZE 112 -#define CFRAME_SHIFT_MULTRES 3 #else #define CFRAME_OFS_ERRF 76 #define CFRAME_OFS_NRES 72 #define CFRAME_OFS_PREV 68 #define CFRAME_OFS_L 64 +#define CFRAME_SIZE 64 +#endif #define CFRAME_OFS_PC 20 #define CFRAME_OFS_MULTRES 16 -#define CFRAME_SIZE 64 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_MIPS64 +#if LJ_ARCH_HASFPU +#define CFRAME_OFS_ERRF 188 +#define CFRAME_OFS_NRES 184 +#define CFRAME_OFS_PREV 176 +#define CFRAME_OFS_L 168 +#define CFRAME_OFS_PC 160 +#define CFRAME_SIZE 192 +#else +#define CFRAME_OFS_ERRF 124 +#define CFRAME_OFS_NRES 120 +#define CFRAME_OFS_PREV 112 +#define CFRAME_OFS_L 104 +#define CFRAME_OFS_PC 96 +#define CFRAME_SIZE 128 #endif +#define CFRAME_OFS_MULTRES 0 +#define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff --git a/src/lj_gc.c b/src/lj_gc.c index 53f1d974..7c707462 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) IRIns *ir = &T->ir[ref]; if (ir->o == IR_KGC) gc_markobj(g, ir_kgc(ir)); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } if (T->link) gc_marktrace(g, T->link); if (T->nextroot) gc_marktrace(g, T->nextroot); diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index f2bd865e..8b72be7d 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -719,6 +719,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx) /* -- Interface to GDB JIT API -------------------------------------------- */ +static int gdbjit_lock; + +static void gdbjit_lock_acquire() +{ + while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { + /* Just spin; futexes or pthreads aren't worth the portability cost. */ + } +} + +static void gdbjit_lock_release() +{ + __sync_lock_release(&gdbjit_lock); +} + /* Add new entry to GDB JIT symbol chain. */ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) { @@ -730,6 +744,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) ctx->T->gdbjit_entry = (void *)eo; /* Link new entry to chain and register it. */ eo->entry.prev_entry = NULL; + gdbjit_lock_acquire(); eo->entry.next_entry = __jit_debug_descriptor.first_entry; if (eo->entry.next_entry) eo->entry.next_entry->prev_entry = &eo->entry; @@ -739,6 +754,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); } /* Add debug info for newly compiled trace and notify GDB. */ @@ -770,6 +786,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) { GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; if (eo) { + gdbjit_lock_acquire(); if (eo->entry.prev_entry) eo->entry.prev_entry->next_entry = eo->entry.next_entry; else @@ -779,6 +796,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); lj_mem_free(J2G(J), eo, eo->sz); } } diff --git a/src/lj_ir.c b/src/lj_ir.c index 63c98254..87fd0f4d 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J) IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim); + lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -145,6 +145,14 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) return emitir(CCI_OPTYPE(ci), tr, id); } +/* Load field of type t from GG_State + offset. */ +LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) +{ + lua_assert(ofs >= IRFL__MAX && ofs < REF_BIAS); + lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); + return lj_opt_fold(J); +} + /* -- Interning of constants ---------------------------------------------- */ /* @@ -165,6 +173,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) return ref; } +/* Get ref of next 64 bit IR constant and optionally grow IR. +** Note: this may invalidate all IRIns *! +*/ +static LJ_AINLINE IRRef ir_nextk64(jit_State *J) +{ + IRRef ref = J->cur.nk - 2; + lua_assert(J->state != LJ_TRACE_ASM); + if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = ref; + return ref; +} + +#if LJ_GC64 +#define ir_nextkgc ir_nextk64 +#else +#define ir_nextkgc ir_nextk +#endif + /* Intern int32_t constant. */ TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) { @@ -184,95 +210,21 @@ found: return TREF(ref, IRT_INT); } -/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the -** 64 bit constant. The constants themselves are stored in a chained array -** and shared across traces. -** -** Rationale for choosing this data structure: -** - The address of the constants is embedded in the generated machine code -** and must never move. A resizable array or hash table wouldn't work. -** - Most apps need very few non-32 bit integer constants (less than a dozen). -** - Linear search is hard to beat in terms of speed and low complexity. -*/ -typedef struct K64Array { - MRef next; /* Pointer to next list. */ - MSize numk; /* Number of used elements in this array. */ - TValue k[LJ_MIN_K64SZ]; /* Array of constants. */ -} K64Array; - -/* Free all chained arrays. */ -void lj_ir_k64_freeall(jit_State *J) -{ - K64Array *k; - for (k = mref(J->k64, K64Array); k; ) { - K64Array *next = mref(k->next, K64Array); - lj_mem_free(J2G(J), k, sizeof(K64Array)); - k = next; - } - setmref(J->k64, NULL); -} - -/* Get new 64 bit constant slot. */ -static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64) -{ - TValue *ntv; - if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ - K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); - setmref(kn->next, NULL); - kn->numk = 0; - if (kp) - setmref(kp->next, kn); /* Chain to the end of the list. */ - else - setmref(J->k64, kn); /* Link first array. */ - kp = kn; - } - ntv = &kp->k[kp->numk++]; /* Add to current array. */ - ntv->u64 = u64; - return ntv; -} - -/* Find 64 bit constant in chained array or add it. */ -cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) -{ - K64Array *k, *kp = NULL; - MSize idx; - /* Search for the constant in the whole chain of arrays. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { - kp = k; /* Remember previous element in list. */ - for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ - TValue *tv = &k->k[idx]; - if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */ - return tv; - } - } - /* Otherwise add a new constant. */ - return ir_k64_add(J, kp, u64); -} - -TValue *lj_ir_k64_reserve(jit_State *J) -{ - K64Array *k, *kp = NULL; - lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */ - /* Find last K64Array, if any. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k; - return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ -} - -/* Intern 64 bit constant, given by its address. */ -TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) +/* Intern 64 bit constant, given by its 64 bit pattern. */ +TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) { IRIns *ir, *cir = J->cur.ir; IRRef ref; IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (ir_k64(&cir[ref]) == tv) + if (ir_k64(&cir[ref])->u64 == u64) goto found; - ref = ir_nextk(J); + ref = ir_nextk64(J); ir = IR(ref); - lua_assert(checkptrGC(tv)); - setmref(ir->ptr, tv); + ir[1].tv.u64 = u64; ir->t.irt = t; ir->o = op; + ir->op12 = 0; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: @@ -282,13 +234,13 @@ found: /* Intern FP constant, given by its 64 bit pattern. */ TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KNUM, u64); } /* Intern 64 bit integer constant. */ TRef lj_ir_kint64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KINT64, u64); } /* Check whether a number is int and return it. -0 is NOT considered an int. */ @@ -323,15 +275,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */ lua_assert(!isdead(J2G(J), o)); for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) if (ir_kgc(&cir[ref]) == o) goto found; - ref = ir_nextk(J); + ref = ir_nextkgc(J); ir = IR(ref); /* NOBARRIER: Current trace is a GC root. */ - setgcref(ir->gcr, o); + ir->op12 = 0; + setgcref(ir[LJ_GC64].gcr, o); ir->t.irt = (uint8_t)t; ir->o = IR_KGC; ir->prev = J->chain[IR_KGC]; @@ -340,24 +292,44 @@ found: return TREF(ref, t); } -/* Intern 32 bit pointer constant. */ +/* Allocate GCtrace constant placeholder (no interning). */ +TRef lj_ir_ktrace(jit_State *J) +{ + IRRef ref = ir_nextkgc(J); + IRIns *ir = IR(ref); + lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); + ir->t.irt = IRT_P64; + ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ + ir->op12 = 0; + ir->prev = 0; + return TREF(ref, IRT_P64); +} + +/* Intern pointer constant. */ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); +#if LJ_64 && !LJ_GC64 + lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); +#endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (mref(cir[ref].ptr, void) == ptr) + if (ir_kptr(&cir[ref]) == ptr) goto found; +#if LJ_GC64 + ref = ir_nextk64(J); +#else ref = ir_nextk(J); +#endif ir = IR(ref); - setmref(ir->ptr, ptr); - ir->t.irt = IRT_P32; + ir->op12 = 0; + setmref(ir[LJ_GC64].ptr, ptr); + ir->t.irt = IRT_PGC; ir->o = op; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: - return TREF(ref, IRT_P32); + return TREF(ref, IRT_PGC); } /* Intern typed NULL constant. */ @@ -412,9 +384,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; - case IR_KPTR: case IR_KKPTR: case IR_KNULL: - setlightudV(tv, mref(ir->ptr, void)); - break; + case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; + case IR_KNULL: setlightudV(tv, NULL); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; #if LJ_HASFFI case IR_KINT64: { diff --git a/src/lj_ir.h b/src/lj_ir.h index cd8df59d..e77f7b99 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -220,7 +220,7 @@ IRFLDEF(FLENUM) /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ -#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ +#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ @@ -294,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; /* -- IR instruction types ------------------------------------------------ */ -/* Map of itypes to non-negative numbers. ORDER LJ_T. +#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) + +/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for ** IRT_P32 and IRT_P64, which never escape the IR. ** The various integers are only used in the IR and can only escape to @@ -302,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; ** contiguous and next to IRT_NUM (see the typerange macros below). */ #define IRTDEF(_) \ - _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ - _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ - _(TAB, 4) _(UDATA, 4) \ + _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ + _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ + _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ + _(UDATA, IRTSIZE_PGC) \ _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ - _(SOFTFP, 4) /* There is room for 9 more types. */ + _(SOFTFP, 4) /* There is room for 8 more types. */ /* IR result type and flags (8 bit). */ typedef enum { @@ -318,9 +321,10 @@ IRTDEF(IRTENUM) /* Native pointer type and the corresponding integer type. */ IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, + IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, + IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, - /* TODO_GC64: major changes required for all uses of IRT_P32. */ /* Additional flags. */ IRT_MARK = 0x20, /* Marker for misc. purposes. */ @@ -408,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || t != IRT_LIGHTUD); + lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { @@ -521,7 +525,9 @@ typedef uint32_t TRef; ** +-------+-------+---+---+---+---+ ** | op1 | op2 | t | o | r | s | ** +-------+-------+---+---+---+---+ -** | op12/i/gco | ot | prev | (alternative fields in union) +** | op12/i/gco32 | ot | prev | (alternative fields in union) +** +-------+-------+---+---+---+---+ +** | TValue/gco64 | (2nd IR slot for 64 bit constants) ** +---------------+-------+-------+ ** 32 16 16 ** @@ -549,22 +555,27 @@ typedef union IRIns { ) }; int32_t i; /* 32 bit signed integer literal (overlaps op12). */ - GCRef gcr; /* GCobj constant (overlaps op12). */ - MRef ptr; /* Pointer constant (overlaps op12). */ + GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ + MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ + TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; -/* TODO_GC64: major changes required. */ -#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) +#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) -#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) -#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) +#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) +#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) #define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) + check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (LJ_GC64 && \ + ((ir)->o == IR_KGC || \ + (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ + &(ir)[1].tv) #define ir_kptr(ir) \ - check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) + check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ + mref((ir)[LJ_GC64].ptr, void)) /* A store or any other op with a non-weak guard has a side-effect. */ static LJ_AINLINE int ir_sideeff(IRIns *ir) diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 144135a4..c7cd4257 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -78,13 +78,13 @@ typedef struct CCallInfo { #define IRCALLCOND_SOFTFP_FFI(x) NULL #endif -#if LJ_SOFTFP && LJ_TARGET_MIPS +#if LJ_SOFTFP && LJ_TARGET_MIPS32 #define IRCALLCOND_SOFTFP_MIPS(x) x #else #define IRCALLCOND_SOFTFP_MIPS(x) NULL #endif -#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) +#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) #define IRCALLCOND_FP64_FFI(x) x @@ -104,12 +104,6 @@ typedef struct CCallInfo { #define IRCALLCOND_FFI32(x) NULL #endif -#if LJ_TARGET_X86 -#define CCI_RANDFPR 0 /* Clang on OSX/x86 is overzealous. */ -#else -#define CCI_RANDFPR CCI_NOFPRCLOBBER -#endif - #if LJ_SOFTFP #define XA_FP CCI_XA #define XA2_FP (CCI_XA+CCI_XA) @@ -129,40 +123,40 @@ typedef struct CCallInfo { /* Function definitions for CALL* instructions. */ #define IRCALLDEF(_) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ - _(ANY, lj_str_find, 4, N, P32, 0) \ + _(ANY, lj_str_find, 4, N, PGC, 0) \ _(ANY, lj_str_new, 3, S, STR, CCI_L) \ _(ANY, lj_strscan_num, 2, FN, INT, 0) \ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ - _(ANY, lj_strfmt_putint, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \ - _(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \ - _(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \ - _(ANY, lj_buf_putmem, 3, S, P32, 0) \ - _(ANY, lj_buf_putstr, 2, FL, P32, 0) \ - _(ANY, lj_buf_putchar, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \ - _(ANY, lj_buf_puttab, 5, L, P32, 0) \ + _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \ + _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \ + _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \ + _(ANY, lj_buf_putmem, 3, S, PGC, 0) \ + _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \ + _(ANY, lj_buf_puttab, 5, L, PGC, 0) \ _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ - _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ + _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ - _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ - _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\ + _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ + _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ _(ANY, sinh, 1, N, NUM, XA_FP) \ _(ANY, cosh, 1, N, NUM, XA_FP) \ diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 95374044..8b7a43de 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -36,12 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) return ref; } +LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); + /* Interning of constants. */ LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); -LJ_FUNC void lj_ir_k64_freeall(jit_State *J); -LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); -LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J); -LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); +LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); @@ -49,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); +LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #if LJ_64 #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) @@ -75,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) /* Special 128 bit SIMD constants. */ -#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) -#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) +#define lj_ir_ksimd(J, idx) \ + lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J)) /* Access to constants. */ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); @@ -143,8 +143,8 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); +LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); +LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 2d2e833a..3505c63f 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -46,12 +46,16 @@ #define JIT_F_CPU_FIRST JIT_F_SQRT #define JIT_F_CPUSTRING "\4SQRT\5ROUND" #elif LJ_TARGET_MIPS -#define JIT_F_MIPS32R2 0x00000010 +#define JIT_F_MIPSXXR2 0x00000010 /* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 +#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#if LJ_TARGET_MIPS32 #define JIT_F_CPUSTRING "\010MIPS32R2" #else +#define JIT_F_CPUSTRING "\010MIPS64R2" +#endif +#else #define JIT_F_CPU_FIRST 0 #define JIT_F_CPUSTRING "" #endif @@ -179,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) +#if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_isframe(sn) ((sn) & SNAP_FRAME) -#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) +static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) +{ +#if LJ_FR2 + uint64_t pcbase; + memcpy(&pcbase, sn, sizeof(uint64_t)); + return (const BCIns *)(pcbase >> 8); +#else + return (const BCIns *)(uintptr_t)*sn; +#endif +} + /* Snapshot and exit numbers. */ typedef uint32_t SnapNo; typedef uint32_t ExitNo; @@ -308,6 +324,37 @@ enum { LJ_KSIMD__MAX }; +enum { +#if LJ_TARGET_X86ORX64 + LJ_K64_TOBIT, /* 2^52 + 2^51 */ + LJ_K64_2P64, /* 2^64 */ + LJ_K64_M2P64, /* -2^64 */ +#if LJ_32 + LJ_K64_M2P64_31, /* -2^64 or -2^31 */ +#else + LJ_K64_M2P64_31 = LJ_K64_M2P64, +#endif +#endif +#if LJ_TARGET_MIPS + LJ_K64_2P31, /* 2^31 */ +#endif + LJ_K64__MAX, +}; + +enum { +#if LJ_TARGET_X86ORX64 + LJ_K32_M2P64_31, /* -2^64 or -2^31 */ +#endif +#if LJ_TARGET_PPC + LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ + LJ_K32_2P52, /* 2^52 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + LJ_K32_2P31, /* 2^31 */ +#endif + LJ_K32__MAX +}; + /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) @@ -324,13 +371,14 @@ enum { /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { IRIns ins; /* Currently emitted instruction. */ - IRIns left; /* Instruction referenced by left operand. */ - IRIns right; /* Instruction referenced by right operand. */ + IRIns left[2]; /* Instruction referenced by left operand. */ + IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; /* JIT compiler state. */ typedef struct jit_State { GCtrace cur; /* Current trace. */ + GCtrace *curfinal; /* Final address of current trace (set during asm). */ lua_State *L; /* Current Lua state. */ const BCIns *pc; /* Current PC. */ @@ -360,8 +408,9 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ - MRef k64; /* Pointer to chained array of 64 bit constants. */ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ + uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ @@ -382,7 +431,7 @@ typedef struct jit_State { GCRef *trace; /* Array of traces. */ TraceNo freetrace; /* Start of scan for next free trace. */ MSize sizetrace; /* Size of trace array. */ - TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */ + IRRef1 ktrace; /* Reference to KGC with GCtrace. */ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 059eb132..25da9455 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -843,12 +843,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) #endif #if LJ_FR2 -#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f)) +#define contptr(f) ((void *)(f)) +#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) #elif LJ_64 +#define contptr(f) \ + ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) #define setcont(o, f) \ ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) #else -#define setcont(o, f) setlightudV((o), (void *)(f)) +#define contptr(f) ((void *)(f)) +#define setcont(o, f) setlightudV((o), contptr(f)) #endif #define tvchecklive(L, o) \ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index e1d13691..5f4b8810 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -136,8 +136,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) #define knumleft (ir_knum(fleft)->n) #define knumright (ir_knum(fright)->n) @@ -502,7 +502,7 @@ LJFOLDF(kfold_strref_snew) PHIBARRIER(ir); fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ fins->op1 = str; - fins->ot = IRT(IR_STRREF, IRT_P32); + fins->ot = IRT(IR_STRREF, IRT_PGC); return RETRYFOLD; } } @@ -998,8 +998,10 @@ LJFOLDF(simplify_nummuldiv_k) if (n == 1.0) { /* x o 1 ==> x */ return LEFTFOLD; } else if (n == -1.0) { /* x o -1 ==> -x */ + IRRef op1 = fins->op1; + fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ + fins->op1 = op1; fins->o = IR_NEG; - fins->op2 = (IRRef1)lj_ir_knum_neg(J); return RETRYFOLD; } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ fins->o = IR_ADD; @@ -2393,10 +2395,14 @@ retry: if (fins->op1 >= J->cur.nk) { key += (uint32_t)IR(fins->op1)->o << 10; *fleft = *IR(fins->op1); + if (fins->op1 < REF_TRUE) + fleft[1] = IR(fins->op1)[1]; } if (fins->op2 >= J->cur.nk) { key += (uint32_t)IR(fins->op2)->o; *fright = *IR(fins->op2); + if (fins->op2 < REF_TRUE) + fright[1] = IR(fins->op2)[1]; } else { key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 5549b0d0..92ecbb48 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -22,8 +22,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) /* ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index b1ab5ba8..ca0a0f49 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -517,18 +517,24 @@ static int numisint(lua_Number n) return (n == (lua_Number)lj_num2int(n)); } +/* Convert string to number. Error out for non-numeric string values. */ +static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) +{ + if (tref_isstr(tr)) { + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + /* Would need an inverted STRTO for this rare and useless case. */ + if (!lj_strscan_num(strV(o), o)) /* Convert in-place. Value used below. */ + lj_trace_err(J, LJ_TRERR_BADTYPE); /* Punt if non-numeric. */ + } + return tr; +} + /* Narrowing of arithmetic operations. */ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op) { - if (tref_isstr(rb)) { - rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); - lj_strscan_num(strV(vb), vb); - } - if (tref_isstr(rc)) { - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_strscan_num(strV(vc), vc); - } + rb = conv_str_tonum(J, rb, vb); + rc = conv_str_tonum(J, rc, vc); /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && tref_isinteger(rb) && tref_isinteger(rc) && @@ -543,24 +549,21 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, /* Narrowing of unary minus operator. */ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { - if (tref_isstr(rc)) { - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_strscan_num(strV(vc), vc); - } + rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { if ((uint32_t)numberVint(vc) != 0x80000000u) return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } - return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); + return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); } /* Narrowing of modulo operator. */ -TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) +TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) { TRef tmp; - if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) - lj_trace_err(J, LJ_TRERR_BADTYPE); + rb = conv_str_tonum(J, rb, vb); + rc = conv_str_tonum(J, rc, vc); if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && tref_isinteger(rb) && tref_isinteger(rc) && (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) { @@ -577,10 +580,11 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) } /* Narrowing of power operator or math.pow. */ -TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) +TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) { - if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) - lj_trace_err(J, LJ_TRERR_BADTYPE); + rb = conv_str_tonum(J, rb, vb); + rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ + rc = conv_str_tonum(J, rc, vc); /* Narrowing must be unconditional to preserve (-x)^i semantics. */ if (tvisint(vc) || numisint(numV(vc))) { int checkrange = 0; @@ -591,8 +595,6 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) checkrange = 1; } if (!tref_isinteger(rc)) { - if (tref_isstr(rc)) - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); /* Guarded conversion to integer! */ rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); } diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 975ee831..af05ef46 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -153,10 +153,9 @@ static void sink_remark_phi(jit_State *J) remark = 0; for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) - remark = 1; - else if (irl->prev == irr->prev) + if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev) continue; + remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK); irt_setmark(IR(ir->op1)->t); irt_setmark(IR(ir->op2)->t); } @@ -166,8 +165,8 @@ static void sink_remark_phi(jit_State *J) /* Sweep instructions and tag sunken allocations and stores. */ static void sink_sweep_ins(jit_State *J) { - IRIns *ir, *irfirst = IR(J->cur.nk); - for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { + IRIns *ir, *irbase = IR(REF_BASE); + for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) { switch (ir->o) { case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { IRIns *ira = sink_checkalloc(J, ir); @@ -217,6 +216,12 @@ static void sink_sweep_ins(jit_State *J) break; } } + for (ir = IR(J->cur.nk); ir < irbase; ir++) { + irt_clearmark(ir->t); + ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; + } } /* Allocation sinking and store sinking. diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 6def4161..884285d2 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -16,6 +16,7 @@ #include "lj_jit.h" #include "lj_ircall.h" #include "lj_iropt.h" +#include "lj_dispatch.h" #include "lj_vm.h" /* SPLIT pass: @@ -353,6 +354,8 @@ static void split_ir(jit_State *J) ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } /* Process old IR instructions. */ @@ -448,6 +451,11 @@ static void split_ir(jit_State *J) case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; + case IR_FLOAD: + lua_assert(ir->op1 == REF_NIL); + hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); + nir->op2 += LJ_BE*4; + break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; diff --git a/src/lj_parse.c b/src/lj_parse.c index 610c8614..5df4c6ec 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -2177,6 +2177,8 @@ static void assign_adjust(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e) bcemit_nil(fs, reg, (BCReg)extra); } } + if (nexps > nvars) + ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */ } /* Recursively parse assignment statement. */ @@ -2210,8 +2212,6 @@ static void parse_assignment(LexState *ls, LHSVarList *lh, BCReg nvars) return; } assign_adjust(ls, nvars, nexps, &e); - if (nexps > nvars) - ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */ } /* Assign RHS to LHS and recurse downwards. */ expr_init(&e, VNONRELOC, ls->fs->freereg-1); diff --git a/src/lj_record.c b/src/lj_record.c index d6dd73bc..76699a9f 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -51,7 +51,7 @@ static void rec_check_ir(jit_State *J) { IRRef i, nins = J->cur.nins, nk = J->cur.nk; lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); - for (i = nins-1; i >= nk; i--) { + for (i = nk; i < nins; i++) { IRIns *ir = IR(i); uint32_t mode = lj_ir_mode[ir->o]; IRRef op1 = ir->op1; @@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J) case IRMref: lua_assert(op1 >= nk); lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); continue; + case IRMcst: lua_assert(i < REF_BIAS); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + i++; + continue; } switch (irm_op2(mode)) { case IRMnone: lua_assert(op2 == 0); break; @@ -84,30 +87,48 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); - lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); + lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); lua_assert(nslots < LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); - IRIns *ir; - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); - ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + IRIns *ir = NULL; /* Silence compiler. */ + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { + lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + ir = IR(ref); + lua_assert(irt_t(ir->t) == tref_t(tr)); + } if (s == 0) { lua_assert(tref_isfunc(tr)); +#if LJ_FR2 + } else if (s == 1) { + lua_assert(0); +#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); +#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); + lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); +#else lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); - lua_assert((J->slot[s+1] & TREF_FRAME)); +#endif + lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { if (tvisnumber(tv)) @@ -159,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) /* Get TRef for current function. */ static TRef getcurrf(jit_State *J) { - if (J->base[-1]) - return J->base[-1]; - lua_assert(J->baseslot == 1); - return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; + lua_assert(J->baseslot == 1+LJ_FR2); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } /* Compare for raw object equality. @@ -506,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); @@ -643,8 +663,8 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) GCproto *pt = funcproto(fn); /* Too many closures created? Probably not a monomorphic function. */ if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ - TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); - emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); + TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); + emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ return tr; } @@ -675,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) { RecordIndex ix; TValue *functv = &J->L->base[func]; - TRef *fbase = &J->base[func]; + TRef kfunc, *fbase = &J->base[func]; ptrdiff_t i; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - for (i = 0; i <= nargs; i++) - (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ + (void)getslot(J, func); /* Ensure func has a reference. */ + for (i = 1; i <= nargs; i++) + (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ ix.tab = fbase[0]; copyTV(J->L, &ix.tabv, functv); if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) lj_trace_err(J, LJ_TRERR_NOMM); - for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ - fbase[i] = fbase[i-1]; + for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ + fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; +#if LJ_FR2 + fbase[2] = fbase[0]; +#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); + kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); +#if LJ_FR2 + fbase[0] = kfunc; + fbase[1] = TREF_FRAME; +#else + fbase[0] = kfunc | TREF_FRAME; +#endif J->maxslot = (BCReg)nargs; } @@ -700,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) rec_call_setup(J, func, nargs); /* Bump frame. */ J->framedepth++; - J->base += func+1; - J->baseslot += func+1; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; } /* Record tail call. */ @@ -717,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) func += cbase; } /* Move func + args down. */ - memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); + if (LJ_FR2 && J->baseslot == 2) + J->base[func+1] = 0; + memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ if (++J->tailcalled > J->loopunroll) @@ -758,9 +789,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ BCReg cbase = (BCReg)frame_delta(frame); - if (--J->framedepth < 0) + if (--J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; @@ -784,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -794,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; BCReg cbase = bc_a(callins); - GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */ + GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { @@ -808,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_snap_add(J); } for (i = 0; i < nresults; i++) /* Adjust results. */ - J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; + J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1); - J->baseslot -= cbase+1; - J->base -= cbase+1; + lua_assert(J->baseslot > cbase+1+LJ_FR2); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ @@ -824,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); - emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1); + lua_assert(J->baseslot == 1+LJ_FR2); /* Shift result slots up and clear the slots of the new frame below. */ - memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); - memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); } } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); @@ -839,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; - J->maxslot = cbase-2; + J->maxslot = cbase-(2<base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } else if (cont == lj_cont_nop) { /* Nothing to do here. */ } else if (cont == lj_cont_cat) { BCReg bslot = bc_b(*(frame_contpc(frame)-1)); TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (bslot != cbase-2) { /* Concatenate the remainder. */ + if (bslot != J->maxslot) { /* Concatenate the remainder. */ TValue *b = J->L->base, save; /* Simulate lower frame and result. */ - J->base[cbase-2] = tr; - copyTV(J->L, &save, b-2); - if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<L, b-(2<L->base = b - cbase; - tr = rec_cat(J, bslot, cbase-2); + tr = rec_cat(J, bslot, cbase-(2<L->base + cbase; /* Undo. */ J->L->base = b; - copyTV(J->L, b-2, &save); + copyTV(J->L, b-(2<base[dst] = tr; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ @@ -873,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1); + lua_assert(J->baseslot >= 1+LJ_FR2); } /* -- Metamethod handling ------------------------------------------------- */ @@ -882,16 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; -#if LJ_64 - TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); +#if LJ_FR2 + J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[top+1] = TREF_CONT; #else - TRef trcont = lj_ir_kptr(J, (void *)cont); + J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; #endif - J->base[top] = trcont | TREF_CONT; J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ - return top+1; + return top+1+LJ_FR2; } /* Record metamethod lookup. */ @@ -910,7 +947,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) cTValue *mo; if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { /* Specialize to the C library namespace object. */ - emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); + emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); } else { /* Specialize to the type of userdata. */ TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); @@ -939,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, + GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); +#else ix->mt = mix.tab = lj_ir_ktab(J, mt); +#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -969,9 +1012,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - base[1] = ix->tab; base[2] = ix->key; - copyTV(J->L, basev+1, &ix->tabv); - copyTV(J->L, basev+2, &ix->keyv); + base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; + copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); + copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (mm != MM_unm) { ix->tab = ix->key; @@ -982,8 +1025,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) lj_trace_err(J, LJ_TRERR_NOMM); } ok: - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix->mobj; +#if LJ_FR2 + base[1] = 0; +#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -999,8 +1044,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) BCReg func = rec_mm_prep(J, lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); + base += LJ_FR2; + basev += LJ_FR2; base[1] = tr; copyTV(J->L, basev+1, tv); #if LJ_52 base[2] = tr; copyTV(J->L, basev+2, tv); @@ -1020,11 +1066,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) { BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; - copyTV(J->L, tv+0, &ix->mobjv); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv-LJ_FR2, &ix->mobjv); copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+2, &ix->keyv); lj_record_call(J, func, 2); @@ -1257,8 +1302,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; rec_idx_abc(J, asizeref, ikey, t->asize); - arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); - return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); + arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); + return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); } else { /* Currently not in array (may be an array extension)? */ emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ if (k == 0 && tref_isk(key)) @@ -1298,13 +1343,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, *rbguard = J->guardemit; hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); - node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); + node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); - return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); + return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); } } /* Fall back to a regular hash lookup. */ - return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); + return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); } /* Determine whether a key is NOT one of the fast metamethod names. */ @@ -1341,11 +1386,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) handlemm: if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; - setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+2, &ix->keyv); if (ix->val) { @@ -1387,7 +1431,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) IRType t = itype2irt(oldv); TRef res; if (oldv == niltvg(J2G(J))) { - emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); res = TREF_NIL; } else { res = emitir(IRTG(loadop, t), xref, 0); @@ -1417,7 +1461,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) if (hasmm) emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ else if (xrefop == IR_HREF) - emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), + emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { lua_assert(hasmm); @@ -1428,7 +1472,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) TRef key = ix->key; if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); - xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); + xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ @@ -1438,7 +1482,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { /* Cannot derive that the previous value was non-nil, must do checks. */ if (xrefop == IR_HREF) /* Guard against store to niltv. */ - emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain) { /* Metamethod lookup required? */ /* A check for NULL metatable is cheaper (hoistable) than a load. */ if (!mt) { @@ -1460,7 +1504,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); /* Invalidate neg. metamethod cache for stores with certain string keys. */ if (!nommstr(J, ix->key)) { - TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); + TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); } J->needsnap = 1; @@ -1535,7 +1579,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); - J->base[-1] = TREF_FRAME | kfunc; +#if LJ_FR2 + J->base[-2] = kfunc; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1546,13 +1594,17 @@ noconstify: /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); if (!uvp->closed) { + uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); /* In current stack? */ if (uvval(uvp) >= tvref(J->L->stack) && uvval(uvp) < tvref(J->L->maxstack)) { int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); if (slot >= 0) { /* Aliases an SSA slot? */ + emitir(IRTG(IR_EQ, IRT_PGC), + REF_BASE, + emitir(IRT(IR_ADD, IRT_PGC), uref, + lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ - /* NYI: add IR to guard that it's still aliasing the same slot. */ if (val == 0) { return getslot(J, slot); } else { @@ -1562,10 +1614,12 @@ noconstify: } } } - uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); + emitir(IRTG(IR_UGT, IRT_PGC), + emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), + lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); } else { needbarrier = 1; - uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); + uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); } if (val == 0) { /* Upvalue load */ IRType t = itype2irt(uvval(uvp)); @@ -1640,11 +1694,14 @@ static void rec_func_setup(jit_State *J) static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; - BCReg s, fixargs, vframe = J->maxslot+1; + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; lua_assert((pt->flags & PROTO_VARARG)); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); - J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +#if LJ_FR2 + J->base[vframe-1] = TREF_FRAME; +#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1706,7 +1763,7 @@ static int select_detect(jit_State *J) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; - ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; lua_assert(frame_isvarg(J->L->base-1)); if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; @@ -1718,10 +1775,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) J->maxslot = dst + (BCReg)nresults; } for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; + J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ - TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); - int32_t frofs = 8*(1+numparams)+FRAME_VARG; + TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; if (nresults >= 0) { /* Known fixed number of results. */ ptrdiff_t i; if (nvararg > 0) { @@ -1732,11 +1789,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) else emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); - vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); + vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); for (i = 0; i < nload; i++) { - IRType t = itype2irt(&J->L->base[i-1-nvararg]); - TRef aref = emitir(IRT(IR_AREF, IRT_P32), + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); + TRef aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, lj_ir_kint(J, (int32_t)i)); TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ @@ -1782,15 +1839,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) } if (idx != 0 && idx <= nvararg) { IRType t; - TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); - t = itype2irt(&J->L->base[idx-2-nvararg]); - aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); + TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kint(J, frofs-(8<L->base[idx-2-LJ_FR2-nvararg]); + aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ } - J->base[dst-2] = tr; - J->maxslot = dst-1; + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; J->bcskip = 2; /* Skip CALLM + select. */ } else { nyivarg: @@ -1839,10 +1897,10 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) break; } xbase = ++trp; - tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32), + tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); do { - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++); } while (trp <= top); tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); J->maxslot = (BCReg)(xbase - J->base); @@ -1883,7 +1941,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ +#if LJ_FR2 + SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; + uint64_t pcbase; + memcpy(&pcbase, flink, sizeof(uint64_t)); + pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); + memcpy(flink, &pcbase, sizeof(uint64_t)); +#else J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); +#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -2159,14 +2225,14 @@ void lj_record_ins(jit_State *J) case BC_MODVN: case BC_MODVV: recmod: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_mod(J, rb, rc, rcv); + rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv); else rc = rec_mm_arith(J, &ix, MM_mod); break; case BC_POW: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); + rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv); else rc = rec_mm_arith(J, &ix, MM_pow); break; @@ -2181,7 +2247,13 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ - if (ra > J->maxslot) J->base[ra-1] = 0; + if (ra > J->maxslot) { +#if LJ_FR2 + memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); +#else + J->base[ra-1] = 0; +#endif + } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: break; @@ -2250,14 +2322,14 @@ void lj_record_ins(jit_State *J) /* -- Calls and vararg handling ----------------------------------------- */ case BC_ITERC: - J->base[ra] = getslot(J, ra-3-LJ_FR2); - J->base[ra+1] = getslot(J, ra-2-LJ_FR2); - J->base[ra+2] = getslot(J, ra-1-LJ_FR2); + J->base[ra] = getslot(J, ra-3); + J->base[ra+1+LJ_FR2] = getslot(J, ra-2); + J->base[ra+2+LJ_FR2] = getslot(J, ra-1); { /* Do the actual copy now because lj_record_call needs the values. */ TValue *b = &J->L->base[ra]; - copyTV(J->L, b, b-3-LJ_FR2); - copyTV(J->L, b+1, b-2-LJ_FR2); - copyTV(J->L, b+2, b-1-LJ_FR2); + copyTV(J->L, b, b-3); + copyTV(J->L, b+1+LJ_FR2, b-2); + copyTV(J->L, b+2+LJ_FR2, b-1); } lj_record_call(J, ra, (ptrdiff_t)rc-1); break; @@ -2380,7 +2452,12 @@ void lj_record_ins(jit_State *J) /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; - if (ra >= J->maxslot) J->maxslot = ra+1; + if (ra >= J->maxslot) { +#if LJ_FR2 + if (ra > J->maxslot) J->base[ra-1] = 0; +#endif + J->maxslot = ra+1; + } } #undef rav @@ -2465,7 +2542,7 @@ void lj_record_setup(jit_State *J) J->scev.idx = REF_NIL; setmref(J->scev.pc, NULL); - J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ J->base = J->slot + J->baseslot; J->maxslot = 0; J->framedepth = 0; @@ -2480,7 +2557,7 @@ void lj_record_setup(jit_State *J) J->bc_extent = ~(MSize)0; /* Emit instructions for fixed references. Also triggers initial IR alloc. */ - emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); + emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); for (i = 0; i <= 2; i++) { IRIns *ir = IR(REF_NIL-i); ir->i = 0; diff --git a/src/lj_snap.c b/src/lj_snap.c index 8638d9ed..48259972 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); +#if LJ_FR2 + if (s == 1) continue; + if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { + TValue *base = J->L->base - J->baseslot; + tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); + ref = tref_ref(tr); + } +#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; - if (!(sn & (SNAP_CONT|SNAP_FRAME)) && + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) @@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) } /* Add frame links at the end of the snapshot. */ -static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) +static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) { cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; + cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; +#if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); + lua_assert(2 <= J->baseslot && J->baseslot <= 257); + memcpy(map, &pcbase, sizeof(uint64_t)); +#else MSize f = 0; - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ +#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKPC(frame_pc(frame)); +#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); +#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); +#endif frame = frame_prevd(frame); continue; } if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } + *topslot = (uint8_t)(ftop - lim); +#if LJ_FR2 + lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + return 2; +#else lua_assert(f == (MSize)(1 + J->framedepth)); - return (BCReg)(ftop - lim); + return f; +#endif } /* Take a snapshot of the current stack. */ @@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nent = (uint8_t)nent; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J) MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; - BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; + BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; @@ -371,8 +396,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KPRI: return TREF_PRI(irt_type(ir->t)); case IR_KINT: return lj_ir_kint(J, ir->i); case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); - case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); - case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); + case IR_KNUM: case IR_KINT64: + return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ default: lua_assert(0); return TREF_NIL; break; } @@ -555,8 +580,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); - val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, - lj_ir_k64_find(J, k)); + val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); @@ -599,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); - lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { @@ -608,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif - } else if (LJ_64 && irt_islightud(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; +#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); @@ -628,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif - } else if (LJ_64 && irt_is64(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; +#endif } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { @@ -651,7 +678,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, uint64_t tmp; if (irref_isk(ref)) { if (ir->o == IR_KNUM || ir->o == IR_KINT64) { - src = mref(ir->ptr, int32_t); + src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; src = (int32_t *)&tmp; @@ -795,11 +822,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; +#endif +#if !LJ_FR2 ptrdiff_t ftsz0; +#endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); - const BCIns *pc = snap_pc(map[nent]); + const BCIns *pc = snap_pc(&map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ @@ -812,8 +843,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ - frame = L->base-1; + frame = L->base-1-LJ_FR2; +#if !LJ_FR2 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ +#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -836,14 +869,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; +#if !LJ_FR2 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ /* Overwrite tag with frame link. */ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; +#endif } } } +#if LJ_FR2 + L->base += (map[nent+LJ_BE] & 0xff); +#endif lua_assert(map + nent == flinks); /* Compute current stack top. */ diff --git a/src/lj_state.c b/src/lj_state.c index 66bf439f..a3bfc45e 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -180,7 +180,7 @@ static void close_state(lua_State *L) g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) lua_State *lj_state_newstate(lua_Alloc f, void *ud) #else LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 7c7d81d3..04c71e88 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -98,11 +98,15 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) uint32_t u = (uint32_t)k; if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } if (u < 10000) { - if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3; + if (u < 10) goto dig1; + if (u < 100) goto dig2; + if (u < 1000) goto dig3; } else { uint32_t v = u / 10000; u -= v * 10000; if (v < 10000) { - if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7; + if (v < 10) goto dig5; + if (v < 100) goto dig6; + if (v < 1000) goto dig7; } else { uint32_t w = v / 10000; v -= w * 10000; if (w >= 10) WINT_R(w, 10, 10) diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index bafa817a..6a7d4b50 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -82,11 +82,15 @@ enum { #if LJ_SOFTFP #define RSET_FPR 0 #else +#if LJ_32 #define RSET_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) +#else +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) +#endif #endif #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL @@ -97,23 +101,37 @@ enum { #if LJ_SOFTFP #define RSET_SCRATCH_FPR 0 #else +#if LJ_32 #define RSET_SCRATCH_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)) +#else +#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) +#endif #endif #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define REGARG_FIRSTGPR RID_R4 +#if LJ_32 #define REGARG_LASTGPR RID_R7 #define REGARG_NUMGPR 4 +#else +#define REGARG_LASTGPR RID_R11 +#define REGARG_NUMGPR 8 +#endif #if LJ_ABI_SOFTFP #define REGARG_FIRSTFPR 0 #define REGARG_LASTFPR 0 #define REGARG_NUMFPR 0 #else #define REGARG_FIRSTFPR RID_F12 +#if LJ_32 #define REGARG_LASTFPR RID_F14 #define REGARG_NUMFPR 2 +#else +#define REGARG_LASTFPR RID_F19 +#define REGARG_NUMFPR 8 +#endif #endif /* -- Spill slots --------------------------------------------------------- */ @@ -125,7 +143,11 @@ enum { ** ** SPS_FIRST: First spill slot for general use. */ +#if LJ_32 #define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif #define SPS_FIRST 4 #define SPOFS_TMP 0 @@ -140,7 +162,7 @@ typedef struct { #if !LJ_SOFTFP lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ #endif - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ int32_t spill[256]; /* Spill slots. */ } ExitState; @@ -172,7 +194,7 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) typedef enum MIPSIns { /* Integer instructions. */ - MIPSI_MOVE = 0x00000021, + MIPSI_MOVE = 0x00000025, MIPSI_NOP = 0x00000000, MIPSI_LI = 0x24000000, @@ -204,19 +226,20 @@ typedef enum MIPSIns { MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002, MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ + MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ MIPSI_SLLV = 0x00000004, MIPSI_SRLV = 0x00000006, MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ + MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ - MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ - MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ + MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ + MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ + MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, MIPSI_JAL = 0x0c000000, + MIPSI_JALX = 0x74000000, MIPSI_JR = 0x00000008, MIPSI_JALR = 0x0000f809, @@ -241,6 +264,15 @@ typedef enum MIPSIns { MIPSI_LDC1 = 0xd4000000, MIPSI_SDC1 = 0xf4000000, + /* MIPS64 instructions. */ + MIPSI_DSLL = 0x00000038, + MIPSI_LD = 0xdc000000, + MIPSI_DADDIU = 0x64000000, + MIPSI_SD = 0xfc000000, + MIPSI_DMFC1 = 0x44200000, + MIPSI_DSRA32 = 0x0000003f, + MIPSI_MFHC1 = 0x44600000, + /* FP instructions. */ MIPSI_MOV_S = 0x46000006, MIPSI_MOV_D = 0x46200006, diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index e29f4748..d5429597 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -22,7 +22,7 @@ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) #endif #define VRIDDEF(_) \ - _(MRM) + _(MRM) _(RIP) #define RIDENUM(name) RID_##name, @@ -31,6 +31,7 @@ enum { FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ + RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */ /* Calling conventions. */ RID_SP = RID_ESP, @@ -63,8 +64,10 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except the stack pointer. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) +/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ + - RID2RSET(RID_ESP) \ + - LJ_GC64*RID2RSET(RID_DISPATCH)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL @@ -200,6 +203,7 @@ typedef struct { */ typedef enum { /* Fixed length opcodes. XI_* prefix. */ + XI_O16 = 0x66, XI_NOP = 0x90, XI_XCHGa = 0x90, XI_CALL = 0xe8, @@ -217,6 +221,7 @@ typedef enum { XI_PUSHi8 = 0x6a, XI_TESTb = 0x84, XI_TEST = 0x85, + XI_INT3 = 0xcc, XI_MOVmi = 0xc7, XI_GROUP5 = 0xff, @@ -243,6 +248,7 @@ typedef enum { XV_SHRX = XV_f20f38(f7), /* Variable-length opcodes. XO_* prefix. */ + XO_OR = XO_(0b), XO_MOV = XO_(8b), XO_MOVto = XO_(89), XO_MOVtow = XO_66(89), diff --git a/src/lj_trace.c b/src/lj_trace.c index 7970aba6..87146832 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T) } #endif -/* Allocate space for copy of trace. */ -static GCtrace *trace_save_alloc(jit_State *J) +/* Allocate space for copy of T. */ +GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) { size_t sztr = ((sizeof(GCtrace)+7)&~7); - size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); + size_t szins = (T->nins-T->nk)*sizeof(IRIns); size_t sz = sztr + szins + - J->cur.nsnap*sizeof(SnapShot) + - J->cur.nsnapmap*sizeof(SnapEntry); - return lj_mem_newt(J->L, (MSize)sz, GCtrace); + T->nsnap*sizeof(SnapShot) + + T->nsnapmap*sizeof(SnapEntry); + GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace); + char *p = (char *)T2 + sztr; + T2->gct = ~LJ_TTRACE; + T2->marked = 0; + T2->traceno = 0; + T2->ir = (IRIns *)p - T->nk; + T2->nins = T->nins; + T2->nk = T->nk; + T2->nsnap = T->nsnap; + T2->nsnapmap = T->nsnapmap; + memcpy(p, T->ir + T->nk, szins); + return T2; } /* Save current trace by copying and compacting it. */ @@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T) setgcrefp(J2G(J)->gc.root, T); newwhite(J2G(J), T); T->gct = ~LJ_TTRACE; - T->ir = (IRIns *)p - J->cur.nk; - memcpy(p, J->cur.ir+J->cur.nk, szins); + T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) J->cur.traceno = 0; + J->curfinal = NULL; setgcrefp(J->trace[T->traceno], T); lj_gc_barriertrace(J2G(J), T->traceno); lj_gdbjit_addtrace(J, T); @@ -284,7 +295,6 @@ int lj_trace_flushall(lua_State *L) memset(J->penalty, 0, sizeof(J->penalty)); /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); - lj_ir_k64_freeall(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); lj_vmevent_send(L, TRACE, setstrV(L, L->top++, lj_str_newlit(L, "flush")); @@ -297,13 +307,35 @@ void lj_trace_initstate(global_State *g) { jit_State *J = G2J(g); TValue *tv; - /* Initialize SIMD constants. */ + + /* Initialize aligned SIMD constants. */ tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv[0].u64 = U64x(7fffffff,ffffffff); tv[1].u64 = U64x(7fffffff,ffffffff); tv = LJ_KSIMD(J, LJ_KSIMD_NEG); tv[0].u64 = U64x(80000000,00000000); tv[1].u64 = U64x(80000000,00000000); + + /* Initialize 32/64 bit constants. */ +#if LJ_TARGET_X86ORX64 + J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#if LJ_32 + J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); +#endif + J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; +#endif +#if LJ_TARGET_PPC + J->k32[LJ_K32_2P52_2P31] = 0x59800004; + J->k32[LJ_K32_2P52] = 0x59800000; +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + J->k32[LJ_K32_2P31] = 0x4f000000; +#endif +#if LJ_TARGET_MIPS + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#endif } /* Free everything associated with the JIT compiler state. */ @@ -318,7 +350,6 @@ void lj_trace_freestate(global_State *g) } #endif lj_mcode_free(J); - lj_ir_k64_freeall(J); lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); @@ -403,7 +434,7 @@ static void trace_start(jit_State *J) J->postproc = LJ_POST_NONE; lj_resetsplit(J); J->retryrec = 0; - J->ktracep = NULL; + J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); L = J->L; @@ -427,7 +458,7 @@ static void trace_stop(jit_State *J) BCOp op = bc_op(J->cur.startins); GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; - GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ + GCtrace *T = J->curfinal; lua_State *L; switch (op) { @@ -479,9 +510,6 @@ static void trace_stop(jit_State *J) lj_mcode_commit(J, J->cur.mcode); J->postproc = LJ_POST_NONE; trace_save(J, T); - if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */ - setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE); - } L = J->L; lj_vmevent_send(L, TRACE, @@ -515,6 +543,10 @@ static int trace_abort(jit_State *J) J->postproc = LJ_POST_NONE; lj_mcode_abort(J); + if (J->curfinal) { + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; + } if (tvisnumber(L->top-1)) e = (TraceError)numberVint(L->top-1); if (e == LJ_TRERR_MCODELM) { diff --git a/src/lj_trace.h b/src/lj_trace.h index 6faa1aa3..5658d8a5 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); /* Trace management. */ +LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); diff --git a/src/lj_vm.h b/src/lj_vm.h index be35295d..d605b143 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); +#if LJ_ABI_WIN && LJ_TARGET_X86 +LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, + void *unwinder, int errcode); +#endif LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void); #if LJ_TARGET_X86ORX64 diff --git a/src/luajit.c b/src/luajit.c index 00e12bd3..e582f469 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -152,22 +152,15 @@ static void print_jit_status(lua_State *L) putc('\n', stdout); } -static int getargs(lua_State *L, char **argv, int n) +static void createargtable(lua_State *L, char **argv, int argc, int argf) { - int narg; int i; - int argc = 0; - while (argv[argc]) argc++; /* count total number of arguments */ - narg = argc - (n + 1); /* number of arguments to the script */ - luaL_checkstack(L, narg + 3, "too many arguments to script"); - for (i = n+1; i < argc; i++) - lua_pushstring(L, argv[i]); - lua_createtable(L, narg, n + 1); + lua_createtable(L, argc - argf, argf); for (i = 0; i < argc; i++) { lua_pushstring(L, argv[i]); - lua_rawseti(L, -2, i - n); + lua_rawseti(L, -2, i - argf); } - return narg; + lua_setglobal(L, "arg"); } static int dofile(lua_State *L, const char *name) @@ -273,21 +266,30 @@ static void dotty(lua_State *L) progname = oldprogname; } -static int handle_script(lua_State *L, char **argv, int n) +static int handle_script(lua_State *L, char **argx) { int status; - const char *fname; - int narg = getargs(L, argv, n); /* collect arguments */ - lua_setglobal(L, "arg"); - fname = argv[n]; - if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) + const char *fname = argx[0]; + if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) fname = NULL; /* stdin */ status = luaL_loadfile(L, fname); - lua_insert(L, -(narg+1)); - if (status == 0) + if (status == 0) { + /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ + int narg = 0; + lua_getglobal(L, "arg"); + if (lua_istable(L, -1)) { + do { + narg++; + lua_rawgeti(L, -narg, narg); + } while (!lua_isnil(L, -1)); + lua_pop(L, 1); + lua_remove(L, -narg); + narg--; + } else { + lua_pop(L, 1); + } status = docall(L, narg, 0); - else - lua_pop(L, narg); + } return report(L, status); } @@ -384,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv) } for (argv++; *argv != NULL; narg++, argv++) lua_pushstring(L, *argv); - return report(L, lua_pcall(L, narg, 0, 0)); + report(L, lua_pcall(L, narg, 0, 0)); + return -1; } /* check that argument has no extra characters at the end */ @@ -405,7 +408,7 @@ static int collectargs(char **argv, int *flags) switch (argv[i][1]) { /* Check option. */ case '-': notail(argv[i]); - return (argv[i+1] != NULL ? i+1 : 0); + return i+1; case '\0': return i; case 'i': @@ -430,23 +433,23 @@ static int collectargs(char **argv, int *flags) case 'b': /* LuaJIT extension */ if (*flags) return -1; *flags |= FLAGS_EXEC; - return 0; + return i+1; case 'E': *flags |= FLAGS_NOENV; break; default: return -1; /* invalid option */ } } - return 0; + return i; } -static int runargs(lua_State *L, char **argv, int n) +static int runargs(lua_State *L, char **argv, int argn) { int i; - for (i = 1; i < n; i++) { + for (i = 1; i < argn; i++) { if (argv[i] == NULL) continue; lua_assert(argv[i][0] == '-'); - switch (argv[i][1]) { /* option */ + switch (argv[i][1]) { case 'e': { const char *chunk = argv[i] + 2; if (*chunk == '\0') chunk = argv[++i]; @@ -460,10 +463,10 @@ static int runargs(lua_State *L, char **argv, int n) if (*filename == '\0') filename = argv[++i]; lua_assert(filename != NULL); if (dolibrary(L, filename)) - return 1; /* stop if file fails */ + return 1; break; } - case 'j': { /* LuaJIT extension */ + case 'j': { /* LuaJIT extension. */ const char *cmd = argv[i] + 2; if (*cmd == '\0') cmd = argv[++i]; lua_assert(cmd != NULL); @@ -471,11 +474,11 @@ static int runargs(lua_State *L, char **argv, int n) return 1; break; } - case 'O': /* LuaJIT extension */ + case 'O': /* LuaJIT extension. */ if (dojitopt(L, argv[i] + 2)) return 1; break; - case 'b': /* LuaJIT extension */ + case 'b': /* LuaJIT extension. */ return dobytecode(L, argv+i); default: break; } @@ -508,45 +511,57 @@ static int pmain(lua_State *L) { struct Smain *s = &smain; char **argv = s->argv; - int script; + int argn; int flags = 0; globalL = L; if (argv[0] && argv[0][0]) progname = argv[0]; - LUAJIT_VERSION_SYM(); /* linker-enforced version check */ - script = collectargs(argv, &flags); - if (script < 0) { /* invalid args? */ + + LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ + + argn = collectargs(argv, &flags); + if (argn < 0) { /* Invalid args? */ print_usage(); s->status = 1; return 0; } + if ((flags & FLAGS_NOENV)) { lua_pushboolean(L, 1); lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); } - lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ - luaL_openlibs(L); /* open libraries */ + + /* Stop collector during library initialization. */ + lua_gc(L, LUA_GCSTOP, 0); + luaL_openlibs(L); lua_gc(L, LUA_GCRESTART, -1); + + createargtable(L, argv, s->argc, argn); + if (!(flags & FLAGS_NOENV)) { s->status = handle_luainit(L); if (s->status != 0) return 0; } + if ((flags & FLAGS_VERSION)) print_version(); - s->status = runargs(L, argv, (script > 0) ? script : s->argc); + + s->status = runargs(L, argv, argn); if (s->status != 0) return 0; - if (script) { - s->status = handle_script(L, argv, script); + + if (s->argc > argn) { + s->status = handle_script(L, argv + argn); if (s->status != 0) return 0; } + if ((flags & FLAGS_INTERACTIVE)) { print_jit_status(L); dotty(L); - } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { + } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { if (lua_stdin_is_tty()) { print_version(); print_jit_status(L); dotty(L); } else { - dofile(L, NULL); /* executes stdin as a file */ + dofile(L, NULL); /* Executes stdin as a file. */ } } return 0; @@ -555,7 +570,7 @@ static int pmain(lua_State *L) int main(int argc, char **argv) { int status; - lua_State *L = lua_open(); /* create state */ + lua_State *L = lua_open(); if (L == NULL) { l_message(argv[0], "cannot create state: not enough memory"); return EXIT_FAILURE; @@ -565,6 +580,6 @@ int main(int argc, char **argv) status = lua_cpcall(L, pmain, NULL); report(L, status); lua_close(L); - return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; + return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 8b083ff1..6f5a83dd 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -57,7 +57,7 @@ |.define TMP2, r14 |.define TMP3, r15 | -|// Calling conventions. +|// MIPS o32 calling convention. |.define CFUNCADDR, r25 |.define CARG1, r4 |.define CARG2, r5 @@ -4546,24 +4546,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISNEXT: | // RA = base*8, RD = target (points to ITERN) | addu RA, BASE, RA - | lw TMP0, -24+HI(RA) - | lw CFUNC:TMP1, -24+LO(RA) - | lw TMP2, -16+HI(RA) - | lw TMP3, -8+HI(RA) + | srl TMP0, RD, 1 + | lw CARG1, -24+HI(RA) + | lw CFUNC:CARG2, -24+LO(RA) + | addu TMP0, PC, TMP0 + | lw CARG3, -16+HI(RA) + | lw CARG4, -8+HI(RA) | li AT, LJ_TFUNC - | bne TMP0, AT, >5 - |. addiu TMP2, TMP2, -LJ_TTAB - | lbu TMP1, CFUNC:TMP1->ffid - | addiu TMP3, TMP3, -LJ_TNIL - | srl TMP0, RD, 1 - | or TMP2, TMP2, TMP3 - | addiu TMP1, TMP1, -FF_next_N - | addu TMP0, PC, TMP0 - | or TMP1, TMP1, TMP2 - | bnez TMP1, >5 - |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | bne CARG1, AT, >5 + |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | lbu CARG2, CFUNC:CARG2->ffid + | addiu CARG3, CARG3, -LJ_TTAB + | addiu CARG4, CARG4, -LJ_TNIL + | or CARG3, CARG3, CARG4 + | addiu CARG2, CARG2, -FF_next_N + | or CARG2, CARG2, CARG3 + | bnez CARG2, >5 + |. lui TMP1, 0xfffe | addu PC, TMP0, TMP2 - | lui TMP1, 0xfffe | ori TMP1, TMP1, 0x7fff | sw r0, -8+LO(RA) // Initialize control var. | sw TMP1, -8+HI(RA) @@ -4573,7 +4573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP3, BC_JMP | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 + | addu PC, TMP0, TMP2 | b <1 |. sb TMP1, OFS_OP(PC) break; diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc new file mode 100644 index 00000000..52b56de4 --- /dev/null +++ b/src/vm_mips64.dasc @@ -0,0 +1,4849 @@ +|// Low-level VM code for MIPS64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// +|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +|// Sponsored by Cisco Systems, Inc. +| +|.arch mips64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra +| +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, r16 // Base of current Lua stack frame. +|.define KBASE, r17 // Constants of current Lua function. +|.define PC, r18 // Next PC. +|.define DISPATCH, r19 // Opcode dispatch table. +|.define LREG, r20 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. +| +|.define JGL, r30 // On-trace: global_State + 32768. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, r30 +|.define TISNUM, r22 +|.if FPU +|.define TOBIT, f30 // 2^52 + 2^51. +|.endif +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, r23 // Callee-save. +|.define RB, r8 +|.define RC, r9 +|.define RD, r10 +|.define INS, r11 +| +|.define AT, r1 // Assembler temporary. +|.define TMP0, r12 +|.define TMP1, r13 +|.define TMP2, r14 +|.define TMP3, r15 +| +|// MIPS n64 calling convention. +|.define CFUNCADDR, r25 +|.define CARG1, r4 +|.define CARG2, r5 +|.define CARG3, r6 +|.define CARG4, r7 +|.define CARG5, r8 +|.define CARG6, r9 +|.define CARG7, r10 +|.define CARG8, r11 +| +|.define CRET1, r2 +|.define CRET2, r3 +| +|.if FPU +|.define FARG1, f12 +|.define FARG2, f13 +|.define FARG3, f14 +|.define FARG4, f15 +|.define FARG5, f16 +|.define FARG6, f17 +|.define FARG7, f18 +|.define FARG8, f19 +| +|.define FRET1, f0 +|.define FRET2, f2 +|.endif +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.if FPU // MIPS64 hard-float. +| +|.define CFRAME_SPACE, 192 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 188(sp) // 32 bit values. +|.define SAVE_NRES, 184(sp) +|.define SAVE_CFRAME, 176(sp) // 64 bit values. +|.define SAVE_L, 168(sp) +|.define SAVE_PC, 160(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. +|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. +| +|.else // MIPS64 soft-float +| +|.define CFRAME_SPACE, 128 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 124(sp) // 32 bit values. +|.define SAVE_NRES, 120(sp) +|.define SAVE_CFRAME, 112(sp) // 64 bit values. +|.define SAVE_L, 104(sp) +|.define SAVE_PC, 96(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. +| +|.endif +| +|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. +|.define TMPD, 0(sp) +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|.define SAVE_MULTRES, TMPD +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| daddiu sp, sp, -CFRAME_SPACE +| sd ra, SAVE_GPR_+9*8(sp) +| sd r30, SAVE_GPR_+8*8(sp) +| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) +| sd r23, SAVE_GPR_+7*8(sp) +| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) +| sd r22, SAVE_GPR_+6*8(sp) +| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) +| sd r21, SAVE_GPR_+5*8(sp) +| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) +| sd r20, SAVE_GPR_+4*8(sp) +| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) +| sd r19, SAVE_GPR_+3*8(sp) +| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) +| sd r18, SAVE_GPR_+2*8(sp) +| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) +| sd r17, SAVE_GPR_+1*8(sp) +| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) +| sd r16, SAVE_GPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ld ra, SAVE_GPR_+9*8(sp) +| ld r30, SAVE_GPR_+8*8(sp) +| ld r23, SAVE_GPR_+7*8(sp) +| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) +| ld r22, SAVE_GPR_+6*8(sp) +| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) +| ld r21, SAVE_GPR_+5*8(sp) +| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) +| ld r20, SAVE_GPR_+4*8(sp) +| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) +| ld r19, SAVE_GPR_+3*8(sp) +| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) +| ld r18, SAVE_GPR_+2*8(sp) +| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) +| ld r17, SAVE_GPR_+1*8(sp) +| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) +| ld r16, SAVE_GPR_+0*8(sp) +| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) +| jr ra +| daddiu sp, sp, CFRAME_SPACE +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; .long 0xf0f0f0f0; .endmacro +| +|// Macros to mark delay slots. +|.macro ., a; a; .endmacro +|.macro ., a,b; a,b; .endmacro +|.macro ., a,b,c; a,b,c; .endmacro +|.macro ., a,b,c,d; a,b,c,d; .endmacro +| +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. +|.if ENDIAN_LE +|.define HI, 4 +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define HI, 0 +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| +|// Instruction decode. +|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro +|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro +|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro +|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro +|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro +|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| lw INS, 0(PC) +| daddiu PC, PC, 4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP8a TMP1, INS +| decode_OP8b TMP1 +| daddu TMP0, DISPATCH, TMP1 +| decode_RD8a RD, INS +| ld AT, 0(TMP0) +| decode_RA8a RA, INS +| decode_RD8b RD +| jr AT +| decode_RA8b RA +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ld PC, LFUNC:RB->pc +| lw INS, 0(PC) +| daddiu PC, PC, 4 +| decode_OP8a TMP1, INS +| decode_RA8a RA, INS +| decode_OP8b TMP1 +| decode_RA8b RA +| daddu TMP0, DISPATCH, TMP1 +| ld TMP0, 0(TMP0) +| jr TMP0 +| daddu RA, RA, BASE +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| sd PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +| srl TMP0, RD, 1 +| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) +| addu TMP0, TMP0, AT +| daddu PC, PC, TMP0 +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro load_got, func +| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) +|.endmacro +|// Much faster. Sadly, there's no easy way to force the required code layout. +|// .macro call_intern, func; bal extern func; .endmacro +|.macro call_intern, func; jalr CFUNCADDR; .endmacro +|.macro call_extern; jalr CFUNCADDR; .endmacro +|.macro jmp_extern; jr CFUNCADDR; .endmacro +| +|.macro hotcheck, delta, target +| NYI +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro +|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) +| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) +| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) +| sb mark, tab->marked +| b target +|. sd tmp, tab->gclist +|.endmacro +| +|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. +|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro +|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. +|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; dsra dst, src, 47; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp reg +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp dst, reg +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target // Caveat: has delay slot! +| gettp AT, reg +| bne AT, TISNUM, target +|.endmacro +|.macro checknum, reg, target // Caveat: has delay slot! +| gettp AT, reg +| sltiu AT, AT, LJ_TISNUM +| beqz AT, target +|.endmacro +| +|.macro mov_false, reg +| lu reg, 0x8000 +| dsll reg, reg, 32 +| not reg, reg +|.endmacro +|.macro mov_true, reg +| li reg, 0x0001 +| dsll reg, reg, 48 +| not reg, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi AT, PC, FRAME_P + | beqz AT, ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + |. mov_true TMP1 + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | move BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | daddiu RA, RA, -8 + | + |->vm_returnc: + | addiu RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | beqz RD, ->vm_unwind_c_eh + |. li CRET1, LUA_YIELD + | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + |. move MULTRES, RD + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | li TMP2, -8 + | xori AT, TMP0, FRAME_C + | and TMP2, PC, TMP2 + | bnez AT, ->vm_returnp + | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. + | + | addiu TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES + | daddiu BASE, BASE, -16 + | st_vmstate + | beqz TMP1, >2 + |. sll TMP2, TMP2, 3 + |1: + | addiu TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sd CRET1, 0(BASE) + | bnez TMP1, <1 + |. daddiu BASE, BASE, 8 + | + |2: + | bne TMP2, RD, >6 + |3: + |. sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME // Restore previous C frame. + | move CRET1, r0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | slt AT, TMP2, RD + | bnez AT, >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + |. slt AT, BASE, TMP1 + | beqz AT, >8 + |. nop + | sd TISNIL, 0(BASE) + | addiu RD, RD, 8 + | b <2 + |. daddiu BASE, BASE, 8 + | + |7: // Less results wanted. + | subu TMP0, RD, TMP2 + | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. + | b <3 + |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | load_got lj_state_growstack + | move MULTRES, RD + | srl CARG2, TMP2, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | lw TMP2, SAVE_NRES + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | move RD, MULTRES + | b <2 + |. sll TMP2, TMP2, 3 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | move sp, CARG1 + | move CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | li TMP0, ~LJ_VMST_C + | ld GL:TMP1, L->glref + | b ->vm_leave_unw + |. sw TMP0, GL:TMP1->vmstate + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | li AT, -4 + | and sp, CARG1, AT + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | .FPU mtc1 TMP3, TOBIT + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | .FPU cvt.d.s TOBIT, TOBIT + | daddiu RA, BASE, -8 // Results start at BASE-8. + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd TMP1, 0(RA) // Prepend false to error message. + | st_vmstate + | b ->vm_returnc + |. li RD, 16 // 2 results: false + error message. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | b >2 + |. li CARG2, LUA_MINSTACK + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | daddu RC, BASE, RC + | dsubu RA, RA, BASE + | sd BASE, L->base + | daddiu PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srl CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | load_got lj_state_growstack + | sd PC, SAVE_PC + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | move L, CARG1 + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | move BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L + | li PC, FRAME_CP + | daddiu TMP0, sp, CFRAME_RESUME + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw r0, SAVE_NRES + | sw r0, SAVE_ERRF + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd r0, SAVE_CFRAME + | beqz TMP1, >3 + |. sd TMP0, L->cframe + | + | // Resume after yield (like a return). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | move RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsubu RD, TMP1, BASE + | .FPU mtc1 TMP3, TOBIT + | sb r0, L->status + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | daddiu RD, RD, 8 + | st_vmstate + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | beqz TMP0, ->BC_RET_Z + |. li TISNUM, LJ_TISNUM + | b ->vm_return + |. nop + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF + | b >1 + |. li PC, FRAME_CP + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | move L, CARG1 + | sw CARG3, SAVE_NRES + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_L + | move BASE, CARG2 + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld TMP1, L->top + | .FPU mtc1 TMP3, TOBIT + | daddu PC, PC, BASE + | dsubu NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | dsubu PC, PC, TMP2 // PC = frame delta + frame type + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | move L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L + | ld TMP1, L->top + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. + | sw r0, SAVE_ERRF // No error function. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + |. move CFUNCADDR, CARG4 + | move BASE, CRET1 + | bnez CRET1, <3 // Else continue with the call. + |. li PC, FRAME_CP + | b ->vm_leave_cp // No base? Just remove C frame. + |. nop + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the + |// stack, so BASE doesn't need to be reloaded across these calls. + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | move RB, BASE + | move BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + |.if FFI + | sltiu AT, TMP0, 2 + |.endif + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | daddu TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + |.if FFI + | bnez AT, >1 + |.endif + |. sd TISNIL, -8(TMP2) // Ensure one valid arg. + | // BASE = base, RA = resultptr, RB = meta base + | jr TMP0 // Jump to continuation. + |. ld KBASE, PC2PROTO(k)(TMP1) + | + |.if FFI + |1: + | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + |. daddiu TMP1, RB, -32 + | b ->vm_call_tail + |. dsubu RC, TMP1, BASE + |.endif + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | daddiu CARG2, RB, -32 + | ld CRET1, 0(RA) + | decode_RB8a MULTRES, INS + | decode_RA8a RA, INS + | decode_RB8b MULTRES + | decode_RA8b RA + | daddu TMP1, BASE, MULTRES + | sd BASE, L->base + | dsubu CARG3, CARG2, TMP1 + | bne TMP1, CARG2, ->BC_CAT_Z + |. sd CRET1, 0(CARG2) + | daddu RA, BASE, RA + | b ->cont_nop + |. sd CRET1, 0(RA) + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | load_got lj_meta_tget + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. daddiu TMP1, BASE, -FRAME_CONT + | ld CARG1, 0(CRET1) + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 16 // 2 args for func(t, k). + | + |->vmeta_tgetr: + | load_got lj_tab_getinth + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. nop + | // Returns cTValue * or NULL. + | beqz CRET1, ->BC_TGETR_Z + |. move CARG2, TISNIL + | b ->BC_TGETR_Z + |. ld CARG2, 0(CRET1) + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | load_got lj_meta_tset + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. ld CARG1, 0(RA) + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | daddiu TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | sd CARG1, 16(BASE) // Copy value to third argument. + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 24 // 3 args for func(t, k, v) + | + |->vmeta_tsetr: + | load_got lj_tab_setinth + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + |. move CARG1, L + | // Returns TValue *. + | b ->BC_TSETR_Z + |. nop + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | move CARG2, RA + | move CARG3, RD + | load_got lj_meta_comp + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | decode_OP1 CARG4, INS + | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu AT, CRET1, 2 + | beqz AT, ->vmeta_binop + | negu TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | daddiu PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) + | sll RD, RD, 2 + | addu RD, RD, TMP1 + | and RD, RD, TMP2 + | daddu PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld CRET1, 0(RA) + | sll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | b ->cont_nop + |. sd CRET1, 0(TMP1) + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. negu TMP2, AT // Branch if result is true. + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. addiu TMP2, AT, -1 // Branch if result is false. + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | load_got lj_meta_equal + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | move CARG4, TMP0 + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + | + |->vmeta_equal_cd: + |.if FFI + | load_got lj_meta_equal_cd + | move CARG2, INS + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + |.endif + | + |->vmeta_istype: + | load_got lj_meta_istype + | daddiu PC, PC, -4 + | sd BASE, L->base + | srl CARG2, RA, 3 + | srl CARG3, RD, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + |. move CARG1, L + | b ->cont_nop + |. nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | move RC, RB + | + |->vmeta_arith: + | load_got lj_meta_arith + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move CARG3, RB + | move CARG4, RC + | decode_OP1 CARG5, INS // CARG5 == RB. + | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | beqz CRET1, ->cont_nop + |. nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | dsubu TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | move TMP2, BASE + | daddiu PC, TMP1, FRAME_CONT + | move BASE, CRET1 + | b ->vm_call_dispatch + |. li NARGS8:RC, 16 // 2 args for func(o1, o2). + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | move MULTRES, CARG1 +#endif + | load_got lj_meta_len + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_len // (lua_State *L, TValue *o) + |. move CARG1, L + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bnez CRET1, ->vmeta_binop // Binop call for compatibility. + |. nop + | b ->BC_LEN_Z + |. move CARG1, MULTRES +#else + | b ->vmeta_binop // Binop call for compatibility. + |. nop +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | load_got lj_meta_call + | sd TMP2, L->base // This is the callers base! + | daddiu CARG2, BASE, -16 + | sd PC, SAVE_PC + | daddu CARG3, BASE, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | load_got lj_meta_call + | sd BASE, L->base + | daddiu CARG2, RA, -16 + | sd PC, SAVE_PC + | daddu CARG3, RA, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | b ->BC_CALLT_Z + |. cleartp LFUNC:CARG3, RB + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | load_got lj_meta_for + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move MULTRES, INS + | call_intern lj_meta_for // (lua_State *L, TValue *base) + |. move CARG1, L + |.if JIT + | decode_OP1 TMP0, MULTRES + | li AT, BC_JFORI + |.endif + | decode_RA8a RA, MULTRES + | decode_RD8a RD, MULTRES + | decode_RA8b RA + |.if JIT + | beq TMP0, AT, =>BC_JFORI + |. decode_RD8b RD + | b =>BC_FORI + |. nop + |.else + | b =>BC_FORI + |. decode_RD8b RD + |.endif + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG1, 0(BASE) + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu AT, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | bnez AT, ->fff_fallback + |. ld CARG2, 8(BASE) + |.endmacro + | + |.macro .ffunc_n, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + | // Either ldc1 or the 1st instruction of checknum is in the delay slot. + | .FPU ldc1 FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bnez AT, ->fff_fallback + |. gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | .FPU ldc1 FARG1, 0(BASE) + | and TMP0, TMP0, TMP1 + | .FPU ldc1 FARG2, 8(BASE) + | beqz TMP0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! + |.macro ffgccheck + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | dsubu AT, TMP0, TMP1 + | bgezal AT, ->fff_gcstep + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp AT, CARG1 + | sltiu AT, AT, LJ_TISTRUECOND + | beqz AT, ->fff_fallback + |. daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | daddu TMP2, RA, RD + | daddiu TMP1, BASE, 8 + | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. + |. sd CARG1, 0(RA) + |1: + | ld CRET1, 0(TMP1) + | sd CRET1, -16(TMP1) + | bne TMP1, TMP2, <1 + |. daddiu TMP1, TMP1, 8 + | b ->fff_res + |. nop + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | sltu TMP1, TISNUM, TMP0 + | not TMP2, TMP0 + | li TMP3, ~LJ_TISNUM + | movz TMP2, TMP3, TMP1 + | dsll TMP2, TMP2, 3 + | daddu TMP2, CFUNC:RB, TMP2 + | b ->fff_restv + |. ld CARG1, CFUNC:TMP2->upvalue + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | daddiu TMP0, TMP2, -LJ_TTAB + | daddiu TMP1, TMP2, -LJ_TUDATA + | movn TMP0, TMP1, TMP0 + | bnez TMP0, >6 + |. cleartp TAB:CARG1 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) + | beqz TAB:RB, ->fff_restv + |. li CARG1, LJ_TNIL + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | dsll TMP0, TMP1, 5 + | dsll TMP1, TMP1, 3 + | dsubu TMP1, TMP0, TMP1 + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | beq RC, TMP0, >5 + |. li AT, LJ_TTAB + | bnez NODE:TMP2, <3 + |. nop + |4: + | move CARG1, RB + | b ->fff_restv // Not found, keep default result. + |. settp CARG1, AT + |5: + | bne CARG1, TISNIL, ->fff_restv + |. nop + | b <4 // Ditto for nil value. + |. nop + | + |6: + | sltiu AT, TMP2, LJ_TISNUM + | movn TMP2, TISNUM, AT + | dsll TMP2, TMP2, 3 + | dsubu TMP0, DISPATCH, TMP2 + | b <2 + |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | daddiu AT, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or AT, AT, TAB:TMP0 + | bnez AT, ->fff_fallback + |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) + | beqz AT, ->fff_restv + |. sd TAB:CARG2, TAB:TMP1->metatable + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | load_got lj_tab_get + | gettp TMP0, CARG2 + | cleartp CARG2 + | daddiu TMP0, TMP0, -LJ_TTAB + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. daddiu CARG3, BASE, 8 + | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + |. move CARG1, L + | b ->fff_restv + |. ld CARG1, 0(CRET1) + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori AT, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP0, TISNUM, TMP1 + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. nop + | b ->fff_restv + |. nop + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | daddiu AT, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beqz AT, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) + | sltu TMP0, TISNUM, TMP0 + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback + |. sd BASE, L->base // Add frame since C call can throw. + | ffgccheck + |. sd PC, SAVE_PC // Redundant (but a defined value). + | load_got lj_strfmt_number + | move CARG1, L + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) + |. move CARG2, BASE + | // Returns GCstr *. + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback + | daddu TMP2, BASE, NARGS8:RC + | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. + | ld PC, FRAME_PC(BASE) + | load_got lj_tab_next + | sd BASE, L->base // Add frame since C call can throw. + | sd BASE, L->top // Dummy frame length is ok. + | daddiu CARG3, BASE, 8 + | sd PC, SAVE_PC + | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) + |. move CARG1, L + | // Returns 0 at end of traversal. + | beqz CRET1, ->fff_restv // End of traversal: return nil. + |. move CARG1, TISNIL + | ld TMP0, 8(BASE) + | daddiu RA, BASE, -16 + | ld TMP2, 16(BASE) + | sd TMP0, 0(RA) + | sd TMP2, 8(RA) + | b ->fff_res + |. li RD, (2+1)*8 + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + |. daddiu RA, BASE, -16 + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sextw TMP2, CARG2 + | addiu TMP2, TMP2, 1 + | sltu AT, TMP2, TMP0 + | daddiu RA, BASE, -16 + | zextw TMP0, TMP2 + | settp TMP0, TISNUM + | beqz AT, >2 // Not in array part? + |. sd TMP0, 0(RA) + | dsll TMP3, TMP2, 3 + | daddu TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + |. li RD, (0+1)*8 + | sd TMP1, -8(BASE) + | b ->fff_res + |. li RD, (2+1)*8 + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | load_got lj_tab_getinth + | beqz TMP0, ->fff_res + |. li RD, (0+1)*8 + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. move CARG2, TMP2 + | // Returns cTValue * or NULL. + | beqz CRET1, ->fff_res + |. li RD, (0+1)*8 + | b <1 + |. ld TMP1, 0(CRET1) + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | daddiu RA, BASE, -16 + | dsll AT, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd AT, 0(BASE) + | sd CFUNC:TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | daddiu NARGS8:RC, NARGS8:RC, -8 + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | bltz NARGS8:RC, ->fff_fallback + |. move TMP2, BASE + | daddiu BASE, BASE, 16 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | daddiu PC, TMP3, 16+FRAME_PCALL + | beqz NARGS8:RC, ->vm_call_dispatch + |1: + |. daddu TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | daddiu TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + |. nop + | b ->vm_call_dispatch + |. nop + | + |.ffunc xpcall + | daddiu NARGS8:RC, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bltz NARGS8:RC, ->fff_fallback + |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | gettp AT, CARG2 + | daddiu AT, AT, -LJ_TFUNC + | bnez AT, ->fff_fallback // Traceback must be a function. + |. move TMP2, BASE + | daddiu BASE, BASE, 24 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | beqz NARGS8:RC, ->vm_call_dispatch + |. daddiu PC, TMP3, 24+FRAME_PCALL + | b <1 + |. nop + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiu AT, TMP0, -LUA_YIELD + | daddu CARG3, CARG2, TMP0 + | daddiu TMP3, CARG2, 8 + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. movn CARG2, TMP3, AT + | xor TMP2, TMP2, CARG3 + | bnez TMP1, ->fff_fallback // cframe != 0? + |. or AT, TMP2, TMP0 + | ld TMP0, L:CARG1->maxstack + | beqz AT, ->fff_fallback // base == top && st == 0? + |. ld PC, FRAME_PC(BASE) + | daddu TMP2, CARG2, NARGS8:RC + | sltu AT, TMP0, TMP2 + | bnez AT, ->fff_fallback // Stack overflow? + |. sd PC, SAVE_PC + | sd BASE, L->base + |1: + |.if resume + | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. + | daddiu NARGS8:RC, NARGS8:RC, -8 + | daddiu TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | daddu TMP1, BASE, NARGS8:RC + | move CARG3, CARG2 + | sd BASE, L->top + |2: // Move args to coroutine. + | ld CRET1, 0(BASE) + | sltu AT, BASE, TMP1 + | beqz AT, >3 + |. daddiu BASE, BASE, 8 + | sd CRET1, 0(CARG3) + | b <2 + |. daddiu CARG3, CARG3, 8 + |3: + | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + |. move L:RA, L:CARG1 + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu AT, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | st_vmstate + | beqz AT, >8 + |. dsubu RD, TMP3, TMP2 + | ld TMP0, L->maxstack + | beqz RD, >6 // No results? + |. daddu TMP1, BASE, RD + | sltu AT, TMP0, TMP1 + | bnez AT, >9 // Need to grow stack? + |. daddu TMP3, TMP2, RD + | sd TMP2, L:RA->top // Clear coroutine stack. + | move TMP1, BASE + |5: // Move results from coroutine. + | ld CRET1, 0(TMP2) + | daddiu TMP2, TMP2, 8 + | sltu AT, TMP2, TMP3 + | sd CRET1, 0(TMP1) + | bnez AT, <5 + |. daddiu TMP1, TMP1, 8 + |6: + | andi TMP0, PC, FRAME_TYPE + |.if resume + | mov_true TMP1 + | daddiu RA, BASE, -8 + | sd TMP1, -8(BASE) // Prepend true to results. + | daddiu RD, RD, 16 + |.else + | move RA, BASE + | daddiu RD, RD, 8 + |.endif + |7: + | sd PC, SAVE_PC + | beqz TMP0, ->BC_RET_Z + |. move MULTRES, RD + | b ->vm_return + |. nop + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | daddiu TMP3, TMP3, -8 + | mov_false TMP1 + | ld CRET1, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | li RD, (2+1)*8 + | sd TMP1, -8(BASE) // Prepend false to results. + | daddiu RA, BASE, -8 + | sd CRET1, 0(BASE) // Copy error message. + | b <7 + |. andi TMP0, PC, FRAME_TYPE + |.else + | load_got lj_ffh_coroutine_wrap_err + | move CARG2, L:RA + | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + |. move CARG1, L + |.endif + | + |9: // Handle stack expansion on return from yield. + | load_got lj_state_growstack + | srl CARG2, RD, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | b <4 + |. li CRET1, 0 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | daddu TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | beqz TMP0, ->fff_fallback + |. li CRET1, LUA_YIELD + | sd r0, L->cframe + | b ->vm_leave_unw + |. sb CRET1, L->status + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | daddiu AT, CARG2, -LJ_TISNUM + | bnez AT, >1 + |. sextw TMP1, CARG1 + | sra TMP0, TMP1, 31 // Extract sign. + | xor TMP1, TMP1, TMP0 + | dsubu CARG1, TMP1, TMP0 + | dsll TMP3, CARG1, 32 + | bgez TMP3, ->fff_restv + |. settp CARG1, TISNUM + | li CARG1, 0x41e0 // 2^31 as a double. + | b ->fff_restv + |. dsll CARG1, CARG1, 48 + |1: + | sltiu AT, CARG2, LJ_TISNUM + | beqz AT, ->fff_fallback + |. dextm CARG1, CARG1, 0, 30 + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->vm_return + |. move MULTRES, RD + | lw INS, -4(PC) + | decode_RB8a RB, INS + | decode_RB8b RB + |5: + | sltu AT, RD, RB + | bnez AT, >6 // More results expected? + |. decode_RA8a TMP0, INS + | decode_RA8b TMP0 + | ins_next1 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | dsubu BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | daddu TMP1, RA, RD + | daddiu RD, RD, 8 + | b <5 + |. sd TISNIL, -8(TMP1) + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + |. load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |// TODO: Return integer type if result is integer (own sf implementation). + |.macro math_round, func + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. gettp TMP0, CARG1 + | beq TMP0, TISNUM, ->fff_restv + |. sltu AT, TMP0, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | bal ->vm_ .. func + |. nop + |.else + |. load_got func + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | li AT, 8 + | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. + |. ld CARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |. load_got log + |.if FPU + | call_extern + |. ldc1 FARG1, 0(BASE) + |.else + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.if FPU + |.ffunc_n math_sqrt + |. sqrt.d FRET1, FARG1 + |// fallthrough to ->fff_resn + |.else + | math_extern sqrt + |.endif + | + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | b ->fff_res1 + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + | + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. load_got ldexp + | .FPU ldc1 FARG1, 0(BASE) + | call_extern + |. lw CARG2, 8+LO(BASE) + | b ->fff_resn + |. nop + | + |.ffunc_n math_frexp + | load_got frexp + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) + | daddiu RA, BASE, -16 + |.if FPU + | mtc1 TMP1, FARG2 + | sdc1 FRET1, 0(RA) + | cvt.d.w FARG2, FARG2 + | sdc1 FARG2, 8(RA) + |.else + | sd CRET1, 0(RA) + | zextw TMP1, TMP1 + | settp TMP1, TISNUM + | sd TMP1, 8(RA) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.ffunc_n math_modf + | load_got modf + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, BASE, -16 + | daddiu RA, BASE, -16 + |.if FPU + | sdc1 FRET1, -8(BASE) + |.else + | sd CRET1, -8(BASE) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.macro math_minmax, name, intins, fpins + | .ffunc_1 name + | daddu TMP3, BASE, NARGS8:RC + | checkint CARG1, >5 + |. daddiu TMP2, BASE, 8 + |1: // Handle integers. + | beq TMP2, TMP3, ->fff_restv + |. ld CARG2, 0(TMP2) + | checkint CARG2, >3 + |. sextw CARG1, CARG1 + | lw CARG2, LO(TMP2) + |. slt AT, CARG1, CARG2 + | intins CARG1, CARG2, AT + | daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. settp CARG1, TISNUM + | + |3: // Convert intermediate result to number and continue with number loop. + | checknum CARG2, ->fff_fallback + |.if FPU + |. mtc1 CARG1, FRET1 + | cvt.d.w FRET1, FRET1 + | b >7 + |. ldc1 FARG1, 0(TMP2) + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b >7 + |. nop + |.endif + | + |5: + | .FPU ldc1 FRET1, 0(BASE) + | checknum CARG1, ->fff_fallback + |6: // Handle numbers. + |. ld CARG2, 0(TMP2) + | beq TMP2, TMP3, ->fff_resn + |.if FPU + | ldc1 FARG1, 0(TMP2) + |.else + | move CRET1, CARG1 + |.endif + | checknum CARG2, >8 + |. nop + |7: + |.if FPU + | c.olt.d FRET1, FARG1 + | fpins FRET1, FARG1 + |.else + | bal ->vm_sfcmpolt + |. nop + | intins CARG1, CARG2, CRET1 + |.endif + | b <6 + |. daddiu TMP2, TMP2, 8 + | + |8: // Convert integer to number and continue with number loop. + | checkint CARG2, ->fff_fallback + |.if FPU + |. lwc1 FARG1, LO(TMP2) + | b <7 + |. cvt.d.w FARG1, FARG1 + |.else + |. lw CARG2, LO(TMP2) + | bal ->vm_sfi2d_2 + |. nop + | b <7 + |. nop + |.endif + | + |.endmacro + | + | math_minmax math_min, movz, movf.d + | math_minmax math_max, movn, movt.d + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 + | daddiu TMP0, TMP0, -LJ_TSTR + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback // Need exactly 1 string argument. + |. cleartp STR:CARG1 + | lw TMP0, STR:CARG1->len + | daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | sltu RD, r0, TMP0 + | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). + | addiu RD, RD, 1 + | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp TMP1, TISNUM + | b ->fff_res + |. sd TMP1, 0(RA) + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + |. nop + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. + | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP1, 255 + | sextw CARG1, CARG1 + | or AT, AT, TMP0 + | sltu TMP1, TMP1, CARG1 // !(255 < n). + | or AT, AT, TMP1 + | bnez AT, ->fff_fallback + |. li CARG3, 1 + | daddiu CARG2, sp, TMPD_OFS + | sb CARG1, TMPD + |->fff_newstr: + | load_got lj_str_new + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_str_new // (lua_State *L, char *str, size_t l) + |. move CARG1, L + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |.ffunc string_sub + | ffgccheck + |. nop + | addiu AT, NARGS8:RC, -16 + | ld TMP0, 0(BASE) + | bltz AT, ->fff_fallback + |. gettp TMP3, TMP0 + | cleartp STR:CARG1, TMP0 + | ld CARG2, 8(BASE) + | beqz AT, >1 + |. li CARG4, -1 + | ld CARG3, 16(BASE) + | checkint CARG3, ->fff_fallback + |. sextw CARG4, CARG3 + |1: + | checkint CARG2, ->fff_fallback + |. li AT, LJ_TSTR + | bne TMP3, AT, ->fff_fallback + |. sextw CARG3, CARG2 + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | slt AT, CARG4, r0 + | addiu TMP0, CARG2, 1 + | addu TMP1, CARG4, TMP0 + | slt TMP3, CARG3, r0 + | movn CARG4, TMP1, AT // if (end < 0) end += len+1 + | addu TMP1, CARG3, TMP0 + | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | movn CARG4, r0, AT // if (end < 0) end = 0 + | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | movn CARG4, CARG2, AT // if (end > len) end = len + | daddu CARG2, STR:CARG1, CARG3 + | subu CARG3, CARG4, CARG3 // len = end - start + | daddiu CARG2, CARG2, sizeof(GCstr)-1 + | bgez CARG3, ->fff_newstr + |. addiu CARG3, CARG3, 1 // len++ + |->fff_emptystr: // Return empty string. + | li AT, LJ_TSTR + | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) + | b ->fff_restv + |. settp CARG1, AT + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + |. nop + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) + | load_got lj_buf_putstr_ .. name + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->p + | call_intern extern lj_buf_putstr_ .. name + |. sd PC, SAVE_PC + | load_got lj_buf_tostr + | call_intern lj_buf_tostr + |. move SBUF:CARG1, SBUF:CRET1 + | b ->fff_resstr + |. ld BASE, L->base + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | beqz TMP1, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | add.d FARG1, FARG1, TOBIT + | mfc1 CRET1, FARG1 + | jr ra + |. zextw CRET1, CRET1 + |.else + |// FP number to bit conversion for soft-float. + |->vm_tobit: + | dsll TMP0, CARG1, 1 + | li CARG3, 1076 + | dsrl AT, TMP0, 53 + | dsubu CARG3, CARG3, AT + | sltiu AT, CARG3, 54 + | beqz AT, >1 + |. dextm TMP0, TMP0, 0, 20 + | dinsu TMP0, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, TMP0, CARG3 + | dsubu TMP0, r0, CRET1 + | movn CRET1, TMP0, AT + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 + |.endif + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >6 + |. zextw CRET1, CARG1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + |6: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | daddiu TMP2, BASE, 8 + | daddu TMP3, BASE, NARGS8:RC + |1: + | beq TMP2, TMP3, ->fff_resi + |. ld CARG1, 0(TMP2) + | gettp TMP0, CARG1 + |.if FPU + | bne TMP0, TISNUM, >2 + |. daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |2: + | ldc1 FARG1, -8(TMP2) + | sltiu AT, TMP0, LJ_TISNUM + | beqz AT, ->fff_fallback + |. add.d FARG1, FARG1, TOBIT + | mfc1 CARG1, FARG1 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |.else + | beq TMP0, TISNUM, >2 + |. move CRET2, CRET1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET2 + |2: + | zextw CARG1, CARG1 + | bins CRET1, CRET1, CARG1 + | b <1 + |. daddiu TMP2, TMP2, 8 + |.endif + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | dsrl TMP0, CRET1, 8 + | dsrl TMP1, CRET1, 24 + | andi TMP2, TMP0, 0xff00 + | dins TMP1, CRET1, 24, 31 + | dins TMP2, TMP0, 16, 23 + | b ->fff_resi + |. or CRET1, TMP1, TMP2 + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | b ->fff_resi + |. zextw CRET1, CRET1 + | + |.macro .ffunc_bit_sh, name, shins, shmod + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + |. nop + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET1 + |1: + | gettp TMP0, CARG2 + | bne TMP0, TISNUM, ->fff_fallback + |. zextw CARG2, CARG2 + | sextw CARG1, CARG1 + |.if shmod == 1 + | negu CARG2, CARG2 + |.endif + | shins CRET1, CARG1, CARG2 + | b ->fff_resi + |. zextw CRET1, CRET1 + |.endmacro + | + |.ffunc_bit_sh lshift, sllv, 0 + |.ffunc_bit_sh rshift, srlv, 0 + |.ffunc_bit_sh arshift, srav, 0 + |.ffunc_bit_sh rol, rotrv, 1 + |.ffunc_bit_sh ror, rotrv, 0 + | + |.ffunc_bit tobit + |->fff_resi: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | settp CRET1, TISNUM + | b ->fff_res1 + |. sd CRET1, -16(BASE) + | + |//----------------------------------------------------------------------- + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld TMP3, CFUNC:RB->f + | daddu TMP1, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | daddiu TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC // Redundant (but a defined value). + | sltu AT, TMP2, TMP0 + | sd BASE, L->base + | sd TMP1, L->top + | bnez AT, >5 // Need to grow stack. + |. move CFUNCADDR, TMP3 + | jalr TMP3 // (lua_State *L) + |. move CARG1, L + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | sll RD, CRET1, 3 + | bgtz CRET1, ->fff_res // Returned nresults+1? + |. daddiu RA, BASE, -16 + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | cleartp LFUNC:RB + | bnez CRET1, ->vm_call_tail // Returned -1? + |. dsubu NARGS8:RC, TMP0, BASE + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | li AT, -4 + | bnez TMP0, >3 + |. and TMP1, PC, AT + | lbu TMP1, OFS_RA(PC) + | sll TMP1, TMP1, 3 + | addiu TMP1, TMP1, 16 + |3: + | b ->vm_call_dispatch // Resolve again for tailcall. + |. dsubu TMP2, BASE, TMP1 + | + |5: // Grow stack for fallback handler. + | load_got lj_state_growstack + | li CARG2, LUA_MINSTACK + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | b <1 + |. li CRET1, 0 // Force retry. + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | move MULTRES, ra + | load_got lj_gc_step + | sd BASE, L->base + | daddu TMP0, BASE, NARGS8:RC + | sd PC, SAVE_PC // Redundant (but a defined value). + | sd TMP0, L->top + | call_intern lj_gc_step // (lua_State *L) + |. move CARG1, L + | ld BASE, L->base + | move ra, MULTRES + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | jr ra + |. dsubu NARGS8:RC, TMP0, BASE + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | NYI + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | beqz AT, >1 + |5: // Re-dispatch to static ins. + |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr AT + |. nop + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | bnez AT, <5 + |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, <5 + |. addiu TMP2, TMP2, -1 + | beqz TMP2, >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, LUA_MASKLINE + | beqz AT, <5 + |1: + |. load_got lj_dispatch_ins + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sd BASE, L->base + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |. move CARG1, L + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + | decode_RA8b RA + | + |->cont_hook: // Continue from hook yield. + | daddiu PC, PC, 4 + | b <4 + |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. + | + |->vm_hotloop: // Hot loop counter underflow. + | NYI + | + |->vm_callhook: // Dispatch target for call hooks. + |.if JIT + | b >1 + |.endif + |. move CARG2, PC + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | ori CARG2, PC, 1 + |1: + |.endif + | load_got lj_dispatch_call + | daddu TMP0, BASE, RC + | sd PC, SAVE_PC + | sd BASE, L->base + | dsubu RA, RA, BASE + | sd TMP0, L->top + | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top + | sd r0, SAVE_PC // Invalidate for subsequent line hook. + | dsubu NARGS8:RC, TMP0, BASE + | daddu RA, BASE, RA + | ld LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | jr CRET1 + |. lw INS, -4(PC) + | + |->cont_stitch: // Trace stitching. + |.if JIT + | NYI + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sw BASE, L->base + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | daddiu PC, PC, -4 + | b ->cont_nop + |. lw BASE, L->base +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + |.if FPU + | sdc1 f..a, a*8(sp) + | sd r..a, 32*8+a*8(sp) + | sd r..b, 32*8+b*8(sp) + |.else + | sd r..a, a*8(sp) + | sd r..b, b*8(sp) + |.endif + |.endmacro + | + |->vm_exit_handler: + |.if JIT + | NYI + |.endif + |->vm_exit_interp: + |.if JIT + | NYI + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Hard-float round to integer. + |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. + |.macro vm_round_hf, func + | lui TMP0, 0x4330 // Hiword of 2^52 (double). + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | abs.d FRET2, FARG1 // |x| + | dmfc1 AT, FARG1 + | c.olt.d 0, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1f 0, >1 // Truncate only if |x| < 2^52. + |. sub.d FRET1, FRET1, f4 + | slt AT, AT, r0 + |.if "func" == "ceil" + | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. + |.else + | lui TMP0, 0x3ff0 // Hiword of +1 (double). + |.endif + |.if "func" == "trunc" + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | c.olt.d 0, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | movt.d FRET1, FRET2, 0 // If yes, subtract +1. + | neg.d FRET2, FRET1 + | jr ra + |. movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.else + | neg.d FRET2, FRET1 + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.if "func" == "ceil" + | c.olt.d 0, FRET1, FARG1 // x > result? + |.else + | c.olt.d 0, FARG1, FRET1 // x < result? + |.endif + | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. + | jr ra + |. movt.d FRET1, FRET2, 0 + |.endif + |1: + | jr ra + |. mov.d FRET1, FARG1 + |.endmacro + | + |.macro vm_round, func + |.if FPU + | vm_round_hf, func + |.endif + |.endmacro + | + |->vm_floor: + | vm_round floor + |->vm_ceil: + | vm_round ceil + |->vm_trunc: + |.if JIT + | vm_round trunc + |.endif + | + |// Soft-float integer to number conversion. + |.macro sfi2d, ARG + |.if not FPU + | beqz ARG, >9 // Handle zero first. + |. sra TMP0, ARG, 31 + | xor TMP1, ARG, TMP0 + | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. + | dclz ARG, TMP1 + | addiu ARG, ARG, -11 + | li AT, 0x3ff+63-11-1 + | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. + | subu ARG, AT, ARG // Exponent - 1. + | ins ARG, TMP0, 11, 11 // Sign | Exponent. + | dsll ARG, ARG, 52 // Align left. + | jr ra + |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. + |9: + | jr ra + |. nop + |.endif + |.endmacro + | + |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d CARG1 + | + |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d CARG2 + | + |// Soft-float comparison. Equivalent to c.eq.d. + |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpeq: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. xor AT, CARG1, CARG2 + | jr ra + |. sltiu CRET1, AT, 1 // Same values: return 1. + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. + |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. + |->vm_sfcmpult: + |.if not FPU + | b >1 + |. li CRET2, 1 + |.endif + | + |->vm_sfcmpolt: + |.if not FPU + | li CRET2, 0 + |1: + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG1, CARG2 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG1, CARG2 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG2, CARG1 + |8: + | jr ra + |. nop + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. and AT, CARG1, CARG2 + | xor AT, AT, TMP3 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in r1, g in r2. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:r2->ctype_state + | daddiu DISPATCH, r2, GG_G2DISP + | load_got lj_ccallback_enter + | sw r1, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] + | daddiu TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd r0, SAVE_PC // Any value outside of bytecode is ok. + | move CARG2, sp + | call_intern lj_ccallback_enter // (CTState *cts, void *cf) + |. move CARG1, CTSTATE + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | move L, CRET1 + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU mtc1 TMP3, TOBIT + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | li_vmstate INTERP + | subu RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | .FPU cvt.d.s TOBIT, TOBIT + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | load_got lj_ccallback_leave + | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | move CARG2, RA + | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) + |. move CARG1, CTSTATE + | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] + | b ->vm_leave_unw + |. ld CRET2, CTSTATE->cb.gpr[1] + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | move TMP2, sp + | dsubu sp, sp, TMP1 + | sd ra, -8(TMP2) + | sll CARG2, CARG2, 3 + | sd r16, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | move r16, TMP2 + | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) + | move TMP2, sp + | beqz CARG2, >2 + |. daddu TMP3, TMP1, CARG2 + |1: + | ld TMP0, 0(TMP1) + | daddiu TMP1, TMP1, 8 + | sltu AT, TMP1, TMP3 + | sd TMP0, 0(TMP2) + | bnez AT, <1 + |. daddiu TMP2, TMP2, 8 + |2: + | ld CFUNCADDR, CCSTATE->func + | .FPU ldc1 FARG1, CCSTATE->gpr[0] + | ld CARG2, CCSTATE->gpr[1] + | .FPU ldc1 FARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | .FPU ldc1 FARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | .FPU ldc1 FARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | .FPU ldc1 FARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | .FPU ldc1 FARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | .FPU ldc1 FARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | .FPU ldc1 FARG8, CCSTATE->gpr[7] + | jalr CFUNCADDR + |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | ld CCSTATE:TMP1, -24(r16) + | ld TMP2, -16(r16) + | ld ra, -8(r16) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + |.if FPU + | sdc1 FRET1, CCSTATE:TMP1->fpr[0] + | sdc1 FRET2, CCSTATE:TMP1->fpr[1] + |.else + | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. + |.endif + | move sp, r16 + | jr ra + |. move r16, TMP2 + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp + | daddu RA, BASE, RA + | daddu RD, BASE, RD + | ld ARGRA, 0(RA) + | ld ARGRD, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, ARGRA + | gettp CARG4, ARGRD + | bne CARG3, TISNUM, >2 + |. daddiu PC, PC, 4 + | bne CARG4, TISNUM, >5 + |. decode_RD4b TMP2 + | sextw ARGRA, ARGRA + | sextw ARGRD, ARGRD + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | slt AT, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + | movop TMP2, r0, AT + |1: + | daddu PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, >4 + |. decode_RD4b TMP2 + |.if FPU + | ldc1 FRA, 0(RA) + | ldc1 FRD, 0(RD) + |.endif + |3: // RA and RD are both numbers. + |.if FPU + | fcomp f20, f22 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 + |. movop TMP2, r0, CRET1 + |.endif + | + |4: // RA is a number, RD is not a number. + | bne CARG4, TISNUM, ->vmeta_comp + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FRD, LO(RD) + | ldc1 FRA, 0(RA) + | b <3 + |. cvt.d.w FRD, FRD + |.else + |.if "ARGRD" == "CARG1" + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + |.endif + | b <3 + |. nop + |.endif + | + |5: // RA is an integer, RD is not an integer + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + | lwc1 FRA, LO(RA) + | ldc1 FRD, 0(RD) + | b <3 + | cvt.d.w FRA, FRA + |.else + |.if "ARGRA" == "CARG1" + | bal ->vm_sfi2d_1 + |. sextw CARG1, CARG1 + |.else + | bal ->vm_sfi2d_2 + |. sextw CARG2, CARG2 + |.endif + | b <3 + |. nop + |.endif + |.endmacro + | + if (op == BC_ISLT) { + | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult + } else { + | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult + } + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | daddu RD, BASE, RD + | ld CARG1, 0(RA) + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu AT, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or AT, AT, TMP1 + if (vk) { + | beqz AT, ->BC_ISEQN_Z + } else { + | beqz AT, ->BC_ISNEN_Z + } + | // Either or both types are not numbers. + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + |.if FFI + |. li AT, LJ_TCDATA + | beq CARG3, AT, ->vmeta_equal_cd + |.endif + | decode_RD4b TMP2 + |.if FFI + | beq CARG4, AT, ->vmeta_equal_cd + |. nop + |.endif + | bne CARG1, CARG2, >2 + |. addu TMP2, TMP2, TMP3 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | daddu PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor AT, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? + | movn TMP0, r0, AT + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + |. cleartp TAB:TMP1, CARG1 + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<1 // Or 'no __eq' flag set? + } else { + | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? + } + |. nop + | b ->vmeta_equal // Handle __eq metamethod. + |. li TMP0, 1-vk // ne = 0 or 1. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | ld CARG1, 0(RA) + | dsubu RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp TMP0, CARG1 + | li AT, LJ_TCDATA + |.endif + | li TMP1, LJ_TSTR + | decode_RD4b TMP2 + |.if FFI + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. settp CARG2, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | xor TMP1, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + if (vk) { + | movn TMP2, r0, TMP1 + } else { + | movz TMP2, r0, TMP1 + } + | daddu PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | daddu RA, BASE, RA + | daddu RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | daddiu PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | bne CARG3, TISNUM, >3 + |. decode_RD4b TMP2 + | bne CARG4, TISNUM, >6 + |. addu TMP2, TMP2, TMP3 + | xor AT, CARG1, CARG2 + if (vk) { + | movn TMP2, r0, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | movz TMP2, r0, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + | ins_next + | + |3: // RA is not an integer. + | sltu AT, CARG3, TISNUM + |.if FFI + | beqz AT, >8 + |.else + | beqz AT, <2 + |.endif + |. addu TMP2, TMP2, TMP3 + | sltu AT, CARG4, TISNUM + |.if FPU + | ldc1 f20, 0(RA) + | ldc1 f22, 0(RD) + |.endif + | beqz AT, >5 + |. nop + |4: // RA and RD are both numbers. + |.if FPU + | c.eq.d f20, f22 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } + |.else + | bal ->vm_sfcmpeq + |. nop + | b <1 + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI + | bne CARG4, TISNUM, >9 + |.else + | bne CARG4, TISNUM, <2 + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 f22, LO(RD) + | b <4 + |. cvt.d.w f22, f22 + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + | b <4 + |. nop + |.endif + | + |6: // RA is an integer, RD is not an integer + | sltu AT, CARG4, TISNUM + |.if FFI + | beqz AT, >9 + |.else + | beqz AT, <2 + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. lwc1 f20, LO(RA) + | ldc1 f22, 0(RD) + | b <4 + | cvt.d.w f20, f20 + |.else + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + | b <4 + |. nop + |.endif + | + |.if FFI + |8: + | li AT, LJ_TCDATA + | bne CARG3, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |9: + | li AT, LJ_TCDATA + | bne CARG4, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(RA) + | lhu TMP2, OFS_RD(PC) + | not TMP1, TMP1 + | gettp TMP0, TMP0 + | daddiu PC, PC, 4 + |.if FFI + | li AT, LJ_TCDATA + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. xor TMP0, TMP0, TMP1 + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + if (vk) { + | movn TMP2, r0, TMP0 + } else { + | movz TMP2, r0, TMP0 + } + | daddu PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | daddu RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | daddiu PC, PC, 4 + | gettp TMP0, TMP0 + | sltiu TMP0, TMP0, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISF) { + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + if (op == BC_IST) { + | movz TMP2, r0, TMP0 + } else { + | movn TMP2, r0, TMP0 + } + | daddu PC, PC, TMP2 + } else { + | ld CRET1, 0(RD) + if (op == BC_ISTC) { + | beqz TMP0, >1 + } else { + | bnez TMP0, >1 + } + |. daddu RA, BASE, RA + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + | sd CRET1, 0(RA) + | daddu PC, PC, TMP2 + |1: + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | daddu TMP2, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(TMP2) + | ins_next1 + | gettp TMP0, TMP0 + | daddu AT, TMP0, TMP1 + | bnez AT, ->vmeta_istype + |. ins_next2 + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | daddu TMP2, BASE, RA + | ld TMP0, 0(TMP2) + | ins_next1 + | checknum TMP0, ->vmeta_istype + |. ins_next2 + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(RD) + | li AT, LJ_TTRUE + | gettp TMP0, TMP0 + | sltu TMP0, AT, TMP0 + | addiu TMP0, TMP0, 1 + | dsll TMP0, TMP0, 47 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | daddu RB, BASE, RD + | ld CARG1, 0(RB) + | daddu RA, BASE, RA + | gettp CARG3, CARG1 + | bne CARG3, TISNUM, >2 + |. lui TMP1, 0x8000 + | sextw CARG1, CARG1 + | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + |. negu CARG1, CARG1 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + |1: + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + |2: + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_unm + |. dsll TMP1, TMP1, 32 + | b <1 + |. xor CARG1, CARG1, TMP1 + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | daddu CARG2, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(CARG2) + | gettp TMP1, TMP0 + | daddiu AT, TMP1, -LJ_TSTR + | bnez AT, >2 + |. cleartp STR:CARG1, TMP0 + | lw CRET1, STR:CARG1->len + |1: + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + |2: + | daddiu AT, TMP1, -LJ_TTAB + | bnez AT, ->vmeta_len + |. nop +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |. nop + |3: +#endif + |->BC_LEN_Z: + | load_got lj_tab_len + | call_intern lj_tab_len // (GCtab *t) + |. nop + | // Returns uint32_t (but less than 2^31). + | b <1 + |. nop +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_len + |. nop +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | bal ->vm_floor // floor(b/c) + |. div.d FARG1, b, c + | mul.d a, FRET1, c + | sub.d a, b, a // b - floor(b/c)*c + |.endmacro + + |.macro sfpmod + | daddiu sp, sp, -16 + | + | load_got __divdf3 + | sd CARG1, 0(sp) + | call_extern + |. sd CARG2, 8(sp) + | + | load_got floor + | call_extern + |. move CARG1, CRET1 + | + | load_got __muldf3 + | move CARG1, CRET1 + | call_extern + |. ld CARG2, 8(sp) + | + | load_got __subdf3 + | ld CARG1, 0(sp) + | call_extern + |. move CARG2, CRET1 + | + | daddiu sp, sp, 16 + |.endmacro + + |.macro ins_arithpre, label + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||switch (vk) { + ||case 0: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, KBASE, RC + || break; + ||case 1: + | decode_RB8a RC, INS + | decode_RB8b RC + | decode_RDtoRC8 RB, RD + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | daddu RC, BASE, RC + |.if "label" ~= "none" + | b label + |.endif + |. daddu RB, KBASE, RB + || break; + ||default: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = src2*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arith, intins, fpins, fpcall, label + | ins_arithpre none + | + |.if "label" ~= "none" + |label: + |.endif + | + |// Used in 5. + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | + |.if "intins" ~= "div" + | + | // Check for two integers. + | sextw CARG3, CARG1 + | bne TMP0, TISNUM, >5 + |. sextw CARG4, CARG2 + | bne TMP1, TISNUM, >5 + | + |.if "intins" == "addu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "subu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "mult" + |. intins CARG3, CARG4 + | mflo CRET1 + | mfhi TMP2 + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. daddu RA, BASE, RA + |.else + |. load_got lj_vm_modi + | beqz CARG4, ->vmeta_arith + |. daddu RA, BASE, RA + | move CARG1, CARG3 + | call_extern + |. move CARG2, CARG4 + |.endif + | + | zextw CRET1, CRET1 + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + |3: + | ins_next2 + | + |.endif + | + |5: // Check for two numbers. + | .FPU ldc1 f20, 0(RB) + | sltu AT, TMP0, TISNUM + | sltu TMP0, TMP1, TISNUM + | .FPU ldc1 f22, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + | + |.if FPU + | fpins FRET1, f20, f22 + |.elif "fpcall" == "sfpmod" + | sfpmod + |.else + | load_got fpcall + | call_extern + |. nop + |.endif + | + | ins_next1 + |.if "intins" ~= "div" + | b <3 + |.endif + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + |.if "intins" == "div" + | ins_next2 + |.endif + | + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith addu, add.d, __adddf3, none + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith subu, sub.d, __subdf3, none + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith mult, mul.d, __muldf3, none + break; + case BC_DIVVN: + | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z + break; + case BC_DIVNV: case BC_DIVVV: + | ins_arithpre ->BC_DIVVN_Z + break; + case BC_MODVN: + | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre ->BC_MODVN_Z + break; + case BC_POW: + | ins_arithpre none + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and AT, TMP0, TMP1 + | load_got pow + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + |.if FPU + | ldc1 FARG1, 0(RB) + | ldc1 FARG2, 0(RC) + |.endif + | call_extern + |. nop + | ins_next1 + |.if FPU + | sdc1 FRET1, 0(RA) + |.else + | sd CRET1, 0(RA) + |.endif + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | dsubu CARG3, RC, RB + | sd BASE, L->base + | daddu CARG2, BASE, RC + | move MULTRES, RB + |->BC_CAT_Z: + | load_got lj_meta_cat + | srl CARG3, CARG3, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | bnez CRET1, ->vmeta_binop + |. ld BASE, L->base + | daddu RB, BASE, MULTRES + | ld CRET1, 0(RB) + | daddu RA, BASE, RA + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sra RD, INS, 16 + | daddu RA, BASE, RA + | zextw RD, RD + | ins_next1 + | settp RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | daddu RD, KBASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | daddu RA, BASE, RA + | dsll TMP0, RD, 44 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | daddu RA, BASE, RA + | sd TISNIL, 0(RA) + | daddiu RA, RA, 8 + | daddu RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt AT, RA, RD + | bnez AT, <1 + |. daddiu RA, RA, 8 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RA, BASE, RA + | cleartp LFUNC:RB + | daddu RD, RD, LFUNC:RB + | ld UPVAL:RB, LFUNC:RD->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | ld CRET1, 0(TMP1) + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, BASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:RB->closed + | gettp TMP2, RD + | sd CRET1, 0(CARG2) + | li AT, LJ_GC_BLACK|1 + | or TMP3, TMP3, TMP0 + | beq TMP3, AT, >2 // Upvalue is closed and black? + |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | beqz AT, <1 // tvisgcv(v) + |. cleartp GCOBJ:TMP1, RB + | lbu TMP3, GCOBJ:TMP1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | lbu TMP3, STR:TMP1->marked + | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:RB->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | bnez AT, >2 + |. sd TMP1, 0(CARG2) + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz AT, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | ld TMP1, UPVAL:RB->v + | ins_next1 + | sd CRET1, 0(TMP1) + | ins_next2 + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsll TMP0, RD, 44 + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | not TMP0, TMP0 + | ld UPVAL:RB, LFUNC:RA->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | sd TMP0, 0(TMP1) + | ins_next2 + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | load_got lj_func_closeuv + | sd BASE, L->base + | beqz TMP2, >1 + |. move CARG1, L + | call_intern lj_func_closeuv // (lua_State *L, TValue *level) + |. daddu CARG2, BASE, RA + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | load_got lj_func_newL_gc + | dsubu TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC + | cleartp CARG3 + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern lj_func_newL_gc + |. move CARG1, L + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | daddu RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | sd BASE, L->base + | sd PC, SAVE_PC + | sltu AT, TMP0, TMP1 + | beqz AT, >5 + |1: + if (op == BC_TNEW) { + | load_got lj_tab_new + | srl CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | li TMP0, 0x801 + | addiu AT, CARG2, -0x7ff + | srl CARG3, RD, 14 + | movz CARG2, TMP0, AT + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern lj_tab_new + |. move CARG1, L + | // Returns Table *. + } else { + | load_got lj_tab_dup + | dsubu TMP1, KBASE, RD + | move CARG1, L + | call_intern lj_tab_dup // (lua_State *L, Table *kt) + |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | daddu RA, BASE, RA + | settp CRET1, TMP0 + | sd CRET1, 0(RA) + | ins_next2 + |5: + | load_got lj_gc_step_fixtop + | move MULTRES, RD + | call_intern lj_gc_step_fixtop // (lua_State *L) + |. move CARG1, L + | b <1 + |. move RD, MULTRES + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP2, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP2 + | ld TAB:RB, LFUNC:TMP2->env + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + |. daddu RA, BASE, RA + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | bne TMP3, TISNUM, >5 // Integer key? + |. lw TMP0, TAB:RB->asize + | sextw TMP2, TMP2 + | ld TMP1, TAB:RB->array + | sltu AT, TMP2, TMP0 + | sll TMP2, TMP2, 3 + | beqz AT, ->vmeta_tgetv // Integer key and in array part? + |. daddu TMP2, TMP1, TMP2 + | ld AT, 0(TMP2) + | beq AT, TISNIL, >2 + |. ld CRET1, 0(TMP2) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tgetv + |. nop + | + |5: + | li AT, LJ_TSTR + | bne TMP3, AT, ->vmeta_tgetv + |. cleartp RC, TMP2 + | b ->BC_TGETS_Z // String key? + |. nop + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RC8a RC, INS + | daddu CARG2, BASE, RB + | decode_RC8b RC + | ld TAB:RB, 0(CARG2) + | dsubu CARG3, KBASE, RC + | daddu RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CRET1, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne CARG1, RC, >4 + |. ld TAB:TMP3, TAB:RB->metatable + | beq CRET1, TISNIL, >5 // Key found, but nil value? + |. nop + |3: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |4: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | beqz TAB:TMP3, <3 // No metatable: done. + |. move CRET1, TISNIL + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<vmeta_tgets + |. nop + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tgetb + |. daddu RC, TMP2, RC + | ld AT, 0(RC) + | beq AT, TISNIL, >5 + |. ld CRET1, 0(RC) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + |. nop + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu RB, BASE, RB + | daddu RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, LO(RC) + | daddu RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | sltu AT, CARG2, TMP0 + | sll TMP2, CARG2, 3 + | beqz AT, ->vmeta_tgetr // In array part? + |. daddu CRET1, TMP1, TMP2 + | ld CARG2, 0(CRET1) + |->BC_TGETR_Z: + | ins_next1 + | sd CARG2, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab RB, ->vmeta_tsetv + | checkint TMP2, >5 + |. sextw RC, TMP2 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 + | beqz AT, ->vmeta_tsetv // Integer key and in array part? + |. daddu TMP1, TMP1, TMP2 + | ld TMP0, 0(TMP1) + | lbu TMP3, TAB:RB->marked + | beq TMP0, TISNIL, >3 + |. ld CRET1, 0(RA) + |1: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(TMP1) + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<vmeta_tsetv + |. nop + | + |5: + | gettp AT, TMP2 + | daddiu AT, AT, -LJ_TSTR + | bnez AT, ->vmeta_tsetv + |. nop + | b ->BC_TSETS_Z // String key? + |. cleartp STR:RC, TMP2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RC8a RC, INS + | ld TAB:RB, 0(CARG2) + | decode_RC8b RC + | dsubu CARG3, KBASE, RC + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | sb r0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |.if FPU + | ldc1 f20, 0(RA) + |.else + | ld CRET1, 0(RA) + |.endif + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne TMP0, RC, >5 + |. lbu TMP3, TAB:RB->marked + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |. ld TAB:TMP0, TAB:RB->metatable + |2: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |.if FPU + |. sdc1 f20, NODE:TMP2->val + |.else + |. sd CRET1, NODE:TMP2->val + |.endif + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<vmeta_tsets + |. nop + | + |5: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, >6 // No metatable: continue. + |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | load_got lj_tab_newkey + | sd RC, 0(CARG3) + | sd BASE, L->base + | move CARG2, TAB:RB + | sd PC, SAVE_PC + | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k + |. move CARG1, L + | // Returns TValue *. + | ld BASE, L->base + |.if FPU + | b <3 // No 2nd write barrier needed. + |. sdc1 f20, 0(CRET1) + |.else + | ld CARG1, 0(RA) + | b <3 // No 2nd write barrier needed. + |. sd CARG1, 0(CRET1) + |.endif + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tsetb + |. daddu RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + |. ld CRET1, 0(RA) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(RC) + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + |. nop + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG1, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, LO(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. daddu RA, BASE, RA + |2: + | sltu AT, CARG3, TMP0 + | sll TMP2, CARG3, 3 + | beqz AT, ->vmeta_tsetr // In array part? + |. daddu CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld CARG1, 0(RA) + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | daddu RA, BASE, RA + |1: + | daddu TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiu TMP0, MULTRES, -8 + | lw TMP3, LO(TMP3) // Integer constant is in lo-word. + | beqz TMP0, >4 // Nothing to copy? + |. srl CARG3, TMP0, 3 + | cleartp CARG2 + | addu CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | sll TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | sltu AT, TMP2, CARG3 + | bnez AT, >5 + |. daddu TMP2, RA, TMP0 + | daddu TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sltu AT, RA, TMP2 + | sd CRET1, 0(TMP1) + | bnez AT, <3 + |. daddiu TMP1, TMP1, 8 + | bnez TMP0, >7 + |. nop + |4: + | ins_next + | + |5: // Need to resize array part. + | load_got lj_tab_reasize + | sd BASE, L->base + | sd PC, SAVE_PC + | move BASE, RD + | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + |. move CARG1, L + | // Must not reallocate the stack. + | move RD, BASE + | b <1 + |. ld BASE, L->base // Reload BASE for lack of a saved register. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | b ->BC_CALL_Z + |. addu NARGS8:RC, NARGS8:RC, MULTRES + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | move TMP2, BASE + | daddu BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | daddiu BASE, BASE, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. + | // Fall through. Assumes BC_CALLT follows. + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + | daddu RA, BASE, RA + | ld RB, 0(RA) + | move NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | daddiu RA, RA, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | bnez TMP0, >7 + |. xori TMP2, TMP1, FRAME_VARG + |1: + | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? + | move TMP2, BASE + | move RB, CARG3 + | beqz NARGS8:RC, >3 + |. move TMP3, NARGS8:RC + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | addiu TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | bnez TMP3, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | or TMP0, TMP0, AT + | beqz TMP0, >5 + |. nop + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8a RA, INS + | decode_RA8b RA + | dsubu TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | b <4 + |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | + |7: // Tailcall from a vararg function. + | andi AT, TMP2, FRAME_TYPEP + | bnez AT, <1 // Vararg frame below? + |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. + | move BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | b <1 + |. andi TMP0, TMP1, FRAME_TYPE + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | move TMP2, BASE // Save old BASE fir vmeta_call. + | daddu BASE, BASE, RA + | ld RB, -24(BASE) + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | daddiu BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |.if JIT + | // NYI: add hotloop, record BC_ITERN. + |.endif + | daddu RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8+LO(RA) // Get index from control var. + | cleartp TAB:RB + | daddiu PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | dsll CARG3, TISNUM, 47 + |1: // Traverse array part. + | sltu AT, RC, TMP0 + | beqz AT, >5 // Index points after array part? + |. sll TMP3, RC, 3 + | daddu TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) + | or TMP2, RC, CARG3 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + |. addiu RC, RC, 1 + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | or TMP0, RC, CARG3 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | decode_RD4b RD + | daddu RD, RD, TMP3 + | sw TMP0, -8+LO(RA) // Update control var. + | daddu PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subu RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. + | bnez AT, <3 + |. sll TMP3, RC, 5 + | sll RB, RC, 3 + | subu TMP3, TMP3, RB + | daddu NODE:TMP3, TMP3, TMP2 + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + |. addiu RC, RC, 1 + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sd CARG1, 8(RA) + | addu RC, RC, TMP0 + | decode_RD4b RD + | addu RD, RD, TMP3 + | sd CARG2, 0(RA) + | daddu PC, PC, RD + | b <3 + |. sw RC, -8+LO(RA) // Update control var. + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | daddu RA, BASE, RA + | srl TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | daddu TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | daddiu CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | daddiu CARG3, CARG3, -LJ_TNIL + | or AT, CARG2, CARG3 + | daddiu TMP1, TMP1, -FF_next_N + | or AT, AT, TMP1 + | bnez AT, >5 + |. lui TMP1, 0xfffe + | daddu PC, TMP0, TMP2 + | ori TMP1, TMP1, 0x7fff + | dsll TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | daddu PC, TMP0, TMP2 + | b <1 + |. sb TMP1, OFS_OP(PC) + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8a RB, INS + | daddu RC, BASE, RC + | decode_RB8b RB + | daddu RA, BASE, RA + | daddiu RC, RC, FRAME_VARG + | daddu TMP2, RA, RB + | daddiu TMP3, BASE, -16 // TMP3 = vtop + | dsubu RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | beqz RB, >5 // Copy all varargs? + |. dsubu TMP1, TMP3, RC + | daddiu TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu AT, RC, TMP3 + | daddiu RC, RC, 8 + | movz CARG1, TISNIL, AT + | sd CARG1, 0(RA) + | sltu AT, RA, TMP2 + | bnez AT, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | blez TMP1, <3 // No vararg slots? + |. li MULTRES, 8 // MULTRES = (0+1)*8 + | daddu TMP2, RA, TMP1 + | sltu AT, TMP0, TMP2 + | bnez AT, >7 + |. daddiu MULTRES, TMP1, 8 + |6: + | ld CRET1, 0(RC) + | daddiu RC, RC, 8 + | sd CRET1, 0(RA) + | sltu AT, RC, TMP3 + | bnez AT, <6 // More vararg slots? + |. daddiu RA, RA, 8 + | b <3 + |. nop + | + |7: // Grow stack for varargs. + | load_got lj_state_growstack + | sd RA, L->top + | dsubu RA, RA, BASE + | sd BASE, L->base + | dsubu BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC + | srl CARG2, TMP1, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | move RC, BASE + | ld BASE, L->base + | daddu RA, BASE, RA + | daddu RC, BASE, RC + | b <6 + |. daddiu TMP3, BASE, -16 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. + | // Fall through. Assumes BC_RET follows. + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + | daddiu RC, RD, -8 + | decode_RA8a TMP0, INS + | decode_RB8a RB, INS + | decode_RA8b TMP0 + | decode_RB8b RB + | daddu TMP3, TMP2, RB + | beqz RC, >3 + |. dsubu BASE, TMP2, TMP0 + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | daddiu RC, RC, -8 + | sd CRET1, 0(TMP2) + | bnez RC, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | daddiu TMP3, TMP3, -8 + |5: + | sltu AT, TMP2, TMP3 + | bnez AT, >6 + |. ld LFUNC:TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | b <5 + |. daddiu TMP2, TMP2, 8 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bnez TMP2, ->vm_return + |. nop + | // Return from vararg function: relocate BASE down. + | dsubu BASE, BASE, TMP1 + | b <1 + |. ld PC, FRAME_PC(BASE) + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8a RB, INS + | decode_RA8a RA, INS + | decode_RB8b RB + | decode_RA8b RA + | dsubu BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | sltu AT, RD, RB + | bnez AT, >6 + |. ld TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | daddiu TMP2, TMP2, 8 + | daddiu RD, RD, 8 + | b <5 + if (op == BC_RET1) { + |. sd TISNIL, 0(TMP2) + } else { + |. sd TISNIL, -8(TMP2) + } + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | daddu RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type + | gettp CARG3, CARG1 + if (op != BC_JFORL) { + | srl RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | daddu TMP2, RD, TMP2 + } + if (!vk) { + | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type + | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type + | gettp CARG4, CARG2 + | bne CARG3, TISNUM, >5 + |. gettp CRET2, CRET1 + | bne CARG4, TISNUM, ->vmeta_for + |. sextw CARG3, CARG1 + | bne CRET2, TISNUM, ->vmeta_for + |. sextw CARG2, CARG2 + | dext AT, CRET1, 31, 0 + | slt CRET1, CARG2, CARG3 + | slt TMP1, CARG3, CARG2 + | movn CRET1, TMP1, AT + } else { + | bne CARG3, TISNUM, >5 + |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type + | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type + | sextw TMP3, CARG1 + | sextw CARG2, CARG2 + | sextw CRET1, CRET1 + | addu CARG1, TMP3, CARG2 + | xor TMP0, CARG1, TMP3 + | xor TMP1, CARG1, CARG2 + | and TMP0, TMP0, TMP1 + | slt TMP1, CARG1, CRET1 + | slt CRET1, CRET1, CARG1 + | slt AT, CARG2, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + | movn CRET1, TMP1, AT + | or CRET1, CRET1, TMP0 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + } + |1: + if (op == BC_FORI) { + | movz TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | daddu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } + if (vk) { + | sd CARG1, FORL_IDX*8(RA) + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | beqz CRET1, =>BC_JLOOP + |. decode_RD8b RD + } else if (op == BC_JFORL) { + | beqz CRET1, =>BC_JLOOP + } + | ins_next2 + | + |5: // FP loop. + |.if FPU + if (!vk) { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | ld TMP3, FORL_STEP*8(RA) + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, TMP3, r0 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | ld TMP3, FORL_STEP*8(RA) + | add.d f0, f0, f4 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, TMP3, r0 + | li CRET1, 1 + | li AT, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | movn CRET1, AT, TMP3 + if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 + |. sdc1 f0, FORL_EXT*8(RA) + } + |.else + if (!vk) { + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. nop + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. nop + } else { + | load_got __adddf3 + | call_extern + |. sw TMP2, TMPD + | ld CARG2, FORL_STOP*8(RA) + | move CARG1, CRET1 + if ( op == BC_JFORL ) { + | lhu RD, -4+OFS_RD(PC) + | decode_RD8b RD + } + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. lw TMP2, TMPD + } + |.endif + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | daddu RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + |. nop + if (op == BC_JITERL) { + | b =>BC_JLOOP + |. sd TMP1, -8(RA) + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | NYI + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | sltu AT, TMP2, RA + | bnez AT, ->vm_growstack_l + |. sll TMP1, TMP1, 3 + if (op != BC_JFUNCF) { + | ins_next1 + } + |2: + | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. + | bnez AT, >3 + |. daddu AT, BASE, NARGS8:RC + if (op == BC_JFUNCF) { + | decode_RD8a RD, INS + | b =>BC_JLOOP + |. decode_RD8b RD + } else { + | ins_next2 + } + | + |3: // Clear missing parameters. + | sd TISNIL, 0(AT) + | b <2 + |. addiu NARGS8:RC, NARGS8:RC, 8 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | daddu TMP1, BASE, RC + | ld TMP2, L->maxstack + | daddu TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (untagged) copy of LFUNC. + | daddiu TMP3, RC, 16+FRAME_VARG + | sltu AT, TMP0, TMP2 + | ld KBASE, -4+PC2PROTO(k)(PC) + | beqz AT, ->vm_growstack_l + |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | move RA, BASE + | move RC, TMP1 + | ins_next1 + | beqz TMP2, >3 + |. daddiu BASE, TMP1, 16 + |1: + | ld TMP0, 0(RA) + | sltu AT, RA, RC // Less args than parameters? + | move CARG1, TMP0 + | movz TMP0, TISNIL, AT // Clear missing parameters. + | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). + | addiu TMP2, TMP2, -1 + | sd TMP0, 16(TMP1) + | daddiu TMP1, TMP1, 8 + | sd CARG1, 0(RA) + | bnez TMP2, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CFUNCADDR, CFUNC:RB->f + } else { + | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) + } + | daddu TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | daddu RC, BASE, NARGS8:RC + | sd BASE, L->base + | sltu AT, TMP2, TMP1 + | sd RC, L->top + | li_vmstate C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | bnez AT, ->vm_growstack_c // Need to grow stack. + |. move CARG1, L + | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) + |. st_vmstate + | // Returns nresults. + | ld BASE, L->base + | sll RD, CRET1, 3 + | ld TMP1, L->top + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | dsubu RA, TMP1, RD // RA = L->top - nresults*8 + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | b ->vm_returnc + |. st_vmstate + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 31\n" + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x9f\n\t.sleb128 2*5\n" + "\t.byte 0x9e\n\t.sleb128 2*6\n", + fcofs, CFRAME_SIZE); + for (i = 23; i >= 16; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); +#if !LJ_SOFTFP + for (i = 31; i >= 24; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); +#endif + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.4byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x9f\n\t.uleb128 2*1\n" + "\t.byte 0x90\n\t.uleb128 2*2\n" + "\t.byte 0xd\n\t.uleb128 0x10\n" + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + /* NYI */ +#endif + break; + default: + break; + } +} + diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index c7a7740b..56154495 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -1105,11 +1105,11 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, L:RB->base | mov NARGS:RDd, TMP1d | mov LFUNC:RB, [RA-16] - | cleartp LFUNC:RB | add NARGS:RDd, 1 | // This is fragile. L->base must not move, KBASE must always be defined. | cmp KBASE, BASE // Continue with CALLT if flag set. | je ->BC_CALLT_Z + | cleartp LFUNC:RB | mov BASE, RA | ins_call // Otherwise call resolved metamethod. | @@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx) | movzx RCd, byte [rbp-8] // Reconstruct exit number. | mov RCH, byte [rbp-16] | mov [rbp-8], r15; mov [rbp-16], r14 - | // Caveat: DISPATCH is rbx. - | mov DISPATCH, [ebp] + | // DISPATCH is preserved on-trace in LJ_GC64 mode. | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | set_vmstate EXIT | mov [DISPATCH+DISPATCH_J(exitno)], RCd @@ -3516,7 +3515,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AD // RA = level, RD = target | branchPC RD // Do this first to free RD. | mov L:RB, SAVE_L - | cmp dword L:RB->openupval, 0 + | cmp aword L:RB->openupval, 0 | je >1 | mov L:RB->base, BASE | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index f108c0b5..39ccaa2e 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -121,19 +121,68 @@ |//----------------------------------------------------------------------- |.if not X64 // x86 stack layout. | +|.if WIN +| +|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). +|.macro saveregs_ +| push edi; push esi; push ebx +| push extern lj_err_unwind_win +| fs; push dword [0] +| fs; mov [0], esp +| sub esp, CFRAME_SPACE +|.endmacro +|.macro restoreregs +| add esp, CFRAME_SPACE +| fs; pop dword [0] +| pop edi // Short for esp += 4. +| pop ebx; pop esi; pop edi; pop ebp +|.endmacro +| +|.else +| |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). |.macro saveregs_ | push edi; push esi; push ebx | sub esp, CFRAME_SPACE |.endmacro -|.macro saveregs -| push ebp; saveregs_ -|.endmacro |.macro restoreregs | add esp, CFRAME_SPACE | pop ebx; pop esi; pop edi; pop ebp |.endmacro | +|.endif +| +|.macro saveregs +| push ebp; saveregs_ +|.endmacro +| +|.if WIN +|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. +|.define SAVE_NRES, aword [esp+aword*18] +|.define SAVE_CFRAME, aword [esp+aword*17] +|.define SAVE_L, aword [esp+aword*16] +|//----- 16 byte aligned, ^^^ arguments from C caller +|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. +|.define SAVE_R4, aword [esp+aword*14] +|.define SAVE_R3, aword [esp+aword*13] +|.define SAVE_R2, aword [esp+aword*12] +|//----- 16 byte aligned +|.define SAVE_R1, aword [esp+aword*11] +|.define SEH_FUNC, aword [esp+aword*10] +|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. +|.define UNUSED2, aword [esp+aword*8] +|//----- 16 byte aligned +|.define UNUSED1, aword [esp+aword*7] +|.define SAVE_PC, aword [esp+aword*6] +|.define TMP2, aword [esp+aword*5] +|.define TMP1, aword [esp+aword*4] +|//----- 16 byte aligned +|.define ARG4, aword [esp+aword*3] +|.define ARG3, aword [esp+aword*2] +|.define ARG2, aword [esp+aword*1] +|.define ARG1, aword [esp] //<-- esp while in interpreter. +|//----- 16 byte aligned, ^^^ arguments for C callee +|.else |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |.define SAVE_NRES, aword [esp+aword*14] |.define SAVE_CFRAME, aword [esp+aword*13] @@ -154,6 +203,7 @@ |.define ARG2, aword [esp+aword*1] |.define ARG1, aword [esp] //<-- esp while in interpreter. |//----- 16 byte aligned, ^^^ arguments for C callee +|.endif | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |.define FPARG3, qword [esp+qword*1] @@ -554,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | mov eax, FCARG2 // Error return status for vm_pcall. | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_c_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -577,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | and FCARG1, CFRAME_RAWMASK | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -590,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx) | set_vmstate INTERP | jmp ->vm_returnc // Increments RD/MULTRES and returns. | + |.if WIN and not X64 + |->vm_rtlunwind@16: // Thin layer around RtlUnwind. + | // (void *cframe, void *excptrec, void *unwinder, int errcode) + | mov [esp], FCARG1 // Return value for RtlUnwind. + | push FCARG2 // Exception record for RtlUnwind. + | push 0 // Ignored by RtlUnwind. + | push dword [FCARG1+CFRAME_OFS_SEH] + | call extern RtlUnwind@16 // Violates ABI (clobbers too much). + | mov FCARG1, eax + | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). + | ret // Jump to unwinder. + |.endif + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//-----------------------------------------------------------------------
  • Lua 5.1
    API+ABI
    + JIT+ BitOp+ FFIDrop-in
    DLL/.so