Merge remote-tracking branch 'upstream/v2.1' into ppc64-port

This commit is contained in:
Gustavo Serra Scalet 2016-09-05 10:17:50 -03:00
commit c00253828a
74 changed files with 7328 additions and 981 deletions

View File

@ -153,7 +153,7 @@ Contains the target OS name:
<h3 id="jit_arch"><tt>jit.arch</tt></h3> <h3 id="jit_arch"><tt>jit.arch</tt></h3>
<p> <p>
Contains the target architecture name: Contains the target architecture name:
"x86", "x64", "arm", "ppc", or "mips". "x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
</p> </p>
<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2> <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>

View File

@ -349,6 +349,7 @@ break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
LuaJIT supports some extensions from Lua&nbsp;5.3: LuaJIT supports some extensions from Lua&nbsp;5.3:
<ul> <ul>
<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
</ul> </ul>
<h2 id="exceptions">C++ Exception Interoperability</h2> <h2 id="exceptions">C++ Exception Interoperability</h2>
@ -365,25 +366,30 @@ the toolchain used to compile LuaJIT:
</tr> </tr>
<tr class="odd separate"> <tr class="odd separate">
<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> <td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
<td class="exccompiler">GCC 4.3+</td> <td class="exccompiler">GCC 4.3+, Clang</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td> <td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr> </tr>
<tr class="even"> <tr class="even">
<td class="excplatform">Other platforms, DWARF2 unwinding</td> <td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
<td class="exccompiler">GCC</td> <td class="exccompiler">GCC, Clang</td>
<td class="excinterop"><b style="color: #c06000;">Limited</b></td> <td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr> </tr>
<tr class="odd"> <tr class="odd">
<td class="excplatform">Other platforms, DWARF2 unwinding</td>
<td class="exccompiler">GCC, Clang</td>
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
</tr>
<tr class="even">
<td class="excplatform">Windows/x64</td> <td class="excplatform">Windows/x64</td>
<td class="exccompiler">MSVC or WinSDK</td> <td class="exccompiler">MSVC or WinSDK</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td> <td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr> </tr>
<tr class="even"> <tr class="odd">
<td class="excplatform">Windows/x86</td> <td class="excplatform">Windows/x86</td>
<td class="exccompiler">Any</td> <td class="exccompiler">Any</td>
<td class="excinterop"><b style="color: #a00000;">No</b></td> <td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr> </tr>
<tr class="odd"> <tr class="even">
<td class="excplatform">Other platforms</td> <td class="excplatform">Other platforms</td>
<td class="exccompiler">Other compilers</td> <td class="exccompiler">Other compilers</td>
<td class="excinterop"><b style="color: #a00000;">No</b></td> <td class="excinterop"><b style="color: #a00000;">No</b></td>
@ -432,14 +438,6 @@ C++ destructors.</li>
<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> <li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
<li>Throwing Lua errors across C++ frames will <b>not</b> call <li>Throwing Lua errors across C++ frames will <b>not</b> call
C++ destructors.</li> C++ destructors.</li>
<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
it's <b>not</b> safe to throw a Lua error across any frames containing
a C++ function with any try/catch construct or using variables with
(implicit) destructors. This also applies to any functions which may be
inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
is called inside or outside of a try/catch or whether any object actually
needs to be destroyed: the SEH chain is corrupted and this will eventually
lead to the termination of the process.</li>
</ul> </ul>
<br class="flush"> <br class="flush">
</div> </div>

View File

@ -122,7 +122,7 @@ operating systems, CPUs and compilers:
<tr class="even"> <tr class="even">
<td class="compatcpu">x64 (64 bit)</td> <td class="compatcpu">x64 (64 bit)</td>
<td class="compatos">GCC 4.2+</td> <td class="compatos">GCC 4.2+</td>
<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> <td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
<td class="compatos">XCode 5.0+<br>Clang</td> <td class="compatos">XCode 5.0+<br>Clang</td>
<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td> <td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
</tr> </tr>
@ -148,7 +148,7 @@ operating systems, CPUs and compilers:
<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
</tr> </tr>
<tr class="even"> <tr class="even">
<td class="compatcpu"><a href="#cross2">MIPS</a></td> <td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
<td class="compatos">GCC 4.3+</td> <td class="compatos">GCC 4.3+</td>
<td class="compatos">GCC 4.3+</td> <td class="compatos">GCC 4.3+</td>
<td class="compatos compatno">&nbsp;</td> <td class="compatos compatno">&nbsp;</td>
@ -202,7 +202,7 @@ which is probably the default on your system, anyway. Simply run:
make make
</pre> </pre>
<p> <p>
This always builds a native x86, x64 or PPC binary, depending on the host OS This always builds a native binary, depending on the host OS
you're running this command on. Check the section on you're running this command on. Check the section on
<a href="#cross">cross-compilation</a> for more options. <a href="#cross">cross-compilation</a> for more options.
</p> </p>
@ -333,25 +333,36 @@ directory where <tt>luajit.exe</tt> is installed
<h2 id="cross">Cross-compiling LuaJIT</h2> <h2 id="cross">Cross-compiling LuaJIT</h2>
<p> <p>
The GNU Makefile-based build system allows cross-compiling on any host First, let's clear up some terminology:
for any supported target, as long as both architectures have the same
pointer size. If you want to cross-compile to any 32 bit target on an
x64 OS, you need to install the multilib development package (e.g.
<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
(<tt>HOST_CC="gcc -m32"</tt>).
</p> </p>
<ul>
<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
</ul>
<p>
The GNU Makefile-based build system allows cross-compiling on any host
for any supported target:
</p>
<ul>
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li>
</ul>
<p> <p>
You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
target OS differ, or you'll get assembler or linker errors. E.g. if target OS differ, or you'll get assembler or linker errors:
you're compiling on a Windows or OSX host for embedded Linux or Android,
you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
minimal target OS, you may need to disable the built-in allocator in
<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
specify the same <tt>TARGET_SYS</tt> for the install step, too.
</p> </p>
<ul>
<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
</ul>
<p> <p>
The examples below only show some popular targets &mdash; please check Here are some examples where host and target have the same CPU:
the comments in <tt>src/Makefile</tt> for more details.
</p> </p>
<pre class="code"> <pre class="code">
# Cross-compile to a 32 bit binary on a multilib x64 OS # Cross-compile to a 32 bit binary on a multilib x64 OS
@ -369,38 +380,47 @@ use the canonical toolchain triplets for Linux.
</p> </p>
<p> <p>
Since there's often no easy way to detect CPU features at runtime, it's Since there's often no easy way to detect CPU features at runtime, it's
important to compile with the proper CPU or architecture settings. You important to compile with the proper CPU or architecture settings:
can specify these when building the toolchain yourself. Or add </o>
<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For <ul>
ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, <li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
too. Otherwise LuaJIT may not run at the full performance of your target <li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
CPU. <li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
</ul>
<p>
Here are some examples for targets with a different CPU than the host:
</p> </p>
<pre class="code"> <pre class="code">
# ARM soft-float # ARM soft-float
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
TARGET_CFLAGS="-mfloat-abi=soft" TARGET_CFLAGS="-mfloat-abi=soft"
# ARM soft-float ABI with VFP (example for Cortex-A8) # ARM soft-float ABI with VFP (example for Cortex-A9)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
# ARM hard-float ABI with VFP (armhf, requires recent toolchain) # ARM hard-float ABI with VFP (armhf, most modern toolchains)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
# ARM64 (requires x64 host) # ARM64
make CROSS=aarch64-linux- make CROSS=aarch64-linux-
# PPC # PPC
make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
# MIPS big-endian # MIPS32 big-endian
make HOST_CC="gcc -m32" CROSS=mips-linux- make HOST_CC="gcc -m32" CROSS=mips-linux-
# MIPS little-endian # MIPS32 little-endian
make HOST_CC="gcc -m32" CROSS=mipsel-linux- make HOST_CC="gcc -m32" CROSS=mipsel-linux-
# MIPS64 big-endian
make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
# MIPS64 little-endian
make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
</pre> </pre>
<p> <p>
You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>. You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>.
The environment variables need to match the install locations and the The environment variables need to match the install locations and the
desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14. desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
For details check the folder <tt>docs</tt> in the NDK directory. For details check the folder <tt>docs</tt> in the NDK directory.
@ -414,7 +434,7 @@ to build/deploy or which lowest common denominator you want to pick:
# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) # Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
NDK=/opt/android/ndk NDK=/opt/android/ndk
NDKABI=8 NDKABI=8
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -422,16 +442,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
NDK=/opt/android/ndk NDK=/opt/android/ndk
NDKABI=14 NDKABI=14
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS) # Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
NDK=/opt/android/ndk NDK=/opt/android/ndk
NDKABI=14 NDKABI=14
NDKVER=$NDK/toolchains/mipsel-linux-android-4.6 NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -439,7 +459,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) # Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
NDK=/opt/android/ndk NDK=/opt/android/ndk
NDKABI=14 NDKABI=14
NDKVER=$NDK/toolchains/x86-4.6 NDKVER=$NDK/toolchains/x86-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -459,14 +479,15 @@ Or use Android. :-p
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
ICC=$(xcrun --sdk iphoneos --find clang) ICC=$(xcrun --sdk iphoneos --find clang)
ISDKF="-arch armv7 -isysroot $ISDKP" ISDKF="-arch armv7 -isysroot $ISDKP"
make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \ make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
# iOS/ARM64 # iOS/ARM64
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
ICC=$(xcrun --sdk iphoneos --find clang) ICC=$(xcrun --sdk iphoneos --find clang)
ISDKF="-arch arm64 -isysroot $ISDKP" ISDKF="-arch arm64 -isysroot $ISDKP"
make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
</pre> </pre>
<h3 id="consoles">Cross-compiling for consoles</h3> <h3 id="consoles">Cross-compiling for consoles</h3>

View File

@ -169,10 +169,10 @@ LuaJIT is Copyright &copy; 2005-2016 Mike Pall, released under the
<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr> <tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
</table> </table>
<table class="feature compiler"> <table class="feature compiler">
<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> <tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
</table> </table>
<table class="feature cpu"> <table class="feature cpu">
<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr> <tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
</table> </table>
<table class="feature fcompat"> <table class="feature fcompat">
<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr> <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>

View File

@ -91,12 +91,6 @@ hooks for non-Lua functions) and shows slightly different behavior
in LuaJIT (no per-coroutine hooks, no tail call counting). in LuaJIT (no per-coroutine hooks, no tail call counting).
</li> </li>
<li> <li>
Some checks are missing in the JIT-compiled code for obscure situations
with <b>open upvalues aliasing</b> one of the SSA slots later on (or
vice versa). Bonus points, if you can find a real world test case for
this.
</li>
<li>
Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
handled correctly. The error may fall through an on-trace handled correctly. The error may fall through an on-trace
<tt>pcall</tt> or it may be passed on to the function set with <tt>pcall</tt> or it may be passed on to the function set with

View File

@ -21,7 +21,7 @@ enum {
/* The following actions need a buffer position. */ /* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */ /* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
DASM__MAX DASM__MAX
}; };
@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
*pl = -pos; /* Label exists now. */ *pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */ b[pos++] = ofs; /* Store pass1 offset estimate. */
break; break;
case DASM_IMM: case DASM_IMM: case DASM_IMMS:
#ifdef DASM_CHECKS #ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif #endif
@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: pos++; break; case DASM_IMM: case DASM_IMMS: pos++; break;
} }
} }
stop: (void)0; stop: (void)0;
@ -356,7 +356,7 @@ int dasm_encode(Dst_DECL, void *buffer)
if (ins & 2048) if (ins & 2048)
n = n - (int)((char *)cp - base); n = n - (int)((char *)cp - base);
else else
n = (n + (int)base) & 0x0fffffff; n = (n + (int)(size_t)base) & 0x0fffffff;
patchrel: patchrel:
CK((n & 3) == 0 && CK((n & 3) == 0 &&
((n + ((ins & 2048) ? 0x00020000 : 0)) >> ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
@ -367,6 +367,9 @@ int dasm_encode(Dst_DECL, void *buffer)
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break; break;
case DASM_LABEL_PC: break; case DASM_LABEL_PC: break;
case DASM_IMMS:
cp[-1] |= ((n>>3) & 4); n &= 0x1f;
/* fallthrough */
case DASM_IMM: case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break; break;

View File

@ -1,17 +1,19 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM MIPS module. -- DynASM MIPS32/MIPS64 module.
-- --
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local mips64 = mips64
-- Module information: -- Module information:
local _info = { local _info = {
arch = "mips", arch = mips64 and "mips64" or "mips",
description = "DynASM MIPS module", description = "DynASM MIPS32/MIPS64 module",
version = "1.4.0", version = "1.4.0",
vernum = 10400, vernum = 10400,
release = "2015-10-18", release = "2016-05-24",
author = "Mike Pall", author = "Mike Pall",
license = "MIT", license = "MIT",
} }
@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local match, gmatch = _s.match, _s.gmatch local match, gmatch = _s.match, _s.gmatch
local concat, sort = table.concat, table.sort local concat, sort = table.concat, table.sort
local bit = bit or require("bit") local bit = bit or require("bit")
local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
local tohex = bit.tohex
-- Inherited tables and callbacks. -- Inherited tables and callbacks.
local g_opt, g_arch local g_opt, g_arch
@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn
local action_names = { local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT", "STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG", "ALIGN", "REL_LG", "LABEL_LG",
"REL_PC", "LABEL_PC", "IMM", "REL_PC", "LABEL_PC", "IMM", "IMMS",
} }
-- Maximum number of section buffer positions for dasm_put(). -- Maximum number of section buffer positions for dasm_put().
@ -251,6 +254,10 @@ local map_op = {
bnel_3 = "54000000STB", bnel_3 = "54000000STB",
blezl_2 = "58000000SB", blezl_2 = "58000000SB",
bgtzl_2 = "5c000000SB", bgtzl_2 = "5c000000SB",
daddi_3 = mips64 and "60000000TSI",
daddiu_3 = mips64 and "64000000TSI",
ldl_2 = mips64 and "68000000TO",
ldr_2 = mips64 and "6c000000TO",
lb_2 = "80000000TO", lb_2 = "80000000TO",
lh_2 = "84000000TO", lh_2 = "84000000TO",
lwl_2 = "88000000TO", lwl_2 = "88000000TO",
@ -258,23 +265,30 @@ local map_op = {
lbu_2 = "90000000TO", lbu_2 = "90000000TO",
lhu_2 = "94000000TO", lhu_2 = "94000000TO",
lwr_2 = "98000000TO", lwr_2 = "98000000TO",
lwu_2 = mips64 and "9c000000TO",
sb_2 = "a0000000TO", sb_2 = "a0000000TO",
sh_2 = "a4000000TO", sh_2 = "a4000000TO",
swl_2 = "a8000000TO", swl_2 = "a8000000TO",
sw_2 = "ac000000TO", sw_2 = "ac000000TO",
sdl_2 = mips64 and "b0000000TO",
sdr_2 = mips64 and "b1000000TO",
swr_2 = "b8000000TO", swr_2 = "b8000000TO",
cache_2 = "bc000000NO", cache_2 = "bc000000NO",
ll_2 = "c0000000TO", ll_2 = "c0000000TO",
lwc1_2 = "c4000000HO", lwc1_2 = "c4000000HO",
pref_2 = "cc000000NO", pref_2 = "cc000000NO",
ldc1_2 = "d4000000HO", ldc1_2 = "d4000000HO",
ld_2 = mips64 and "dc000000TO",
sc_2 = "e0000000TO", sc_2 = "e0000000TO",
swc1_2 = "e4000000HO", swc1_2 = "e4000000HO",
scd_2 = mips64 and "f0000000TO",
sdc1_2 = "f4000000HO", sdc1_2 = "f4000000HO",
sd_2 = mips64 and "fc000000TO",
-- Opcode SPECIAL. -- Opcode SPECIAL.
nop_0 = "00000000", nop_0 = "00000000",
sll_3 = "00000000DTA", sll_3 = "00000000DTA",
sextw_2 = "00000000DT",
movf_2 = "00000001DS", movf_2 = "00000001DS",
movf_3 = "00000001DSC", movf_3 = "00000001DSC",
movt_2 = "00010001DS", movt_2 = "00010001DS",
@ -285,6 +299,7 @@ local map_op = {
sllv_3 = "00000004DTS", sllv_3 = "00000004DTS",
srlv_3 = "00000006DTS", srlv_3 = "00000006DTS",
rotrv_3 = "00000046DTS", rotrv_3 = "00000046DTS",
drotrv_3 = mips64 and "00000056DTS",
srav_3 = "00000007DTS", srav_3 = "00000007DTS",
jr_1 = "00000008S", jr_1 = "00000008S",
jalr_1 = "0000f809S", jalr_1 = "0000f809S",
@ -300,15 +315,22 @@ local map_op = {
mthi_1 = "00000011S", mthi_1 = "00000011S",
mflo_1 = "00000012D", mflo_1 = "00000012D",
mtlo_1 = "00000013S", mtlo_1 = "00000013S",
dsllv_3 = mips64 and "00000014DTS",
dsrlv_3 = mips64 and "00000016DTS",
dsrav_3 = mips64 and "00000017DTS",
mult_2 = "00000018ST", mult_2 = "00000018ST",
multu_2 = "00000019ST", multu_2 = "00000019ST",
div_2 = "0000001aST", div_2 = "0000001aST",
divu_2 = "0000001bST", divu_2 = "0000001bST",
dmult_2 = mips64 and "0000001cST",
dmultu_2 = mips64 and "0000001dST",
ddiv_2 = mips64 and "0000001eST",
ddivu_2 = mips64 and "0000001fST",
add_3 = "00000020DST", add_3 = "00000020DST",
move_2 = "00000021DS", move_2 = mips64 and "00000025DS" or "00000021DS",
addu_3 = "00000021DST", addu_3 = "00000021DST",
sub_3 = "00000022DST", sub_3 = "00000022DST",
negu_2 = "00000023DT", negu_2 = mips64 and "0000002fDT" or "00000023DT",
subu_3 = "00000023DST", subu_3 = "00000023DST",
and_3 = "00000024DST", and_3 = "00000024DST",
or_3 = "00000025DST", or_3 = "00000025DST",
@ -317,6 +339,10 @@ local map_op = {
nor_3 = "00000027DST", nor_3 = "00000027DST",
slt_3 = "0000002aDST", slt_3 = "0000002aDST",
sltu_3 = "0000002bDST", sltu_3 = "0000002bDST",
dadd_3 = mips64 and "0000002cDST",
daddu_3 = mips64 and "0000002dDST",
dsub_3 = mips64 and "0000002eDST",
dsubu_3 = mips64 and "0000002fDST",
tge_2 = "00000030ST", tge_2 = "00000030ST",
tge_3 = "00000030STZ", tge_3 = "00000030STZ",
tgeu_2 = "00000031ST", tgeu_2 = "00000031ST",
@ -329,6 +355,14 @@ local map_op = {
teq_3 = "00000034STZ", teq_3 = "00000034STZ",
tne_2 = "00000036ST", tne_2 = "00000036ST",
tne_3 = "00000036STZ", tne_3 = "00000036STZ",
dsll_3 = mips64 and "00000038DTa",
dsrl_3 = mips64 and "0000003aDTa",
drotr_3 = mips64 and "0020003aDTa",
dsra_3 = mips64 and "0000003bDTa",
dsll32_3 = mips64 and "0000003cDTA",
dsrl32_3 = mips64 and "0000003eDTA",
drotr32_3 = mips64 and "0020003eDTA",
dsra32_3 = mips64 and "0000003fDTA",
-- Opcode REGIMM. -- Opcode REGIMM.
bltz_2 = "04000000SB", bltz_2 = "04000000SB",
@ -356,13 +390,24 @@ local map_op = {
msubu_2 = "70000005ST", msubu_2 = "70000005ST",
clz_2 = "70000020DS=", clz_2 = "70000020DS=",
clo_2 = "70000021DS=", clo_2 = "70000021DS=",
dclz_2 = mips64 and "70000024DS=",
dclo_2 = mips64 and "70000025DS=",
sdbbp_0 = "7000003f", sdbbp_0 = "7000003f",
sdbbp_1 = "7000003fY", sdbbp_1 = "7000003fY",
-- Opcode SPECIAL3. -- Opcode SPECIAL3.
ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
zextw_2 = mips64 and "7c00f803TS",
ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
wsbh_2 = "7c0000a0DT", wsbh_2 = "7c0000a0DT",
dsbh_2 = mips64 and "7c0000a4DT",
dshd_2 = mips64 and "7c000164DT",
seb_2 = "7c000420DT", seb_2 = "7c000420DT",
seh_2 = "7c000620DT", seh_2 = "7c000620DT",
rdhwr_2 = "7c00003bTD", rdhwr_2 = "7c00003bTD",
@ -370,8 +415,12 @@ local map_op = {
-- Opcode COP0. -- Opcode COP0.
mfc0_2 = "40000000TD", mfc0_2 = "40000000TD",
mfc0_3 = "40000000TDW", mfc0_3 = "40000000TDW",
dmfc0_2 = mips64 and "40200000TD",
dmfc0_3 = mips64 and "40200000TDW",
mtc0_2 = "40800000TD", mtc0_2 = "40800000TD",
mtc0_3 = "40800000TDW", mtc0_3 = "40800000TDW",
dmtc0_2 = mips64 and "40a00000TD",
dmtc0_3 = mips64 and "40a00000TDW",
rdpgpr_2 = "41400000DT", rdpgpr_2 = "41400000DT",
di_0 = "41606000", di_0 = "41606000",
di_1 = "41606000T", di_1 = "41606000T",
@ -388,9 +437,11 @@ local map_op = {
-- Opcode COP1. -- Opcode COP1.
mfc1_2 = "44000000TG", mfc1_2 = "44000000TG",
dmfc1_2 = mips64 and "44200000TG",
cfc1_2 = "44400000TG", cfc1_2 = "44400000TG",
mfhc1_2 = "44600000TG", mfhc1_2 = "44600000TG",
mtc1_2 = "44800000TG", mtc1_2 = "44800000TG",
dmtc1_2 = mips64 and "44a00000TG",
ctc1_2 = "44c00000TG", ctc1_2 = "44c00000TG",
mthc1_2 = "44e00000TG", mthc1_2 = "44e00000TG",
@ -633,7 +684,7 @@ local function parse_fpr(expr)
werror("bad register name `"..expr.."'") werror("bad register name `"..expr.."'")
end end
local function parse_imm(imm, bits, shift, scale, signed) local function parse_imm(imm, bits, shift, scale, signed, action)
local n = tonumber(imm) local n = tonumber(imm)
if n then if n then
local m = sar(n, scale) local m = sar(n, scale)
@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
werror("expected immediate operand, got register") werror("expected immediate operand, got register")
else else
waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) waction(action or "IMM",
(signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
return 0 return 0
end end
end end
@ -763,6 +815,9 @@ map_op[".template__"] = function(params, template, nparams)
n = n + 1 n = n + 1
elseif p == "A" then elseif p == "A" then
op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
elseif p == "a" then
local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
elseif p == "M" then elseif p == "M" then
op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
elseif p == "N" then elseif p == "N" then

12
dynasm/dasm_mips64.lua Normal file
View File

@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM MIPS64 module.
--
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
mips64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_mips")

View File

@ -121,8 +121,8 @@ XCFLAGS=
# #
# Use the system provided memory allocator (realloc) instead of the # Use the system provided memory allocator (realloc) instead of the
# bundled memory allocator. This is slower, but sometimes helpful for # bundled memory allocator. This is slower, but sometimes helpful for
# debugging. This option cannot be enabled on x64, since realloc usually # debugging. This option cannot be enabled on x64 without GC64, since
# doesn't return addresses in the right address range. # realloc usually doesn't return addresses in the right address range.
# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
# the only way to get useful results from it for all other architectures. # the only way to get useful results from it for all other architectures.
#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
@ -166,10 +166,6 @@ else
HOST_SYS= Windows HOST_SYS= Windows
HOST_MSYS= cygwin HOST_MSYS= cygwin
endif endif
# Use Clang for OSX host.
ifeq (Darwin,$(HOST_SYS))
DEFAULT_CC= clang
endif
endif endif
############################################################################## ##############################################################################
@ -257,7 +253,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__MIPSEL__=1 TARGET_ARCH= -D__MIPSEL__=1
endif endif
TARGET_LJARCH= mips ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= mips64
else
TARGET_LJARCH= mips
endif
else else
$(error Unsupported target architecture) $(error Unsupported target architecture)
endif endif

View File

@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
@ -215,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
lib_init.c lib_ffi.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx,
if (p) { if (p) {
#if LJ_TARGET_X86ORX64 #if LJ_TARGET_X86ORX64
if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
name[0] = '@'; name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
else else
*p = '\0'; *p = '\0';
#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE

View File

@ -109,6 +109,8 @@ enum {
#if LJ_TARGET_X64 #if LJ_TARGET_X64
PEOBJ_SECT_PDATA, PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA, PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86
PEOBJ_SECT_SXDATA,
#endif #endif
PEOBJ_SECT_RDATA_Z, PEOBJ_SECT_RDATA_Z,
PEOBJ_NSECTIONS PEOBJ_NSECTIONS
@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx)
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
#elif LJ_TARGET_X86
memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
/* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
#endif #endif
memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx)
nrsym = ctx->nrelocsym; nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64 #if LJ_TARGET_X64
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif #endif
/* Write PE object header and all sections. */ /* Write PE object header and all sections. */
@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB; reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
} }
#elif LJ_TARGET_X86
/* Write .sxdata section. */
for (i = 0; i < nrsym; i++) {
if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
uint32_t symidx = 1+2+i;
owrite(ctx, &symidx, 4);
break;
}
}
if (i == nrsym) {
fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
exit(1);
}
#endif #endif
/* Write .rdata$Z section. */ /* Write .rdata$Z section. */
@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64 #if LJ_TARGET_X64
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#elif LJ_TARGET_X86
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
#endif #endif
emit_peobj_sym(ctx, ctx->beginsym, 0, emit_peobj_sym(ctx, ctx->beginsym, 0,

View File

@ -157,11 +157,11 @@ local function merge_includes(src)
if includes[name] then return "" end if includes[name] then return "" end
includes[name] = true includes[name] = true
local fp = assert(io.open(LUA_SOURCE..name, "r")) local fp = assert(io.open(LUA_SOURCE..name, "r"))
local src = fp:read("*a") local inc = fp:read("*a")
assert(fp:close()) assert(fp:close())
src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "") inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
src = gsub(src, "#endif%s*$", "") inc = gsub(inc, "#endif%s*$", "")
return merge_includes(src) return merge_includes(inc)
end) end)
end end

View File

@ -125,12 +125,12 @@ extern "C"
#ifdef _WIN32 #ifdef _WIN32
__declspec(dllexport) __declspec(dllexport)
#endif #endif
const char %s%s[] = { const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname)) ]], LJBC_PREFIX, ctx.modname))
else else
fp:write(string.format([[ fp:write(string.format([[
#define %s%s_SIZE %d #define %s%s_SIZE %d
static const char %s%s[] = { static const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
end end
local t, n, m = {}, 0, 0 local t, n, m = {}, 0, 0

View File

@ -12,7 +12,7 @@
local type = type local type = type
local sub, byte, format = string.sub, string.byte, string.format local sub, byte, format = string.sub, string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub local match, gmatch = string.match, string.gmatch
local concat = table.concat local concat = table.concat
local bit = require("bit") local bit = require("bit")
local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex

View File

@ -11,8 +11,8 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local type = type local type = type
local sub, byte, format = string.sub, string.byte, string.format local byte, format = string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub local match, gmatch = string.match, string.gmatch
local concat = table.concat local concat = table.concat
local bit = require("bit") local bit = require("bit")
local band, bor, tohex = bit.band, bit.bor, bit.tohex local band, bor, tohex = bit.band, bit.bor, bit.tohex
@ -38,7 +38,7 @@ local map_special = {
"multST", "multuST", "divST", "divuST", "multST", "multuST", "divST", "divuST",
false, false, false, false, false, false, false, false,
"addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
"andDST", "orDST", "xorDST", "nor|notDST0", "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
false, false, "sltDST", "sltuDST", false, false, "sltDST", "sltuDST",
false, false, false, false, false, false, false, false,
"tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
@ -214,7 +214,7 @@ local map_pri = {
map_cop0, map_cop1, false, map_cop1x, map_cop0, map_cop1, false, map_cop1x,
"beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
false, false, false, false, false, false, false, false,
map_special2, false, false, map_special3, map_special2, "jalxJ", false, map_special3,
"lbTSO", "lhTSO", "lwlTSO", "lwTSO", "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
"lbuTSO", "lhuTSO", "lwrTSO", false, "lbuTSO", "lhuTSO", "lwrTSO", false,
"sbTSO", "shTSO", "swlTSO", "swTSO", "sbTSO", "shTSO", "swlTSO", "swTSO",

View File

@ -13,7 +13,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local type = type local type = type
local sub, byte, format = string.sub, string.byte, string.format local byte, format = string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local concat = table.concat local concat = table.concat
local bit = require("bit") local bit = require("bit")

View File

@ -818,7 +818,7 @@ map_act = {
m = b%32; b = (b-m)/32 m = b%32; b = (b-m)/32
local nb = b%2; b = (b-nb)/2 local nb = b%2; b = (b-nb)/2
if nb == 0 then ctx.rexb = true end if nb == 0 then ctx.rexb = true end
local nx = b%2; b = (b-nx)/2 local nx = b%2
if nx == 0 then ctx.rexx = true end if nx == 0 then ctx.rexx = true end
b = byte(ctx.code, pos, pos) b = byte(ctx.code, pos, pos)
if not b then return incomplete(ctx) end if not b then return incomplete(ctx) end

View File

@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
local bit = require("bit") local bit = require("bit")
local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex local band, shr, tohex = bit.band, bit.rshift, bit.tohex
local sub, gsub, format = string.sub, string.gsub, string.format local sub, gsub, format = string.sub, string.gsub, string.format
local byte, char, rep = string.byte, string.char, string.rep local byte, rep = string.byte, string.rep
local type, tostring = type, tostring local type, tostring = type, tostring
local stdout, stderr = io.stdout, io.stderr local stdout, stderr = io.stdout, io.stderr
@ -213,7 +213,7 @@ local colortype_ansi = {
"\027[35m%s\027[m", "\027[35m%s\027[m",
} }
local function colorize_text(s, t) local function colorize_text(s)
return s return s
end end
@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
end end
end end
local function formatk(tr, idx) local function formatk(tr, idx, sn)
local k, t, slot = tracek(tr, idx) local k, t, slot = tracek(tr, idx)
local tn = type(k) local tn = type(k)
local s local s
if tn == "number" then if tn == "number" then
if k == 2^52+2^51 then if band(sn or 0, 0x30000) ~= 0 then
s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
elseif k == 2^52+2^51 then
s = "bias" s = "bias"
else else
s = format("%+.14g", k) s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
end end
elseif tn == "string" then elseif tn == "string" then
s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@ -354,7 +356,7 @@ local function printsnap(tr, snap)
n = n + 1 n = n + 1
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
if ref < 0 then if ref < 0 then
out:write(formatk(tr, ref)) out:write(formatk(tr, ref, sn))
elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
else else

View File

@ -120,7 +120,7 @@ end
-- Show top N list. -- Show top N list.
local function prof_top(count1, count2, samples, indent) local function prof_top(count1, count2, samples, indent)
local t, n = {}, 0 local t, n = {}, 0
for k, v in pairs(count1) do for k in pairs(count1) do
n = n + 1 n = n + 1
t[n] = k t[n] = k
end end

View File

@ -302,7 +302,7 @@ static int panic(lua_State *L)
#ifdef LUAJIT_USE_SYSMALLOC #ifdef LUAJIT_USE_SYSMALLOC
#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
#error "Must use builtin allocator for 64 bit target" #error "Must use builtin allocator for 64 bit target"
#endif #endif
@ -334,7 +334,7 @@ LUALIB_API lua_State *luaL_newstate(void)
lua_State *L; lua_State *L;
void *ud = lj_alloc_create(); void *ud = lj_alloc_create();
if (ud == NULL) return NULL; if (ud == NULL) return NULL;
#if LJ_64 #if LJ_64 && !LJ_GC64
L = lj_state_newstate(lj_alloc_f, ud); L = lj_state_newstate(lj_alloc_f, ud);
#else #else
L = lua_newstate(lj_alloc_f, ud); L = lua_newstate(lj_alloc_f, ud);
@ -343,7 +343,7 @@ LUALIB_API lua_State *luaL_newstate(void)
return L; return L;
} }
#if LJ_64 #if LJ_64 && !LJ_GC64
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
{ {
UNUSED(f); UNUSED(ud); UNUSED(f); UNUSED(ud);

View File

@ -715,15 +715,19 @@ static uint32_t jit_cpudetect(lua_State *L)
#if LJ_HASJIT #if LJ_HASJIT
/* Compile-time MIPS CPU detection. */ /* Compile-time MIPS CPU detection. */
#if LJ_ARCH_VERSION >= 20 #if LJ_ARCH_VERSION >= 20
flags |= JIT_F_MIPS32R2; flags |= JIT_F_MIPSXXR2;
#endif #endif
/* Runtime MIPS CPU detection. */ /* Runtime MIPS CPU detection. */
#if defined(__GNUC__) #if defined(__GNUC__)
if (!(flags & JIT_F_MIPS32R2)) { if (!(flags & JIT_F_MIPSXXR2)) {
int x; int x;
#ifdef __mips16
x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
#else
/* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
__asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ #endif
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
} }
#endif #endif
#endif #endif

View File

@ -27,15 +27,15 @@
{ {
Optimized string compare Optimized string compare
Memcheck:Addr4 Memcheck:Addr4
fun:lj_str_fastcmp fun:str_fastcmp
} }
{ {
Optimized string compare Optimized string compare
Memcheck:Addr1 Memcheck:Addr1
fun:lj_str_fastcmp fun:str_fastcmp
} }
{ {
Optimized string compare Optimized string compare
Memcheck:Cond Memcheck:Cond
fun:lj_str_fastcmp fun:str_fastcmp
} }

View File

@ -72,13 +72,56 @@
#define IS_DIRECT_BIT (SIZE_T_ONE) #define IS_DIRECT_BIT (SIZE_T_ONE)
/* Determine system-specific block allocation method. */
#if LJ_TARGET_WINDOWS #if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#if LJ_64 && !LJ_GC64 #define LJ_ALLOC_VIRTUALALLOC 1
#if LJ_64 && !LJ_GC64
#define LJ_ALLOC_NTAVM 1
#endif
#else
#include <errno.h>
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
#include <sys/mman.h>
#define LJ_ALLOC_MMAP 1
#if LJ_64
#define LJ_ALLOC_MMAP_PROBE 1
#if LJ_GC64
#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
#elif LJ_TARGET_X64 && LJ_HASJIT
/* Due to limitations in the x64 compiler backend. */
#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
#else
#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
#endif
#endif
#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
#define LJ_ALLOC_MMAP32 1
#endif
#if LJ_TARGET_LINUX
#define LJ_ALLOC_MREMAP 1
#endif
#endif
#if LJ_ALLOC_VIRTUALALLOC
#if LJ_ALLOC_NTAVM
/* Undocumented, but hey, that's what we all love so much about Windows. */ /* Undocumented, but hey, that's what we all love so much about Windows. */
typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
size_t *size, ULONG alloctype, ULONG prot); size_t *size, ULONG alloctype, ULONG prot);
@ -89,14 +132,15 @@ static PNTAVM ntavm;
*/ */
#define NTAVM_ZEROBITS 1 #define NTAVM_ZEROBITS 1
static void INIT_MMAP(void) static void init_mmap(void)
{ {
ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
"NtAllocateVirtualMemory"); "NtAllocateVirtualMemory");
} }
#define INIT_MMAP() init_mmap()
/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
static LJ_AINLINE void *CALL_MMAP(size_t size) static void *CALL_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = NULL; void *ptr = NULL;
@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
} }
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size) static void *DIRECT_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = NULL; void *ptr = NULL;
@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#else #else
#define INIT_MMAP() ((void)0)
/* Win32 MMAP via VirtualAlloc */ /* Win32 MMAP via VirtualAlloc */
static LJ_AINLINE void *CALL_MMAP(size_t size) static void *CALL_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
} }
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size) static void *DIRECT_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#endif #endif
/* This function supports releasing coalesed segments */ /* This function supports releasing coalesed segments */
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) static int CALL_MUNMAP(void *ptr, size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
MEMORY_BASIC_INFORMATION minfo; MEMORY_BASIC_INFORMATION minfo;
@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return 0; return 0;
} }
#else #elif LJ_ALLOC_MMAP
#include <errno.h>
#include <sys/mman.h>
#define MMAP_PROT (PROT_READ|PROT_WRITE) #define MMAP_PROT (PROT_READ|PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@ -174,107 +213,151 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
#endif #endif
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
#if LJ_64 && !LJ_GC64 #if LJ_ALLOC_MMAP_PROBE
/* 64 bit mode with 32 bit pointers needs special support for allocating
** memory in the lower 2GB.
*/
#if defined(MAP_32BIT) #ifdef MAP_TRYFIXED
#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
#if defined(__sun__)
#define MMAP_REGION_START ((uintptr_t)0x1000)
#else #else
/* Actually this only gives us max. 1GB in current Linux kernels. */ #define MMAP_FLAGS_PROBE MMAP_FLAGS
#define MMAP_REGION_START ((uintptr_t)0)
#endif #endif
static LJ_AINLINE void *CALL_MMAP(size_t size) #define LJ_ALLOC_MMAP_PROBE_MAX 30
#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
/* No point in a giant ifdef mess. Just try to open /dev/urandom.
** It doesn't really matter if this fails, since we get some ASLR bits from
** every unsuitable allocation, too. And we prefer linear allocation, anyway.
*/
#include <fcntl.h>
#include <unistd.h>
static uintptr_t mmap_probe_seed(void)
{ {
int olderr = errno; uintptr_t val;
void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); int fd = open("/dev/urandom", O_RDONLY);
errno = olderr; if (fd != -1) {
return ptr; int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
(void)close(fd);
if (ok) return val;
}
return 1; /* Punt. */
} }
#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN static void *mmap_probe(size_t size)
/* OSX and FreeBSD mmap() use a naive first-fit linear search.
** That's perfect for us. Except that -pagezero_size must be set for OSX,
** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
** to be reduced to 250MB on FreeBSD.
*/
#if LJ_TARGET_OSX || defined(__DragonFly__)
#define MMAP_REGION_START ((uintptr_t)0x10000)
#elif LJ_TARGET_PS4
#define MMAP_REGION_START ((uintptr_t)0x4000)
#else
#define MMAP_REGION_START ((uintptr_t)0x10000000)
#endif
#define MMAP_REGION_END ((uintptr_t)0x80000000)
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
#endif
static LJ_AINLINE void *CALL_MMAP(size_t size)
{ {
int olderr = errno;
/* Hint for next allocation. Doesn't need to be thread-safe. */ /* Hint for next allocation. Doesn't need to be thread-safe. */
static uintptr_t alloc_hint = MMAP_REGION_START; static uintptr_t hint_addr = 0;
int retry = 0; static uintptr_t hint_prng = 0;
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 int olderr = errno;
static int rlimit_modified = 0; int retry;
if (LJ_UNLIKELY(rlimit_modified == 0)) { for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
struct rlimit rlim; void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; uintptr_t addr = (uintptr_t)p;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) {
rlimit_modified = 1; /* We got a suitable address. Bump the hint address. */
} hint_addr = addr + size;
#endif
for (;;) {
void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
if ((uintptr_t)p >= MMAP_REGION_START &&
(uintptr_t)p + size < MMAP_REGION_END) {
alloc_hint = (uintptr_t)p + size;
errno = olderr; errno = olderr;
return p; return p;
} }
if (p != CMFAIL) munmap(p, size); if (p != MFAIL) {
#if defined(__sun__) || defined(__DragonFly__) munmap(p, size);
alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ } else if (errno == ENOMEM) {
if (alloc_hint + size < MMAP_REGION_END) continue; return MFAIL;
#endif }
if (retry) break; if (hint_addr) {
retry = 1; /* First, try linear probing. */
alloc_hint = MMAP_REGION_START; if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
hint_addr += 0x1000000;
if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
hint_addr = 0;
continue;
} else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
/* Next, try a no-hint probe to get back an ASLR address. */
hint_addr = 0;
continue;
}
}
/* Finally, try pseudo-random probing. */
if (LJ_UNLIKELY(hint_prng == 0)) {
hint_prng = mmap_probe_seed();
}
/* The unsuitable address we got has some ASLR PRNG bits. */
hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
do { /* The PRNG itself is very weak, but see above. */
hint_prng = hint_prng * 1103515245 + 12345;
hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
} while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
} }
errno = olderr; errno = olderr;
return CMFAIL; return MFAIL;
} }
#else
#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
#endif #endif
#else #if LJ_ALLOC_MMAP32
/* 32 bit mode and GC64 mode is easy. */ #if defined(__sun__)
static LJ_AINLINE void *CALL_MMAP(size_t size) #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
#else
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
#endif
static void *mmap_map32(size_t size)
{
#if LJ_ALLOC_MMAP_PROBE
static int fallback = 0;
if (fallback)
return mmap_probe(size);
#endif
{
int olderr = errno;
void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
errno = olderr;
/* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
#if LJ_ALLOC_MMAP_PROBE
if (ptr == MFAIL) {
fallback = 1;
return mmap_probe(size);
}
#endif
return ptr;
}
}
#endif
#if LJ_ALLOC_MMAP32
#define CALL_MMAP(size) mmap_map32(size)
#elif LJ_ALLOC_MMAP_PROBE
#define CALL_MMAP(size) mmap_probe(size)
#else
static void *CALL_MMAP(size_t size)
{ {
int olderr = errno; int olderr = errno;
void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
errno = olderr; errno = olderr;
return ptr; return ptr;
} }
#endif
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
static void init_mmap(void)
{
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 0x10000;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
}
#define INIT_MMAP() init_mmap()
#endif #endif
#define INIT_MMAP() ((void)0) static int CALL_MUNMAP(void *ptr, size_t size)
#define DIRECT_MMAP(s) CALL_MMAP(s)
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
{ {
int olderr = errno; int olderr = errno;
int ret = munmap(ptr, size); int ret = munmap(ptr, size);
@ -282,10 +365,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return ret; return ret;
} }
#if LJ_TARGET_LINUX #if LJ_ALLOC_MREMAP
/* Need to define _GNU_SOURCE to get the mremap prototype. */ /* Need to define _GNU_SOURCE to get the mremap prototype. */
static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
int flags)
{ {
int olderr = errno; int olderr = errno;
ptr = mremap(ptr, osz, nsz, flags); ptr = mremap(ptr, osz, nsz, flags);
@ -305,6 +387,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#endif #endif
#ifndef INIT_MMAP
#define INIT_MMAP() ((void)0)
#endif
#ifndef DIRECT_MMAP
#define DIRECT_MMAP(s) CALL_MMAP(s)
#endif
#ifndef CALL_MREMAP #ifndef CALL_MREMAP
#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
#endif #endif

View File

@ -25,6 +25,10 @@
#define LUAJIT_ARCH_ppc 5 #define LUAJIT_ARCH_ppc 5
#define LUAJIT_ARCH_MIPS 6 #define LUAJIT_ARCH_MIPS 6
#define LUAJIT_ARCH_mips 6 #define LUAJIT_ARCH_mips 6
#define LUAJIT_ARCH_MIPS32 6
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
/* Target OS. */ /* Target OS. */
#define LUAJIT_OS_OTHER 0 #define LUAJIT_OS_OTHER 0
@ -47,8 +51,10 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC #define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else #else
#error "No support for this architecture (yet)" #error "No support for this architecture (yet)"
#endif #endif
@ -289,13 +295,21 @@
#define LJ_ARCH_XENON 1 #define LJ_ARCH_XENON 1
#endif #endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_NAME "mipsel" #define LJ_ARCH_NAME "mipsel"
#else
#define LJ_ARCH_NAME "mips64el"
#endif
#define LJ_ARCH_ENDIAN LUAJIT_LE #define LJ_ARCH_ENDIAN LUAJIT_LE
#else #else
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_NAME "mips" #define LJ_ARCH_NAME "mips"
#else
#define LJ_ARCH_NAME "mips64"
#endif
#define LJ_ARCH_ENDIAN LUAJIT_BE #define LJ_ARCH_ENDIAN LUAJIT_BE
#endif #endif
@ -307,11 +321,6 @@
#endif #endif
#endif #endif
/* Temporarily disable features until the code has been merged. */
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
#define LUAJIT_NO_UNWIND 1
#endif
#if !defined(LJ_ABI_SOFTFP) #if !defined(LJ_ABI_SOFTFP)
#ifdef __mips_soft_float #ifdef __mips_soft_float
#define LJ_ABI_SOFTFP 1 #define LJ_ABI_SOFTFP 1
@ -320,7 +329,15 @@
#endif #endif
#endif #endif
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_BITS 32 #define LJ_ARCH_BITS 32
#define LJ_TARGET_MIPS32 1
#else
#define LJ_ARCH_BITS 64
#define LJ_TARGET_MIPS64 1
#define LJ_TARGET_GC64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
#endif
#define LJ_TARGET_MIPS 1 #define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_EHRETREG 4
#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
@ -329,7 +346,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if _MIPS_ARCH_MIPS32R2 #if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
#define LJ_ARCH_VERSION 20 #define LJ_ARCH_VERSION 20
#else #else
#define LJ_ARCH_VERSION 10 #define LJ_ARCH_VERSION 10
@ -410,9 +427,13 @@
#ifdef __NO_FPRS__ #ifdef __NO_FPRS__
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif #endif
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
#if defined(_LP64) #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "No support for MIPS64" #error "Only o32 ABI supported for MIPS32"
#endif
#elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
#error "Only n64 ABI supported for MIPS64"
#endif #endif
#endif #endif
#endif #endif
@ -453,7 +474,7 @@
#endif #endif
/* Disable or enable the JIT compiler. */ /* Disable or enable the JIT compiler. */
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
#define LJ_HASJIT 0 #define LJ_HASJIT 0
#else #else
#define LJ_HASJIT 1 #define LJ_HASJIT 1
@ -524,6 +545,11 @@
#define LJ_NO_SYSTEM 1 #define LJ_NO_SYSTEM 1
#endif #endif
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
/* NYI: no support for compact unwind specification, yet. */
#define LUAJIT_NO_UNWIND 1
#endif
#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
#define LJ_NO_UNWIND 1 #define LJ_NO_UNWIND 1
#endif #endif

View File

@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
RA_DBGX((as, "remat $i $r", ir, r)); RA_DBGX((as, "remat $i $r", ir, r));
#if !LJ_SOFTFP #if !LJ_SOFTFP
if (ir->o == IR_KNUM) { if (ir->o == IR_KNUM) {
emit_loadn(as, r, ir_knum(ir)); emit_loadk64(as, r, ir);
} else } else
#endif #endif
if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
#if LJ_64 #if LJ_64
} else if (ir->o == IR_KINT64) { } else if (ir->o == IR_KINT64) {
emit_loadu64(as, r, ir_kint64(ir)->u64); emit_loadu64(as, r, ir_kint64(ir)->u64);
#if LJ_GC64
} else if (ir->o == IR_KGC) {
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
#endif
#endif #endif
} else { } else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@ -619,10 +625,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
return r; return r;
} }
/* Add a register rename to the IR. */
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
{
IRRef ren;
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
ren = tref_ref(lj_ir_emit(as->J));
as->J->cur.ir[ren].r = (uint8_t)down;
as->J->cur.ir[ren].s = SPS_NONE;
}
/* Rename register allocation and emit move. */ /* Rename register allocation and emit move. */
static void ra_rename(ASMState *as, Reg down, Reg up) static void ra_rename(ASMState *as, Reg down, Reg up)
{ {
IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
ir->r = (uint8_t)up; ir->r = (uint8_t)up;
as->cost[down] = 0; as->cost[down] = 0;
@ -635,11 +651,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); ra_addrename(as, down, ref, as->snapno);
ren = tref_ref(lj_ir_emit(as->J));
as->ir = as->T->ir; /* The IR may have been reallocated. */
IR(ren)->r = (uint8_t)down;
IR(ren)->s = SPS_NONE;
} }
} }
@ -689,16 +701,20 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
if (ra_noreg(left)) { if (ra_noreg(left)) {
if (irref_isk(lref)) { if (irref_isk(lref)) {
if (ir->o == IR_KNUM) { if (ir->o == IR_KNUM) {
cTValue *tv = ir_knum(ir);
/* FP remat needs a load except for +0. Still better than eviction. */ /* FP remat needs a load except for +0. Still better than eviction. */
if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
emit_loadn(as, dest, tv); emit_loadk64(as, dest, ir);
return; return;
} }
#if LJ_64 #if LJ_64
} else if (ir->o == IR_KINT64) { } else if (ir->o == IR_KINT64) {
emit_loadu64(as, dest, ir_kint64(ir)->u64); emit_loadk64(as, dest, ir);
return; return;
#if LJ_GC64
} else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
emit_loadk64(as, dest, ir);
return;
#endif
#endif #endif
} else if (ir->o != IR_KPRI) { } else if (ir->o != IR_KPRI) {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@ -941,7 +957,7 @@ static void asm_snap_prep(ASMState *as)
} else { } else {
/* Process any renames above the highwater mark. */ /* Process any renames above the highwater mark. */
for (; as->snaprename < as->T->nins; as->snaprename++) { for (; as->snaprename < as->T->nins; as->snaprename++) {
IRIns *ir = IR(as->snaprename); IRIns *ir = &as->T->ir[as->snaprename];
if (asm_snap_checkrename(as, ir->op1)) if (asm_snap_checkrename(as, ir->op1))
ir->op2 = REF_BIAS-1; /* Kill rename. */ ir->op2 = REF_BIAS-1; /* Kill rename. */
} }
@ -1055,7 +1071,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir)
} }
} else { } else {
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
/* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
} }
@ -1472,12 +1488,7 @@ static void asm_phi_fixup(ASMState *as)
irt_clearmark(ir->t); irt_clearmark(ir->t);
/* Left PHI gained a spill slot before the loop? */ /* Left PHI gained a spill slot before the loop? */
if (ra_hasspill(ir->s)) { if (ra_hasspill(ir->s)) {
IRRef ren; ra_addrename(as, r, lref, as->loopsnapno);
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
ren = tref_ref(lj_ir_emit(as->J));
as->ir = as->T->ir; /* The IR may have been reallocated. */
IR(ren)->r = (uint8_t)r;
IR(ren)->s = SPS_NONE;
} }
} }
rset_clear(work, r); rset_clear(work, r);
@ -1888,7 +1899,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
SnapEntry sn = map[n-1]; SnapEntry sn = map[n-1];
if ((sn & SNAP_FRAME)) { if ((sn & SNAP_FRAME)) {
*gotframe = 1; *gotframe = 1;
return snap_slot(sn); return snap_slot(sn) - LJ_FR2;
} }
} }
return 0; return 0;
@ -1908,16 +1919,20 @@ static void asm_tail_link(ASMState *as)
if (as->T->link == 0) { if (as->T->link == 0) {
/* Setup fixed registers for exit to interpreter. */ /* Setup fixed registers for exit to interpreter. */
const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
int32_t mres; int32_t mres;
if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
if (bc_isret(bc_op(*retpc))) if (bc_isret(bc_op(*retpc)))
pc = retpc; pc = retpc;
} }
#if LJ_GC64
emit_loadu64(as, RID_LPC, u64ptr(pc));
#else
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
ra_allockreg(as, i32ptr(pc), RID_LPC); ra_allockreg(as, i32ptr(pc), RID_LPC);
mres = (int32_t)(snap->nslots - baseslot); #endif
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
switch (bc_op(*pc)) { switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT: case BC_CALLM: case BC_CALLMT:
mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
@ -1932,6 +1947,11 @@ static void asm_tail_link(ASMState *as)
} }
emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
IR(as->J->ktrace)->o = IR_KGC;
}
/* Sync the interpreter state with the on-trace state. */ /* Sync the interpreter state with the on-trace state. */
asm_stack_restore(as, snap); asm_stack_restore(as, snap);
@ -1957,17 +1977,22 @@ static void asm_setup_regsp(ASMState *as)
ra_setup(as); ra_setup(as);
/* Clear reg/sp for constants. */ /* Clear reg/sp for constants. */
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
ir->prev = REGSP_INIT; ir->prev = REGSP_INIT;
if (irt_is64(ir->t) && ir->o != IR_KNULL) {
#if LJ_GC64
ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
#else
/* Make life easier for backends by putting address of constant in i. */
ir->i = (int32_t)(intptr_t)(ir+1);
#endif
ir++;
}
}
/* REF_BASE is used for implicit references to the BASE register. */ /* REF_BASE is used for implicit references to the BASE register. */
lastir->prev = REGSP_HINT(RID_BASE); lastir->prev = REGSP_HINT(RID_BASE);
ir = IR(nins-1);
if (ir->o == IR_RENAME) {
do { ir--; nins--; } while (ir->o == IR_RENAME);
T->nins = nins; /* Remove any renames left over from ASM restart. */
}
as->snaprename = nins; as->snaprename = nins;
as->snapref = nins; as->snapref = nins;
as->snapno = T->nsnap; as->snapno = T->nsnap;
@ -2199,14 +2224,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
ASMState *as = &as_; ASMState *as = &as_;
MCode *origtop; MCode *origtop;
/* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
{
IRRef nins = T->nins;
IRIns *ir = &T->ir[nins-1];
if (ir->o == IR_NOP || ir->o == IR_RENAME) {
do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
T->nins = nins;
}
}
/* Ensure an initialized instruction beyond the last one for HIOP checks. */ /* Ensure an initialized instruction beyond the last one for HIOP checks. */
J->cur.nins = lj_ir_nextins(J); /* This also allows one RENAME to be added without reallocating curfinal. */
J->cur.ir[J->cur.nins].o = IR_NOP; as->orignins = lj_ir_nextins(J);
J->cur.ir[as->orignins].o = IR_NOP;
/* Setup initial state. Copy some fields to reduce indirections. */ /* Setup initial state. Copy some fields to reduce indirections. */
as->J = J; as->J = J;
as->T = T; as->T = T;
as->ir = T->ir; J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
as->flags = J->flags; as->flags = J->flags;
as->loopref = J->loopref; as->loopref = J->loopref;
as->realign = NULL; as->realign = NULL;
@ -2219,12 +2255,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->mclim = as->mcbot + MCLIM_REDZONE; as->mclim = as->mcbot + MCLIM_REDZONE;
asm_setup_target(as); asm_setup_target(as);
do { /*
** This is a loop, because the MCode may have to be (re-)assembled
** multiple times:
**
** 1. as->realign is set (and the assembly aborted), if the arch-specific
** backend wants the MCode to be aligned differently.
**
** This is currently only the case on x86/x64, where small loops get
** an aligned loop body plus a short branch. Not much effort is wasted,
** because the abort happens very quickly and only once.
**
** 2. The IR is immovable, since the MCode embeds pointers to various
** constants inside the IR. But RENAMEs may need to be added to the IR
** during assembly, which might grow and reallocate the IR. We check
** at the end if the IR (in J->cur.ir) has actually grown, resize the
** copy (in J->curfinal.ir) and try again.
**
** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
** always have one spare slot in the IR (see above), which means we
** have to redo the assembly for only ~2% of all traces.
**
** Very, very rarely, this needs to be done repeatedly, since the
** location of constants inside the IR (actually, reachability from
** a global pointer) may affect register allocation and thus the
** number of RENAMEs.
*/
for (;;) {
as->mcp = as->mctop; as->mcp = as->mctop;
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
as->mcp_prev = as->mcp; as->mcp_prev = as->mcp;
#endif #endif
as->curins = T->nins; as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins;
RA_DBG_START(); RA_DBG_START();
RA_DBGX((as, "===== STOP =====")); RA_DBGX((as, "===== STOP ====="));
@ -2252,22 +2317,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
checkmclim(as); checkmclim(as);
asm_ir(as, ir); asm_ir(as, ir);
} }
} while (as->realign); /* Retry in case the MCode needs to be realigned. */
/* Emit head of trace. */ if (as->realign && J->curfinal->nins >= T->nins)
RA_DBG_REF(); continue; /* Retry in case only the MCode needs to be realigned. */
checkmclim(as);
if (as->gcsteps > 0) { /* Emit head of trace. */
as->curins = as->T->snap[0].ref; RA_DBG_REF();
asm_snap_prep(as); /* The GC check is a guard. */ checkmclim(as);
asm_gc_check(as); if (as->gcsteps > 0) {
as->curins = as->T->snap[0].ref;
asm_snap_prep(as); /* The GC check is a guard. */
asm_gc_check(as);
as->curins = as->stopins;
}
ra_evictk(as);
if (as->parent)
asm_head_side(as);
else
asm_head_root(as);
asm_phi_fixup(as);
if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
lua_assert(J->curfinal->nk == T->nk);
memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
(T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
T->nins = J->curfinal->nins;
break; /* Done. */
}
/* Otherwise try again with a bigger IR. */
lj_trace_free(J2G(J), J->curfinal);
J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
J->curfinal = lj_trace_alloc(J->L, T);
as->realign = NULL;
} }
ra_evictk(as);
if (as->parent)
asm_head_side(as);
else
asm_head_root(as);
asm_phi_fixup(as);
RA_DBGX((as, "===== START ====")); RA_DBGX((as, "===== START ===="));
RA_DBG_FLUSH(); RA_DBG_FLUSH();

View File

@ -909,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -998,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); if (ir->op1 == REF_NIL) {
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
ARMIns ai = asm_fxloadins(ir); } else {
int32_t ofs; Reg dest = ra_dest(as, ir, RSET_GPR);
if (ir->op2 == IRFL_TAB_ARRAY) { Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
ofs = asm_fuseabase(as, ir->op1); ARMIns ai = asm_fxloadins(ir);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ int32_t ofs;
emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); if (ir->op2 == IRFL_TAB_ARRAY) {
return; ofs = asm_fuseabase(as, ir->op1);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
return;
}
} }
ofs = field_ofs[ir->op2];
if ((ai & 0x04000000))
emit_lso(as, ai, dest, idx, ofs);
else
emit_lsox(as, ai, dest, idx, ofs);
} }
ofs = field_ofs[ir->op2];
if ((ai & 0x04000000))
emit_lso(as, ai, dest, idx, ofs);
else
emit_lsox(as, ai, dest, idx, ofs);
} }
static void asm_fstore(ASMState *as, IRIns *ir) static void asm_fstore(ASMState *as, IRIns *ir)

View File

@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
dest, dest); dest, dest);
if (irt_isfloat(ir->t)) if (irt_isfloat(ir->t))
emit_lsptr(as, MIPSI_LWC1, (tmp & 31), emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
RSET_GPR);
else else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31), emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
RSET_GPR);
emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
tmp, left, tmp); tmp, left, tmp);
if (st == IRT_FLOAT) if (st == IRT_FLOAT)
emit_lsptr(as, MIPSI_LWC1, (tmp & 31), emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
RSET_GPR);
else else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31), emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
RSET_GPR);
} else { } else {
emit_tg(as, MIPSI_MFC1, dest, tmp); emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@ -514,7 +510,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, ir->op1, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
if ((ir->op2 & IRCONV_SEXT)) { if ((ir->op2 & IRCONV_SEXT)) {
if ((as->flags & JIT_F_MIPS32R2)) { if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
} else { } else {
uint32_t shift = st == IRT_I8 ? 24 : 16; uint32_t shift = st == IRT_I8 ? 24 : 16;
@ -743,7 +739,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
if ((as->flags & JIT_F_MIPS32R2)) { if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
} else { } else {
emit_dst(as, MIPSI_OR, dest, dest, tmp1); emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@ -810,7 +806,6 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -901,17 +896,23 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
MIPSIns mi = asm_fxloadins(ir); MIPSIns mi = asm_fxloadins(ir);
Reg idx;
int32_t ofs; int32_t ofs;
if (ir->op2 == IRFL_TAB_ARRAY) { if (ir->op1 == REF_NIL) {
ofs = asm_fuseabase(as, ir->op1); idx = RID_JGL;
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ofs = ir->op2 - 32768;
emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); } else {
return; idx = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->op2 == IRFL_TAB_ARRAY) {
ofs = asm_fuseabase(as, ir->op1);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
return;
}
} }
ofs = field_ofs[ir->op2];
} }
ofs = field_ofs[ir->op2];
lua_assert(!irt_isfp(ir->t)); lua_assert(!irt_isfp(ir->t));
emit_tsi(as, mi, dest, idx, ofs); emit_tsi(as, mi, dest, idx, ofs);
} }
@ -1456,7 +1457,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
if ((as->flags & JIT_F_MIPS32R2)) { if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
} else { } else {
@ -1512,7 +1513,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
static void asm_bror(ASMState *as, IRIns *ir) static void asm_bror(ASMState *as, IRIns *ir)
{ {
if ((as->flags & JIT_F_MIPS32R2)) { if ((as->flags & JIT_F_MIPSXXR2)) {
asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
} else { } else {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);

View File

@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_lsptr(as, PPCI_LFS, (fbias & 31), emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
RSET_GPR);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fb(as, PPCI_FCTIWZ, tmp, left); emit_fb(as, PPCI_FCTIWZ, tmp, left);
} }
@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, lref, allow); Reg left = ra_alloc1(as, lref, allow);
Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
const float *kbias;
if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); emit_lsptr(as, PPCI_LFS, (fbias & 31),
if (st == IRT_U32) kbias++; &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
rset_clear(allow, hibias)); rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
RID_SP, SPOFS_TMPLO); RID_SP, SPOFS_TMPLO);
@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_fb(as, PPCI_FCTIWZ, tmp, tmp); emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
emit_fab(as, PPCI_FSUB, tmp, left, tmp); emit_fab(as, PPCI_FSUB, tmp, left, tmp);
emit_lsptr(as, PPCI_LFS, (tmp & 31), emit_lsptr(as, PPCI_LFS, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
RSET_GPR);
} else { } else {
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
PPCIns pi = asm_fxloadins(ir); PPCIns pi = asm_fxloadins(ir);
Reg idx;
int32_t ofs; int32_t ofs;
if (ir->op2 == IRFL_TAB_ARRAY) { if (ir->op1 == REF_NIL) {
ofs = asm_fuseabase(as, ir->op1); idx = RID_JGL;
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ofs = ir->op2 - 32768;
emit_tai(as, PPCI_ADDI, dest, idx, ofs); } else {
return; idx = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->op2 == IRFL_TAB_ARRAY) {
ofs = asm_fuseabase(as, ir->op1);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
emit_tai(as, PPCI_ADDI, dest, idx, ofs);
return;
}
} }
ofs = field_ofs[ir->op2];
} }
ofs = field_ofs[ir->op2];
lua_assert(!irt_isi8(ir->t)); lua_assert(!irt_isi8(ir->t));
emit_tai(as, pi, dest, idx, ofs); emit_tai(as, pi, dest, idx, ofs);
} }
@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
emit_lsptr(as, PPCI_LFS, (fbias & 31), emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), (void *)&as->J->k32[LJ_K32_2P52_2P31],
rset_clear(allow, hibias)); rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);

File diff suppressed because it is too large Load Diff

View File

@ -439,8 +439,8 @@
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
/* -- MIPS calling conventions -------------------------------------------- */ /* -- MIPS o32 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \ #define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \ cc->retref = 1; /* Return all structs by reference. */ \
@ -515,6 +515,78 @@
sp = (uint8_t *)&cc->fpr[0].f; sp = (uint8_t *)&cc->fpr[0].f;
#endif #endif
#elif LJ_TARGET_MIPS64
/* -- MIPS n64 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = !(sz <= 16); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_STRUCTRET2 \
ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
((intptr_t *)dp)[0] = cc->gpr[0]; \
} else { /* Copy complex double from GPRs. */ \
((intptr_t *)dp)[0] = cc->gpr[0]; \
((intptr_t *)dp)[1] = cc->gpr[1]; \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
/* Position of soft-float 'float' return value depends on endianess. */
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
#else /* MIPS64 hard-float */
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
((float *)dp)[1] = cc->fpr[1].f; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
#define CCALL_HANDLE_COMPLEXARG \
if (sz == 2*sizeof(float)) { \
isfp = 2; \
if (ngpr < maxgpr) \
sz *= 2; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#endif
#define CCALL_HANDLE_STRUCTARG \
/* Pass all structs by value in registers and/or on the stack. */
#define CCALL_HANDLE_REGARG \
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
} \
goto done; \
}
#else #else
#error "Missing calling convention definitions for this architecture" #error "Missing calling convention definitions for this architecture"
#endif #endif
@ -754,6 +826,78 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif #endif
/* -- MIPS64 ABI struct classification ---------------------------- */
#if LJ_TARGET_MIPS64
#define FTYPE_FLOAT 1
#define FTYPE_DOUBLE 2
/* Classify FP fields (max. 2) and their types. */
static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
{
int n = 0, ft = 0;
if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
goto noth;
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (n == 2) {
goto noth;
} else if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isfp(sct->info)) {
ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
n++;
} else {
goto noth;
}
} else if (ctype_isbitfield(ct->info) ||
ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
goto noth;
}
}
if (n <= 2)
return ft;
noth: /* Not a homogeneous float/double aggregate. */
return 0; /* Struct is in GPRs. */
}
void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
{
if (LJ_ABI_SOFTFP ? ft :
((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
int i, ofs = 0;
for (i = 0; ft != 0; i++, ft >>= 2) {
if ((ft & 3) == FTYPE_FLOAT) {
#if LJ_ABI_SOFTFP
/* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
memcpy((uint8_t *)dp + ofs,
(uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
#else
*(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
#endif
ofs += 4;
} else {
ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
#if LJ_ABI_SOFTFP
*(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
#else
*(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
#endif
ofs += 8;
}
}
} else {
#if !LJ_ABI_SOFTFP
if (ft) sp = (uint8_t *)&cc->fpr[0];
#endif
memcpy(dp, sp, ctr->size);
}
}
#endif
/* -- Common C call handling ---------------------------------------------- */ /* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */ /* Infer the destination CTypeID for a vararg argument. */
@ -921,6 +1065,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp; (int32_t)*(int16_t *)dp;
} }
#if LJ_TARGET_MIPS64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && nsp == 0)) && d->size <= 4) {
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN #if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr) if (nfpr == ngpr)
@ -936,7 +1086,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
cc->fpr[nfpr-2].d[1] = 0; cc->fpr[nfpr-2].d[1] = 0;
} }
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
/* Split float HFA or complex float into separate registers. */ /* Split float HFA or complex float into separate registers. */
CTSize i = (sz >> 2) - 1; CTSize i = (sz >> 2) - 1;
@ -983,7 +1133,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_COMPLEXRET2 CCALL_HANDLE_COMPLEXRET2
return 1; /* One GC step. */ return 1; /* One GC step. */
} }
if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) if (LJ_BE && ctr->size < CTSIZE_PTR &&
(ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
sp += (CTSIZE_PTR - ctr->size); sp += (CTSIZE_PTR - ctr->size);
#if CCALL_NUM_FPR #if CCALL_NUM_FPR
if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))

View File

@ -104,11 +104,11 @@ typedef union FPRArg {
typedef intptr_t GPRArg; typedef intptr_t GPRArg;
typedef double FPRArg; typedef double FPRArg;
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
#define CCALL_NARG_GPR 4 #define CCALL_NARG_GPR 4
#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_NRET_GPR 2 #define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_SPS_EXTRA 7 #define CCALL_SPS_EXTRA 7
#define CCALL_SPS_FREE 1 #define CCALL_SPS_FREE 1
@ -119,6 +119,22 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) }; struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg; } FPRArg;
#elif LJ_TARGET_MIPS64
/* FP args are positional and overlay the GPR array. */
#define CCALL_NARG_GPR 8
#define CCALL_NARG_FPR 0
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#else #else
#error "Missing calling convention definitions for this architecture" #error "Missing calling convention definitions for this architecture"
#endif #endif

View File

@ -67,9 +67,13 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#define CALLBACK_MCODE_HEAD 24 #define CALLBACK_MCODE_HEAD 24
#endif #endif
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
#define CALLBACK_MCODE_HEAD 24 #define CALLBACK_MCODE_HEAD 20
#elif LJ_TARGET_MIPS64
#define CALLBACK_MCODE_HEAD 52
#else #else
@ -221,14 +225,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_init(global_State *g, uint32_t *page)
{ {
uint32_t *p = page; uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback; uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
uintptr_t ug = (uintptr_t)(void *)g;
MSize slot; MSize slot;
*p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; #if LJ_TARGET_MIPS32
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); #else
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
#endif
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
*p++ = MIPSI_JR | MIPSF_S(RID_R3); *p++ = MIPSI_JR | MIPSF_S(RID_R3);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p = MIPSI_B | ((page-p-1) & 0x0000ffffu); *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
p++; p++;
@ -440,7 +457,7 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
#define CALLBACK_HANDLE_GPR \ #define CALLBACK_HANDLE_GPR \
if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
@ -466,6 +483,29 @@ void lj_ccallback_mcode_free(CTState *cts)
UNUSED(isfp); UNUSED(isfp);
#endif #endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_MIPS64
#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
#define CALLBACK_HANDLE_REGARG \
if (ngpr + n <= maxgpr) { \
sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
}
#else /* MIPS64 soft-float */
#define CALLBACK_HANDLE_REGARG \
if (ngpr + n <= maxgpr) { \
UNUSED(isfp); \
sp = (void*) &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
}
#endif
#define CALLBACK_HANDLE_RET \ #define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp; ((float *)dp)[1] = *(float *)dp;
@ -557,7 +597,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
nsp += n; nsp += n;
done: done:
if (LJ_BE && cta->size < CTSIZE_PTR) if (LJ_BE && cta->size < CTSIZE_PTR
#if LJ_TARGET_MIPS64
&& !(isfp && nsp)
#endif
)
sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
} }
@ -608,6 +652,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp; (int32_t)*(int16_t *)dp;
} }
#if LJ_TARGET_MIPS64
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
#endif
#if LJ_TARGET_X86 #if LJ_TARGET_X86
if (ctype_isfp(ctr->info)) if (ctype_isfp(ctr->info))
cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;

View File

@ -93,11 +93,13 @@ void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
setcdataV(L, &tmp, cd); setcdataV(L, &tmp, cd);
lj_gc_anybarriert(L, t); lj_gc_anybarriert(L, t);
tv = lj_tab_set(L, t, &tmp); tv = lj_tab_set(L, t, &tmp);
setgcV(L, tv, obj, it); if (it == LJ_TNIL) {
if (!tvisnil(tv)) setnilV(tv);
cd->marked |= LJ_GC_CDATA_FIN;
else
cd->marked &= ~LJ_GC_CDATA_FIN; cd->marked &= ~LJ_GC_CDATA_FIN;
} else {
setgcV(L, tv, obj, it);
cd->marked |= LJ_GC_CDATA_FIN;
}
} }
} }

View File

@ -297,13 +297,17 @@ static CPToken cp_next_(CPState *cp)
else return '/'; else return '/';
break; break;
case '|': case '|':
if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR; if (cp_get(cp) != '|') return '|';
cp_get(cp); return CTOK_OROR;
case '&': case '&':
if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND; if (cp_get(cp) != '&') return '&';
cp_get(cp); return CTOK_ANDAND;
case '=': case '=':
if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ; if (cp_get(cp) != '=') return '=';
cp_get(cp); return CTOK_EQ;
case '!': case '!':
if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE; if (cp_get(cp) != '=') return '!';
cp_get(cp); return CTOK_NE;
case '<': case '<':
if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; } if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; }
else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; } else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; }
@ -313,7 +317,8 @@ static CPToken cp_next_(CPState *cp)
else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; } else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; }
return '>'; return '>';
case '-': case '-':
if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF; if (cp_get(cp) != '>') return '-';
cp_get(cp); return CTOK_DEREF;
case '$': case '$':
return cp_param(cp); return cp_param(cp);
case '\0': return CTOK_EOF; case '\0': return CTOK_EOF;

View File

@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
return tr; return tr;
} }
/* Tailcall to function. */
static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
{
TRef kfunc = lj_ir_kfunc(J, funcV(tv));
#if LJ_FR2
J->base[-2] = kfunc;
J->base[-1] = TREF_FRAME;
#else
J->base[-1] = kfunc | TREF_FRAME;
#endif
rd->nres = -1; /* Pending tailcall. */
}
/* Record ctype __index/__newindex metamethods. */ /* Record ctype __index/__newindex metamethods. */
static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
RecordFFData *rd) RecordFFData *rd)
@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
if (!tv) if (!tv)
lj_trace_err(J, LJ_TRERR_BADTYPE); lj_trace_err(J, LJ_TRERR_BADTYPE);
if (tvisfunc(tv)) { if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; crec_tailcall(J, rd, tv);
rd->nres = -1; /* Pending tailcall. */
} else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
/* Specialize to result of __index lookup. */ /* Specialize to result of __index lookup. */
cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J)
lua_State *L = J->L; lua_State *L = J->L;
TValue *base = L->base, *top = L->top; TValue *base = L->base, *top = L->top;
const BCIns *pc = J->pc; const BCIns *pc = J->pc;
TRef ftr = J->base[-1]; TRef ftr = J->base[-1-LJ_FR2];
ptrdiff_t delta; ptrdiff_t delta;
if (!frame_islua(base-1) || J->framedepth <= 0) if (!frame_islua(base-1) || J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYICALL); lj_trace_err(J, LJ_TRERR_NYICALL);
J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
L->top = base; L->base = base - delta; L->top = base; L->base = base - delta;
J->base[-1] = TREF_FALSE; J->base[-1-LJ_FR2] = TREF_FALSE;
J->base -= delta; J->baseslot -= (BCReg)delta; J->base -= delta; J->baseslot -= (BCReg)delta;
J->maxslot = (BCReg)delta; J->framedepth--; J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
lj_snap_add(J); lj_snap_add(J);
L->base = base; L->top = top; L->base = base; L->top = top;
J->framedepth++; J->maxslot = 1; J->framedepth++; J->maxslot = 1;
J->base += delta; J->baseslot += (BCReg)delta; J->base += delta; J->baseslot += (BCReg)delta;
J->base[-1] = ftr; J->pc = pc; J->base[-1-LJ_FR2] = ftr; J->pc = pc;
} }
/* Record function call. */ /* Record function call. */
@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
if (tv) { if (tv) {
if (tvisfunc(tv)) { if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; crec_tailcall(J, rd, tv);
rd->nres = -1; /* Pending tailcall. */
return; return;
} }
} else if (mm == MM_new) { } else if (mm == MM_new) {
@ -1238,7 +1249,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
{ {
if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { if (sp[0] && sp[1] && ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
IRType dt; IRType dt;
CTypeID id; CTypeID id;
TRef tr; TRef tr;
@ -1296,6 +1307,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
{ {
CTState *cts = ctype_ctsG(J2G(J)); CTState *cts = ctype_ctsG(J2G(J));
CType *ctp = s[0]; CType *ctp = s[0];
if (!(sp[0] && sp[1])) return 0;
if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) { if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) {
if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) && if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) &&
(ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) { (ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) {
@ -1373,8 +1385,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
} }
if (tv) { if (tv) {
if (tvisfunc(tv)) { if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; crec_tailcall(J, rd, tv);
rd->nres = -1; /* Pending tailcall. */
return 0; return 0;
} /* NYI: non-function metamethods. */ } /* NYI: non-function metamethods. */
} else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */

View File

@ -42,18 +42,18 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT);
** ---------- info ------------ ** ---------- info ------------
** |type flags... A cid | size | sib | next | name | ** |type flags... A cid | size | sib | next | name |
** +----------------------------+--------+-------+-------+-------+-- ** +----------------------------+--------+-------+-------+-------+--
** |NUM BFvcUL.. A | size | | type | | ** |NUM BFcvUL.. A | size | | type | |
** |STRUCT ..vcU..V A | size | field | name? | name? | ** |STRUCT ..cvU..V A | size | field | name? | name? |
** |PTR ..vcR... A cid | size | | type | | ** |PTR ..cvR... A cid | size | | type | |
** |ARRAY VCvc...V A cid | size | | type | | ** |ARRAY VCcv...V A cid | size | | type | |
** |VOID ..vc.... A | size | | type | | ** |VOID ..cv.... A | size | | type | |
** |ENUM A cid | size | const | name? | name? | ** |ENUM A cid | size | const | name? | name? |
** |FUNC ....VS.. cc cid | nargs | field | name? | name? | ** |FUNC ....VS.. cc cid | nargs | field | name? | name? |
** |TYPEDEF cid | | | name | name | ** |TYPEDEF cid | | | name | name |
** |ATTRIB attrnum cid | attr | sib? | type? | | ** |ATTRIB attrnum cid | attr | sib? | type? | |
** |FIELD cid | offset | field | | name? | ** |FIELD cid | offset | field | | name? |
** |BITFIELD B.vcU csz bsz pos | offset | field | | name? | ** |BITFIELD B.cvU csz bsz pos | offset | field | | name? |
** |CONSTVAL c cid | value | const | name | name | ** |CONSTVAL c cid | value | const | name | name |
** |EXTERN cid | | sib? | name | name | ** |EXTERN cid | | sib? | name | name |
** |KW tok | size | | name | name | ** |KW tok | size | | name | name |
** +----------------------------+--------+-------+-------+-------+-- ** +----------------------------+--------+-------+-------+-------+--

View File

@ -95,6 +95,8 @@ typedef unsigned int uintptr_t;
#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
#define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x))

View File

@ -75,7 +75,7 @@ void lj_dispatch_init(GG_State *GG)
for (i = 0; i < GG_NUM_ASMFF; i++) for (i = 0; i < GG_NUM_ASMFF; i++)
GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
#if LJ_TARGET_MIPS #if LJ_TARGET_MIPS
memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
#endif #endif
} }

View File

@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
#if !LJ_SOFTFP #if !LJ_SOFTFP
/* Load a number constant into an FPR. */ /* Load a number constant into an FPR. */
static void emit_loadn(ASMState *as, Reg r, cTValue *tv) static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{ {
cTValue *tv = ir_knum(ir);
int32_t i; int32_t i;
if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
uint32_t hi = tv->u32.hi; uint32_t hi = tv->u32.hi;

View File

@ -35,7 +35,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
{ {
if ((as->flags & JIT_F_MIPS32R2)) { if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, src, shift); emit_dta(as, MIPSI_ROTR, dest, src, shift);
} else { } else {
emit_dst(as, MIPSI_OR, dest, dest, tmp); emit_dst(as, MIPSI_OR, dest, dest, tmp);
@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
emit_tsi(as, mi, r, base, i); emit_tsi(as, mi, r, base, i);
} }
#define emit_loadn(as, r, tv) \ #define emit_loadk64(as, r, ir) \
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
/* Get/set global_State fields. */ /* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@ -157,7 +157,8 @@ static void emit_call(ASMState *as, void *target, int needcfa)
MCode *p = as->mcp; MCode *p = as->mcp;
*--p = MIPSI_NOP; *--p = MIPSI_NOP;
if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
*--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
(((uintptr_t)target >>2) & 0x03ffffffu);
} else { /* Target out of range: need indirect call. */ } else { /* Target out of range: need indirect call. */
*--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
needcfa = 1; needcfa = 1;

View File

@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
emit_tai(as, pi, r, base, i); emit_tai(as, pi, r, base, i);
} }
#define emit_loadn(as, r, tv) \ #define emit_loadk64(as, r, ir) \
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
/* Get/set global_State fields. */ /* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)

View File

@ -20,6 +20,11 @@
#define REX_64 0 #define REX_64 0
#define VEX_64 0 #define VEX_64 0
#endif #endif
#if LJ_GC64
#define REX_GC64 REX_64
#else
#define REX_GC64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) #define emit_i8(as, i) (*--as->mcp = (MCode)(i))
#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p)
#define ptr2addr(p) (i32ptr((p))) #define ptr2addr(p) (i32ptr((p)))
#endif #endif
/* op r, [addr] */
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
/* op r, [base+ofs] */ /* op r, [base+ofs] */
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
{ {
MCode *p = as->mcp; MCode *p = as->mcp;
x86Mode mode; x86Mode mode;
if (ra_hasreg(rb)) { if (ra_hasreg(rb)) {
if (ofs == 0 && (rb&7) != RID_EBP) { if (LJ_GC64 && rb == RID_RIP) {
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = ofs;
} else if (ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0; mode = XM_OFS0;
} else if (checki8(ofs)) { } else if (checki8(ofs)) {
*--p = (MCode)ofs; *--p = (MCode)ofs;
@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
rb = RID_ESP; rb = RID_ESP;
#endif #endif
} else if (LJ_GC64 && rb == RID_RIP) {
lua_assert(as->mrm.idx == RID_NONE);
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = as->mrm.ofs;
} else { } else {
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0; mode = XM_OFS0;
@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
/* Get/set global_State fields. */ /* Get/set global_State fields. */
#define emit_opgl(as, xo, r, field) \ #define emit_opgl(as, xo, r, field) \
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
#define emit_setvmstate(as, i) \ #define emit_setvmstate(as, i) \
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
} }
} }
#if LJ_GC64
#define dispofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
#define mcpofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
#define mctopofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadu64(as, (r), (uintptr_t)(addr))
#else
/* mov r, addr */ /* mov r, addr */
#define emit_loada(as, r, addr) \ #define emit_loada(as, r, addr) \
emit_loadi(as, (r), ptr2addr((addr))) emit_loadi(as, (r), ptr2addr((addr)))
#endif
#if LJ_64 #if LJ_64
/* mov r, imm64 or shorter 32 bit extended load. */ /* mov r, imm64 or shorter 32 bit extended load. */
@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
MCode *p = as->mcp; MCode *p = as->mcp;
*(int32_t *)(p-4) = (int32_t)u64; *(int32_t *)(p-4) = (int32_t)u64;
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
#if LJ_GC64
} else if (checki32(dispofs(as, u64))) {
emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
} else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
/* Since as->realign assumes the code size doesn't change, check
** RIP-relative addressing reachability for both as->mcp and as->mctop.
*/
emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
#endif
} else { /* Full-size 64 bit load. */ } else { /* Full-size 64 bit load. */
MCode *p = as->mcp; MCode *p = as->mcp;
*(uint64_t *)(p-8) = u64; *(uint64_t *)(p-8) = u64;
@ -313,13 +335,70 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
} }
#endif #endif
/* movsd r, [&tv->n] / xorps r, r */ /* op r, [addr] */
static void emit_loadn(ASMState *as, Reg r, cTValue *tv) static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{ {
if (tvispzero(tv)) /* Use xor only for +0. */ #if LJ_GC64
emit_rr(as, XO_XORPS, r, r); if (checki32(dispofs(as, addr))) {
else emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
emit_rma(as, XO_MOVSD, r, &tv->n); } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
} else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
emit_rmro(as, xo, rr, rr, 0);
emit_loadu64(as, rr, (uintptr_t)addr);
} else
#endif
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
}
/* Load 64 bit IR constant into register. */
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
Reg r64;
x86Op xo;
const uint64_t *k = &ir_k64(ir)->u64;
if (rset_test(RSET_FPR, r)) {
r64 = r;
xo = XO_MOVSD;
} else {
r64 = r | REX_64;
xo = XO_MOV;
}
if (*k == 0) {
emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
#if LJ_GC64
} else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
(checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
emit_rma(as, xo, r64, k);
} else {
if (ir->i) {
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
} else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
emit_loadu64(as, r, *k);
return;
} else {
/* If all else fails, add the FP constant at the MCode area bottom. */
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
*(uint64_t *)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
}
emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
#else
} else {
emit_rma(as, xo, r64, k);
#endif
}
} }
/* -- Emit control-flow instructions -------------------------------------- */ /* -- Emit control-flow instructions -------------------------------------- */
@ -460,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{ {
if (ofs) { if (ofs) {
if ((as->flags & JIT_F_LEA_AGU)) if ((as->flags & JIT_F_LEA_AGU))
emit_rmro(as, XO_LEA, r, r, ofs); emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
else else
emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
} }
} }

View File

@ -46,7 +46,8 @@
** the wrapper function feature. Lua errors thrown through C++ frames ** the wrapper function feature. Lua errors thrown through C++ frames
** cannot be caught by C++ code and C++ destructors are not run. ** cannot be caught by C++ code and C++ destructors are not run.
** **
** EXT is the default on x64 systems, INT is the default on all other systems. ** EXT is the default on x64 systems and on Windows, INT is the default on all
** other systems.
** **
** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
@ -55,7 +56,6 @@
** and all C libraries that have callbacks which may be used to call back ** and all C libraries that have callbacks which may be used to call back
** into Lua. C++ code must *not* be compiled with -fno-exceptions. ** into Lua. C++ code must *not* be compiled with -fno-exceptions.
** **
** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
** EXT is mandatory on WIN64 since the calling convention has an abundance ** EXT is mandatory on WIN64 since the calling convention has an abundance
** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
@ -63,7 +63,7 @@
#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
#define LJ_UNWIND_EXT 1 #define LJ_UNWIND_EXT 1
#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS #elif LJ_TARGET_WINDOWS
#define LJ_UNWIND_EXT 1 #define LJ_UNWIND_EXT 1
#endif #endif
@ -384,7 +384,7 @@ static void err_raise_ext(int errcode)
#endif /* LJ_TARGET_ARM */ #endif /* LJ_TARGET_ARM */
#elif LJ_TARGET_X64 && LJ_ABI_WIN #elif LJ_ABI_WIN
/* /*
** Someone in Redmond owes me several days of my life. A lot of this is ** Someone in Redmond owes me several days of my life. A lot of this is
@ -402,6 +402,7 @@ static void err_raise_ext(int errcode)
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#if LJ_TARGET_X64
/* Taken from: http://www.nynaeve.net/?p=99 */ /* Taken from: http://www.nynaeve.net/?p=99 */
typedef struct UndocumentedDispatcherContext { typedef struct UndocumentedDispatcherContext {
ULONG64 ControlPc; ULONG64 ControlPc;
@ -416,11 +417,14 @@ typedef struct UndocumentedDispatcherContext {
ULONG ScopeIndex; ULONG ScopeIndex;
ULONG Fill0; ULONG Fill0;
} UndocumentedDispatcherContext; } UndocumentedDispatcherContext;
#else
typedef void *UndocumentedDispatcherContext;
#endif
/* Another wild guess. */ /* Another wild guess. */
extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
#ifdef MINGW_SDK_INIT #if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
/* Workaround for broken MinGW64 declaration. */ /* Workaround for broken MinGW64 declaration. */
VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
#define RtlUnwindEx RtlUnwindEx_FIXED #define RtlUnwindEx RtlUnwindEx_FIXED
@ -434,10 +438,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
/* Win64 exception handler for interpreter frame. */ /* Windows exception handler for interpreter frame. */
LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
{ {
#if LJ_TARGET_X64
void *cf = f;
#else
void *cf = (char *)f - CFRAME_OFS_SEH;
#endif
lua_State *L = cframe_L(cf); lua_State *L = cframe_L(cf);
int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
@ -455,8 +464,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
} else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
/* Don't catch access violations etc. */ /* Don't catch access violations etc. */
return ExceptionContinueSearch; return 1; /* ExceptionContinueSearch */
} }
#if LJ_TARGET_X64
/* Unwind the stack and call all handlers for all lower C frames /* Unwind the stack and call all handlers for all lower C frames
** (including ourselves) again with EH_UNWINDING set. Then set ** (including ourselves) again with EH_UNWINDING set. Then set
** rsp = cf, rax = errcode and jump to the specified target. ** rsp = cf, rax = errcode and jump to the specified target.
@ -466,9 +476,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
lj_vm_unwind_c_eh), lj_vm_unwind_c_eh),
rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
/* RtlUnwindEx should never return. */ /* RtlUnwindEx should never return. */
#else
UNUSED(ctx);
UNUSED(dispatch);
/* Call all handlers for all lower C frames (including ourselves) again
** with EH_UNWINDING set. Then call the specified function, passing cf
** and errcode.
*/
lj_vm_rtlunwind(cf, (void *)rec,
(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
/* lj_vm_rtlunwind does not return. */
#endif
} }
} }
return ExceptionContinueSearch; return 1; /* ExceptionContinueSearch */
} }
/* Raise Windows exception. */ /* Raise Windows exception. */

View File

@ -102,42 +102,41 @@ static void recff_stitch(jit_State *J)
ASMFunction cont = lj_cont_stitch; ASMFunction cont = lj_cont_stitch;
lua_State *L = J->L; lua_State *L = J->L;
TValue *base = L->base; TValue *base = L->base;
BCReg nslot = J->maxslot + 1 + LJ_FR2;
TValue *nframe = base + 1 + LJ_FR2;
const BCIns *pc = frame_pc(base-1); const BCIns *pc = frame_pc(base-1);
TValue *pframe = frame_prevl(base-1); TValue *pframe = frame_prevl(base-1);
TRef trcont;
lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
/* Move func + args up in Lua stack and insert continuation. */ /* Move func + args up in Lua stack and insert continuation. */
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
setcont(base, cont); setcont(base-LJ_FR2, cont);
setframe_pc(base, pc); setframe_pc(base, pc);
setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2; L->base += 2 + LJ_FR2;
L->top += 2; L->top += 2 + LJ_FR2;
/* Ditto for the IR. */ /* Ditto for the IR. */
memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
#if LJ_64 #if LJ_FR2
trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); J->base[2] = TREF_FRAME;
J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
#else #else
trcont = lj_ir_kptr(J, (void *)cont); J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif #endif
J->base[0] = trcont | TREF_CONT; J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
J->ktracep = lj_ir_k64_reserve(J); J->base += 2 + LJ_FR2;
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); J->baseslot += 2 + LJ_FR2;
J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
J->base += 2;
J->baseslot += 2;
J->framedepth++; J->framedepth++;
lj_record_stop(J, LJ_TRLINK_STITCH, 0); lj_record_stop(J, LJ_TRLINK_STITCH, 0);
/* Undo Lua stack changes. */ /* Undo Lua stack changes. */
memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
setframe_pc(base-1, pc); setframe_pc(base-1, pc);
L->base -= 2; L->base -= 2 + LJ_FR2;
L->top -= 2; L->top -= 2 + LJ_FR2;
} }
/* Fallback handler for fast functions that are not recorded (yet). */ /* Fallback handler for fast functions that are not recorded (yet). */
@ -179,7 +178,7 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
/* Emit BUFHDR for the global temporary buffer. */ /* Emit BUFHDR for the global temporary buffer. */
static TRef recff_bufhdr(jit_State *J) static TRef recff_bufhdr(jit_State *J)
{ {
return emitir(IRT(IR_BUFHDR, IRT_P32), return emitir(IRT(IR_BUFHDR, IRT_PGC),
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
} }
@ -229,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
ix.tab = tr; ix.tab = tr;
copyTV(J->L, &ix.tabv, &rd->argv[0]); copyTV(J->L, &ix.tabv, &rd->argv[0]);
lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
if (!tref_isnil(mt)) if (!tref_isnil(mt))
@ -295,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
if (strV(tv)->len == 1) { if (strV(tv)->len == 1) {
emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
} else { } else {
TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
} }
@ -380,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
int errcode; int errcode;
TValue argv0; TValue argv0;
/* Temporarily insert metamethod below object. */ /* Temporarily insert metamethod below object. */
J->base[1] = J->base[0]; J->base[1+LJ_FR2] = J->base[0];
J->base[0] = ix.mobj; J->base[0] = ix.mobj;
copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &rd->argv[1], &rd->argv[0]); copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
copyTV(J->L, &rd->argv[0], &ix.mobjv); copyTV(J->L, &rd->argv[0], &ix.mobjv);
/* Need to protect lj_record_tailcall because it may throw. */ /* Need to protect lj_record_tailcall because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@ -450,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
{ {
if (J->maxslot >= 1) { if (J->maxslot >= 1) {
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
#endif
lj_record_call(J, 0, J->maxslot - 1); lj_record_call(J, 0, J->maxslot - 1);
rd->nres = -1; /* Pending call. */ rd->nres = -1; /* Pending call. */
} /* else: Interpreter will throw. */ } /* else: Interpreter will throw. */
@ -469,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
TValue argv0, argv1; TValue argv0, argv1;
TRef tmp; TRef tmp;
int errcode; int errcode;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
/* Swap function and traceback. */ /* Swap function and traceback. */
tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &argv1, &rd->argv[1]);
copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[0], &argv1);
copyTV(J->L, &rd->argv[1], &argv0); copyTV(J->L, &rd->argv[1], &argv0);
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
#endif
/* Need to protect lj_record_call because it may throw. */ /* Need to protect lj_record_call because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
/* Always undo Lua stack swap to avoid confusing the interpreter. */ /* Always undo Lua stack swap to avoid confusing the interpreter. */
@ -504,7 +510,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr = lj_ir_tonum(J, J->base[0]);
J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
UNUSED(rd); UNUSED(rd);
} }
@ -613,10 +619,8 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
if (!tref_isnumber_str(J->base[1])) &rd->argv[0], &rd->argv[1]);
lj_trace_err(J, LJ_TRERR_BADTYPE);
J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]);
UNUSED(rd); UNUSED(rd);
} }
@ -822,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
/* Also handle empty range here, to avoid extra traces. */ /* Also handle empty range here, to avoid extra traces. */
TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
emitir(IRTGI(IR_GE), trslen, tr0); emitir(IRTGI(IR_GE), trslen, tr0);
trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
} else { /* Range underflow: return empty string. */ } else { /* Range underflow: return empty string. */
emitir(IRTGI(IR_LT), trend, trstart); emitir(IRTGI(IR_LT), trend, trstart);
@ -838,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
rd->nres = len; rd->nres = len;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
} }
} else { /* Empty range or range underflow: return no results. */ } else { /* Empty range or range underflow: return no results. */
@ -860,7 +864,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
if (i > 1) { /* Concatenate the strings, if there's more than one. */ if (i > 1) { /* Concatenate the strings, if there's more than one. */
TRef hdr = recff_bufhdr(J), tr = hdr; TRef hdr = recff_bufhdr(J), tr = hdr;
for (i = 0; J->base[i] != 0; i++) for (i = 0; J->base[i] != 0; i++)
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
} }
UNUSED(rd); UNUSED(rd);
@ -877,14 +881,14 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
if (vrep > 1) { if (vrep > 1) {
TRef hdr2 = recff_bufhdr(J); TRef hdr2 = recff_bufhdr(J);
TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep); TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str); tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
} }
} }
tr = hdr = recff_bufhdr(J); tr = hdr = recff_bufhdr(J);
if (str2) { if (str2) {
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str); tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
str = str2; str = str2;
rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
} }
@ -935,8 +939,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
if ((J->base[2] && tref_istruecond(J->base[3])) || if ((J->base[2] && tref_istruecond(J->base[3])) ||
(emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
!lj_str_haspattern(pat))) { /* Search for fixed string. */ !lj_str_haspattern(pat))) { /* Search for fixed string. */
TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0); TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
@ -944,13 +948,13 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
str->len-(MSize)start, pat->len)) { str->len-(MSize)start, pat->len)) {
TRef pos; TRef pos;
emitir(IRTG(IR_NE, IRT_P32), tr, trp0); emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0)); pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0));
J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
rd->nres = 2; rd->nres = 2;
} else { } else {
emitir(IRTG(IR_EQ, IRT_P32), tr, trp0); emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
J->base[0] = TREF_NIL; J->base[0] = TREF_NIL;
} }
} else { /* Search for pattern. */ } else { /* Search for pattern. */
@ -977,7 +981,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
IRCallID id; IRCallID id;
switch (STRFMT_TYPE(sf)) { switch (STRFMT_TYPE(sf)) {
case STRFMT_LIT: case STRFMT_LIT:
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
break; break;
case STRFMT_INT: case STRFMT_INT:
@ -986,7 +990,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
if (!tref_isinteger(tra)) if (!tref_isinteger(tra))
goto handle_num; goto handle_num;
if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
} else { } else {
#if LJ_HASFFI #if LJ_HASFFI
@ -1016,7 +1020,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
return; return;
} }
if (sf == STRFMT_STR) /* Shortcut for plain %s. */ if (sf == STRFMT_STR) /* Shortcut for plain %s. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra); tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
else if ((sf & STRFMT_T_QUOTED)) else if ((sf & STRFMT_T_QUOTED))
tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
else else
@ -1025,7 +1029,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
case STRFMT_CHAR: case STRFMT_CHAR:
tra = lj_opt_narrow_toint(J, tra); tra = lj_opt_narrow_toint(J, tra);
if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
else else
tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
@ -1110,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
{ {
TRef tr, ud, fp; TRef tr, ud, fp;
if (id) { /* io.func() */ if (id) { /* io.func() */
#if LJ_GC64
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
#else
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
#endif
} else { /* fp:method() */ } else { /* fp:method() */
ud = J->base[0]; ud = J->base[0];
if (!tref_isudata(ud)) if (!tref_isudata(ud))
@ -1133,7 +1142,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
ptrdiff_t i = rd->data == 0 ? 1 : 0; ptrdiff_t i = rd->data == 0 ? 1 : 0;
for (; J->base[i]; i++) { for (; J->base[i]; i++) {
TRef str = lj_ir_tostr(J, J->base[i]); TRef str = lj_ir_tostr(J, J->base[i]);
TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
if (tref_isk(len) && IR(tref_ref(len))->i == 1) { if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
IRIns *irs = IR(tref_ref(str)); IRIns *irs = IR(tref_ref(str));

View File

@ -116,6 +116,17 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
/* These definitions must match with the arch-specific *.dasc files. */ /* These definitions must match with the arch-specific *.dasc files. */
#if LJ_TARGET_X86 #if LJ_TARGET_X86
#if LJ_ABI_WIN
#define CFRAME_OFS_ERRF (19*4)
#define CFRAME_OFS_NRES (18*4)
#define CFRAME_OFS_PREV (17*4)
#define CFRAME_OFS_L (16*4)
#define CFRAME_OFS_SEH (9*4)
#define CFRAME_OFS_PC (6*4)
#define CFRAME_OFS_MULTRES (5*4)
#define CFRAME_SIZE (16*4)
#define CFRAME_SHIFT_MULTRES 0
#else
#define CFRAME_OFS_ERRF (15*4) #define CFRAME_OFS_ERRF (15*4)
#define CFRAME_OFS_NRES (14*4) #define CFRAME_OFS_NRES (14*4)
#define CFRAME_OFS_PREV (13*4) #define CFRAME_OFS_PREV (13*4)
@ -124,6 +135,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES (5*4) #define CFRAME_OFS_MULTRES (5*4)
#define CFRAME_SIZE (12*4) #define CFRAME_SIZE (12*4)
#define CFRAME_SHIFT_MULTRES 0 #define CFRAME_SHIFT_MULTRES 0
#endif
#elif LJ_TARGET_X64 #elif LJ_TARGET_X64
#if LJ_ABI_WIN #if LJ_ABI_WIN
#define CFRAME_OFS_PREV (13*8) #define CFRAME_OFS_PREV (13*8)
@ -226,26 +238,41 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SIZE 272 #define CFRAME_SIZE 272
#define CFRAME_SHIFT_MULTRES 3 #define CFRAME_SHIFT_MULTRES 3
#endif #endif
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS32
#if LJ_ARCH_HASFPU #if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 124 #define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120 #define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 116 #define CFRAME_OFS_PREV 116
#define CFRAME_OFS_L 112 #define CFRAME_OFS_L 112
#define CFRAME_OFS_PC 20
#define CFRAME_OFS_MULTRES 16
#define CFRAME_SIZE 112 #define CFRAME_SIZE 112
#define CFRAME_SHIFT_MULTRES 3
#else #else
#define CFRAME_OFS_ERRF 76 #define CFRAME_OFS_ERRF 76
#define CFRAME_OFS_NRES 72 #define CFRAME_OFS_NRES 72
#define CFRAME_OFS_PREV 68 #define CFRAME_OFS_PREV 68
#define CFRAME_OFS_L 64 #define CFRAME_OFS_L 64
#define CFRAME_SIZE 64
#endif
#define CFRAME_OFS_PC 20 #define CFRAME_OFS_PC 20
#define CFRAME_OFS_MULTRES 16 #define CFRAME_OFS_MULTRES 16
#define CFRAME_SIZE 64
#define CFRAME_SHIFT_MULTRES 3 #define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_MIPS64
#if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 188
#define CFRAME_OFS_NRES 184
#define CFRAME_OFS_PREV 176
#define CFRAME_OFS_L 168
#define CFRAME_OFS_PC 160
#define CFRAME_SIZE 192
#else
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 112
#define CFRAME_OFS_L 104
#define CFRAME_OFS_PC 96
#define CFRAME_SIZE 128
#endif #endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#else #else
#error "Missing CFRAME_* definitions for this architecture" #error "Missing CFRAME_* definitions for this architecture"
#endif #endif

View File

@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
IRIns *ir = &T->ir[ref]; IRIns *ir = &T->ir[ref];
if (ir->o == IR_KGC) if (ir->o == IR_KGC)
gc_markobj(g, ir_kgc(ir)); gc_markobj(g, ir_kgc(ir));
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ref++;
} }
if (T->link) gc_marktrace(g, T->link); if (T->link) gc_marktrace(g, T->link);
if (T->nextroot) gc_marktrace(g, T->nextroot); if (T->nextroot) gc_marktrace(g, T->nextroot);

View File

@ -719,6 +719,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
/* -- Interface to GDB JIT API -------------------------------------------- */ /* -- Interface to GDB JIT API -------------------------------------------- */
static int gdbjit_lock;
static void gdbjit_lock_acquire()
{
while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
/* Just spin; futexes or pthreads aren't worth the portability cost. */
}
}
static void gdbjit_lock_release()
{
__sync_lock_release(&gdbjit_lock);
}
/* Add new entry to GDB JIT symbol chain. */ /* Add new entry to GDB JIT symbol chain. */
static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
{ {
@ -730,6 +744,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
ctx->T->gdbjit_entry = (void *)eo; ctx->T->gdbjit_entry = (void *)eo;
/* Link new entry to chain and register it. */ /* Link new entry to chain and register it. */
eo->entry.prev_entry = NULL; eo->entry.prev_entry = NULL;
gdbjit_lock_acquire();
eo->entry.next_entry = __jit_debug_descriptor.first_entry; eo->entry.next_entry = __jit_debug_descriptor.first_entry;
if (eo->entry.next_entry) if (eo->entry.next_entry)
eo->entry.next_entry->prev_entry = &eo->entry; eo->entry.next_entry->prev_entry = &eo->entry;
@ -739,6 +754,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
__jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_REGISTER; __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
__jit_debug_register_code(); __jit_debug_register_code();
gdbjit_lock_release();
} }
/* Add debug info for newly compiled trace and notify GDB. */ /* Add debug info for newly compiled trace and notify GDB. */
@ -770,6 +786,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
{ {
GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
if (eo) { if (eo) {
gdbjit_lock_acquire();
if (eo->entry.prev_entry) if (eo->entry.prev_entry)
eo->entry.prev_entry->next_entry = eo->entry.next_entry; eo->entry.prev_entry->next_entry = eo->entry.next_entry;
else else
@ -779,6 +796,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
__jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
__jit_debug_register_code(); __jit_debug_register_code();
gdbjit_lock_release();
lj_mem_free(J2G(J), eo, eo->sz); lj_mem_free(J2G(J), eo, eo->sz);
} }
} }

View File

@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J)
IRIns *baseir = J->irbuf + J->irbotlim; IRIns *baseir = J->irbuf + J->irbotlim;
MSize szins = J->irtoplim - J->irbotlim; MSize szins = J->irtoplim - J->irbotlim;
lua_assert(szins != 0); lua_assert(szins != 0);
lua_assert(J->cur.nk == J->irbotlim); lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
if (J->cur.nins + (szins >> 1) < J->irtoplim) { if (J->cur.nins + (szins >> 1) < J->irtoplim) {
/* More than half of the buffer is free on top: shift up by a quarter. */ /* More than half of the buffer is free on top: shift up by a quarter. */
MSize ofs = szins >> 2; MSize ofs = szins >> 2;
@ -145,6 +145,14 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
return emitir(CCI_OPTYPE(ci), tr, id); return emitir(CCI_OPTYPE(ci), tr, id);
} }
/* Load field of type t from GG_State + offset. */
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
{
lua_assert(ofs >= IRFL__MAX && ofs < REF_BIAS);
lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
return lj_opt_fold(J);
}
/* -- Interning of constants ---------------------------------------------- */ /* -- Interning of constants ---------------------------------------------- */
/* /*
@ -165,6 +173,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
return ref; return ref;
} }
/* Get ref of next 64 bit IR constant and optionally grow IR.
** Note: this may invalidate all IRIns *!
*/
static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
{
IRRef ref = J->cur.nk - 2;
lua_assert(J->state != LJ_TRACE_ASM);
if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
J->cur.nk = ref;
return ref;
}
#if LJ_GC64
#define ir_nextkgc ir_nextk64
#else
#define ir_nextkgc ir_nextk
#endif
/* Intern int32_t constant. */ /* Intern int32_t constant. */
TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
{ {
@ -184,95 +210,21 @@ found:
return TREF(ref, IRT_INT); return TREF(ref, IRT_INT);
} }
/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the /* Intern 64 bit constant, given by its 64 bit pattern. */
** 64 bit constant. The constants themselves are stored in a chained array TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
** and shared across traces.
**
** Rationale for choosing this data structure:
** - The address of the constants is embedded in the generated machine code
** and must never move. A resizable array or hash table wouldn't work.
** - Most apps need very few non-32 bit integer constants (less than a dozen).
** - Linear search is hard to beat in terms of speed and low complexity.
*/
typedef struct K64Array {
MRef next; /* Pointer to next list. */
MSize numk; /* Number of used elements in this array. */
TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
} K64Array;
/* Free all chained arrays. */
void lj_ir_k64_freeall(jit_State *J)
{
K64Array *k;
for (k = mref(J->k64, K64Array); k; ) {
K64Array *next = mref(k->next, K64Array);
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
setmref(J->k64, NULL);
}
/* Get new 64 bit constant slot. */
static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
{
TValue *ntv;
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
setmref(kn->next, NULL);
kn->numk = 0;
if (kp)
setmref(kp->next, kn); /* Chain to the end of the list. */
else
setmref(J->k64, kn); /* Link first array. */
kp = kn;
}
ntv = &kp->k[kp->numk++]; /* Add to current array. */
ntv->u64 = u64;
return ntv;
}
/* Find 64 bit constant in chained array or add it. */
cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
{
K64Array *k, *kp = NULL;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
return tv;
}
}
/* Otherwise add a new constant. */
return ir_k64_add(J, kp, u64);
}
TValue *lj_ir_k64_reserve(jit_State *J)
{
K64Array *k, *kp = NULL;
lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
/* Find last K64Array, if any. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
}
/* Intern 64 bit constant, given by its address. */
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
{ {
IRIns *ir, *cir = J->cur.ir; IRIns *ir, *cir = J->cur.ir;
IRRef ref; IRRef ref;
IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
for (ref = J->chain[op]; ref; ref = cir[ref].prev) for (ref = J->chain[op]; ref; ref = cir[ref].prev)
if (ir_k64(&cir[ref]) == tv) if (ir_k64(&cir[ref])->u64 == u64)
goto found; goto found;
ref = ir_nextk(J); ref = ir_nextk64(J);
ir = IR(ref); ir = IR(ref);
lua_assert(checkptrGC(tv)); ir[1].tv.u64 = u64;
setmref(ir->ptr, tv);
ir->t.irt = t; ir->t.irt = t;
ir->o = op; ir->o = op;
ir->op12 = 0;
ir->prev = J->chain[op]; ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref; J->chain[op] = (IRRef1)ref;
found: found:
@ -282,13 +234,13 @@ found:
/* Intern FP constant, given by its 64 bit pattern. */ /* Intern FP constant, given by its 64 bit pattern. */
TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
{ {
return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); return lj_ir_k64(J, IR_KNUM, u64);
} }
/* Intern 64 bit integer constant. */ /* Intern 64 bit integer constant. */
TRef lj_ir_kint64(jit_State *J, uint64_t u64) TRef lj_ir_kint64(jit_State *J, uint64_t u64)
{ {
return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); return lj_ir_k64(J, IR_KINT64, u64);
} }
/* Check whether a number is int and return it. -0 is NOT considered an int. */ /* Check whether a number is int and return it. -0 is NOT considered an int. */
@ -323,15 +275,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
{ {
IRIns *ir, *cir = J->cur.ir; IRIns *ir, *cir = J->cur.ir;
IRRef ref; IRRef ref;
lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */
lua_assert(!isdead(J2G(J), o)); lua_assert(!isdead(J2G(J), o));
for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
if (ir_kgc(&cir[ref]) == o) if (ir_kgc(&cir[ref]) == o)
goto found; goto found;
ref = ir_nextk(J); ref = ir_nextkgc(J);
ir = IR(ref); ir = IR(ref);
/* NOBARRIER: Current trace is a GC root. */ /* NOBARRIER: Current trace is a GC root. */
setgcref(ir->gcr, o); ir->op12 = 0;
setgcref(ir[LJ_GC64].gcr, o);
ir->t.irt = (uint8_t)t; ir->t.irt = (uint8_t)t;
ir->o = IR_KGC; ir->o = IR_KGC;
ir->prev = J->chain[IR_KGC]; ir->prev = J->chain[IR_KGC];
@ -340,24 +292,44 @@ found:
return TREF(ref, t); return TREF(ref, t);
} }
/* Intern 32 bit pointer constant. */ /* Allocate GCtrace constant placeholder (no interning). */
TRef lj_ir_ktrace(jit_State *J)
{
IRRef ref = ir_nextkgc(J);
IRIns *ir = IR(ref);
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
ir->t.irt = IRT_P64;
ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
ir->op12 = 0;
ir->prev = 0;
return TREF(ref, IRT_P64);
}
/* Intern pointer constant. */
TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
{ {
IRIns *ir, *cir = J->cur.ir; IRIns *ir, *cir = J->cur.ir;
IRRef ref; IRRef ref;
lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); #if LJ_64 && !LJ_GC64
lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
#endif
for (ref = J->chain[op]; ref; ref = cir[ref].prev) for (ref = J->chain[op]; ref; ref = cir[ref].prev)
if (mref(cir[ref].ptr, void) == ptr) if (ir_kptr(&cir[ref]) == ptr)
goto found; goto found;
#if LJ_GC64
ref = ir_nextk64(J);
#else
ref = ir_nextk(J); ref = ir_nextk(J);
#endif
ir = IR(ref); ir = IR(ref);
setmref(ir->ptr, ptr); ir->op12 = 0;
ir->t.irt = IRT_P32; setmref(ir[LJ_GC64].ptr, ptr);
ir->t.irt = IRT_PGC;
ir->o = op; ir->o = op;
ir->prev = J->chain[op]; ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref; J->chain[op] = (IRRef1)ref;
found: found:
return TREF(ref, IRT_P32); return TREF(ref, IRT_PGC);
} }
/* Intern typed NULL constant. */ /* Intern typed NULL constant. */
@ -412,9 +384,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
case IR_KINT: setintV(tv, ir->i); break; case IR_KINT: setintV(tv, ir->i); break;
case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
case IR_KPTR: case IR_KKPTR: case IR_KNULL: case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
setlightudV(tv, mref(ir->ptr, void)); case IR_KNULL: setlightudV(tv, NULL); break;
break;
case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
#if LJ_HASFFI #if LJ_HASFFI
case IR_KINT64: { case IR_KINT64: {

View File

@ -220,7 +220,7 @@ IRFLDEF(FLENUM)
/* SLOAD mode bits, stored in op2. */ /* SLOAD mode bits, stored in op2. */
#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
@ -294,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
/* -- IR instruction types ------------------------------------------------ */ /* -- IR instruction types ------------------------------------------------ */
/* Map of itypes to non-negative numbers. ORDER LJ_T. #define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
** IRT_P32 and IRT_P64, which never escape the IR. ** IRT_P32 and IRT_P64, which never escape the IR.
** The various integers are only used in the IR and can only escape to ** The various integers are only used in the IR and can only escape to
@ -302,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
** contiguous and next to IRT_NUM (see the typerange macros below). ** contiguous and next to IRT_NUM (see the typerange macros below).
*/ */
#define IRTDEF(_) \ #define IRTDEF(_) \
_(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
_(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
_(TAB, 4) _(UDATA, 4) \ _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
_(UDATA, IRTSIZE_PGC) \
_(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
_(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
_(SOFTFP, 4) /* There is room for 9 more types. */ _(SOFTFP, 4) /* There is room for 8 more types. */
/* IR result type and flags (8 bit). */ /* IR result type and flags (8 bit). */
typedef enum { typedef enum {
@ -318,9 +321,10 @@ IRTDEF(IRTENUM)
/* Native pointer type and the corresponding integer type. */ /* Native pointer type and the corresponding integer type. */
IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
/* TODO_GC64: major changes required for all uses of IRT_P32. */
/* Additional flags. */ /* Additional flags. */
IRT_MARK = 0x20, /* Marker for misc. purposes. */ IRT_MARK = 0x20, /* Marker for misc. purposes. */
@ -408,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
static LJ_AINLINE uint32_t irt_toitype_(IRType t) static LJ_AINLINE uint32_t irt_toitype_(IRType t)
{ {
lua_assert(!LJ_64 || t != IRT_LIGHTUD); lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
if (LJ_DUALNUM && t > IRT_NUM) { if (LJ_DUALNUM && t > IRT_NUM) {
return LJ_TISNUM; return LJ_TISNUM;
} else { } else {
@ -521,7 +525,9 @@ typedef uint32_t TRef;
** +-------+-------+---+---+---+---+ ** +-------+-------+---+---+---+---+
** | op1 | op2 | t | o | r | s | ** | op1 | op2 | t | o | r | s |
** +-------+-------+---+---+---+---+ ** +-------+-------+---+---+---+---+
** | op12/i/gco | ot | prev | (alternative fields in union) ** | op12/i/gco32 | ot | prev | (alternative fields in union)
** +-------+-------+---+---+---+---+
** | TValue/gco64 | (2nd IR slot for 64 bit constants)
** +---------------+-------+-------+ ** +---------------+-------+-------+
** 32 16 16 ** 32 16 16
** **
@ -549,22 +555,27 @@ typedef union IRIns {
) )
}; };
int32_t i; /* 32 bit signed integer literal (overlaps op12). */ int32_t i; /* 32 bit signed integer literal (overlaps op12). */
GCRef gcr; /* GCobj constant (overlaps op12). */ GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
MRef ptr; /* Pointer constant (overlaps op12). */ MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
TValue tv; /* TValue constant (overlaps entire slot). */
} IRIns; } IRIns;
/* TODO_GC64: major changes required. */ #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_kstr(ir) (gco2str(ir_kgc((ir))))
#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
#define ir_k64(ir) \ #define ir_k64(ir) \
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
(LJ_GC64 && \
((ir)->o == IR_KGC || \
(ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
&(ir)[1].tv)
#define ir_kptr(ir) \ #define ir_kptr(ir) \
check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
mref((ir)[LJ_GC64].ptr, void))
/* A store or any other op with a non-weak guard has a side-effect. */ /* A store or any other op with a non-weak guard has a side-effect. */
static LJ_AINLINE int ir_sideeff(IRIns *ir) static LJ_AINLINE int ir_sideeff(IRIns *ir)

View File

@ -78,13 +78,13 @@ typedef struct CCallInfo {
#define IRCALLCOND_SOFTFP_FFI(x) NULL #define IRCALLCOND_SOFTFP_FFI(x) NULL
#endif #endif
#if LJ_SOFTFP && LJ_TARGET_MIPS #if LJ_SOFTFP && LJ_TARGET_MIPS32
#define IRCALLCOND_SOFTFP_MIPS(x) x #define IRCALLCOND_SOFTFP_MIPS(x) x
#else #else
#define IRCALLCOND_SOFTFP_MIPS(x) NULL #define IRCALLCOND_SOFTFP_MIPS(x) NULL
#endif #endif
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
#define IRCALLCOND_FP64_FFI(x) x #define IRCALLCOND_FP64_FFI(x) x
@ -104,12 +104,6 @@ typedef struct CCallInfo {
#define IRCALLCOND_FFI32(x) NULL #define IRCALLCOND_FFI32(x) NULL
#endif #endif
#if LJ_TARGET_X86
#define CCI_RANDFPR 0 /* Clang on OSX/x86 is overzealous. */
#else
#define CCI_RANDFPR CCI_NOFPRCLOBBER
#endif
#if LJ_SOFTFP #if LJ_SOFTFP
#define XA_FP CCI_XA #define XA_FP CCI_XA
#define XA2_FP (CCI_XA+CCI_XA) #define XA2_FP (CCI_XA+CCI_XA)
@ -129,40 +123,40 @@ typedef struct CCallInfo {
/* Function definitions for CALL* instructions. */ /* Function definitions for CALL* instructions. */
#define IRCALLDEF(_) \ #define IRCALLDEF(_) \
_(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(ANY, lj_str_find, 4, N, P32, 0) \ _(ANY, lj_str_find, 4, N, PGC, 0) \
_(ANY, lj_str_new, 3, S, STR, CCI_L) \ _(ANY, lj_str_new, 3, S, STR, CCI_L) \
_(ANY, lj_strscan_num, 2, FN, INT, 0) \ _(ANY, lj_strscan_num, 2, FN, INT, 0) \
_(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_putint, 2, FL, P32, 0) \ _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \ _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \ _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \ _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \
_(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \ _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \ _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \ _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \ _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \
_(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \ _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \
_(ANY, lj_buf_putmem, 3, S, P32, 0) \ _(ANY, lj_buf_putmem, 3, S, PGC, 0) \
_(ANY, lj_buf_putstr, 2, FL, P32, 0) \ _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \
_(ANY, lj_buf_putchar, 2, FL, P32, 0) \ _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \ _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \
_(ANY, lj_buf_puttab, 5, L, P32, 0) \ _(ANY, lj_buf_puttab, 5, L, PGC, 0) \
_(ANY, lj_buf_tostr, 1, FL, STR, 0) \ _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
_(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
_(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
_(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
_(ANY, lj_tab_clear, 1, FS, NIL, 0) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
_(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
_(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
_(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\ _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
_(ANY, lj_vm_modi, 2, FN, INT, 0) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \
_(ANY, sinh, 1, N, NUM, XA_FP) \ _(ANY, sinh, 1, N, NUM, XA_FP) \
_(ANY, cosh, 1, N, NUM, XA_FP) \ _(ANY, cosh, 1, N, NUM, XA_FP) \

View File

@ -36,12 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
return ref; return ref;
} }
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
/* Interning of constants. */ /* Interning of constants. */
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
LJ_FUNC void lj_ir_k64_freeall(jit_State *J); LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@ -49,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
#if LJ_64 #if LJ_64
#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@ -75,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
/* Special 128 bit SIMD constants. */ /* Special 128 bit SIMD constants. */
#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) #define lj_ir_ksimd(J, idx) \
#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
/* Access to constants. */ /* Access to constants. */
LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@ -143,8 +143,8 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op); TValue *vb, TValue *vc, IROp op);
LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */ /* Optimization passes. */

View File

@ -46,12 +46,16 @@
#define JIT_F_CPU_FIRST JIT_F_SQRT #define JIT_F_CPU_FIRST JIT_F_SQRT
#define JIT_F_CPUSTRING "\4SQRT\5ROUND" #define JIT_F_CPUSTRING "\4SQRT\5ROUND"
#elif LJ_TARGET_MIPS #elif LJ_TARGET_MIPS
#define JIT_F_MIPS32R2 0x00000010 #define JIT_F_MIPSXXR2 0x00000010
/* Names for the CPU-specific flags. Must match the order above. */ /* Names for the CPU-specific flags. Must match the order above. */
#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
#if LJ_TARGET_MIPS32
#define JIT_F_CPUSTRING "\010MIPS32R2" #define JIT_F_CPUSTRING "\010MIPS32R2"
#else #else
#define JIT_F_CPUSTRING "\010MIPS64R2"
#endif
#else
#define JIT_F_CPU_FIRST 0 #define JIT_F_CPU_FIRST 0
#define JIT_F_CPUSTRING "" #define JIT_F_CPUSTRING ""
#endif #endif
@ -179,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
#define SNAP_TR(slot, tr) \ #define SNAP_TR(slot, tr) \
(((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
#if !LJ_FR2
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
#endif
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
#define snap_ref(sn) ((sn) & 0xffff) #define snap_ref(sn) ((sn) & 0xffff)
#define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_slot(sn) ((BCReg)((sn) >> 24))
#define snap_isframe(sn) ((sn) & SNAP_FRAME) #define snap_isframe(sn) ((sn) & SNAP_FRAME)
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
{
#if LJ_FR2
uint64_t pcbase;
memcpy(&pcbase, sn, sizeof(uint64_t));
return (const BCIns *)(pcbase >> 8);
#else
return (const BCIns *)(uintptr_t)*sn;
#endif
}
/* Snapshot and exit numbers. */ /* Snapshot and exit numbers. */
typedef uint32_t SnapNo; typedef uint32_t SnapNo;
typedef uint32_t ExitNo; typedef uint32_t ExitNo;
@ -308,6 +324,37 @@ enum {
LJ_KSIMD__MAX LJ_KSIMD__MAX
}; };
enum {
#if LJ_TARGET_X86ORX64
LJ_K64_TOBIT, /* 2^52 + 2^51 */
LJ_K64_2P64, /* 2^64 */
LJ_K64_M2P64, /* -2^64 */
#if LJ_32
LJ_K64_M2P64_31, /* -2^64 or -2^31 */
#else
LJ_K64_M2P64_31 = LJ_K64_M2P64,
#endif
#endif
#if LJ_TARGET_MIPS
LJ_K64_2P31, /* 2^31 */
#endif
LJ_K64__MAX,
};
enum {
#if LJ_TARGET_X86ORX64
LJ_K32_M2P64_31, /* -2^64 or -2^31 */
#endif
#if LJ_TARGET_PPC
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
LJ_K32_2P52, /* 2^52 */
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
LJ_K32_2P31, /* 2^31 */
#endif
LJ_K32__MAX
};
/* Get 16 byte aligned pointer to SIMD constant. */ /* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \ #define LJ_KSIMD(J, n) \
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
@ -324,13 +371,14 @@ enum {
/* Fold state is used to fold instructions on-the-fly. */ /* Fold state is used to fold instructions on-the-fly. */
typedef struct FoldState { typedef struct FoldState {
IRIns ins; /* Currently emitted instruction. */ IRIns ins; /* Currently emitted instruction. */
IRIns left; /* Instruction referenced by left operand. */ IRIns left[2]; /* Instruction referenced by left operand. */
IRIns right; /* Instruction referenced by right operand. */ IRIns right[2]; /* Instruction referenced by right operand. */
} FoldState; } FoldState;
/* JIT compiler state. */ /* JIT compiler state. */
typedef struct jit_State { typedef struct jit_State {
GCtrace cur; /* Current trace. */ GCtrace cur; /* Current trace. */
GCtrace *curfinal; /* Final address of current trace (set during asm). */
lua_State *L; /* Current Lua state. */ lua_State *L; /* Current Lua state. */
const BCIns *pc; /* Current PC. */ const BCIns *pc; /* Current PC. */
@ -360,8 +408,9 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */ int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */ int32_t retdepth; /* Return frame depth (count of RETF). */
MRef k64; /* Pointer to chained array of 64 bit constants. */
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@ -382,7 +431,7 @@ typedef struct jit_State {
GCRef *trace; /* Array of traces. */ GCRef *trace; /* Array of traces. */
TraceNo freetrace; /* Start of scan for next free trace. */ TraceNo freetrace; /* Start of scan for next free trace. */
MSize sizetrace; /* Size of trace array. */ MSize sizetrace; /* Size of trace array. */
TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */ IRRef1 ktrace; /* Reference to KGC with GCtrace. */
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */

View File

@ -843,12 +843,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
#endif #endif
#if LJ_FR2 #if LJ_FR2
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f)) #define contptr(f) ((void *)(f))
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
#elif LJ_64 #elif LJ_64
#define contptr(f) \
((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
#define setcont(o, f) \ #define setcont(o, f) \
((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
#else #else
#define setcont(o, f) setlightudV((o), (void *)(f)) #define contptr(f) ((void *)(f))
#define setcont(o, f) setlightudV((o), contptr(f))
#endif #endif
#define tvchecklive(L, o) \ #define tvchecklive(L, o) \

View File

@ -136,8 +136,8 @@
/* Some local macros to save typing. Undef'd at the end. */ /* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)]) #define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins) #define fins (&J->fold.ins)
#define fleft (&J->fold.left) #define fleft (J->fold.left)
#define fright (&J->fold.right) #define fright (J->fold.right)
#define knumleft (ir_knum(fleft)->n) #define knumleft (ir_knum(fleft)->n)
#define knumright (ir_knum(fright)->n) #define knumright (ir_knum(fright)->n)
@ -502,7 +502,7 @@ LJFOLDF(kfold_strref_snew)
PHIBARRIER(ir); PHIBARRIER(ir);
fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
fins->op1 = str; fins->op1 = str;
fins->ot = IRT(IR_STRREF, IRT_P32); fins->ot = IRT(IR_STRREF, IRT_PGC);
return RETRYFOLD; return RETRYFOLD;
} }
} }
@ -998,8 +998,10 @@ LJFOLDF(simplify_nummuldiv_k)
if (n == 1.0) { /* x o 1 ==> x */ if (n == 1.0) { /* x o 1 ==> x */
return LEFTFOLD; return LEFTFOLD;
} else if (n == -1.0) { /* x o -1 ==> -x */ } else if (n == -1.0) { /* x o -1 ==> -x */
IRRef op1 = fins->op1;
fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
fins->op1 = op1;
fins->o = IR_NEG; fins->o = IR_NEG;
fins->op2 = (IRRef1)lj_ir_knum_neg(J);
return RETRYFOLD; return RETRYFOLD;
} else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
fins->o = IR_ADD; fins->o = IR_ADD;
@ -2393,10 +2395,14 @@ retry:
if (fins->op1 >= J->cur.nk) { if (fins->op1 >= J->cur.nk) {
key += (uint32_t)IR(fins->op1)->o << 10; key += (uint32_t)IR(fins->op1)->o << 10;
*fleft = *IR(fins->op1); *fleft = *IR(fins->op1);
if (fins->op1 < REF_TRUE)
fleft[1] = IR(fins->op1)[1];
} }
if (fins->op2 >= J->cur.nk) { if (fins->op2 >= J->cur.nk) {
key += (uint32_t)IR(fins->op2)->o; key += (uint32_t)IR(fins->op2)->o;
*fright = *IR(fins->op2); *fright = *IR(fins->op2);
if (fins->op2 < REF_TRUE)
fright[1] = IR(fins->op2)[1];
} else { } else {
key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
} }

View File

@ -22,8 +22,8 @@
/* Some local macros to save typing. Undef'd at the end. */ /* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)]) #define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins) #define fins (&J->fold.ins)
#define fleft (&J->fold.left) #define fleft (J->fold.left)
#define fright (&J->fold.right) #define fright (J->fold.right)
/* /*
** Caveat #1: return value is not always a TRef -- only use with tref_ref(). ** Caveat #1: return value is not always a TRef -- only use with tref_ref().

View File

@ -517,18 +517,24 @@ static int numisint(lua_Number n)
return (n == (lua_Number)lj_num2int(n)); return (n == (lua_Number)lj_num2int(n));
} }
/* Convert string to number. Error out for non-numeric string values. */
static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
{
if (tref_isstr(tr)) {
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
/* Would need an inverted STRTO for this rare and useless case. */
if (!lj_strscan_num(strV(o), o)) /* Convert in-place. Value used below. */
lj_trace_err(J, LJ_TRERR_BADTYPE); /* Punt if non-numeric. */
}
return tr;
}
/* Narrowing of arithmetic operations. */ /* Narrowing of arithmetic operations. */
TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op) TValue *vb, TValue *vc, IROp op)
{ {
if (tref_isstr(rb)) { rb = conv_str_tonum(J, rb, vb);
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); rc = conv_str_tonum(J, rc, vc);
lj_strscan_num(strV(vb), vb);
}
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_strscan_num(strV(vc), vc);
}
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
tref_isinteger(rb) && tref_isinteger(rc) && tref_isinteger(rb) && tref_isinteger(rc) &&
@ -543,24 +549,21 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
/* Narrowing of unary minus operator. */ /* Narrowing of unary minus operator. */
TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
{ {
if (tref_isstr(rc)) { rc = conv_str_tonum(J, rc, vc);
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_strscan_num(strV(vc), vc);
}
if (tref_isinteger(rc)) { if (tref_isinteger(rc)) {
if ((uint32_t)numberVint(vc) != 0x80000000u) if ((uint32_t)numberVint(vc) != 0x80000000u)
return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
} }
return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
} }
/* Narrowing of modulo operator. */ /* Narrowing of modulo operator. */
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
{ {
TRef tmp; TRef tmp;
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) rb = conv_str_tonum(J, rb, vb);
lj_trace_err(J, LJ_TRERR_BADTYPE); rc = conv_str_tonum(J, rc, vc);
if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) &&
tref_isinteger(rb) && tref_isinteger(rc) && tref_isinteger(rb) && tref_isinteger(rc) &&
(tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) { (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) {
@ -577,10 +580,11 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
} }
/* Narrowing of power operator or math.pow. */ /* Narrowing of power operator or math.pow. */
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
{ {
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) rb = conv_str_tonum(J, rb, vb);
lj_trace_err(J, LJ_TRERR_BADTYPE); rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
rc = conv_str_tonum(J, rc, vc);
/* Narrowing must be unconditional to preserve (-x)^i semantics. */ /* Narrowing must be unconditional to preserve (-x)^i semantics. */
if (tvisint(vc) || numisint(numV(vc))) { if (tvisint(vc) || numisint(numV(vc))) {
int checkrange = 0; int checkrange = 0;
@ -591,8 +595,6 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
checkrange = 1; checkrange = 1;
} }
if (!tref_isinteger(rc)) { if (!tref_isinteger(rc)) {
if (tref_isstr(rc))
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
/* Guarded conversion to integer! */ /* Guarded conversion to integer! */
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
} }

View File

@ -153,10 +153,9 @@ static void sink_remark_phi(jit_State *J)
remark = 0; remark = 0;
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev)
remark = 1;
else if (irl->prev == irr->prev)
continue; continue;
remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
irt_setmark(IR(ir->op1)->t); irt_setmark(IR(ir->op1)->t);
irt_setmark(IR(ir->op2)->t); irt_setmark(IR(ir->op2)->t);
} }
@ -166,8 +165,8 @@ static void sink_remark_phi(jit_State *J)
/* Sweep instructions and tag sunken allocations and stores. */ /* Sweep instructions and tag sunken allocations and stores. */
static void sink_sweep_ins(jit_State *J) static void sink_sweep_ins(jit_State *J)
{ {
IRIns *ir, *irfirst = IR(J->cur.nk); IRIns *ir, *irbase = IR(REF_BASE);
for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
switch (ir->o) { switch (ir->o) {
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir); IRIns *ira = sink_checkalloc(J, ir);
@ -217,6 +216,12 @@ static void sink_sweep_ins(jit_State *J)
break; break;
} }
} }
for (ir = IR(J->cur.nk); ir < irbase; ir++) {
irt_clearmark(ir->t);
ir->prev = REGSP_INIT;
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ir++;
}
} }
/* Allocation sinking and store sinking. /* Allocation sinking and store sinking.

View File

@ -16,6 +16,7 @@
#include "lj_jit.h" #include "lj_jit.h"
#include "lj_ircall.h" #include "lj_ircall.h"
#include "lj_iropt.h" #include "lj_iropt.h"
#include "lj_dispatch.h"
#include "lj_vm.h" #include "lj_vm.h"
/* SPLIT pass: /* SPLIT pass:
@ -353,6 +354,8 @@ static void split_ir(jit_State *J)
ir->prev = ref; /* Identity substitution for loword. */ ir->prev = ref; /* Identity substitution for loword. */
hisubst[ref] = 0; hisubst[ref] = 0;
} }
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ref++;
} }
/* Process old IR instructions. */ /* Process old IR instructions. */
@ -448,6 +451,11 @@ static void split_ir(jit_State *J)
case IR_STRTO: case IR_STRTO:
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
break; break;
case IR_FLOAD:
lua_assert(ir->op1 == REF_NIL);
hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
nir->op2 += LJ_BE*4;
break;
case IR_XLOAD: { case IR_XLOAD: {
IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
J->cur.nins--; J->cur.nins--;

View File

@ -2177,6 +2177,8 @@ static void assign_adjust(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e)
bcemit_nil(fs, reg, (BCReg)extra); bcemit_nil(fs, reg, (BCReg)extra);
} }
} }
if (nexps > nvars)
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
} }
/* Recursively parse assignment statement. */ /* Recursively parse assignment statement. */
@ -2210,8 +2212,6 @@ static void parse_assignment(LexState *ls, LHSVarList *lh, BCReg nvars)
return; return;
} }
assign_adjust(ls, nvars, nexps, &e); assign_adjust(ls, nvars, nexps, &e);
if (nexps > nvars)
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
} }
/* Assign RHS to LHS and recurse downwards. */ /* Assign RHS to LHS and recurse downwards. */
expr_init(&e, VNONRELOC, ls->fs->freereg-1); expr_init(&e, VNONRELOC, ls->fs->freereg-1);

View File

@ -51,7 +51,7 @@ static void rec_check_ir(jit_State *J)
{ {
IRRef i, nins = J->cur.nins, nk = J->cur.nk; IRRef i, nins = J->cur.nins, nk = J->cur.nk;
lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
for (i = nins-1; i >= nk; i--) { for (i = nk; i < nins; i++) {
IRIns *ir = IR(i); IRIns *ir = IR(i);
uint32_t mode = lj_ir_mode[ir->o]; uint32_t mode = lj_ir_mode[ir->o];
IRRef op1 = ir->op1; IRRef op1 = ir->op1;
@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J)
case IRMref: lua_assert(op1 >= nk); case IRMref: lua_assert(op1 >= nk);
lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
case IRMlit: break; case IRMlit: break;
case IRMcst: lua_assert(i < REF_BIAS); continue; case IRMcst: lua_assert(i < REF_BIAS);
if (irt_is64(ir->t) && ir->o != IR_KNULL)
i++;
continue;
} }
switch (irm_op2(mode)) { switch (irm_op2(mode)) {
case IRMnone: lua_assert(op2 == 0); break; case IRMnone: lua_assert(op2 == 0); break;
@ -84,30 +87,48 @@ static void rec_check_slots(jit_State *J)
BCReg s, nslots = J->baseslot + J->maxslot; BCReg s, nslots = J->baseslot + J->maxslot;
int32_t depth = 0; int32_t depth = 0;
cTValue *base = J->L->base - J->baseslot; cTValue *base = J->L->base - J->baseslot;
lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
lua_assert(nslots < LJ_MAX_JSLOTS); lua_assert(nslots < LJ_MAX_JSLOTS);
for (s = 0; s < nslots; s++) { for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s]; TRef tr = J->slot[s];
if (tr) { if (tr) {
cTValue *tv = &base[s]; cTValue *tv = &base[s];
IRRef ref = tref_ref(tr); IRRef ref = tref_ref(tr);
IRIns *ir; IRIns *ir = NULL; /* Silence compiler. */
lua_assert(ref >= J->cur.nk && ref < J->cur.nins); if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
ir = IR(ref); lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
lua_assert(irt_t(ir->t) == tref_t(tr)); ir = IR(ref);
lua_assert(irt_t(ir->t) == tref_t(tr));
}
if (s == 0) { if (s == 0) {
lua_assert(tref_isfunc(tr)); lua_assert(tref_isfunc(tr));
#if LJ_FR2
} else if (s == 1) {
lua_assert(0);
#endif
} else if ((tr & TREF_FRAME)) { } else if ((tr & TREF_FRAME)) {
GCfunc *fn = gco2func(frame_gc(tv)); GCfunc *fn = gco2func(frame_gc(tv));
BCReg delta = (BCReg)(tv - frame_prev(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv));
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
tr = J->slot[s-1];
ir = IR(tref_ref(tr));
#endif
lua_assert(tref_isfunc(tr)); lua_assert(tref_isfunc(tr));
if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
: (s == delta + LJ_FR2));
depth++; depth++;
} else if ((tr & TREF_CONT)) { } else if ((tr & TREF_CONT)) {
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
#else
lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
lua_assert((J->slot[s+1] & TREF_FRAME)); #endif
lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
depth++; depth++;
} else { } else {
if (tvisnumber(tv)) if (tvisnumber(tv))
@ -159,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
/* Get TRef for current function. */ /* Get TRef for current function. */
static TRef getcurrf(jit_State *J) static TRef getcurrf(jit_State *J)
{ {
if (J->base[-1]) if (J->base[-1-LJ_FR2])
return J->base[-1]; return J->base[-1-LJ_FR2];
lua_assert(J->baseslot == 1); lua_assert(J->baseslot == 1+LJ_FR2);
return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
} }
/* Compare for raw object equality. /* Compare for raw object equality.
@ -506,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{ {
BCReg ra = bc_a(iterins); BCReg ra = bc_a(iterins);
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]); J->maxslot = ra-1+bc_b(J->pc[-1]);
@ -643,8 +663,8 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
GCproto *pt = funcproto(fn); GCproto *pt = funcproto(fn);
/* Too many closures created? Probably not a monomorphic function. */ /* Too many closures created? Probably not a monomorphic function. */
if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
return tr; return tr;
} }
@ -675,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
{ {
RecordIndex ix; RecordIndex ix;
TValue *functv = &J->L->base[func]; TValue *functv = &J->L->base[func];
TRef *fbase = &J->base[func]; TRef kfunc, *fbase = &J->base[func];
ptrdiff_t i; ptrdiff_t i;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ (void)getslot(J, func); /* Ensure func has a reference. */
for (i = 0; i <= nargs; i++) for (i = 1; i <= nargs; i++)
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */ (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
ix.tab = fbase[0]; ix.tab = fbase[0];
copyTV(J->L, &ix.tabv, functv); copyTV(J->L, &ix.tabv, functv);
if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
lj_trace_err(J, LJ_TRERR_NOMM); lj_trace_err(J, LJ_TRERR_NOMM);
for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
fbase[i] = fbase[i-1]; fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
#if LJ_FR2
fbase[2] = fbase[0];
#endif
fbase[0] = ix.mobj; /* Replace function. */ fbase[0] = ix.mobj; /* Replace function. */
functv = &ix.mobjv; functv = &ix.mobjv;
} }
fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
#if LJ_FR2
fbase[0] = kfunc;
fbase[1] = TREF_FRAME;
#else
fbase[0] = kfunc | TREF_FRAME;
#endif
J->maxslot = (BCReg)nargs; J->maxslot = (BCReg)nargs;
} }
@ -700,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
rec_call_setup(J, func, nargs); rec_call_setup(J, func, nargs);
/* Bump frame. */ /* Bump frame. */
J->framedepth++; J->framedepth++;
J->base += func+1; J->base += func+1+LJ_FR2;
J->baseslot += func+1; J->baseslot += func+1+LJ_FR2;
} }
/* Record tail call. */ /* Record tail call. */
@ -717,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
func += cbase; func += cbase;
} }
/* Move func + args down. */ /* Move func + args down. */
memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); if (LJ_FR2 && J->baseslot == 2)
J->base[func+1] = 0;
memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
/* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
/* Tailcalls can form a loop, so count towards the loop unroll limit. */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */
if (++J->tailcalled > J->loopunroll) if (++J->tailcalled > J->loopunroll)
@ -758,9 +789,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
(void)getslot(J, rbase+i); /* Ensure all results have a reference. */ (void)getslot(J, rbase+i); /* Ensure all results have a reference. */
while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */
BCReg cbase = (BCReg)frame_delta(frame); BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0) if (--J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYIRETL); lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1); lua_assert(J->baseslot > 1+LJ_FR2);
gotresults++; gotresults++;
rbase += cbase; rbase += cbase;
J->baseslot -= (BCReg)cbase; J->baseslot -= (BCReg)cbase;
@ -784,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCReg cbase = (BCReg)frame_delta(frame); BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL); lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1); lua_assert(J->baseslot > 1+LJ_FR2);
rbase += cbase; rbase += cbase;
J->baseslot -= (BCReg)cbase; J->baseslot -= (BCReg)cbase;
J->base -= cbase; J->base -= cbase;
@ -794,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCIns callins = *(frame_pc(frame)-1); BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
BCReg cbase = bc_a(callins); BCReg cbase = bc_a(callins);
GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
if ((pt->flags & PROTO_NOJIT)) if ((pt->flags & PROTO_NOJIT))
lj_trace_err(J, LJ_TRERR_CJITOFF); lj_trace_err(J, LJ_TRERR_CJITOFF);
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
@ -808,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_snap_add(J); lj_snap_add(J);
} }
for (i = 0; i < nresults; i++) /* Adjust results. */ for (i = 0; i < nresults; i++) /* Adjust results. */
J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->maxslot = cbase+(BCReg)nresults; J->maxslot = cbase+(BCReg)nresults;
if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
J->framedepth--; J->framedepth--;
lua_assert(J->baseslot > cbase+1); lua_assert(J->baseslot > cbase+1+LJ_FR2);
J->baseslot -= cbase+1; J->baseslot -= cbase+1+LJ_FR2;
J->base -= cbase+1; J->base -= cbase+1+LJ_FR2;
} else if (J->parent == 0 && J->exitno == 0 && } else if (J->parent == 0 && J->exitno == 0 &&
!bc_isret(bc_op(J->cur.startins))) { !bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */ /* Return to lower frame would leave the loop in a root trace. */
@ -824,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else { /* Return to lower frame. Guard for the target we return to. */ } else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++; J->retdepth++;
J->needsnap = 1; J->needsnap = 1;
lua_assert(J->baseslot == 1); lua_assert(J->baseslot == 1+LJ_FR2);
/* Shift result slots up and clear the slots of the new frame below. */ /* Shift result slots up and clear the slots of the new frame below. */
memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
} }
} else if (frame_iscont(frame)) { /* Return to continuation frame. */ } else if (frame_iscont(frame)) { /* Return to continuation frame. */
ASMFunction cont = frame_contf(frame); ASMFunction cont = frame_contf(frame);
@ -839,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_trace_err(J, LJ_TRERR_NYIRETL); lj_trace_err(J, LJ_TRERR_NYIRETL);
J->baseslot -= (BCReg)cbase; J->baseslot -= (BCReg)cbase;
J->base -= cbase; J->base -= cbase;
J->maxslot = cbase-2; J->maxslot = cbase-(2<<LJ_FR2);
if (cont == lj_cont_ra) { if (cont == lj_cont_ra) {
/* Copy result to destination slot. */ /* Copy result to destination slot. */
BCReg dst = bc_a(*(frame_contpc(frame)-1)); BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (dst >= J->maxslot) J->maxslot = dst+1; if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} else if (cont == lj_cont_nop) { } else if (cont == lj_cont_nop) {
/* Nothing to do here. */ /* Nothing to do here. */
} else if (cont == lj_cont_cat) { } else if (cont == lj_cont_cat) {
BCReg bslot = bc_b(*(frame_contpc(frame)-1)); BCReg bslot = bc_b(*(frame_contpc(frame)-1));
TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (bslot != cbase-2) { /* Concatenate the remainder. */ if (bslot != J->maxslot) { /* Concatenate the remainder. */
TValue *b = J->L->base, save; /* Simulate lower frame and result. */ TValue *b = J->L->base, save; /* Simulate lower frame and result. */
J->base[cbase-2] = tr; J->base[J->maxslot] = tr;
copyTV(J->L, &save, b-2); copyTV(J->L, &save, b-(2<<LJ_FR2));
if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); if (gotresults)
copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
else
setnilV(b-(2<<LJ_FR2));
J->L->base = b - cbase; J->L->base = b - cbase;
tr = rec_cat(J, bslot, cbase-2); tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
b = J->L->base + cbase; /* Undo. */ b = J->L->base + cbase; /* Undo. */
J->L->base = b; J->L->base = b;
copyTV(J->L, b-2, &save); copyTV(J->L, b-(2<<LJ_FR2), &save);
} }
if (tr) { /* Store final result. */ if (tr) { /* Store final result. */
BCReg dst = bc_a(*(frame_contpc(frame)-1)); BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = tr; J->base[dst] = tr;
if (dst >= J->maxslot) J->maxslot = dst+1; if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} /* Otherwise continue with another __concat call. */ } /* Otherwise continue with another __concat call. */
} else { } else {
/* Result type already specialized. */ /* Result type already specialized. */
@ -873,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else { } else {
lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
} }
lua_assert(J->baseslot >= 1); lua_assert(J->baseslot >= 1+LJ_FR2);
} }
/* -- Metamethod handling ------------------------------------------------- */ /* -- Metamethod handling ------------------------------------------------- */
@ -882,16 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
{ {
BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
#if LJ_64 #if LJ_FR2
TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[top+1] = TREF_CONT;
#else #else
TRef trcont = lj_ir_kptr(J, (void *)cont); J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif #endif
J->base[top] = trcont | TREF_CONT;
J->framedepth++; J->framedepth++;
for (s = J->maxslot; s < top; s++) for (s = J->maxslot; s < top; s++)
J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
return top+1; return top+1+LJ_FR2;
} }
/* Record metamethod lookup. */ /* Record metamethod lookup. */
@ -910,7 +947,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
cTValue *mo; cTValue *mo;
if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
/* Specialize to the C library namespace object. */ /* Specialize to the C library namespace object. */
emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
} else { } else {
/* Specialize to the type of userdata. */ /* Specialize to the type of userdata. */
TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@ -939,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
} }
/* The cdata metatable is treated as immutable. */ /* The cdata metatable is treated as immutable. */
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
#if LJ_GC64
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
#else
ix->mt = mix.tab = lj_ir_ktab(J, mt); ix->mt = mix.tab = lj_ir_ktab(J, mt);
#endif
goto nocheck; goto nocheck;
} }
ix->mt = mt ? mix.tab : TREF_NIL; ix->mt = mt ? mix.tab : TREF_NIL;
@ -969,9 +1012,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
TRef *base = J->base + func; TRef *base = J->base + func;
TValue *basev = J->L->base + func; TValue *basev = J->L->base + func;
base[1] = ix->tab; base[2] = ix->key; base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
copyTV(J->L, basev+1, &ix->tabv); copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
copyTV(J->L, basev+2, &ix->keyv); copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
if (mm != MM_unm) { if (mm != MM_unm) {
ix->tab = ix->key; ix->tab = ix->key;
@ -982,8 +1025,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
lj_trace_err(J, LJ_TRERR_NOMM); lj_trace_err(J, LJ_TRERR_NOMM);
} }
ok: ok:
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[0] = ix->mobj;
#if LJ_FR2
base[1] = 0;
#endif
copyTV(J->L, basev+0, &ix->mobjv); copyTV(J->L, basev+0, &ix->mobjv);
lj_record_call(J, func, 2); lj_record_call(J, func, 2);
return 0; /* No result yet. */ return 0; /* No result yet. */
@ -999,8 +1044,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
BCReg func = rec_mm_prep(J, lj_cont_ra); BCReg func = rec_mm_prep(J, lj_cont_ra);
TRef *base = J->base + func; TRef *base = J->base + func;
TValue *basev = J->L->base + func; TValue *basev = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
base += LJ_FR2;
basev += LJ_FR2;
base[1] = tr; copyTV(J->L, basev+1, tv); base[1] = tr; copyTV(J->L, basev+1, tv);
#if LJ_52 #if LJ_52
base[2] = tr; copyTV(J->L, basev+2, tv); base[2] = tr; copyTV(J->L, basev+2, tv);
@ -1020,11 +1066,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
{ {
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
TRef *base = J->base + func; TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func; TValue *tv = J->L->base + func + LJ_FR2;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
copyTV(J->L, tv+0, &ix->mobjv);
copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+1, &ix->valv);
copyTV(J->L, tv+2, &ix->keyv); copyTV(J->L, tv+2, &ix->keyv);
lj_record_call(J, func, 2); lj_record_call(J, func, 2);
@ -1257,8 +1302,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
if ((MSize)k < t->asize) { /* Currently an array key? */ if ((MSize)k < t->asize) { /* Currently an array key? */
TRef arrayref; TRef arrayref;
rec_idx_abc(J, asizeref, ikey, t->asize); rec_idx_abc(J, asizeref, ikey, t->asize);
arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
} else { /* Currently not in array (may be an array extension)? */ } else { /* Currently not in array (may be an array extension)? */
emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
if (k == 0 && tref_isk(key)) if (k == 0 && tref_isk(key))
@ -1298,13 +1343,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
*rbguard = J->guardemit; *rbguard = J->guardemit;
hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
} }
} }
/* Fall back to a regular hash lookup. */ /* Fall back to a regular hash lookup. */
return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
} }
/* Determine whether a key is NOT one of the fast metamethod names. */ /* Determine whether a key is NOT one of the fast metamethod names. */
@ -1341,11 +1386,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
handlemm: handlemm:
if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
TRef *base = J->base + func; TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func; TValue *tv = J->L->base + func + LJ_FR2;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
setfuncV(J->L, tv+0, funcV(&ix->mobjv));
copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+1, &ix->tabv);
copyTV(J->L, tv+2, &ix->keyv); copyTV(J->L, tv+2, &ix->keyv);
if (ix->val) { if (ix->val) {
@ -1387,7 +1431,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
IRType t = itype2irt(oldv); IRType t = itype2irt(oldv);
TRef res; TRef res;
if (oldv == niltvg(J2G(J))) { if (oldv == niltvg(J2G(J))) {
emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
res = TREF_NIL; res = TREF_NIL;
} else { } else {
res = emitir(IRTG(loadop, t), xref, 0); res = emitir(IRTG(loadop, t), xref, 0);
@ -1417,7 +1461,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
if (hasmm) if (hasmm)
emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
else if (xrefop == IR_HREF) else if (xrefop == IR_HREF)
emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
xref, lj_ir_kkptr(J, niltvg(J2G(J)))); xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
lua_assert(hasmm); lua_assert(hasmm);
@ -1428,7 +1472,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
TRef key = ix->key; TRef key = ix->key;
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
keybarrier = 0; /* NEWREF already takes care of the key barrier. */ keybarrier = 0; /* NEWREF already takes care of the key barrier. */
#ifdef LUAJIT_ENABLE_TABLE_BUMP #ifdef LUAJIT_ENABLE_TABLE_BUMP
if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
@ -1438,7 +1482,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
/* Cannot derive that the previous value was non-nil, must do checks. */ /* Cannot derive that the previous value was non-nil, must do checks. */
if (xrefop == IR_HREF) /* Guard against store to niltv. */ if (xrefop == IR_HREF) /* Guard against store to niltv. */
emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain) { /* Metamethod lookup required? */ if (ix->idxchain) { /* Metamethod lookup required? */
/* A check for NULL metatable is cheaper (hoistable) than a load. */ /* A check for NULL metatable is cheaper (hoistable) than a load. */
if (!mt) { if (!mt) {
@ -1460,7 +1504,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
/* Invalidate neg. metamethod cache for stores with certain string keys. */ /* Invalidate neg. metamethod cache for stores with certain string keys. */
if (!nommstr(J, ix->key)) { if (!nommstr(J, ix->key)) {
TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
} }
J->needsnap = 1; J->needsnap = 1;
@ -1535,7 +1579,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
goto noconstify; goto noconstify;
kfunc = lj_ir_kfunc(J, J->fn); kfunc = lj_ir_kfunc(J, J->fn);
emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
J->base[-1] = TREF_FRAME | kfunc; #if LJ_FR2
J->base[-2] = kfunc;
#else
J->base[-1] = kfunc | TREF_FRAME;
#endif
fn = kfunc; fn = kfunc;
} }
tr = lj_record_constify(J, uvval(uvp)); tr = lj_record_constify(J, uvval(uvp));
@ -1546,13 +1594,17 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */ /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) { if (!uvp->closed) {
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */ /* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) && if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) { uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */ if (slot >= 0) { /* Aliases an SSA slot? */
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
/* NYI: add IR to guard that it's still aliasing the same slot. */
if (val == 0) { if (val == 0) {
return getslot(J, slot); return getslot(J, slot);
} else { } else {
@ -1562,10 +1614,12 @@ noconstify:
} }
} }
} }
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
} else { } else {
needbarrier = 1; needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
} }
if (val == 0) { /* Upvalue load */ if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp)); IRType t = itype2irt(uvval(uvp));
@ -1640,11 +1694,14 @@ static void rec_func_setup(jit_State *J)
static void rec_func_vararg(jit_State *J) static void rec_func_vararg(jit_State *J)
{ {
GCproto *pt = J->pt; GCproto *pt = J->pt;
BCReg s, fixargs, vframe = J->maxslot+1; BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
lua_assert((pt->flags & PROTO_VARARG)); lua_assert((pt->flags & PROTO_VARARG));
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV); lj_trace_err(J, LJ_TRERR_STACKOV);
J->base[vframe-1] = J->base[-1]; /* Copy function up. */ J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
#if LJ_FR2
J->base[vframe-1] = TREF_FRAME;
#endif
/* Copy fixarg slots up and set their original slots to nil. */ /* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) { for (s = 0; s < fixargs; s++) {
@ -1706,7 +1763,7 @@ static int select_detect(jit_State *J)
static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
{ {
int32_t numparams = J->pt->numparams; int32_t numparams = J->pt->numparams;
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
lua_assert(frame_isvarg(J->L->base-1)); lua_assert(frame_isvarg(J->L->base-1));
if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
ptrdiff_t i; ptrdiff_t i;
@ -1718,10 +1775,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
J->maxslot = dst + (BCReg)nresults; J->maxslot = dst + (BCReg)nresults;
} }
for (i = 0; i < nresults; i++) for (i = 0; i < nresults; i++)
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
} else { /* Unknown number of varargs passed to trace. */ } else { /* Unknown number of varargs passed to trace. */
TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
int32_t frofs = 8*(1+numparams)+FRAME_VARG; int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
if (nresults >= 0) { /* Known fixed number of results. */ if (nresults >= 0) { /* Known fixed number of results. */
ptrdiff_t i; ptrdiff_t i;
if (nvararg > 0) { if (nvararg > 0) {
@ -1732,11 +1789,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
else else
emitir(IRTGI(IR_EQ), fr, emitir(IRTGI(IR_EQ), fr,
lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
for (i = 0; i < nload; i++) { for (i = 0; i < nload; i++) {
IRType t = itype2irt(&J->L->base[i-1-nvararg]); IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
TRef aref = emitir(IRT(IR_AREF, IRT_P32), TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
vbase, lj_ir_kint(J, (int32_t)i)); vbase, lj_ir_kint(J, (int32_t)i));
TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
@ -1782,15 +1839,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
} }
if (idx != 0 && idx <= nvararg) { if (idx != 0 && idx <= nvararg) {
IRType t; IRType t;
TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
t = itype2irt(&J->L->base[idx-2-nvararg]); lj_ir_kint(J, frofs-(8<<LJ_FR2)));
aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
tr = emitir(IRTG(IR_VLOAD, t), aref, 0); tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
} }
J->base[dst-2] = tr; J->base[dst-2-LJ_FR2] = tr;
J->maxslot = dst-1; J->maxslot = dst-1-LJ_FR2;
J->bcskip = 2; /* Skip CALLM + select. */ J->bcskip = 2; /* Skip CALLM + select. */
} else { } else {
nyivarg: nyivarg:
@ -1839,10 +1897,10 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
break; break;
} }
xbase = ++trp; xbase = ++trp;
tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32), tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
do { do {
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
} while (trp <= top); } while (trp <= top);
tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
J->maxslot = (BCReg)(xbase - J->base); J->maxslot = (BCReg)(xbase - J->base);
@ -1883,7 +1941,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */ /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
#if LJ_FR2
SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
uint64_t pcbase;
memcpy(&pcbase, flink, sizeof(uint64_t));
pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
memcpy(flink, &pcbase, sizeof(uint64_t));
#else
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
#endif
J->needsnap = 1; J->needsnap = 1;
if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
lj_snap_shrink(J); /* Shrink last snapshot if possible. */ lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@ -2159,14 +2225,14 @@ void lj_record_ins(jit_State *J)
case BC_MODVN: case BC_MODVV: case BC_MODVN: case BC_MODVV:
recmod: recmod:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
rc = lj_opt_narrow_mod(J, rb, rc, rcv); rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv);
else else
rc = rec_mm_arith(J, &ix, MM_mod); rc = rec_mm_arith(J, &ix, MM_mod);
break; break;
case BC_POW: case BC_POW:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
else else
rc = rec_mm_arith(J, &ix, MM_pow); rc = rec_mm_arith(J, &ix, MM_pow);
break; break;
@ -2181,7 +2247,13 @@ void lj_record_ins(jit_State *J)
case BC_MOV: case BC_MOV:
/* Clear gap of method call to avoid resurrecting previous refs. */ /* Clear gap of method call to avoid resurrecting previous refs. */
if (ra > J->maxslot) J->base[ra-1] = 0; if (ra > J->maxslot) {
#if LJ_FR2
memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
#else
J->base[ra-1] = 0;
#endif
}
break; break;
case BC_KSTR: case BC_KNUM: case BC_KPRI: case BC_KSTR: case BC_KNUM: case BC_KPRI:
break; break;
@ -2250,14 +2322,14 @@ void lj_record_ins(jit_State *J)
/* -- Calls and vararg handling ----------------------------------------- */ /* -- Calls and vararg handling ----------------------------------------- */
case BC_ITERC: case BC_ITERC:
J->base[ra] = getslot(J, ra-3-LJ_FR2); J->base[ra] = getslot(J, ra-3);
J->base[ra+1] = getslot(J, ra-2-LJ_FR2); J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
J->base[ra+2] = getslot(J, ra-1-LJ_FR2); J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
{ /* Do the actual copy now because lj_record_call needs the values. */ { /* Do the actual copy now because lj_record_call needs the values. */
TValue *b = &J->L->base[ra]; TValue *b = &J->L->base[ra];
copyTV(J->L, b, b-3-LJ_FR2); copyTV(J->L, b, b-3);
copyTV(J->L, b+1, b-2-LJ_FR2); copyTV(J->L, b+1+LJ_FR2, b-2);
copyTV(J->L, b+2, b-1-LJ_FR2); copyTV(J->L, b+2+LJ_FR2, b-1);
} }
lj_record_call(J, ra, (ptrdiff_t)rc-1); lj_record_call(J, ra, (ptrdiff_t)rc-1);
break; break;
@ -2380,7 +2452,12 @@ void lj_record_ins(jit_State *J)
/* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
if (bcmode_a(op) == BCMdst && rc) { if (bcmode_a(op) == BCMdst && rc) {
J->base[ra] = rc; J->base[ra] = rc;
if (ra >= J->maxslot) J->maxslot = ra+1; if (ra >= J->maxslot) {
#if LJ_FR2
if (ra > J->maxslot) J->base[ra-1] = 0;
#endif
J->maxslot = ra+1;
}
} }
#undef rav #undef rav
@ -2465,7 +2542,7 @@ void lj_record_setup(jit_State *J)
J->scev.idx = REF_NIL; J->scev.idx = REF_NIL;
setmref(J->scev.pc, NULL); setmref(J->scev.pc, NULL);
J->baseslot = 1; /* Invoking function is at base[-1]. */ J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
J->base = J->slot + J->baseslot; J->base = J->slot + J->baseslot;
J->maxslot = 0; J->maxslot = 0;
J->framedepth = 0; J->framedepth = 0;
@ -2480,7 +2557,7 @@ void lj_record_setup(jit_State *J)
J->bc_extent = ~(MSize)0; J->bc_extent = ~(MSize)0;
/* Emit instructions for fixed references. Also triggers initial IR alloc. */ /* Emit instructions for fixed references. Also triggers initial IR alloc. */
emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
for (i = 0; i <= 2; i++) { for (i = 0; i <= 2; i++) {
IRIns *ir = IR(REF_NIL-i); IRIns *ir = IR(REF_NIL-i);
ir->i = 0; ir->i = 0;

View File

@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
for (s = 0; s < nslots; s++) { for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s]; TRef tr = J->slot[s];
IRRef ref = tref_ref(tr); IRRef ref = tref_ref(tr);
#if LJ_FR2
if (s == 1) continue;
if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
TValue *base = J->L->base - J->baseslot;
tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
ref = tref_ref(tr);
}
#endif
if (ref) { if (ref) {
SnapEntry sn = SNAP_TR(s, tr); SnapEntry sn = SNAP_TR(s, tr);
IRIns *ir = &J->cur.ir[ref]; IRIns *ir = &J->cur.ir[ref];
if (!(sn & (SNAP_CONT|SNAP_FRAME)) && if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
/* No need to snapshot unmodified non-inherited slots. */ /* No need to snapshot unmodified non-inherited slots. */
if (!(ir->op2 & IRSLOAD_INHERIT)) if (!(ir->op2 & IRSLOAD_INHERIT))
@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
} }
/* Add frame links at the end of the snapshot. */ /* Add frame links at the end of the snapshot. */
static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
{ {
cTValue *frame = J->L->base - 1; cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot; cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
GCfunc *fn = frame_func(frame); GCfunc *fn = frame_func(frame);
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
#if LJ_FR2
uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
lua_assert(2 <= J->baseslot && J->baseslot <= 257);
memcpy(map, &pcbase, sizeof(uint64_t));
#else
MSize f = 0; MSize f = 0;
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
#endif
while (frame > lim) { /* Backwards traversal of all frames above base. */ while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) { if (frame_islua(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKPC(frame_pc(frame)); map[f++] = SNAP_MKPC(frame_pc(frame));
#endif
frame = frame_prevl(frame); frame = frame_prevl(frame);
} else if (frame_iscont(frame)) { } else if (frame_iscont(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame));
#endif
frame = frame_prevd(frame); frame = frame_prevd(frame);
} else { } else {
lua_assert(!frame_isc(frame)); lua_assert(!frame_isc(frame));
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
#endif
frame = frame_prevd(frame); frame = frame_prevd(frame);
continue; continue;
} }
if (frame + funcproto(frame_func(frame))->framesize > ftop) if (frame + funcproto(frame_func(frame))->framesize > ftop)
ftop = frame + funcproto(frame_func(frame))->framesize; ftop = frame + funcproto(frame_func(frame))->framesize;
} }
*topslot = (uint8_t)(ftop - lim);
#if LJ_FR2
lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
return 2;
#else
lua_assert(f == (MSize)(1 + J->framedepth)); lua_assert(f == (MSize)(1 + J->framedepth));
return (BCReg)(ftop - lim); return f;
#endif
} }
/* Take a snapshot of the current stack. */ /* Take a snapshot of the current stack. */
@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
MSize nent; MSize nent;
SnapEntry *p; SnapEntry *p;
/* Conservative estimate. */ /* Conservative estimate. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
p = &J->cur.snapmap[nsnapmap]; p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots); nent = snapshot_slots(J, p, nslots);
snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); snap->nent = (uint8_t)nent;
nent += snapshot_framelinks(J, p + nent, &snap->topslot);
snap->mapofs = (uint16_t)nsnapmap; snap->mapofs = (uint16_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins; snap->ref = (IRRef1)J->cur.nins;
snap->nent = (uint8_t)nent;
snap->nslots = (uint8_t)nslots; snap->nslots = (uint8_t)nslots;
snap->count = 0; snap->count = 0;
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
} }
/* Add or merge a snapshot. */ /* Add or merge a snapshot. */
@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J)
MSize n, m, nlim, nent = snap->nent; MSize n, m, nlim, nent = snap->nent;
uint8_t udf[SNAP_USEDEF_SLOTS]; uint8_t udf[SNAP_USEDEF_SLOTS];
BCReg maxslot = J->maxslot; BCReg maxslot = J->maxslot;
BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
BCReg baseslot = J->baseslot; BCReg baseslot = J->baseslot;
BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
maxslot += baseslot; maxslot += baseslot;
minslot += baseslot; minslot += baseslot;
snap->nslots = (uint8_t)maxslot; snap->nslots = (uint8_t)maxslot;
@ -371,8 +396,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
case IR_KPRI: return TREF_PRI(irt_type(ir->t)); case IR_KPRI: return TREF_PRI(irt_type(ir->t));
case IR_KINT: return lj_ir_kint(J, ir->i); case IR_KINT: return lj_ir_kint(J, ir->i);
case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); case IR_KNUM: case IR_KINT64:
case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
default: lua_assert(0); return TREF_NIL; break; default: lua_assert(0); return TREF_NIL; break;
} }
@ -555,8 +580,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
uint64_t k = (uint32_t)T->ir[irs->op2].i + uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32); ((uint64_t)T->ir[(irs+1)->op2].i << 32);
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
lj_ir_k64_find(J, k));
} else { } else {
val = emitir_raw(IRT(IR_HIOP, t), val, val = emitir_raw(IRT(IR_HIOP, t), val,
snap_pref(J, T, map, nent, seen, (irs+1)->op2)); snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@ -599,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} }
if (LJ_UNLIKELY(bloomtest(rfilt, ref))) if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs); rs = snap_renameref(T, snapno, ref, rs);
lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)]; int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) { if (irt_isinteger(t)) {
@ -608,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) { } else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps; o->u64 = *(uint64_t *)sps;
#endif #endif
} else if (LJ_64 && irt_islightud(t)) { #if LJ_64 && !LJ_GC64
} else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */ /* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = *(uint64_t *)sps; o->u64 = *(uint64_t *)sps;
#endif
} else { } else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
@ -628,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) { } else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]); setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif #endif
} else if (LJ_64 && irt_is64(t)) { #if LJ_64 && !LJ_GC64
} else if (irt_is64(t)) {
/* 64 bit values that already have the tag bits. */ /* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR]; o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
} else if (irt_ispri(t)) { } else if (irt_ispri(t)) {
setpriV(o, irt_toitype(t)); setpriV(o, irt_toitype(t));
} else { } else {
@ -651,7 +678,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
uint64_t tmp; uint64_t tmp;
if (irref_isk(ref)) { if (irref_isk(ref)) {
if (ir->o == IR_KNUM || ir->o == IR_KINT64) { if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
src = mref(ir->ptr, int32_t); src = (int32_t *)&ir[1];
} else if (sz == 8) { } else if (sz == 8) {
tmp = (uint64_t)(uint32_t)ir->i; tmp = (uint64_t)(uint32_t)ir->i;
src = (int32_t *)&tmp; src = (int32_t *)&tmp;
@ -795,11 +822,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
SnapShot *snap = &T->snap[snapno]; SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent; MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; #if !LJ_FR2 || defined(LUA_USE_ASSERT)
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
#endif
#if !LJ_FR2
ptrdiff_t ftsz0; ptrdiff_t ftsz0;
#endif
TValue *frame; TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno); BloomFilter rfilt = snap_renamefilter(T, snapno);
const BCIns *pc = snap_pc(map[nent]); const BCIns *pc = snap_pc(&map[nent]);
lua_State *L = J->L; lua_State *L = J->L;
/* Set interpreter PC to the next PC to get correct error messages. */ /* Set interpreter PC to the next PC to get correct error messages. */
@ -812,8 +843,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
} }
/* Fill stack slots with data from the registers and spill slots. */ /* Fill stack slots with data from the registers and spill slots. */
frame = L->base-1; frame = L->base-1-LJ_FR2;
#if !LJ_FR2
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
#endif
for (n = 0; n < nent; n++) { for (n = 0; n < nent; n++) {
SnapEntry sn = map[n]; SnapEntry sn = map[n];
if (!(sn & SNAP_NORESTORE)) { if (!(sn & SNAP_NORESTORE)) {
@ -836,14 +869,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
TValue tmp; TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo; o->u32.hi = tmp.u32.lo;
#if !LJ_FR2
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) { } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
/* Overwrite tag with frame link. */ /* Overwrite tag with frame link. */
setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
L->base = o+1; L->base = o+1;
#endif
} }
} }
} }
#if LJ_FR2
L->base += (map[nent+LJ_BE] & 0xff);
#endif
lua_assert(map + nent == flinks); lua_assert(map + nent == flinks);
/* Compute current stack top. */ /* Compute current stack top. */

View File

@ -180,7 +180,7 @@ static void close_state(lua_State *L)
g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
} }
#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
lua_State *lj_state_newstate(lua_Alloc f, void *ud) lua_State *lj_state_newstate(lua_Alloc f, void *ud)
#else #else
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)

View File

@ -98,11 +98,15 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
uint32_t u = (uint32_t)k; uint32_t u = (uint32_t)k;
if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
if (u < 10000) { if (u < 10000) {
if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3; if (u < 10) goto dig1;
if (u < 100) goto dig2;
if (u < 1000) goto dig3;
} else { } else {
uint32_t v = u / 10000; u -= v * 10000; uint32_t v = u / 10000; u -= v * 10000;
if (v < 10000) { if (v < 10000) {
if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7; if (v < 10) goto dig5;
if (v < 100) goto dig6;
if (v < 1000) goto dig7;
} else { } else {
uint32_t w = v / 10000; v -= w * 10000; uint32_t w = v / 10000; v -= w * 10000;
if (w >= 10) WINT_R(w, 10, 10) if (w >= 10) WINT_R(w, 10, 10)

View File

@ -82,11 +82,15 @@ enum {
#if LJ_SOFTFP #if LJ_SOFTFP
#define RSET_FPR 0 #define RSET_FPR 0
#else #else
#if LJ_32
#define RSET_FPR \ #define RSET_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
#else
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
#endif
#endif #endif
#define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL #define RSET_INIT RSET_ALL
@ -97,23 +101,37 @@ enum {
#if LJ_SOFTFP #if LJ_SOFTFP
#define RSET_SCRATCH_FPR 0 #define RSET_SCRATCH_FPR 0
#else #else
#if LJ_32
#define RSET_SCRATCH_FPR \ #define RSET_SCRATCH_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18)) RID2RSET(RID_F16)|RID2RSET(RID_F18))
#else
#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
#endif
#endif #endif
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R4 #define REGARG_FIRSTGPR RID_R4
#if LJ_32
#define REGARG_LASTGPR RID_R7 #define REGARG_LASTGPR RID_R7
#define REGARG_NUMGPR 4 #define REGARG_NUMGPR 4
#else
#define REGARG_LASTGPR RID_R11
#define REGARG_NUMGPR 8
#endif
#if LJ_ABI_SOFTFP #if LJ_ABI_SOFTFP
#define REGARG_FIRSTFPR 0 #define REGARG_FIRSTFPR 0
#define REGARG_LASTFPR 0 #define REGARG_LASTFPR 0
#define REGARG_NUMFPR 0 #define REGARG_NUMFPR 0
#else #else
#define REGARG_FIRSTFPR RID_F12 #define REGARG_FIRSTFPR RID_F12
#if LJ_32
#define REGARG_LASTFPR RID_F14 #define REGARG_LASTFPR RID_F14
#define REGARG_NUMFPR 2 #define REGARG_NUMFPR 2
#else
#define REGARG_LASTFPR RID_F19
#define REGARG_NUMFPR 8
#endif
#endif #endif
/* -- Spill slots --------------------------------------------------------- */ /* -- Spill slots --------------------------------------------------------- */
@ -125,7 +143,11 @@ enum {
** **
** SPS_FIRST: First spill slot for general use. ** SPS_FIRST: First spill slot for general use.
*/ */
#if LJ_32
#define SPS_FIXED 5 #define SPS_FIXED 5
#else
#define SPS_FIXED 4
#endif
#define SPS_FIRST 4 #define SPS_FIRST 4
#define SPOFS_TMP 0 #define SPOFS_TMP 0
@ -140,7 +162,7 @@ typedef struct {
#if !LJ_SOFTFP #if !LJ_SOFTFP
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
#endif #endif
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */ int32_t spill[256]; /* Spill slots. */
} ExitState; } ExitState;
@ -172,7 +194,7 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
typedef enum MIPSIns { typedef enum MIPSIns {
/* Integer instructions. */ /* Integer instructions. */
MIPSI_MOVE = 0x00000021, MIPSI_MOVE = 0x00000025,
MIPSI_NOP = 0x00000000, MIPSI_NOP = 0x00000000,
MIPSI_LI = 0x24000000, MIPSI_LI = 0x24000000,
@ -204,19 +226,20 @@ typedef enum MIPSIns {
MIPSI_SLL = 0x00000000, MIPSI_SLL = 0x00000000,
MIPSI_SRL = 0x00000002, MIPSI_SRL = 0x00000002,
MIPSI_SRA = 0x00000003, MIPSI_SRA = 0x00000003,
MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
MIPSI_SLLV = 0x00000004, MIPSI_SLLV = 0x00000004,
MIPSI_SRLV = 0x00000006, MIPSI_SRLV = 0x00000006,
MIPSI_SRAV = 0x00000007, MIPSI_SRAV = 0x00000007,
MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
MIPSI_B = 0x10000000, MIPSI_B = 0x10000000,
MIPSI_J = 0x08000000, MIPSI_J = 0x08000000,
MIPSI_JAL = 0x0c000000, MIPSI_JAL = 0x0c000000,
MIPSI_JALX = 0x74000000,
MIPSI_JR = 0x00000008, MIPSI_JR = 0x00000008,
MIPSI_JALR = 0x0000f809, MIPSI_JALR = 0x0000f809,
@ -241,6 +264,15 @@ typedef enum MIPSIns {
MIPSI_LDC1 = 0xd4000000, MIPSI_LDC1 = 0xd4000000,
MIPSI_SDC1 = 0xf4000000, MIPSI_SDC1 = 0xf4000000,
/* MIPS64 instructions. */
MIPSI_DSLL = 0x00000038,
MIPSI_LD = 0xdc000000,
MIPSI_DADDIU = 0x64000000,
MIPSI_SD = 0xfc000000,
MIPSI_DMFC1 = 0x44200000,
MIPSI_DSRA32 = 0x0000003f,
MIPSI_MFHC1 = 0x44600000,
/* FP instructions. */ /* FP instructions. */
MIPSI_MOV_S = 0x46000006, MIPSI_MOV_S = 0x46000006,
MIPSI_MOV_D = 0x46200006, MIPSI_MOV_D = 0x46200006,

View File

@ -22,7 +22,7 @@
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
#endif #endif
#define VRIDDEF(_) \ #define VRIDDEF(_) \
_(MRM) _(MRM) _(RIP)
#define RIDENUM(name) RID_##name, #define RIDENUM(name) RID_##name,
@ -31,6 +31,7 @@ enum {
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX, RID_MAX,
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */
/* Calling conventions. */ /* Calling conventions. */
RID_SP = RID_ESP, RID_SP = RID_ESP,
@ -63,8 +64,10 @@ enum {
/* -- Register sets ------------------------------------------------------- */ /* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except the stack pointer. */ /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
- RID2RSET(RID_ESP) \
- LJ_GC64*RID2RSET(RID_DISPATCH))
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL #define RSET_INIT RSET_ALL
@ -200,6 +203,7 @@ typedef struct {
*/ */
typedef enum { typedef enum {
/* Fixed length opcodes. XI_* prefix. */ /* Fixed length opcodes. XI_* prefix. */
XI_O16 = 0x66,
XI_NOP = 0x90, XI_NOP = 0x90,
XI_XCHGa = 0x90, XI_XCHGa = 0x90,
XI_CALL = 0xe8, XI_CALL = 0xe8,
@ -217,6 +221,7 @@ typedef enum {
XI_PUSHi8 = 0x6a, XI_PUSHi8 = 0x6a,
XI_TESTb = 0x84, XI_TESTb = 0x84,
XI_TEST = 0x85, XI_TEST = 0x85,
XI_INT3 = 0xcc,
XI_MOVmi = 0xc7, XI_MOVmi = 0xc7,
XI_GROUP5 = 0xff, XI_GROUP5 = 0xff,
@ -243,6 +248,7 @@ typedef enum {
XV_SHRX = XV_f20f38(f7), XV_SHRX = XV_f20f38(f7),
/* Variable-length opcodes. XO_* prefix. */ /* Variable-length opcodes. XO_* prefix. */
XO_OR = XO_(0b),
XO_MOV = XO_(8b), XO_MOV = XO_(8b),
XO_MOVto = XO_(89), XO_MOVto = XO_(89),
XO_MOVtow = XO_66(89), XO_MOVtow = XO_66(89),

View File

@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T)
} }
#endif #endif
/* Allocate space for copy of trace. */ /* Allocate space for copy of T. */
static GCtrace *trace_save_alloc(jit_State *J) GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
{ {
size_t sztr = ((sizeof(GCtrace)+7)&~7); size_t sztr = ((sizeof(GCtrace)+7)&~7);
size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); size_t szins = (T->nins-T->nk)*sizeof(IRIns);
size_t sz = sztr + szins + size_t sz = sztr + szins +
J->cur.nsnap*sizeof(SnapShot) + T->nsnap*sizeof(SnapShot) +
J->cur.nsnapmap*sizeof(SnapEntry); T->nsnapmap*sizeof(SnapEntry);
return lj_mem_newt(J->L, (MSize)sz, GCtrace); GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
char *p = (char *)T2 + sztr;
T2->gct = ~LJ_TTRACE;
T2->marked = 0;
T2->traceno = 0;
T2->ir = (IRIns *)p - T->nk;
T2->nins = T->nins;
T2->nk = T->nk;
T2->nsnap = T->nsnap;
T2->nsnapmap = T->nsnapmap;
memcpy(p, T->ir + T->nk, szins);
return T2;
} }
/* Save current trace by copying and compacting it. */ /* Save current trace by copying and compacting it. */
@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T)
setgcrefp(J2G(J)->gc.root, T); setgcrefp(J2G(J)->gc.root, T);
newwhite(J2G(J), T); newwhite(J2G(J), T);
T->gct = ~LJ_TTRACE; T->gct = ~LJ_TTRACE;
T->ir = (IRIns *)p - J->cur.nk; T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
memcpy(p, J->cur.ir+J->cur.nk, szins);
p += szins; p += szins;
TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snap, nsnap, SnapShot)
TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
J->cur.traceno = 0; J->cur.traceno = 0;
J->curfinal = NULL;
setgcrefp(J->trace[T->traceno], T); setgcrefp(J->trace[T->traceno], T);
lj_gc_barriertrace(J2G(J), T->traceno); lj_gc_barriertrace(J2G(J), T->traceno);
lj_gdbjit_addtrace(J, T); lj_gdbjit_addtrace(J, T);
@ -284,7 +295,6 @@ int lj_trace_flushall(lua_State *L)
memset(J->penalty, 0, sizeof(J->penalty)); memset(J->penalty, 0, sizeof(J->penalty));
/* Free the whole machine code and invalidate all exit stub groups. */ /* Free the whole machine code and invalidate all exit stub groups. */
lj_mcode_free(J); lj_mcode_free(J);
lj_ir_k64_freeall(J);
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
lj_vmevent_send(L, TRACE, lj_vmevent_send(L, TRACE,
setstrV(L, L->top++, lj_str_newlit(L, "flush")); setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@ -297,13 +307,35 @@ void lj_trace_initstate(global_State *g)
{ {
jit_State *J = G2J(g); jit_State *J = G2J(g);
TValue *tv; TValue *tv;
/* Initialize SIMD constants. */
/* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff); tv[0].u64 = U64x(7fffffff,ffffffff);
tv[1].u64 = U64x(7fffffff,ffffffff); tv[1].u64 = U64x(7fffffff,ffffffff);
tv = LJ_KSIMD(J, LJ_KSIMD_NEG); tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
tv[0].u64 = U64x(80000000,00000000); tv[0].u64 = U64x(80000000,00000000);
tv[1].u64 = U64x(80000000,00000000); tv[1].u64 = U64x(80000000,00000000);
/* Initialize 32/64 bit constants. */
#if LJ_TARGET_X86ORX64
J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
#if LJ_32
J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
#endif
J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
#endif
#if LJ_TARGET_PPC
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
J->k32[LJ_K32_2P52] = 0x59800000;
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
J->k32[LJ_K32_2P31] = 0x4f000000;
#endif
#if LJ_TARGET_MIPS
J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
#endif
} }
/* Free everything associated with the JIT compiler state. */ /* Free everything associated with the JIT compiler state. */
@ -318,7 +350,6 @@ void lj_trace_freestate(global_State *g)
} }
#endif #endif
lj_mcode_free(J); lj_mcode_free(J);
lj_ir_k64_freeall(J);
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@ -403,7 +434,7 @@ static void trace_start(jit_State *J)
J->postproc = LJ_POST_NONE; J->postproc = LJ_POST_NONE;
lj_resetsplit(J); lj_resetsplit(J);
J->retryrec = 0; J->retryrec = 0;
J->ktracep = NULL; J->ktrace = 0;
setgcref(J->cur.startpt, obj2gco(J->pt)); setgcref(J->cur.startpt, obj2gco(J->pt));
L = J->L; L = J->L;
@ -427,7 +458,7 @@ static void trace_stop(jit_State *J)
BCOp op = bc_op(J->cur.startins); BCOp op = bc_op(J->cur.startins);
GCproto *pt = &gcref(J->cur.startpt)->pt; GCproto *pt = &gcref(J->cur.startpt)->pt;
TraceNo traceno = J->cur.traceno; TraceNo traceno = J->cur.traceno;
GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ GCtrace *T = J->curfinal;
lua_State *L; lua_State *L;
switch (op) { switch (op) {
@ -479,9 +510,6 @@ static void trace_stop(jit_State *J)
lj_mcode_commit(J, J->cur.mcode); lj_mcode_commit(J, J->cur.mcode);
J->postproc = LJ_POST_NONE; J->postproc = LJ_POST_NONE;
trace_save(J, T); trace_save(J, T);
if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
}
L = J->L; L = J->L;
lj_vmevent_send(L, TRACE, lj_vmevent_send(L, TRACE,
@ -515,6 +543,10 @@ static int trace_abort(jit_State *J)
J->postproc = LJ_POST_NONE; J->postproc = LJ_POST_NONE;
lj_mcode_abort(J); lj_mcode_abort(J);
if (J->curfinal) {
lj_trace_free(J2G(J), J->curfinal);
J->curfinal = NULL;
}
if (tvisnumber(L->top-1)) if (tvisnumber(L->top-1))
e = (TraceError)numberVint(L->top-1); e = (TraceError)numberVint(L->top-1);
if (e == LJ_TRERR_MCODELM) { if (e == LJ_TRERR_MCODELM) {

View File

@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
/* Trace management. */ /* Trace management. */
LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);

View File

@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
#if LJ_ABI_WIN && LJ_TARGET_X86
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
void *unwinder, int errcode);
#endif
LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_c_eh(void);
LJ_ASMF void lj_vm_unwind_ff_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void);
#if LJ_TARGET_X86ORX64 #if LJ_TARGET_X86ORX64

View File

@ -152,22 +152,15 @@ static void print_jit_status(lua_State *L)
putc('\n', stdout); putc('\n', stdout);
} }
static int getargs(lua_State *L, char **argv, int n) static void createargtable(lua_State *L, char **argv, int argc, int argf)
{ {
int narg;
int i; int i;
int argc = 0; lua_createtable(L, argc - argf, argf);
while (argv[argc]) argc++; /* count total number of arguments */
narg = argc - (n + 1); /* number of arguments to the script */
luaL_checkstack(L, narg + 3, "too many arguments to script");
for (i = n+1; i < argc; i++)
lua_pushstring(L, argv[i]);
lua_createtable(L, narg, n + 1);
for (i = 0; i < argc; i++) { for (i = 0; i < argc; i++) {
lua_pushstring(L, argv[i]); lua_pushstring(L, argv[i]);
lua_rawseti(L, -2, i - n); lua_rawseti(L, -2, i - argf);
} }
return narg; lua_setglobal(L, "arg");
} }
static int dofile(lua_State *L, const char *name) static int dofile(lua_State *L, const char *name)
@ -273,21 +266,30 @@ static void dotty(lua_State *L)
progname = oldprogname; progname = oldprogname;
} }
static int handle_script(lua_State *L, char **argv, int n) static int handle_script(lua_State *L, char **argx)
{ {
int status; int status;
const char *fname; const char *fname = argx[0];
int narg = getargs(L, argv, n); /* collect arguments */ if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
lua_setglobal(L, "arg");
fname = argv[n];
if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
fname = NULL; /* stdin */ fname = NULL; /* stdin */
status = luaL_loadfile(L, fname); status = luaL_loadfile(L, fname);
lua_insert(L, -(narg+1)); if (status == 0) {
if (status == 0) /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
int narg = 0;
lua_getglobal(L, "arg");
if (lua_istable(L, -1)) {
do {
narg++;
lua_rawgeti(L, -narg, narg);
} while (!lua_isnil(L, -1));
lua_pop(L, 1);
lua_remove(L, -narg);
narg--;
} else {
lua_pop(L, 1);
}
status = docall(L, narg, 0); status = docall(L, narg, 0);
else }
lua_pop(L, narg);
return report(L, status); return report(L, status);
} }
@ -384,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv)
} }
for (argv++; *argv != NULL; narg++, argv++) for (argv++; *argv != NULL; narg++, argv++)
lua_pushstring(L, *argv); lua_pushstring(L, *argv);
return report(L, lua_pcall(L, narg, 0, 0)); report(L, lua_pcall(L, narg, 0, 0));
return -1;
} }
/* check that argument has no extra characters at the end */ /* check that argument has no extra characters at the end */
@ -405,7 +408,7 @@ static int collectargs(char **argv, int *flags)
switch (argv[i][1]) { /* Check option. */ switch (argv[i][1]) { /* Check option. */
case '-': case '-':
notail(argv[i]); notail(argv[i]);
return (argv[i+1] != NULL ? i+1 : 0); return i+1;
case '\0': case '\0':
return i; return i;
case 'i': case 'i':
@ -430,23 +433,23 @@ static int collectargs(char **argv, int *flags)
case 'b': /* LuaJIT extension */ case 'b': /* LuaJIT extension */
if (*flags) return -1; if (*flags) return -1;
*flags |= FLAGS_EXEC; *flags |= FLAGS_EXEC;
return 0; return i+1;
case 'E': case 'E':
*flags |= FLAGS_NOENV; *flags |= FLAGS_NOENV;
break; break;
default: return -1; /* invalid option */ default: return -1; /* invalid option */
} }
} }
return 0; return i;
} }
static int runargs(lua_State *L, char **argv, int n) static int runargs(lua_State *L, char **argv, int argn)
{ {
int i; int i;
for (i = 1; i < n; i++) { for (i = 1; i < argn; i++) {
if (argv[i] == NULL) continue; if (argv[i] == NULL) continue;
lua_assert(argv[i][0] == '-'); lua_assert(argv[i][0] == '-');
switch (argv[i][1]) { /* option */ switch (argv[i][1]) {
case 'e': { case 'e': {
const char *chunk = argv[i] + 2; const char *chunk = argv[i] + 2;
if (*chunk == '\0') chunk = argv[++i]; if (*chunk == '\0') chunk = argv[++i];
@ -460,10 +463,10 @@ static int runargs(lua_State *L, char **argv, int n)
if (*filename == '\0') filename = argv[++i]; if (*filename == '\0') filename = argv[++i];
lua_assert(filename != NULL); lua_assert(filename != NULL);
if (dolibrary(L, filename)) if (dolibrary(L, filename))
return 1; /* stop if file fails */ return 1;
break; break;
} }
case 'j': { /* LuaJIT extension */ case 'j': { /* LuaJIT extension. */
const char *cmd = argv[i] + 2; const char *cmd = argv[i] + 2;
if (*cmd == '\0') cmd = argv[++i]; if (*cmd == '\0') cmd = argv[++i];
lua_assert(cmd != NULL); lua_assert(cmd != NULL);
@ -471,11 +474,11 @@ static int runargs(lua_State *L, char **argv, int n)
return 1; return 1;
break; break;
} }
case 'O': /* LuaJIT extension */ case 'O': /* LuaJIT extension. */
if (dojitopt(L, argv[i] + 2)) if (dojitopt(L, argv[i] + 2))
return 1; return 1;
break; break;
case 'b': /* LuaJIT extension */ case 'b': /* LuaJIT extension. */
return dobytecode(L, argv+i); return dobytecode(L, argv+i);
default: break; default: break;
} }
@ -508,45 +511,57 @@ static int pmain(lua_State *L)
{ {
struct Smain *s = &smain; struct Smain *s = &smain;
char **argv = s->argv; char **argv = s->argv;
int script; int argn;
int flags = 0; int flags = 0;
globalL = L; globalL = L;
if (argv[0] && argv[0][0]) progname = argv[0]; if (argv[0] && argv[0][0]) progname = argv[0];
LUAJIT_VERSION_SYM(); /* linker-enforced version check */
script = collectargs(argv, &flags); LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
if (script < 0) { /* invalid args? */
argn = collectargs(argv, &flags);
if (argn < 0) { /* Invalid args? */
print_usage(); print_usage();
s->status = 1; s->status = 1;
return 0; return 0;
} }
if ((flags & FLAGS_NOENV)) { if ((flags & FLAGS_NOENV)) {
lua_pushboolean(L, 1); lua_pushboolean(L, 1);
lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
} }
lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
luaL_openlibs(L); /* open libraries */ /* Stop collector during library initialization. */
lua_gc(L, LUA_GCSTOP, 0);
luaL_openlibs(L);
lua_gc(L, LUA_GCRESTART, -1); lua_gc(L, LUA_GCRESTART, -1);
createargtable(L, argv, s->argc, argn);
if (!(flags & FLAGS_NOENV)) { if (!(flags & FLAGS_NOENV)) {
s->status = handle_luainit(L); s->status = handle_luainit(L);
if (s->status != 0) return 0; if (s->status != 0) return 0;
} }
if ((flags & FLAGS_VERSION)) print_version(); if ((flags & FLAGS_VERSION)) print_version();
s->status = runargs(L, argv, (script > 0) ? script : s->argc);
s->status = runargs(L, argv, argn);
if (s->status != 0) return 0; if (s->status != 0) return 0;
if (script) {
s->status = handle_script(L, argv, script); if (s->argc > argn) {
s->status = handle_script(L, argv + argn);
if (s->status != 0) return 0; if (s->status != 0) return 0;
} }
if ((flags & FLAGS_INTERACTIVE)) { if ((flags & FLAGS_INTERACTIVE)) {
print_jit_status(L); print_jit_status(L);
dotty(L); dotty(L);
} else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
if (lua_stdin_is_tty()) { if (lua_stdin_is_tty()) {
print_version(); print_version();
print_jit_status(L); print_jit_status(L);
dotty(L); dotty(L);
} else { } else {
dofile(L, NULL); /* executes stdin as a file */ dofile(L, NULL); /* Executes stdin as a file. */
} }
} }
return 0; return 0;
@ -555,7 +570,7 @@ static int pmain(lua_State *L)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int status; int status;
lua_State *L = lua_open(); /* create state */ lua_State *L = lua_open();
if (L == NULL) { if (L == NULL) {
l_message(argv[0], "cannot create state: not enough memory"); l_message(argv[0], "cannot create state: not enough memory");
return EXIT_FAILURE; return EXIT_FAILURE;
@ -565,6 +580,6 @@ int main(int argc, char **argv)
status = lua_cpcall(L, pmain, NULL); status = lua_cpcall(L, pmain, NULL);
report(L, status); report(L, status);
lua_close(L); lua_close(L);
return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
} }

View File

@ -57,7 +57,7 @@
|.define TMP2, r14 |.define TMP2, r14
|.define TMP3, r15 |.define TMP3, r15
| |
|// Calling conventions. |// MIPS o32 calling convention.
|.define CFUNCADDR, r25 |.define CFUNCADDR, r25
|.define CARG1, r4 |.define CARG1, r4
|.define CARG2, r5 |.define CARG2, r5
@ -4546,24 +4546,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISNEXT: case BC_ISNEXT:
| // RA = base*8, RD = target (points to ITERN) | // RA = base*8, RD = target (points to ITERN)
| addu RA, BASE, RA | addu RA, BASE, RA
| lw TMP0, -24+HI(RA) | srl TMP0, RD, 1
| lw CFUNC:TMP1, -24+LO(RA) | lw CARG1, -24+HI(RA)
| lw TMP2, -16+HI(RA) | lw CFUNC:CARG2, -24+LO(RA)
| lw TMP3, -8+HI(RA) | addu TMP0, PC, TMP0
| lw CARG3, -16+HI(RA)
| lw CARG4, -8+HI(RA)
| li AT, LJ_TFUNC | li AT, LJ_TFUNC
| bne TMP0, AT, >5 | bne CARG1, AT, >5
|. addiu TMP2, TMP2, -LJ_TTAB |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
| lbu TMP1, CFUNC:TMP1->ffid | lbu CARG2, CFUNC:CARG2->ffid
| addiu TMP3, TMP3, -LJ_TNIL | addiu CARG3, CARG3, -LJ_TTAB
| srl TMP0, RD, 1 | addiu CARG4, CARG4, -LJ_TNIL
| or TMP2, TMP2, TMP3 | or CARG3, CARG3, CARG4
| addiu TMP1, TMP1, -FF_next_N | addiu CARG2, CARG2, -FF_next_N
| addu TMP0, PC, TMP0 | or CARG2, CARG2, CARG3
| or TMP1, TMP1, TMP2 | bnez CARG2, >5
| bnez TMP1, >5 |. lui TMP1, 0xfffe
|. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
| addu PC, TMP0, TMP2 | addu PC, TMP0, TMP2
| lui TMP1, 0xfffe
| ori TMP1, TMP1, 0x7fff | ori TMP1, TMP1, 0x7fff
| sw r0, -8+LO(RA) // Initialize control var. | sw r0, -8+LO(RA) // Initialize control var.
| sw TMP1, -8+HI(RA) | sw TMP1, -8+HI(RA)
@ -4573,7 +4573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP3, BC_JMP | li TMP3, BC_JMP
| li TMP1, BC_ITERC | li TMP1, BC_ITERC
| sb TMP3, -4+OFS_OP(PC) | sb TMP3, -4+OFS_OP(PC)
| addu PC, TMP0, TMP2 | addu PC, TMP0, TMP2
| b <1 | b <1
|. sb TMP1, OFS_OP(PC) |. sb TMP1, OFS_OP(PC)
break; break;

4849
src/vm_mips64.dasc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1105,11 +1105,11 @@ static void build_subroutines(BuildCtx *ctx)
| mov BASE, L:RB->base | mov BASE, L:RB->base
| mov NARGS:RDd, TMP1d | mov NARGS:RDd, TMP1d
| mov LFUNC:RB, [RA-16] | mov LFUNC:RB, [RA-16]
| cleartp LFUNC:RB
| add NARGS:RDd, 1 | add NARGS:RDd, 1
| // This is fragile. L->base must not move, KBASE must always be defined. | // This is fragile. L->base must not move, KBASE must always be defined.
| cmp KBASE, BASE // Continue with CALLT if flag set. | cmp KBASE, BASE // Continue with CALLT if flag set.
| je ->BC_CALLT_Z | je ->BC_CALLT_Z
| cleartp LFUNC:RB
| mov BASE, RA | mov BASE, RA
| ins_call // Otherwise call resolved metamethod. | ins_call // Otherwise call resolved metamethod.
| |
@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RCd, byte [rbp-8] // Reconstruct exit number. | movzx RCd, byte [rbp-8] // Reconstruct exit number.
| mov RCH, byte [rbp-16] | mov RCH, byte [rbp-16]
| mov [rbp-8], r15; mov [rbp-16], r14 | mov [rbp-8], r15; mov [rbp-16], r14
| // Caveat: DISPATCH is rbx. | // DISPATCH is preserved on-trace in LJ_GC64 mode.
| mov DISPATCH, [ebp]
| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
| set_vmstate EXIT | set_vmstate EXIT
| mov [DISPATCH+DISPATCH_J(exitno)], RCd | mov [DISPATCH+DISPATCH_J(exitno)], RCd
@ -3516,7 +3515,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_AD // RA = level, RD = target | ins_AD // RA = level, RD = target
| branchPC RD // Do this first to free RD. | branchPC RD // Do this first to free RD.
| mov L:RB, SAVE_L | mov L:RB, SAVE_L
| cmp dword L:RB->openupval, 0 | cmp aword L:RB->openupval, 0
| je >1 | je >1
| mov L:RB->base, BASE | mov L:RB->base, BASE
| lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE

View File

@ -121,19 +121,68 @@
|//----------------------------------------------------------------------- |//-----------------------------------------------------------------------
|.if not X64 // x86 stack layout. |.if not X64 // x86 stack layout.
| |
|.if WIN
|
|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
|.macro saveregs_
| push edi; push esi; push ebx
| push extern lj_err_unwind_win
| fs; push dword [0]
| fs; mov [0], esp
| sub esp, CFRAME_SPACE
|.endmacro
|.macro restoreregs
| add esp, CFRAME_SPACE
| fs; pop dword [0]
| pop edi // Short for esp += 4.
| pop ebx; pop esi; pop edi; pop ebp
|.endmacro
|
|.else
|
|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
|.macro saveregs_ |.macro saveregs_
| push edi; push esi; push ebx | push edi; push esi; push ebx
| sub esp, CFRAME_SPACE | sub esp, CFRAME_SPACE
|.endmacro |.endmacro
|.macro saveregs
| push ebp; saveregs_
|.endmacro
|.macro restoreregs |.macro restoreregs
| add esp, CFRAME_SPACE | add esp, CFRAME_SPACE
| pop ebx; pop esi; pop edi; pop ebp | pop ebx; pop esi; pop edi; pop ebp
|.endmacro |.endmacro
| |
|.endif
|
|.macro saveregs
| push ebp; saveregs_
|.endmacro
|
|.if WIN
|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
|.define SAVE_NRES, aword [esp+aword*18]
|.define SAVE_CFRAME, aword [esp+aword*17]
|.define SAVE_L, aword [esp+aword*16]
|//----- 16 byte aligned, ^^^ arguments from C caller
|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
|.define SAVE_R4, aword [esp+aword*14]
|.define SAVE_R3, aword [esp+aword*13]
|.define SAVE_R2, aword [esp+aword*12]
|//----- 16 byte aligned
|.define SAVE_R1, aword [esp+aword*11]
|.define SEH_FUNC, aword [esp+aword*10]
|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
|.define UNUSED2, aword [esp+aword*8]
|//----- 16 byte aligned
|.define UNUSED1, aword [esp+aword*7]
|.define SAVE_PC, aword [esp+aword*6]
|.define TMP2, aword [esp+aword*5]
|.define TMP1, aword [esp+aword*4]
|//----- 16 byte aligned
|.define ARG4, aword [esp+aword*3]
|.define ARG3, aword [esp+aword*2]
|.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
|.else
|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
|.define SAVE_NRES, aword [esp+aword*14] |.define SAVE_NRES, aword [esp+aword*14]
|.define SAVE_CFRAME, aword [esp+aword*13] |.define SAVE_CFRAME, aword [esp+aword*13]
@ -154,6 +203,7 @@
|.define ARG2, aword [esp+aword*1] |.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp] //<-- esp while in interpreter. |.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee |//----- 16 byte aligned, ^^^ arguments for C callee
|.endif
| |
|// FPARGx overlaps ARGx and ARG(x+1) on x86. |// FPARGx overlaps ARGx and ARG(x+1) on x86.
|.define FPARG3, qword [esp+qword*1] |.define FPARG3, qword [esp+qword*1]
@ -554,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else |.else
| mov eax, FCARG2 // Error return status for vm_pcall. | mov eax, FCARG2 // Error return status for vm_pcall.
| mov esp, FCARG1 | mov esp, FCARG1
|.if WIN
| lea FCARG1, SEH_NEXT
| fs; mov [0], FCARG1
|.endif
|.endif |.endif
|->vm_unwind_c_eh: // Landing pad for external unwinder. |->vm_unwind_c_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L | mov L:RB, SAVE_L
@ -577,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else |.else
| and FCARG1, CFRAME_RAWMASK | and FCARG1, CFRAME_RAWMASK
| mov esp, FCARG1 | mov esp, FCARG1
|.if WIN
| lea FCARG1, SEH_NEXT
| fs; mov [0], FCARG1
|.endif
|.endif |.endif
|->vm_unwind_ff_eh: // Landing pad for external unwinder. |->vm_unwind_ff_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L | mov L:RB, SAVE_L
@ -590,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
| set_vmstate INTERP | set_vmstate INTERP
| jmp ->vm_returnc // Increments RD/MULTRES and returns. | jmp ->vm_returnc // Increments RD/MULTRES and returns.
| |
|.if WIN and not X64
|->vm_rtlunwind@16: // Thin layer around RtlUnwind.
| // (void *cframe, void *excptrec, void *unwinder, int errcode)
| mov [esp], FCARG1 // Return value for RtlUnwind.
| push FCARG2 // Exception record for RtlUnwind.
| push 0 // Ignored by RtlUnwind.
| push dword [FCARG1+CFRAME_OFS_SEH]
| call extern RtlUnwind@16 // Violates ABI (clobbers too much).
| mov FCARG1, eax
| mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
| ret // Jump to unwinder.
|.endif
|
|//----------------------------------------------------------------------- |//-----------------------------------------------------------------------
|//-- Grow stack for calls ----------------------------------------------- |//-- Grow stack for calls -----------------------------------------------
|//----------------------------------------------------------------------- |//-----------------------------------------------------------------------