mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 15:34:09 +00:00
Merge remote-tracking branch 'upstream/v2.1' into ppc64-port
This commit is contained in:
commit
c00253828a
@ -153,7 +153,7 @@ Contains the target OS name:
|
||||
<h3 id="jit_arch"><tt>jit.arch</tt></h3>
|
||||
<p>
|
||||
Contains the target architecture name:
|
||||
"x86", "x64", "arm", "ppc", or "mips".
|
||||
"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
|
||||
</p>
|
||||
|
||||
<h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2>
|
||||
|
@ -349,6 +349,7 @@ break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
|
||||
LuaJIT supports some extensions from Lua 5.3:
|
||||
<ul>
|
||||
<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
|
||||
<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
|
||||
</ul>
|
||||
|
||||
<h2 id="exceptions">C++ Exception Interoperability</h2>
|
||||
@ -365,25 +366,30 @@ the toolchain used to compile LuaJIT:
|
||||
</tr>
|
||||
<tr class="odd separate">
|
||||
<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
|
||||
<td class="exccompiler">GCC 4.3+</td>
|
||||
<td class="exccompiler">GCC 4.3+, Clang</td>
|
||||
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td class="excplatform">Other platforms, DWARF2 unwinding</td>
|
||||
<td class="exccompiler">GCC</td>
|
||||
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
|
||||
<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
|
||||
<td class="exccompiler">GCC, Clang</td>
|
||||
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td class="excplatform">Other platforms, DWARF2 unwinding</td>
|
||||
<td class="exccompiler">GCC, Clang</td>
|
||||
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td class="excplatform">Windows/x64</td>
|
||||
<td class="exccompiler">MSVC or WinSDK</td>
|
||||
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<tr class="odd">
|
||||
<td class="excplatform">Windows/x86</td>
|
||||
<td class="exccompiler">Any</td>
|
||||
<td class="excinterop"><b style="color: #a00000;">No</b></td>
|
||||
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<tr class="even">
|
||||
<td class="excplatform">Other platforms</td>
|
||||
<td class="exccompiler">Other compilers</td>
|
||||
<td class="excinterop"><b style="color: #a00000;">No</b></td>
|
||||
@ -432,14 +438,6 @@ C++ destructors.</li>
|
||||
<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
|
||||
<li>Throwing Lua errors across C++ frames will <b>not</b> call
|
||||
C++ destructors.</li>
|
||||
<li>Additionally, on Windows/x86 with SEH-based C++ exceptions:
|
||||
it's <b>not</b> safe to throw a Lua error across any frames containing
|
||||
a C++ function with any try/catch construct or using variables with
|
||||
(implicit) destructors. This also applies to any functions which may be
|
||||
inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
|
||||
is called inside or outside of a try/catch or whether any object actually
|
||||
needs to be destroyed: the SEH chain is corrupted and this will eventually
|
||||
lead to the termination of the process.</li>
|
||||
</ul>
|
||||
<br class="flush">
|
||||
</div>
|
||||
|
@ -122,7 +122,7 @@ operating systems, CPUs and compilers:
|
||||
<tr class="even">
|
||||
<td class="compatcpu">x64 (64 bit)</td>
|
||||
<td class="compatos">GCC 4.2+</td>
|
||||
<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
|
||||
<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
|
||||
<td class="compatos">XCode 5.0+<br>Clang</td>
|
||||
<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
|
||||
</tr>
|
||||
@ -148,7 +148,7 @@ operating systems, CPUs and compilers:
|
||||
<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td class="compatcpu"><a href="#cross2">MIPS</a></td>
|
||||
<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
|
||||
<td class="compatos">GCC 4.3+</td>
|
||||
<td class="compatos">GCC 4.3+</td>
|
||||
<td class="compatos compatno"> </td>
|
||||
@ -202,7 +202,7 @@ which is probably the default on your system, anyway. Simply run:
|
||||
make
|
||||
</pre>
|
||||
<p>
|
||||
This always builds a native x86, x64 or PPC binary, depending on the host OS
|
||||
This always builds a native binary, depending on the host OS
|
||||
you're running this command on. Check the section on
|
||||
<a href="#cross">cross-compilation</a> for more options.
|
||||
</p>
|
||||
@ -333,25 +333,36 @@ directory where <tt>luajit.exe</tt> is installed
|
||||
|
||||
<h2 id="cross">Cross-compiling LuaJIT</h2>
|
||||
<p>
|
||||
The GNU Makefile-based build system allows cross-compiling on any host
|
||||
for any supported target, as long as both architectures have the same
|
||||
pointer size. If you want to cross-compile to any 32 bit target on an
|
||||
x64 OS, you need to install the multilib development package (e.g.
|
||||
<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
|
||||
(<tt>HOST_CC="gcc -m32"</tt>).
|
||||
First, let's clear up some terminology:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
|
||||
<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
|
||||
<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
|
||||
<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
|
||||
<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
|
||||
</ul>
|
||||
<p>
|
||||
The GNU Makefile-based build system allows cross-compiling on any host
|
||||
for any supported target:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
|
||||
<li>Both host and target architectures must have the same pointer size.</li>
|
||||
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
|
||||
<li>64 bit targets always require compilation on a 64 bit host.</li>
|
||||
</ul>
|
||||
<p>
|
||||
You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
|
||||
target OS differ, or you'll get assembler or linker errors. E.g. if
|
||||
you're compiling on a Windows or OSX host for embedded Linux or Android,
|
||||
you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
|
||||
minimal target OS, you may need to disable the built-in allocator in
|
||||
<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
|
||||
specify the same <tt>TARGET_SYS</tt> for the install step, too.
|
||||
target OS differ, or you'll get assembler or linker errors:
|
||||
</p>
|
||||
<ul>
|
||||
<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
|
||||
<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
|
||||
<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
|
||||
</ul>
|
||||
<p>
|
||||
The examples below only show some popular targets — please check
|
||||
the comments in <tt>src/Makefile</tt> for more details.
|
||||
Here are some examples where host and target have the same CPU:
|
||||
</p>
|
||||
<pre class="code">
|
||||
# Cross-compile to a 32 bit binary on a multilib x64 OS
|
||||
@ -369,38 +380,47 @@ use the canonical toolchain triplets for Linux.
|
||||
</p>
|
||||
<p>
|
||||
Since there's often no easy way to detect CPU features at runtime, it's
|
||||
important to compile with the proper CPU or architecture settings. You
|
||||
can specify these when building the toolchain yourself. Or add
|
||||
<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
|
||||
ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
|
||||
too. Otherwise LuaJIT may not run at the full performance of your target
|
||||
CPU.
|
||||
important to compile with the proper CPU or architecture settings:
|
||||
</o>
|
||||
<ul>
|
||||
<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
|
||||
<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
|
||||
<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
|
||||
<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
|
||||
</ul>
|
||||
<p>
|
||||
Here are some examples for targets with a different CPU than the host:
|
||||
</p>
|
||||
<pre class="code">
|
||||
# ARM soft-float
|
||||
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
|
||||
TARGET_CFLAGS="-mfloat-abi=soft"
|
||||
|
||||
# ARM soft-float ABI with VFP (example for Cortex-A8)
|
||||
# ARM soft-float ABI with VFP (example for Cortex-A9)
|
||||
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
|
||||
TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
|
||||
TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
|
||||
|
||||
# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
|
||||
# ARM hard-float ABI with VFP (armhf, most modern toolchains)
|
||||
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
|
||||
|
||||
# ARM64 (requires x64 host)
|
||||
# ARM64
|
||||
make CROSS=aarch64-linux-
|
||||
|
||||
# PPC
|
||||
make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
|
||||
|
||||
# MIPS big-endian
|
||||
# MIPS32 big-endian
|
||||
make HOST_CC="gcc -m32" CROSS=mips-linux-
|
||||
# MIPS little-endian
|
||||
# MIPS32 little-endian
|
||||
make HOST_CC="gcc -m32" CROSS=mipsel-linux-
|
||||
|
||||
# MIPS64 big-endian
|
||||
make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
|
||||
# MIPS64 little-endian
|
||||
make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
|
||||
</pre>
|
||||
<p>
|
||||
You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">»</span> Android NDK</a>.
|
||||
You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>.
|
||||
The environment variables need to match the install locations and the
|
||||
desired target platform. E.g. Android 4.0 corresponds to ABI level 14.
|
||||
For details check the folder <tt>docs</tt> in the NDK directory.
|
||||
@ -414,7 +434,7 @@ to build/deploy or which lowest common denominator you want to pick:
|
||||
# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
|
||||
NDK=/opt/android/ndk
|
||||
NDKABI=8
|
||||
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
|
||||
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
|
||||
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
|
||||
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
|
||||
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
|
||||
@ -422,16 +442,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
|
||||
# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
|
||||
NDK=/opt/android/ndk
|
||||
NDKABI=14
|
||||
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
|
||||
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
|
||||
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
|
||||
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
|
||||
NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
|
||||
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
|
||||
|
||||
# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS)
|
||||
# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
|
||||
NDK=/opt/android/ndk
|
||||
NDKABI=14
|
||||
NDKVER=$NDK/toolchains/mipsel-linux-android-4.6
|
||||
NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
|
||||
NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
|
||||
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
|
||||
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
|
||||
@ -439,7 +459,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
|
||||
# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
|
||||
NDK=/opt/android/ndk
|
||||
NDKABI=14
|
||||
NDKVER=$NDK/toolchains/x86-4.6
|
||||
NDKVER=$NDK/toolchains/x86-4.9
|
||||
NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
|
||||
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
|
||||
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
|
||||
@ -459,14 +479,15 @@ Or use Android. :-p
|
||||
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
|
||||
ICC=$(xcrun --sdk iphoneos --find clang)
|
||||
ISDKF="-arch armv7 -isysroot $ISDKP"
|
||||
make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
|
||||
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
|
||||
make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
|
||||
CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
|
||||
|
||||
# iOS/ARM64
|
||||
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
|
||||
ICC=$(xcrun --sdk iphoneos --find clang)
|
||||
ISDKF="-arch arm64 -isysroot $ISDKP"
|
||||
make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
|
||||
make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
|
||||
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
|
||||
</pre>
|
||||
|
||||
<h3 id="consoles">Cross-compiling for consoles</h3>
|
||||
|
@ -169,10 +169,10 @@ LuaJIT is Copyright © 2005-2016 Mike Pall, released under the
|
||||
<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
|
||||
</table>
|
||||
<table class="feature compiler">
|
||||
<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
|
||||
<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
|
||||
</table>
|
||||
<table class="feature cpu">
|
||||
<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
|
||||
<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
|
||||
</table>
|
||||
<table class="feature fcompat">
|
||||
<tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr>
|
||||
|
@ -91,12 +91,6 @@ hooks for non-Lua functions) and shows slightly different behavior
|
||||
in LuaJIT (no per-coroutine hooks, no tail call counting).
|
||||
</li>
|
||||
<li>
|
||||
Some checks are missing in the JIT-compiled code for obscure situations
|
||||
with <b>open upvalues aliasing</b> one of the SSA slots later on (or
|
||||
vice versa). Bonus points, if you can find a real world test case for
|
||||
this.
|
||||
</li>
|
||||
<li>
|
||||
Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
|
||||
handled correctly. The error may fall through an on-trace
|
||||
<tt>pcall</tt> or it may be passed on to the function set with
|
||||
|
@ -21,7 +21,7 @@ enum {
|
||||
/* The following actions need a buffer position. */
|
||||
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
|
||||
/* The following actions also have an argument. */
|
||||
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
|
||||
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
|
||||
DASM__MAX
|
||||
};
|
||||
|
||||
@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
|
||||
*pl = -pos; /* Label exists now. */
|
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */
|
||||
break;
|
||||
case DASM_IMM:
|
||||
case DASM_IMM: case DASM_IMMS:
|
||||
#ifdef DASM_CHECKS
|
||||
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
|
||||
#endif
|
||||
@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
|
||||
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
|
||||
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
|
||||
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
|
||||
case DASM_IMM: pos++; break;
|
||||
case DASM_IMM: case DASM_IMMS: pos++; break;
|
||||
}
|
||||
}
|
||||
stop: (void)0;
|
||||
@ -356,7 +356,7 @@ int dasm_encode(Dst_DECL, void *buffer)
|
||||
if (ins & 2048)
|
||||
n = n - (int)((char *)cp - base);
|
||||
else
|
||||
n = (n + (int)base) & 0x0fffffff;
|
||||
n = (n + (int)(size_t)base) & 0x0fffffff;
|
||||
patchrel:
|
||||
CK((n & 3) == 0 &&
|
||||
((n + ((ins & 2048) ? 0x00020000 : 0)) >>
|
||||
@ -367,6 +367,9 @@ int dasm_encode(Dst_DECL, void *buffer)
|
||||
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
|
||||
break;
|
||||
case DASM_LABEL_PC: break;
|
||||
case DASM_IMMS:
|
||||
cp[-1] |= ((n>>3) & 4); n &= 0x1f;
|
||||
/* fallthrough */
|
||||
case DASM_IMM:
|
||||
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
|
||||
break;
|
||||
|
@ -1,17 +1,19 @@
|
||||
------------------------------------------------------------------------------
|
||||
-- DynASM MIPS module.
|
||||
-- DynASM MIPS32/MIPS64 module.
|
||||
--
|
||||
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
||||
-- See dynasm.lua for full copyright notice.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
local mips64 = mips64
|
||||
|
||||
-- Module information:
|
||||
local _info = {
|
||||
arch = "mips",
|
||||
description = "DynASM MIPS module",
|
||||
arch = mips64 and "mips64" or "mips",
|
||||
description = "DynASM MIPS32/MIPS64 module",
|
||||
version = "1.4.0",
|
||||
vernum = 10400,
|
||||
release = "2015-10-18",
|
||||
release = "2016-05-24",
|
||||
author = "Mike Pall",
|
||||
license = "MIT",
|
||||
}
|
||||
@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
||||
local match, gmatch = _s.match, _s.gmatch
|
||||
local concat, sort = table.concat, table.sort
|
||||
local bit = bit or require("bit")
|
||||
local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
|
||||
local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
|
||||
local tohex = bit.tohex
|
||||
|
||||
-- Inherited tables and callbacks.
|
||||
local g_opt, g_arch
|
||||
@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn
|
||||
local action_names = {
|
||||
"STOP", "SECTION", "ESC", "REL_EXT",
|
||||
"ALIGN", "REL_LG", "LABEL_LG",
|
||||
"REL_PC", "LABEL_PC", "IMM",
|
||||
"REL_PC", "LABEL_PC", "IMM", "IMMS",
|
||||
}
|
||||
|
||||
-- Maximum number of section buffer positions for dasm_put().
|
||||
@ -251,6 +254,10 @@ local map_op = {
|
||||
bnel_3 = "54000000STB",
|
||||
blezl_2 = "58000000SB",
|
||||
bgtzl_2 = "5c000000SB",
|
||||
daddi_3 = mips64 and "60000000TSI",
|
||||
daddiu_3 = mips64 and "64000000TSI",
|
||||
ldl_2 = mips64 and "68000000TO",
|
||||
ldr_2 = mips64 and "6c000000TO",
|
||||
lb_2 = "80000000TO",
|
||||
lh_2 = "84000000TO",
|
||||
lwl_2 = "88000000TO",
|
||||
@ -258,23 +265,30 @@ local map_op = {
|
||||
lbu_2 = "90000000TO",
|
||||
lhu_2 = "94000000TO",
|
||||
lwr_2 = "98000000TO",
|
||||
lwu_2 = mips64 and "9c000000TO",
|
||||
sb_2 = "a0000000TO",
|
||||
sh_2 = "a4000000TO",
|
||||
swl_2 = "a8000000TO",
|
||||
sw_2 = "ac000000TO",
|
||||
sdl_2 = mips64 and "b0000000TO",
|
||||
sdr_2 = mips64 and "b1000000TO",
|
||||
swr_2 = "b8000000TO",
|
||||
cache_2 = "bc000000NO",
|
||||
ll_2 = "c0000000TO",
|
||||
lwc1_2 = "c4000000HO",
|
||||
pref_2 = "cc000000NO",
|
||||
ldc1_2 = "d4000000HO",
|
||||
ld_2 = mips64 and "dc000000TO",
|
||||
sc_2 = "e0000000TO",
|
||||
swc1_2 = "e4000000HO",
|
||||
scd_2 = mips64 and "f0000000TO",
|
||||
sdc1_2 = "f4000000HO",
|
||||
sd_2 = mips64 and "fc000000TO",
|
||||
|
||||
-- Opcode SPECIAL.
|
||||
nop_0 = "00000000",
|
||||
sll_3 = "00000000DTA",
|
||||
sextw_2 = "00000000DT",
|
||||
movf_2 = "00000001DS",
|
||||
movf_3 = "00000001DSC",
|
||||
movt_2 = "00010001DS",
|
||||
@ -285,6 +299,7 @@ local map_op = {
|
||||
sllv_3 = "00000004DTS",
|
||||
srlv_3 = "00000006DTS",
|
||||
rotrv_3 = "00000046DTS",
|
||||
drotrv_3 = mips64 and "00000056DTS",
|
||||
srav_3 = "00000007DTS",
|
||||
jr_1 = "00000008S",
|
||||
jalr_1 = "0000f809S",
|
||||
@ -300,15 +315,22 @@ local map_op = {
|
||||
mthi_1 = "00000011S",
|
||||
mflo_1 = "00000012D",
|
||||
mtlo_1 = "00000013S",
|
||||
dsllv_3 = mips64 and "00000014DTS",
|
||||
dsrlv_3 = mips64 and "00000016DTS",
|
||||
dsrav_3 = mips64 and "00000017DTS",
|
||||
mult_2 = "00000018ST",
|
||||
multu_2 = "00000019ST",
|
||||
div_2 = "0000001aST",
|
||||
divu_2 = "0000001bST",
|
||||
dmult_2 = mips64 and "0000001cST",
|
||||
dmultu_2 = mips64 and "0000001dST",
|
||||
ddiv_2 = mips64 and "0000001eST",
|
||||
ddivu_2 = mips64 and "0000001fST",
|
||||
add_3 = "00000020DST",
|
||||
move_2 = "00000021DS",
|
||||
move_2 = mips64 and "00000025DS" or "00000021DS",
|
||||
addu_3 = "00000021DST",
|
||||
sub_3 = "00000022DST",
|
||||
negu_2 = "00000023DT",
|
||||
negu_2 = mips64 and "0000002fDT" or "00000023DT",
|
||||
subu_3 = "00000023DST",
|
||||
and_3 = "00000024DST",
|
||||
or_3 = "00000025DST",
|
||||
@ -317,6 +339,10 @@ local map_op = {
|
||||
nor_3 = "00000027DST",
|
||||
slt_3 = "0000002aDST",
|
||||
sltu_3 = "0000002bDST",
|
||||
dadd_3 = mips64 and "0000002cDST",
|
||||
daddu_3 = mips64 and "0000002dDST",
|
||||
dsub_3 = mips64 and "0000002eDST",
|
||||
dsubu_3 = mips64 and "0000002fDST",
|
||||
tge_2 = "00000030ST",
|
||||
tge_3 = "00000030STZ",
|
||||
tgeu_2 = "00000031ST",
|
||||
@ -329,6 +355,14 @@ local map_op = {
|
||||
teq_3 = "00000034STZ",
|
||||
tne_2 = "00000036ST",
|
||||
tne_3 = "00000036STZ",
|
||||
dsll_3 = mips64 and "00000038DTa",
|
||||
dsrl_3 = mips64 and "0000003aDTa",
|
||||
drotr_3 = mips64 and "0020003aDTa",
|
||||
dsra_3 = mips64 and "0000003bDTa",
|
||||
dsll32_3 = mips64 and "0000003cDTA",
|
||||
dsrl32_3 = mips64 and "0000003eDTA",
|
||||
drotr32_3 = mips64 and "0020003eDTA",
|
||||
dsra32_3 = mips64 and "0000003fDTA",
|
||||
|
||||
-- Opcode REGIMM.
|
||||
bltz_2 = "04000000SB",
|
||||
@ -356,13 +390,24 @@ local map_op = {
|
||||
msubu_2 = "70000005ST",
|
||||
clz_2 = "70000020DS=",
|
||||
clo_2 = "70000021DS=",
|
||||
dclz_2 = mips64 and "70000024DS=",
|
||||
dclo_2 = mips64 and "70000025DS=",
|
||||
sdbbp_0 = "7000003f",
|
||||
sdbbp_1 = "7000003fY",
|
||||
|
||||
-- Opcode SPECIAL3.
|
||||
ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
|
||||
dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
|
||||
dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
|
||||
dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
|
||||
zextw_2 = mips64 and "7c00f803TS",
|
||||
ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
|
||||
dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
|
||||
dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
|
||||
dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
|
||||
wsbh_2 = "7c0000a0DT",
|
||||
dsbh_2 = mips64 and "7c0000a4DT",
|
||||
dshd_2 = mips64 and "7c000164DT",
|
||||
seb_2 = "7c000420DT",
|
||||
seh_2 = "7c000620DT",
|
||||
rdhwr_2 = "7c00003bTD",
|
||||
@ -370,8 +415,12 @@ local map_op = {
|
||||
-- Opcode COP0.
|
||||
mfc0_2 = "40000000TD",
|
||||
mfc0_3 = "40000000TDW",
|
||||
dmfc0_2 = mips64 and "40200000TD",
|
||||
dmfc0_3 = mips64 and "40200000TDW",
|
||||
mtc0_2 = "40800000TD",
|
||||
mtc0_3 = "40800000TDW",
|
||||
dmtc0_2 = mips64 and "40a00000TD",
|
||||
dmtc0_3 = mips64 and "40a00000TDW",
|
||||
rdpgpr_2 = "41400000DT",
|
||||
di_0 = "41606000",
|
||||
di_1 = "41606000T",
|
||||
@ -388,9 +437,11 @@ local map_op = {
|
||||
|
||||
-- Opcode COP1.
|
||||
mfc1_2 = "44000000TG",
|
||||
dmfc1_2 = mips64 and "44200000TG",
|
||||
cfc1_2 = "44400000TG",
|
||||
mfhc1_2 = "44600000TG",
|
||||
mtc1_2 = "44800000TG",
|
||||
dmtc1_2 = mips64 and "44a00000TG",
|
||||
ctc1_2 = "44c00000TG",
|
||||
mthc1_2 = "44e00000TG",
|
||||
|
||||
@ -633,7 +684,7 @@ local function parse_fpr(expr)
|
||||
werror("bad register name `"..expr.."'")
|
||||
end
|
||||
|
||||
local function parse_imm(imm, bits, shift, scale, signed)
|
||||
local function parse_imm(imm, bits, shift, scale, signed, action)
|
||||
local n = tonumber(imm)
|
||||
if n then
|
||||
local m = sar(n, scale)
|
||||
@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
|
||||
match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
|
||||
werror("expected immediate operand, got register")
|
||||
else
|
||||
waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
|
||||
waction(action or "IMM",
|
||||
(signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
|
||||
return 0
|
||||
end
|
||||
end
|
||||
@ -763,6 +815,9 @@ map_op[".template__"] = function(params, template, nparams)
|
||||
n = n + 1
|
||||
elseif p == "A" then
|
||||
op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
|
||||
elseif p == "a" then
|
||||
local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
|
||||
op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
|
||||
elseif p == "M" then
|
||||
op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
|
||||
elseif p == "N" then
|
||||
|
12
dynasm/dasm_mips64.lua
Normal file
12
dynasm/dasm_mips64.lua
Normal file
@ -0,0 +1,12 @@
|
||||
------------------------------------------------------------------------------
|
||||
-- DynASM MIPS64 module.
|
||||
--
|
||||
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
||||
-- See dynasm.lua for full copyright notice.
|
||||
------------------------------------------------------------------------------
|
||||
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
|
||||
-- All the interesting stuff is there.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
mips64 = true -- Using a global is an ugly, but effective solution.
|
||||
return require("dasm_mips")
|
14
src/Makefile
14
src/Makefile
@ -121,8 +121,8 @@ XCFLAGS=
|
||||
#
|
||||
# Use the system provided memory allocator (realloc) instead of the
|
||||
# bundled memory allocator. This is slower, but sometimes helpful for
|
||||
# debugging. This option cannot be enabled on x64, since realloc usually
|
||||
# doesn't return addresses in the right address range.
|
||||
# debugging. This option cannot be enabled on x64 without GC64, since
|
||||
# realloc usually doesn't return addresses in the right address range.
|
||||
# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
|
||||
# the only way to get useful results from it for all other architectures.
|
||||
#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
|
||||
@ -166,10 +166,6 @@ else
|
||||
HOST_SYS= Windows
|
||||
HOST_MSYS= cygwin
|
||||
endif
|
||||
# Use Clang for OSX host.
|
||||
ifeq (Darwin,$(HOST_SYS))
|
||||
DEFAULT_CC= clang
|
||||
endif
|
||||
endif
|
||||
|
||||
##############################################################################
|
||||
@ -257,7 +253,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
|
||||
ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
|
||||
TARGET_ARCH= -D__MIPSEL__=1
|
||||
endif
|
||||
TARGET_LJARCH= mips
|
||||
ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
|
||||
TARGET_LJARCH= mips64
|
||||
else
|
||||
TARGET_LJARCH= mips
|
||||
endif
|
||||
else
|
||||
$(error Unsupported target architecture)
|
||||
endif
|
||||
|
@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
|
||||
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
|
||||
lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
|
||||
lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
|
||||
lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
|
||||
lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
|
||||
lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
|
||||
@ -215,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
|
||||
lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
|
||||
lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
|
||||
lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
|
||||
lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
|
||||
lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
|
||||
lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
|
||||
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
|
||||
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
|
||||
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
|
||||
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
|
||||
lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
|
||||
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
|
||||
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
|
||||
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
|
||||
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
|
||||
lib_init.c
|
||||
lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
|
||||
lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
|
||||
lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
|
||||
lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
|
||||
lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
|
||||
lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
|
||||
lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
|
||||
lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
|
||||
lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
|
||||
lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
|
||||
lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
|
||||
lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
|
||||
lib_ffi.c lib_init.c
|
||||
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
|
||||
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
|
||||
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
|
||||
|
@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx,
|
||||
if (p) {
|
||||
#if LJ_TARGET_X86ORX64
|
||||
if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
|
||||
name[0] = '@';
|
||||
name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
|
||||
else
|
||||
*p = '\0';
|
||||
#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
|
||||
|
@ -109,6 +109,8 @@ enum {
|
||||
#if LJ_TARGET_X64
|
||||
PEOBJ_SECT_PDATA,
|
||||
PEOBJ_SECT_XDATA,
|
||||
#elif LJ_TARGET_X86
|
||||
PEOBJ_SECT_SXDATA,
|
||||
#endif
|
||||
PEOBJ_SECT_RDATA_Z,
|
||||
PEOBJ_NSECTIONS
|
||||
@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx)
|
||||
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
|
||||
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
|
||||
pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
|
||||
#elif LJ_TARGET_X86
|
||||
memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
|
||||
pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
|
||||
sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
|
||||
pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
|
||||
/* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
|
||||
pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
|
||||
#endif
|
||||
|
||||
memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
|
||||
@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx)
|
||||
nrsym = ctx->nrelocsym;
|
||||
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
|
||||
#if LJ_TARGET_X64
|
||||
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */
|
||||
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
|
||||
#endif
|
||||
|
||||
/* Write PE object header and all sections. */
|
||||
@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx)
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
}
|
||||
#elif LJ_TARGET_X86
|
||||
/* Write .sxdata section. */
|
||||
for (i = 0; i < nrsym; i++) {
|
||||
if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
|
||||
uint32_t symidx = 1+2+i;
|
||||
owrite(ctx, &symidx, 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == nrsym) {
|
||||
fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Write .rdata$Z section. */
|
||||
@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx)
|
||||
#if LJ_TARGET_X64
|
||||
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
|
||||
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
|
||||
emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
|
||||
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
|
||||
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
|
||||
#elif LJ_TARGET_X86
|
||||
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
|
||||
#endif
|
||||
|
||||
emit_peobj_sym(ctx, ctx->beginsym, 0,
|
||||
|
@ -157,11 +157,11 @@ local function merge_includes(src)
|
||||
if includes[name] then return "" end
|
||||
includes[name] = true
|
||||
local fp = assert(io.open(LUA_SOURCE..name, "r"))
|
||||
local src = fp:read("*a")
|
||||
local inc = fp:read("*a")
|
||||
assert(fp:close())
|
||||
src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
|
||||
src = gsub(src, "#endif%s*$", "")
|
||||
return merge_includes(src)
|
||||
inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
|
||||
inc = gsub(inc, "#endif%s*$", "")
|
||||
return merge_includes(inc)
|
||||
end)
|
||||
end
|
||||
|
||||
|
@ -125,12 +125,12 @@ extern "C"
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
const char %s%s[] = {
|
||||
const unsigned char %s%s[] = {
|
||||
]], LJBC_PREFIX, ctx.modname))
|
||||
else
|
||||
fp:write(string.format([[
|
||||
#define %s%s_SIZE %d
|
||||
static const char %s%s[] = {
|
||||
static const unsigned char %s%s[] = {
|
||||
]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
|
||||
end
|
||||
local t, n, m = {}, 0, 0
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
local type = type
|
||||
local sub, byte, format = string.sub, string.byte, string.format
|
||||
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
|
||||
local match, gmatch = string.match, string.gmatch
|
||||
local concat = table.concat
|
||||
local bit = require("bit")
|
||||
local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex
|
||||
|
@ -11,8 +11,8 @@
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
local type = type
|
||||
local sub, byte, format = string.sub, string.byte, string.format
|
||||
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
|
||||
local byte, format = string.byte, string.format
|
||||
local match, gmatch = string.match, string.gmatch
|
||||
local concat = table.concat
|
||||
local bit = require("bit")
|
||||
local band, bor, tohex = bit.band, bit.bor, bit.tohex
|
||||
@ -38,7 +38,7 @@ local map_special = {
|
||||
"multST", "multuST", "divST", "divuST",
|
||||
false, false, false, false,
|
||||
"addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
|
||||
"andDST", "orDST", "xorDST", "nor|notDST0",
|
||||
"andDST", "or|moveDST0", "xorDST", "nor|notDST0",
|
||||
false, false, "sltDST", "sltuDST",
|
||||
false, false, false, false,
|
||||
"tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
|
||||
@ -214,7 +214,7 @@ local map_pri = {
|
||||
map_cop0, map_cop1, false, map_cop1x,
|
||||
"beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
|
||||
false, false, false, false,
|
||||
map_special2, false, false, map_special3,
|
||||
map_special2, "jalxJ", false, map_special3,
|
||||
"lbTSO", "lhTSO", "lwlTSO", "lwTSO",
|
||||
"lbuTSO", "lhuTSO", "lwrTSO", false,
|
||||
"sbTSO", "shTSO", "swlTSO", "swTSO",
|
||||
|
@ -13,7 +13,7 @@
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
local type = type
|
||||
local sub, byte, format = string.sub, string.byte, string.format
|
||||
local byte, format = string.byte, string.format
|
||||
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
|
||||
local concat = table.concat
|
||||
local bit = require("bit")
|
||||
|
@ -818,7 +818,7 @@ map_act = {
|
||||
m = b%32; b = (b-m)/32
|
||||
local nb = b%2; b = (b-nb)/2
|
||||
if nb == 0 then ctx.rexb = true end
|
||||
local nx = b%2; b = (b-nx)/2
|
||||
local nx = b%2
|
||||
if nx == 0 then ctx.rexx = true end
|
||||
b = byte(ctx.code, pos, pos)
|
||||
if not b then return incomplete(ctx) end
|
||||
|
@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
|
||||
local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
|
||||
local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
|
||||
local bit = require("bit")
|
||||
local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
|
||||
local band, shr, tohex = bit.band, bit.rshift, bit.tohex
|
||||
local sub, gsub, format = string.sub, string.gsub, string.format
|
||||
local byte, char, rep = string.byte, string.char, string.rep
|
||||
local byte, rep = string.byte, string.rep
|
||||
local type, tostring = type, tostring
|
||||
local stdout, stderr = io.stdout, io.stderr
|
||||
|
||||
@ -213,7 +213,7 @@ local colortype_ansi = {
|
||||
"\027[35m%s\027[m",
|
||||
}
|
||||
|
||||
local function colorize_text(s, t)
|
||||
local function colorize_text(s)
|
||||
return s
|
||||
end
|
||||
|
||||
@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
|
||||
end
|
||||
end
|
||||
|
||||
local function formatk(tr, idx)
|
||||
local function formatk(tr, idx, sn)
|
||||
local k, t, slot = tracek(tr, idx)
|
||||
local tn = type(k)
|
||||
local s
|
||||
if tn == "number" then
|
||||
if k == 2^52+2^51 then
|
||||
if band(sn or 0, 0x30000) ~= 0 then
|
||||
s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
|
||||
elseif k == 2^52+2^51 then
|
||||
s = "bias"
|
||||
else
|
||||
s = format("%+.14g", k)
|
||||
s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
|
||||
end
|
||||
elseif tn == "string" then
|
||||
s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
|
||||
@ -354,7 +356,7 @@ local function printsnap(tr, snap)
|
||||
n = n + 1
|
||||
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
|
||||
if ref < 0 then
|
||||
out:write(formatk(tr, ref))
|
||||
out:write(formatk(tr, ref, sn))
|
||||
elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
|
||||
out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
|
||||
else
|
||||
|
@ -120,7 +120,7 @@ end
|
||||
-- Show top N list.
|
||||
local function prof_top(count1, count2, samples, indent)
|
||||
local t, n = {}, 0
|
||||
for k, v in pairs(count1) do
|
||||
for k in pairs(count1) do
|
||||
n = n + 1
|
||||
t[n] = k
|
||||
end
|
||||
|
@ -302,7 +302,7 @@ static int panic(lua_State *L)
|
||||
|
||||
#ifdef LUAJIT_USE_SYSMALLOC
|
||||
|
||||
#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
|
||||
#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
|
||||
#error "Must use builtin allocator for 64 bit target"
|
||||
#endif
|
||||
|
||||
@ -334,7 +334,7 @@ LUALIB_API lua_State *luaL_newstate(void)
|
||||
lua_State *L;
|
||||
void *ud = lj_alloc_create();
|
||||
if (ud == NULL) return NULL;
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
L = lj_state_newstate(lj_alloc_f, ud);
|
||||
#else
|
||||
L = lua_newstate(lj_alloc_f, ud);
|
||||
@ -343,7 +343,7 @@ LUALIB_API lua_State *luaL_newstate(void)
|
||||
return L;
|
||||
}
|
||||
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
|
||||
{
|
||||
UNUSED(f); UNUSED(ud);
|
||||
|
@ -715,15 +715,19 @@ static uint32_t jit_cpudetect(lua_State *L)
|
||||
#if LJ_HASJIT
|
||||
/* Compile-time MIPS CPU detection. */
|
||||
#if LJ_ARCH_VERSION >= 20
|
||||
flags |= JIT_F_MIPS32R2;
|
||||
flags |= JIT_F_MIPSXXR2;
|
||||
#endif
|
||||
/* Runtime MIPS CPU detection. */
|
||||
#if defined(__GNUC__)
|
||||
if (!(flags & JIT_F_MIPS32R2)) {
|
||||
if (!(flags & JIT_F_MIPSXXR2)) {
|
||||
int x;
|
||||
#ifdef __mips16
|
||||
x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
|
||||
#else
|
||||
/* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
|
||||
__asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
|
||||
if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */
|
||||
#endif
|
||||
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -27,15 +27,15 @@
|
||||
{
|
||||
Optimized string compare
|
||||
Memcheck:Addr4
|
||||
fun:lj_str_fastcmp
|
||||
fun:str_fastcmp
|
||||
}
|
||||
{
|
||||
Optimized string compare
|
||||
Memcheck:Addr1
|
||||
fun:lj_str_fastcmp
|
||||
fun:str_fastcmp
|
||||
}
|
||||
{
|
||||
Optimized string compare
|
||||
Memcheck:Cond
|
||||
fun:lj_str_fastcmp
|
||||
fun:str_fastcmp
|
||||
}
|
||||
|
269
src/lj_alloc.c
269
src/lj_alloc.c
@ -72,13 +72,56 @@
|
||||
|
||||
#define IS_DIRECT_BIT (SIZE_T_ONE)
|
||||
|
||||
|
||||
/* Determine system-specific block allocation method. */
|
||||
#if LJ_TARGET_WINDOWS
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
#if LJ_64 && !LJ_GC64
|
||||
#define LJ_ALLOC_VIRTUALALLOC 1
|
||||
|
||||
#if LJ_64 && !LJ_GC64
|
||||
#define LJ_ALLOC_NTAVM 1
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#include <errno.h>
|
||||
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define LJ_ALLOC_MMAP 1
|
||||
|
||||
#if LJ_64
|
||||
|
||||
#define LJ_ALLOC_MMAP_PROBE 1
|
||||
|
||||
#if LJ_GC64
|
||||
#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
|
||||
#elif LJ_TARGET_X64 && LJ_HASJIT
|
||||
/* Due to limitations in the x64 compiler backend. */
|
||||
#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
|
||||
#else
|
||||
#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
|
||||
#define LJ_ALLOC_MMAP32 1
|
||||
#endif
|
||||
|
||||
#if LJ_TARGET_LINUX
|
||||
#define LJ_ALLOC_MREMAP 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if LJ_ALLOC_VIRTUALALLOC
|
||||
|
||||
#if LJ_ALLOC_NTAVM
|
||||
/* Undocumented, but hey, that's what we all love so much about Windows. */
|
||||
typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
|
||||
size_t *size, ULONG alloctype, ULONG prot);
|
||||
@ -89,14 +132,15 @@ static PNTAVM ntavm;
|
||||
*/
|
||||
#define NTAVM_ZEROBITS 1
|
||||
|
||||
static void INIT_MMAP(void)
|
||||
static void init_mmap(void)
|
||||
{
|
||||
ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
|
||||
"NtAllocateVirtualMemory");
|
||||
}
|
||||
#define INIT_MMAP() init_mmap()
|
||||
|
||||
/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
|
||||
static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
static void *CALL_MMAP(size_t size)
|
||||
{
|
||||
DWORD olderr = GetLastError();
|
||||
void *ptr = NULL;
|
||||
@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
}
|
||||
|
||||
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
|
||||
static LJ_AINLINE void *DIRECT_MMAP(size_t size)
|
||||
static void *DIRECT_MMAP(size_t size)
|
||||
{
|
||||
DWORD olderr = GetLastError();
|
||||
void *ptr = NULL;
|
||||
@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
|
||||
|
||||
#else
|
||||
|
||||
#define INIT_MMAP() ((void)0)
|
||||
|
||||
/* Win32 MMAP via VirtualAlloc */
|
||||
static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
static void *CALL_MMAP(size_t size)
|
||||
{
|
||||
DWORD olderr = GetLastError();
|
||||
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
|
||||
@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
}
|
||||
|
||||
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
|
||||
static LJ_AINLINE void *DIRECT_MMAP(size_t size)
|
||||
static void *DIRECT_MMAP(size_t size)
|
||||
{
|
||||
DWORD olderr = GetLastError();
|
||||
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
|
||||
@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
|
||||
#endif
|
||||
|
||||
/* This function supports releasing coalesed segments */
|
||||
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
|
||||
static int CALL_MUNMAP(void *ptr, size_t size)
|
||||
{
|
||||
DWORD olderr = GetLastError();
|
||||
MEMORY_BASIC_INFORMATION minfo;
|
||||
@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#elif LJ_ALLOC_MMAP
|
||||
|
||||
#define MMAP_PROT (PROT_READ|PROT_WRITE)
|
||||
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
|
||||
@ -174,107 +213,151 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
|
||||
#endif
|
||||
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
|
||||
|
||||
#if LJ_64 && !LJ_GC64
|
||||
/* 64 bit mode with 32 bit pointers needs special support for allocating
|
||||
** memory in the lower 2GB.
|
||||
*/
|
||||
#if LJ_ALLOC_MMAP_PROBE
|
||||
|
||||
#if defined(MAP_32BIT)
|
||||
|
||||
#if defined(__sun__)
|
||||
#define MMAP_REGION_START ((uintptr_t)0x1000)
|
||||
#ifdef MAP_TRYFIXED
|
||||
#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
|
||||
#else
|
||||
/* Actually this only gives us max. 1GB in current Linux kernels. */
|
||||
#define MMAP_REGION_START ((uintptr_t)0)
|
||||
#define MMAP_FLAGS_PROBE MMAP_FLAGS
|
||||
#endif
|
||||
|
||||
static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
#define LJ_ALLOC_MMAP_PROBE_MAX 30
|
||||
#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
|
||||
|
||||
#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
|
||||
|
||||
/* No point in a giant ifdef mess. Just try to open /dev/urandom.
|
||||
** It doesn't really matter if this fails, since we get some ASLR bits from
|
||||
** every unsuitable allocation, too. And we prefer linear allocation, anyway.
|
||||
*/
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static uintptr_t mmap_probe_seed(void)
|
||||
{
|
||||
int olderr = errno;
|
||||
void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
|
||||
errno = olderr;
|
||||
return ptr;
|
||||
uintptr_t val;
|
||||
int fd = open("/dev/urandom", O_RDONLY);
|
||||
if (fd != -1) {
|
||||
int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
|
||||
(void)close(fd);
|
||||
if (ok) return val;
|
||||
}
|
||||
return 1; /* Punt. */
|
||||
}
|
||||
|
||||
#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN
|
||||
|
||||
/* OSX and FreeBSD mmap() use a naive first-fit linear search.
|
||||
** That's perfect for us. Except that -pagezero_size must be set for OSX,
|
||||
** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
|
||||
** to be reduced to 250MB on FreeBSD.
|
||||
*/
|
||||
#if LJ_TARGET_OSX || defined(__DragonFly__)
|
||||
#define MMAP_REGION_START ((uintptr_t)0x10000)
|
||||
#elif LJ_TARGET_PS4
|
||||
#define MMAP_REGION_START ((uintptr_t)0x4000)
|
||||
#else
|
||||
#define MMAP_REGION_START ((uintptr_t)0x10000000)
|
||||
#endif
|
||||
#define MMAP_REGION_END ((uintptr_t)0x80000000)
|
||||
|
||||
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
static void *mmap_probe(size_t size)
|
||||
{
|
||||
int olderr = errno;
|
||||
/* Hint for next allocation. Doesn't need to be thread-safe. */
|
||||
static uintptr_t alloc_hint = MMAP_REGION_START;
|
||||
int retry = 0;
|
||||
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
|
||||
static int rlimit_modified = 0;
|
||||
if (LJ_UNLIKELY(rlimit_modified == 0)) {
|
||||
struct rlimit rlim;
|
||||
rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
|
||||
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */
|
||||
rlimit_modified = 1;
|
||||
}
|
||||
#endif
|
||||
for (;;) {
|
||||
void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
|
||||
if ((uintptr_t)p >= MMAP_REGION_START &&
|
||||
(uintptr_t)p + size < MMAP_REGION_END) {
|
||||
alloc_hint = (uintptr_t)p + size;
|
||||
static uintptr_t hint_addr = 0;
|
||||
static uintptr_t hint_prng = 0;
|
||||
int olderr = errno;
|
||||
int retry;
|
||||
for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
|
||||
void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
|
||||
uintptr_t addr = (uintptr_t)p;
|
||||
if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) {
|
||||
/* We got a suitable address. Bump the hint address. */
|
||||
hint_addr = addr + size;
|
||||
errno = olderr;
|
||||
return p;
|
||||
}
|
||||
if (p != CMFAIL) munmap(p, size);
|
||||
#if defined(__sun__) || defined(__DragonFly__)
|
||||
alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */
|
||||
if (alloc_hint + size < MMAP_REGION_END) continue;
|
||||
#endif
|
||||
if (retry) break;
|
||||
retry = 1;
|
||||
alloc_hint = MMAP_REGION_START;
|
||||
if (p != MFAIL) {
|
||||
munmap(p, size);
|
||||
} else if (errno == ENOMEM) {
|
||||
return MFAIL;
|
||||
}
|
||||
if (hint_addr) {
|
||||
/* First, try linear probing. */
|
||||
if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
|
||||
hint_addr += 0x1000000;
|
||||
if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
|
||||
hint_addr = 0;
|
||||
continue;
|
||||
} else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
|
||||
/* Next, try a no-hint probe to get back an ASLR address. */
|
||||
hint_addr = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* Finally, try pseudo-random probing. */
|
||||
if (LJ_UNLIKELY(hint_prng == 0)) {
|
||||
hint_prng = mmap_probe_seed();
|
||||
}
|
||||
/* The unsuitable address we got has some ASLR PRNG bits. */
|
||||
hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
|
||||
do { /* The PRNG itself is very weak, but see above. */
|
||||
hint_prng = hint_prng * 1103515245 + 12345;
|
||||
hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
|
||||
hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
|
||||
} while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
|
||||
}
|
||||
errno = olderr;
|
||||
return CMFAIL;
|
||||
return MFAIL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
#if LJ_ALLOC_MMAP32
|
||||
|
||||
/* 32 bit mode and GC64 mode is easy. */
|
||||
static LJ_AINLINE void *CALL_MMAP(size_t size)
|
||||
#if defined(__sun__)
|
||||
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
|
||||
#else
|
||||
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
|
||||
#endif
|
||||
|
||||
static void *mmap_map32(size_t size)
|
||||
{
|
||||
#if LJ_ALLOC_MMAP_PROBE
|
||||
static int fallback = 0;
|
||||
if (fallback)
|
||||
return mmap_probe(size);
|
||||
#endif
|
||||
{
|
||||
int olderr = errno;
|
||||
void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
|
||||
errno = olderr;
|
||||
/* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
|
||||
#if LJ_ALLOC_MMAP_PROBE
|
||||
if (ptr == MFAIL) {
|
||||
fallback = 1;
|
||||
return mmap_probe(size);
|
||||
}
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if LJ_ALLOC_MMAP32
|
||||
#define CALL_MMAP(size) mmap_map32(size)
|
||||
#elif LJ_ALLOC_MMAP_PROBE
|
||||
#define CALL_MMAP(size) mmap_probe(size)
|
||||
#else
|
||||
static void *CALL_MMAP(size_t size)
|
||||
{
|
||||
int olderr = errno;
|
||||
void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
|
||||
errno = olderr;
|
||||
return ptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
|
||||
|
||||
#include <sys/resource.h>
|
||||
|
||||
static void init_mmap(void)
|
||||
{
|
||||
struct rlimit rlim;
|
||||
rlim.rlim_cur = rlim.rlim_max = 0x10000;
|
||||
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
|
||||
}
|
||||
#define INIT_MMAP() init_mmap()
|
||||
|
||||
#endif
|
||||
|
||||
#define INIT_MMAP() ((void)0)
|
||||
#define DIRECT_MMAP(s) CALL_MMAP(s)
|
||||
|
||||
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
|
||||
static int CALL_MUNMAP(void *ptr, size_t size)
|
||||
{
|
||||
int olderr = errno;
|
||||
int ret = munmap(ptr, size);
|
||||
@ -282,10 +365,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if LJ_TARGET_LINUX
|
||||
#if LJ_ALLOC_MREMAP
|
||||
/* Need to define _GNU_SOURCE to get the mremap prototype. */
|
||||
static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
|
||||
int flags)
|
||||
static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
|
||||
{
|
||||
int olderr = errno;
|
||||
ptr = mremap(ptr, osz, nsz, flags);
|
||||
@ -305,6 +387,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef INIT_MMAP
|
||||
#define INIT_MMAP() ((void)0)
|
||||
#endif
|
||||
|
||||
#ifndef DIRECT_MMAP
|
||||
#define DIRECT_MMAP(s) CALL_MMAP(s)
|
||||
#endif
|
||||
|
||||
#ifndef CALL_MREMAP
|
||||
#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
|
||||
#endif
|
||||
|
@ -25,6 +25,10 @@
|
||||
#define LUAJIT_ARCH_ppc 5
|
||||
#define LUAJIT_ARCH_MIPS 6
|
||||
#define LUAJIT_ARCH_mips 6
|
||||
#define LUAJIT_ARCH_MIPS32 6
|
||||
#define LUAJIT_ARCH_mips32 6
|
||||
#define LUAJIT_ARCH_MIPS64 7
|
||||
#define LUAJIT_ARCH_mips64 7
|
||||
|
||||
/* Target OS. */
|
||||
#define LUAJIT_OS_OTHER 0
|
||||
@ -47,8 +51,10 @@
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
|
||||
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
|
||||
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
|
||||
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
|
||||
#else
|
||||
#error "No support for this architecture (yet)"
|
||||
#endif
|
||||
@ -289,13 +295,21 @@
|
||||
#define LJ_ARCH_XENON 1
|
||||
#endif
|
||||
|
||||
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
|
||||
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
|
||||
|
||||
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
|
||||
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
|
||||
#define LJ_ARCH_NAME "mipsel"
|
||||
#else
|
||||
#define LJ_ARCH_NAME "mips64el"
|
||||
#endif
|
||||
#define LJ_ARCH_ENDIAN LUAJIT_LE
|
||||
#else
|
||||
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
|
||||
#define LJ_ARCH_NAME "mips"
|
||||
#else
|
||||
#define LJ_ARCH_NAME "mips64"
|
||||
#endif
|
||||
#define LJ_ARCH_ENDIAN LUAJIT_BE
|
||||
#endif
|
||||
|
||||
@ -307,11 +321,6 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Temporarily disable features until the code has been merged. */
|
||||
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
|
||||
#define LUAJIT_NO_UNWIND 1
|
||||
#endif
|
||||
|
||||
#if !defined(LJ_ABI_SOFTFP)
|
||||
#ifdef __mips_soft_float
|
||||
#define LJ_ABI_SOFTFP 1
|
||||
@ -320,7 +329,15 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
|
||||
#define LJ_ARCH_BITS 32
|
||||
#define LJ_TARGET_MIPS32 1
|
||||
#else
|
||||
#define LJ_ARCH_BITS 64
|
||||
#define LJ_TARGET_MIPS64 1
|
||||
#define LJ_TARGET_GC64 1
|
||||
#define LJ_ARCH_NOJIT 1 /* NYI */
|
||||
#endif
|
||||
#define LJ_TARGET_MIPS 1
|
||||
#define LJ_TARGET_EHRETREG 4
|
||||
#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
|
||||
@ -329,7 +346,7 @@
|
||||
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
|
||||
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
|
||||
|
||||
#if _MIPS_ARCH_MIPS32R2
|
||||
#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
|
||||
#define LJ_ARCH_VERSION 20
|
||||
#else
|
||||
#define LJ_ARCH_VERSION 10
|
||||
@ -410,9 +427,13 @@
|
||||
#ifdef __NO_FPRS__
|
||||
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
|
||||
#endif
|
||||
#elif LJ_TARGET_MIPS
|
||||
#if defined(_LP64)
|
||||
#error "No support for MIPS64"
|
||||
#elif LJ_TARGET_MIPS32
|
||||
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
|
||||
#error "Only o32 ABI supported for MIPS32"
|
||||
#endif
|
||||
#elif LJ_TARGET_MIPS64
|
||||
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
|
||||
#error "Only n64 ABI supported for MIPS64"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@ -453,7 +474,7 @@
|
||||
#endif
|
||||
|
||||
/* Disable or enable the JIT compiler. */
|
||||
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
|
||||
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
|
||||
#define LJ_HASJIT 0
|
||||
#else
|
||||
#define LJ_HASJIT 1
|
||||
@ -524,6 +545,11 @@
|
||||
#define LJ_NO_SYSTEM 1
|
||||
#endif
|
||||
|
||||
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
|
||||
/* NYI: no support for compact unwind specification, yet. */
|
||||
#define LUAJIT_NO_UNWIND 1
|
||||
#endif
|
||||
|
||||
#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
|
||||
#define LJ_NO_UNWIND 1
|
||||
#endif
|
||||
|
177
src/lj_asm.c
177
src/lj_asm.c
@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
|
||||
RA_DBGX((as, "remat $i $r", ir, r));
|
||||
#if !LJ_SOFTFP
|
||||
if (ir->o == IR_KNUM) {
|
||||
emit_loadn(as, r, ir_knum(ir));
|
||||
emit_loadk64(as, r, ir);
|
||||
} else
|
||||
#endif
|
||||
if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
|
||||
@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
|
||||
#if LJ_64
|
||||
} else if (ir->o == IR_KINT64) {
|
||||
emit_loadu64(as, r, ir_kint64(ir)->u64);
|
||||
#if LJ_GC64
|
||||
} else if (ir->o == IR_KGC) {
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
|
||||
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
|
||||
@ -619,10 +625,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Add a register rename to the IR. */
|
||||
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
|
||||
{
|
||||
IRRef ren;
|
||||
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
|
||||
ren = tref_ref(lj_ir_emit(as->J));
|
||||
as->J->cur.ir[ren].r = (uint8_t)down;
|
||||
as->J->cur.ir[ren].s = SPS_NONE;
|
||||
}
|
||||
|
||||
/* Rename register allocation and emit move. */
|
||||
static void ra_rename(ASMState *as, Reg down, Reg up)
|
||||
{
|
||||
IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
|
||||
IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
|
||||
IRIns *ir = IR(ref);
|
||||
ir->r = (uint8_t)up;
|
||||
as->cost[down] = 0;
|
||||
@ -635,11 +651,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
|
||||
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
|
||||
emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
|
||||
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
|
||||
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
|
||||
ren = tref_ref(lj_ir_emit(as->J));
|
||||
as->ir = as->T->ir; /* The IR may have been reallocated. */
|
||||
IR(ren)->r = (uint8_t)down;
|
||||
IR(ren)->s = SPS_NONE;
|
||||
ra_addrename(as, down, ref, as->snapno);
|
||||
}
|
||||
}
|
||||
|
||||
@ -689,16 +701,20 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
|
||||
if (ra_noreg(left)) {
|
||||
if (irref_isk(lref)) {
|
||||
if (ir->o == IR_KNUM) {
|
||||
cTValue *tv = ir_knum(ir);
|
||||
/* FP remat needs a load except for +0. Still better than eviction. */
|
||||
if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
|
||||
emit_loadn(as, dest, tv);
|
||||
if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
|
||||
emit_loadk64(as, dest, ir);
|
||||
return;
|
||||
}
|
||||
#if LJ_64
|
||||
} else if (ir->o == IR_KINT64) {
|
||||
emit_loadu64(as, dest, ir_kint64(ir)->u64);
|
||||
emit_loadk64(as, dest, ir);
|
||||
return;
|
||||
#if LJ_GC64
|
||||
} else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
||||
emit_loadk64(as, dest, ir);
|
||||
return;
|
||||
#endif
|
||||
#endif
|
||||
} else if (ir->o != IR_KPRI) {
|
||||
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
|
||||
@ -941,7 +957,7 @@ static void asm_snap_prep(ASMState *as)
|
||||
} else {
|
||||
/* Process any renames above the highwater mark. */
|
||||
for (; as->snaprename < as->T->nins; as->snaprename++) {
|
||||
IRIns *ir = IR(as->snaprename);
|
||||
IRIns *ir = &as->T->ir[as->snaprename];
|
||||
if (asm_snap_checkrename(as, ir->op1))
|
||||
ir->op2 = REF_BIAS-1; /* Kill rename. */
|
||||
}
|
||||
@ -1055,7 +1071,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir)
|
||||
}
|
||||
} else {
|
||||
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
|
||||
/* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
|
||||
/* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
|
||||
emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
|
||||
emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
|
||||
}
|
||||
@ -1472,12 +1488,7 @@ static void asm_phi_fixup(ASMState *as)
|
||||
irt_clearmark(ir->t);
|
||||
/* Left PHI gained a spill slot before the loop? */
|
||||
if (ra_hasspill(ir->s)) {
|
||||
IRRef ren;
|
||||
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
|
||||
ren = tref_ref(lj_ir_emit(as->J));
|
||||
as->ir = as->T->ir; /* The IR may have been reallocated. */
|
||||
IR(ren)->r = (uint8_t)r;
|
||||
IR(ren)->s = SPS_NONE;
|
||||
ra_addrename(as, r, lref, as->loopsnapno);
|
||||
}
|
||||
}
|
||||
rset_clear(work, r);
|
||||
@ -1888,7 +1899,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
|
||||
SnapEntry sn = map[n-1];
|
||||
if ((sn & SNAP_FRAME)) {
|
||||
*gotframe = 1;
|
||||
return snap_slot(sn);
|
||||
return snap_slot(sn) - LJ_FR2;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -1908,16 +1919,20 @@ static void asm_tail_link(ASMState *as)
|
||||
|
||||
if (as->T->link == 0) {
|
||||
/* Setup fixed registers for exit to interpreter. */
|
||||
const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
|
||||
const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
|
||||
int32_t mres;
|
||||
if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
|
||||
BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
|
||||
if (bc_isret(bc_op(*retpc)))
|
||||
pc = retpc;
|
||||
}
|
||||
#if LJ_GC64
|
||||
emit_loadu64(as, RID_LPC, u64ptr(pc));
|
||||
#else
|
||||
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
|
||||
ra_allockreg(as, i32ptr(pc), RID_LPC);
|
||||
mres = (int32_t)(snap->nslots - baseslot);
|
||||
#endif
|
||||
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
|
||||
switch (bc_op(*pc)) {
|
||||
case BC_CALLM: case BC_CALLMT:
|
||||
mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
|
||||
@ -1932,6 +1947,11 @@ static void asm_tail_link(ASMState *as)
|
||||
}
|
||||
emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
|
||||
|
||||
if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
|
||||
setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
|
||||
IR(as->J->ktrace)->o = IR_KGC;
|
||||
}
|
||||
|
||||
/* Sync the interpreter state with the on-trace state. */
|
||||
asm_stack_restore(as, snap);
|
||||
|
||||
@ -1957,17 +1977,22 @@ static void asm_setup_regsp(ASMState *as)
|
||||
ra_setup(as);
|
||||
|
||||
/* Clear reg/sp for constants. */
|
||||
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
|
||||
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
|
||||
ir->prev = REGSP_INIT;
|
||||
if (irt_is64(ir->t) && ir->o != IR_KNULL) {
|
||||
#if LJ_GC64
|
||||
ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
|
||||
#else
|
||||
/* Make life easier for backends by putting address of constant in i. */
|
||||
ir->i = (int32_t)(intptr_t)(ir+1);
|
||||
#endif
|
||||
ir++;
|
||||
}
|
||||
}
|
||||
|
||||
/* REF_BASE is used for implicit references to the BASE register. */
|
||||
lastir->prev = REGSP_HINT(RID_BASE);
|
||||
|
||||
ir = IR(nins-1);
|
||||
if (ir->o == IR_RENAME) {
|
||||
do { ir--; nins--; } while (ir->o == IR_RENAME);
|
||||
T->nins = nins; /* Remove any renames left over from ASM restart. */
|
||||
}
|
||||
as->snaprename = nins;
|
||||
as->snapref = nins;
|
||||
as->snapno = T->nsnap;
|
||||
@ -2199,14 +2224,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
ASMState *as = &as_;
|
||||
MCode *origtop;
|
||||
|
||||
/* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
|
||||
{
|
||||
IRRef nins = T->nins;
|
||||
IRIns *ir = &T->ir[nins-1];
|
||||
if (ir->o == IR_NOP || ir->o == IR_RENAME) {
|
||||
do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
|
||||
T->nins = nins;
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure an initialized instruction beyond the last one for HIOP checks. */
|
||||
J->cur.nins = lj_ir_nextins(J);
|
||||
J->cur.ir[J->cur.nins].o = IR_NOP;
|
||||
/* This also allows one RENAME to be added without reallocating curfinal. */
|
||||
as->orignins = lj_ir_nextins(J);
|
||||
J->cur.ir[as->orignins].o = IR_NOP;
|
||||
|
||||
/* Setup initial state. Copy some fields to reduce indirections. */
|
||||
as->J = J;
|
||||
as->T = T;
|
||||
as->ir = T->ir;
|
||||
J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
|
||||
as->flags = J->flags;
|
||||
as->loopref = J->loopref;
|
||||
as->realign = NULL;
|
||||
@ -2219,12 +2255,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
||||
asm_setup_target(as);
|
||||
|
||||
do {
|
||||
/*
|
||||
** This is a loop, because the MCode may have to be (re-)assembled
|
||||
** multiple times:
|
||||
**
|
||||
** 1. as->realign is set (and the assembly aborted), if the arch-specific
|
||||
** backend wants the MCode to be aligned differently.
|
||||
**
|
||||
** This is currently only the case on x86/x64, where small loops get
|
||||
** an aligned loop body plus a short branch. Not much effort is wasted,
|
||||
** because the abort happens very quickly and only once.
|
||||
**
|
||||
** 2. The IR is immovable, since the MCode embeds pointers to various
|
||||
** constants inside the IR. But RENAMEs may need to be added to the IR
|
||||
** during assembly, which might grow and reallocate the IR. We check
|
||||
** at the end if the IR (in J->cur.ir) has actually grown, resize the
|
||||
** copy (in J->curfinal.ir) and try again.
|
||||
**
|
||||
** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
|
||||
** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
|
||||
** always have one spare slot in the IR (see above), which means we
|
||||
** have to redo the assembly for only ~2% of all traces.
|
||||
**
|
||||
** Very, very rarely, this needs to be done repeatedly, since the
|
||||
** location of constants inside the IR (actually, reachability from
|
||||
** a global pointer) may affect register allocation and thus the
|
||||
** number of RENAMEs.
|
||||
*/
|
||||
for (;;) {
|
||||
as->mcp = as->mctop;
|
||||
#ifdef LUA_USE_ASSERT
|
||||
as->mcp_prev = as->mcp;
|
||||
#endif
|
||||
as->curins = T->nins;
|
||||
as->ir = J->curfinal->ir; /* Use the copied IR. */
|
||||
as->curins = J->cur.nins = as->orignins;
|
||||
|
||||
RA_DBG_START();
|
||||
RA_DBGX((as, "===== STOP ====="));
|
||||
|
||||
@ -2252,22 +2317,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
checkmclim(as);
|
||||
asm_ir(as, ir);
|
||||
}
|
||||
} while (as->realign); /* Retry in case the MCode needs to be realigned. */
|
||||
|
||||
/* Emit head of trace. */
|
||||
RA_DBG_REF();
|
||||
checkmclim(as);
|
||||
if (as->gcsteps > 0) {
|
||||
as->curins = as->T->snap[0].ref;
|
||||
asm_snap_prep(as); /* The GC check is a guard. */
|
||||
asm_gc_check(as);
|
||||
if (as->realign && J->curfinal->nins >= T->nins)
|
||||
continue; /* Retry in case only the MCode needs to be realigned. */
|
||||
|
||||
/* Emit head of trace. */
|
||||
RA_DBG_REF();
|
||||
checkmclim(as);
|
||||
if (as->gcsteps > 0) {
|
||||
as->curins = as->T->snap[0].ref;
|
||||
asm_snap_prep(as); /* The GC check is a guard. */
|
||||
asm_gc_check(as);
|
||||
as->curins = as->stopins;
|
||||
}
|
||||
ra_evictk(as);
|
||||
if (as->parent)
|
||||
asm_head_side(as);
|
||||
else
|
||||
asm_head_root(as);
|
||||
asm_phi_fixup(as);
|
||||
|
||||
if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
|
||||
lua_assert(J->curfinal->nk == T->nk);
|
||||
memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
|
||||
(T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
|
||||
T->nins = J->curfinal->nins;
|
||||
break; /* Done. */
|
||||
}
|
||||
|
||||
/* Otherwise try again with a bigger IR. */
|
||||
lj_trace_free(J2G(J), J->curfinal);
|
||||
J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
|
||||
J->curfinal = lj_trace_alloc(J->L, T);
|
||||
as->realign = NULL;
|
||||
}
|
||||
ra_evictk(as);
|
||||
if (as->parent)
|
||||
asm_head_side(as);
|
||||
else
|
||||
asm_head_root(as);
|
||||
asm_phi_fixup(as);
|
||||
|
||||
RA_DBGX((as, "===== START ===="));
|
||||
RA_DBG_FLUSH();
|
||||
|
@ -909,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
/* NYI: Check that UREFO is still open and not aliasing a slot. */
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
@ -998,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
|
||||
|
||||
static void asm_fload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
ARMIns ai = asm_fxloadins(ir);
|
||||
int32_t ofs;
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
|
||||
return;
|
||||
if (ir->op1 == REF_NIL) {
|
||||
lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
|
||||
} else {
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
ARMIns ai = asm_fxloadins(ir);
|
||||
int32_t ofs;
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
if ((ai & 0x04000000))
|
||||
emit_lso(as, ai, dest, idx, ofs);
|
||||
else
|
||||
emit_lsox(as, ai, dest, idx, ofs);
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
if ((ai & 0x04000000))
|
||||
emit_lso(as, ai, dest, idx, ofs);
|
||||
else
|
||||
emit_lsox(as, ai, dest, idx, ofs);
|
||||
}
|
||||
|
||||
static void asm_fstore(ASMState *as, IRIns *ir)
|
||||
|
@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
dest, dest);
|
||||
if (irt_isfloat(ir->t))
|
||||
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
|
||||
else
|
||||
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
|
||||
emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
|
||||
emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
|
||||
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
|
||||
@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
tmp, left, tmp);
|
||||
if (st == IRT_FLOAT)
|
||||
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
|
||||
else
|
||||
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
|
||||
} else {
|
||||
emit_tg(as, MIPSI_MFC1, dest, tmp);
|
||||
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
|
||||
@ -514,7 +510,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
|
||||
if ((ir->op2 & IRCONV_SEXT)) {
|
||||
if ((as->flags & JIT_F_MIPS32R2)) {
|
||||
if ((as->flags & JIT_F_MIPSXXR2)) {
|
||||
emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
|
||||
} else {
|
||||
uint32_t shift = st == IRT_I8 ? 24 : 16;
|
||||
@ -743,7 +739,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
|
||||
if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
|
||||
emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
|
||||
if ((as->flags & JIT_F_MIPS32R2)) {
|
||||
if ((as->flags & JIT_F_MIPSXXR2)) {
|
||||
emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
|
||||
} else {
|
||||
emit_dst(as, MIPSI_OR, dest, dest, tmp1);
|
||||
@ -810,7 +806,6 @@ nolo:
|
||||
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
/* NYI: Check that UREFO is still open and not aliasing a slot. */
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
@ -901,17 +896,23 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
|
||||
static void asm_fload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
MIPSIns mi = asm_fxloadins(ir);
|
||||
Reg idx;
|
||||
int32_t ofs;
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
|
||||
return;
|
||||
if (ir->op1 == REF_NIL) {
|
||||
idx = RID_JGL;
|
||||
ofs = ir->op2 - 32768;
|
||||
} else {
|
||||
idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
lua_assert(!irt_isfp(ir->t));
|
||||
emit_tsi(as, mi, dest, idx, ofs);
|
||||
}
|
||||
@ -1456,7 +1457,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if ((as->flags & JIT_F_MIPS32R2)) {
|
||||
if ((as->flags & JIT_F_MIPSXXR2)) {
|
||||
emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
|
||||
emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
|
||||
} else {
|
||||
@ -1512,7 +1513,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
|
||||
|
||||
static void asm_bror(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if ((as->flags & JIT_F_MIPS32R2)) {
|
||||
if ((as->flags & JIT_F_MIPSXXR2)) {
|
||||
asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
|
||||
} else {
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
|
@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
||||
emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
|
||||
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
|
||||
emit_lsptr(as, PPCI_LFS, (fbias & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
|
||||
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
|
||||
emit_fb(as, PPCI_FCTIWZ, tmp, left);
|
||||
}
|
||||
@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
Reg left = ra_alloc1(as, lref, allow);
|
||||
Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
|
||||
Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
|
||||
const float *kbias;
|
||||
if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
|
||||
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
|
||||
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
|
||||
kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
|
||||
if (st == IRT_U32) kbias++;
|
||||
emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
|
||||
emit_lsptr(as, PPCI_LFS, (fbias & 31),
|
||||
&as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
|
||||
rset_clear(allow, hibias));
|
||||
emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
|
||||
RID_SP, SPOFS_TMPLO);
|
||||
@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
|
||||
emit_fab(as, PPCI_FSUB, tmp, left, tmp);
|
||||
emit_lsptr(as, PPCI_LFS, (tmp & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
|
||||
RSET_GPR);
|
||||
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
|
||||
} else {
|
||||
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
|
||||
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
|
||||
@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
/* NYI: Check that UREFO is still open and not aliasing a slot. */
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
|
||||
static void asm_fload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
PPCIns pi = asm_fxloadins(ir);
|
||||
Reg idx;
|
||||
int32_t ofs;
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_tai(as, PPCI_ADDI, dest, idx, ofs);
|
||||
return;
|
||||
if (ir->op1 == REF_NIL) {
|
||||
idx = RID_JGL;
|
||||
ofs = ir->op2 - 32768;
|
||||
} else {
|
||||
idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->op2 == IRFL_TAB_ARRAY) {
|
||||
ofs = asm_fuseabase(as, ir->op1);
|
||||
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
||||
emit_tai(as, PPCI_ADDI, dest, idx, ofs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
}
|
||||
ofs = field_ofs[ir->op2];
|
||||
lua_assert(!irt_isi8(ir->t));
|
||||
emit_tai(as, pi, dest, idx, ofs);
|
||||
}
|
||||
@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
|
||||
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
|
||||
emit_lsptr(as, PPCI_LFS, (fbias & 31),
|
||||
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
|
||||
(void *)&as->J->k32[LJ_K32_2P52_2P31],
|
||||
rset_clear(allow, hibias));
|
||||
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
|
||||
emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
|
||||
|
626
src/lj_asm_x86.h
626
src/lj_asm_x86.h
File diff suppressed because it is too large
Load Diff
159
src/lj_ccall.c
159
src/lj_ccall.c
@ -439,8 +439,8 @@
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
|
||||
|
||||
#elif LJ_TARGET_MIPS
|
||||
/* -- MIPS calling conventions -------------------------------------------- */
|
||||
#elif LJ_TARGET_MIPS32
|
||||
/* -- MIPS o32 calling conventions ---------------------------------------- */
|
||||
|
||||
#define CCALL_HANDLE_STRUCTRET \
|
||||
cc->retref = 1; /* Return all structs by reference. */ \
|
||||
@ -515,6 +515,78 @@
|
||||
sp = (uint8_t *)&cc->fpr[0].f;
|
||||
#endif
|
||||
|
||||
#elif LJ_TARGET_MIPS64
|
||||
/* -- MIPS n64 calling conventions ---------------------------------------- */
|
||||
|
||||
#define CCALL_HANDLE_STRUCTRET \
|
||||
cc->retref = !(sz <= 16); \
|
||||
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
|
||||
|
||||
#define CCALL_HANDLE_STRUCTRET2 \
|
||||
ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXRET \
|
||||
/* Complex values are returned in 1 or 2 FPRs. */ \
|
||||
cc->retref = 0;
|
||||
|
||||
#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXRET2 \
|
||||
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
|
||||
((intptr_t *)dp)[0] = cc->gpr[0]; \
|
||||
} else { /* Copy complex double from GPRs. */ \
|
||||
((intptr_t *)dp)[0] = cc->gpr[0]; \
|
||||
((intptr_t *)dp)[1] = cc->gpr[1]; \
|
||||
}
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXARG \
|
||||
/* Pass complex by value in 2 or 4 GPRs. */
|
||||
|
||||
/* Position of soft-float 'float' return value depends on endianess. */
|
||||
#define CCALL_HANDLE_RET \
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
|
||||
|
||||
#else /* MIPS64 hard-float */
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXRET2 \
|
||||
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
|
||||
((float *)dp)[0] = cc->fpr[0].f; \
|
||||
((float *)dp)[1] = cc->fpr[1].f; \
|
||||
} else { /* Copy complex double from FPRs. */ \
|
||||
((double *)dp)[0] = cc->fpr[0].d; \
|
||||
((double *)dp)[1] = cc->fpr[1].d; \
|
||||
}
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXARG \
|
||||
if (sz == 2*sizeof(float)) { \
|
||||
isfp = 2; \
|
||||
if (ngpr < maxgpr) \
|
||||
sz *= 2; \
|
||||
}
|
||||
|
||||
#define CCALL_HANDLE_RET \
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
sp = (uint8_t *)&cc->fpr[0].f;
|
||||
|
||||
#endif
|
||||
|
||||
#define CCALL_HANDLE_STRUCTARG \
|
||||
/* Pass all structs by value in registers and/or on the stack. */
|
||||
|
||||
#define CCALL_HANDLE_REGARG \
|
||||
if (ngpr < maxgpr) { \
|
||||
dp = &cc->gpr[ngpr]; \
|
||||
if (ngpr + n > maxgpr) { \
|
||||
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
|
||||
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
|
||||
ngpr = maxgpr; \
|
||||
} else { \
|
||||
ngpr += n; \
|
||||
} \
|
||||
goto done; \
|
||||
}
|
||||
|
||||
#else
|
||||
#error "Missing calling convention definitions for this architecture"
|
||||
#endif
|
||||
@ -754,6 +826,78 @@ noth: /* Not a homogeneous float/double aggregate. */
|
||||
|
||||
#endif
|
||||
|
||||
/* -- MIPS64 ABI struct classification ---------------------------- */
|
||||
|
||||
#if LJ_TARGET_MIPS64
|
||||
|
||||
#define FTYPE_FLOAT 1
|
||||
#define FTYPE_DOUBLE 2
|
||||
|
||||
/* Classify FP fields (max. 2) and their types. */
|
||||
static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
|
||||
{
|
||||
int n = 0, ft = 0;
|
||||
if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
|
||||
goto noth;
|
||||
while (ct->sib) {
|
||||
CType *sct;
|
||||
ct = ctype_get(cts, ct->sib);
|
||||
if (n == 2) {
|
||||
goto noth;
|
||||
} else if (ctype_isfield(ct->info)) {
|
||||
sct = ctype_rawchild(cts, ct);
|
||||
if (ctype_isfp(sct->info)) {
|
||||
ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
|
||||
n++;
|
||||
} else {
|
||||
goto noth;
|
||||
}
|
||||
} else if (ctype_isbitfield(ct->info) ||
|
||||
ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
|
||||
goto noth;
|
||||
}
|
||||
}
|
||||
if (n <= 2)
|
||||
return ft;
|
||||
noth: /* Not a homogeneous float/double aggregate. */
|
||||
return 0; /* Struct is in GPRs. */
|
||||
}
|
||||
|
||||
void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
|
||||
{
|
||||
if (LJ_ABI_SOFTFP ? ft :
|
||||
((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
|
||||
int i, ofs = 0;
|
||||
for (i = 0; ft != 0; i++, ft >>= 2) {
|
||||
if ((ft & 3) == FTYPE_FLOAT) {
|
||||
#if LJ_ABI_SOFTFP
|
||||
/* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
|
||||
memcpy((uint8_t *)dp + ofs,
|
||||
(uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
|
||||
#else
|
||||
*(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
|
||||
#endif
|
||||
ofs += 4;
|
||||
} else {
|
||||
ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
|
||||
#if LJ_ABI_SOFTFP
|
||||
*(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
|
||||
#else
|
||||
*(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
|
||||
#endif
|
||||
ofs += 8;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#if !LJ_ABI_SOFTFP
|
||||
if (ft) sp = (uint8_t *)&cc->fpr[0];
|
||||
#endif
|
||||
memcpy(dp, sp, ctr->size);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* -- Common C call handling ---------------------------------------------- */
|
||||
|
||||
/* Infer the destination CTypeID for a vararg argument. */
|
||||
@ -921,6 +1065,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
||||
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
|
||||
(int32_t)*(int16_t *)dp;
|
||||
}
|
||||
#if LJ_TARGET_MIPS64
|
||||
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
|
||||
(isfp && nsp == 0)) && d->size <= 4) {
|
||||
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
|
||||
}
|
||||
#endif
|
||||
#if LJ_TARGET_X64 && LJ_ABI_WIN
|
||||
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
|
||||
if (nfpr == ngpr)
|
||||
@ -936,7 +1086,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
||||
cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
|
||||
cc->fpr[nfpr-2].d[1] = 0;
|
||||
}
|
||||
#elif LJ_TARGET_ARM64
|
||||
#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
|
||||
if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
|
||||
/* Split float HFA or complex float into separate registers. */
|
||||
CTSize i = (sz >> 2) - 1;
|
||||
@ -983,7 +1133,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
|
||||
CCALL_HANDLE_COMPLEXRET2
|
||||
return 1; /* One GC step. */
|
||||
}
|
||||
if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR)
|
||||
if (LJ_BE && ctr->size < CTSIZE_PTR &&
|
||||
(ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
|
||||
sp += (CTSIZE_PTR - ctr->size);
|
||||
#if CCALL_NUM_FPR
|
||||
if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
|
||||
|
@ -104,11 +104,11 @@ typedef union FPRArg {
|
||||
typedef intptr_t GPRArg;
|
||||
typedef double FPRArg;
|
||||
|
||||
#elif LJ_TARGET_MIPS
|
||||
#elif LJ_TARGET_MIPS32
|
||||
|
||||
#define CCALL_NARG_GPR 4
|
||||
#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
|
||||
#define CCALL_NRET_GPR 2
|
||||
#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
|
||||
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
|
||||
#define CCALL_SPS_EXTRA 7
|
||||
#define CCALL_SPS_FREE 1
|
||||
@ -119,6 +119,22 @@ typedef union FPRArg {
|
||||
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
|
||||
} FPRArg;
|
||||
|
||||
#elif LJ_TARGET_MIPS64
|
||||
|
||||
/* FP args are positional and overlay the GPR array. */
|
||||
#define CCALL_NARG_GPR 8
|
||||
#define CCALL_NARG_FPR 0
|
||||
#define CCALL_NRET_GPR 2
|
||||
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
|
||||
#define CCALL_SPS_EXTRA 3
|
||||
#define CCALL_SPS_FREE 1
|
||||
|
||||
typedef intptr_t GPRArg;
|
||||
typedef union FPRArg {
|
||||
double d;
|
||||
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
|
||||
} FPRArg;
|
||||
|
||||
#else
|
||||
#error "Missing calling convention definitions for this architecture"
|
||||
#endif
|
||||
|
@ -67,9 +67,13 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
|
||||
#define CALLBACK_MCODE_HEAD 24
|
||||
#endif
|
||||
|
||||
#elif LJ_TARGET_MIPS
|
||||
#elif LJ_TARGET_MIPS32
|
||||
|
||||
#define CALLBACK_MCODE_HEAD 24
|
||||
#define CALLBACK_MCODE_HEAD 20
|
||||
|
||||
#elif LJ_TARGET_MIPS64
|
||||
|
||||
#define CALLBACK_MCODE_HEAD 52
|
||||
|
||||
#else
|
||||
|
||||
@ -221,14 +225,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
|
||||
static void callback_mcode_init(global_State *g, uint32_t *page)
|
||||
{
|
||||
uint32_t *p = page;
|
||||
void *target = (void *)lj_vm_ffi_callback;
|
||||
uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
|
||||
uintptr_t ug = (uintptr_t)(void *)g;
|
||||
MSize slot;
|
||||
*p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0;
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16);
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff);
|
||||
#if LJ_TARGET_MIPS32
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
|
||||
#else
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
|
||||
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
|
||||
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
|
||||
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
|
||||
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
|
||||
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
|
||||
#endif
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
|
||||
*p++ = MIPSI_JR | MIPSF_S(RID_R3);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff);
|
||||
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
|
||||
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
|
||||
*p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
|
||||
p++;
|
||||
@ -440,7 +457,7 @@ void lj_ccallback_mcode_free(CTState *cts)
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
|
||||
|
||||
#elif LJ_TARGET_MIPS
|
||||
#elif LJ_TARGET_MIPS32
|
||||
|
||||
#define CALLBACK_HANDLE_GPR \
|
||||
if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
|
||||
@ -466,6 +483,29 @@ void lj_ccallback_mcode_free(CTState *cts)
|
||||
UNUSED(isfp);
|
||||
#endif
|
||||
|
||||
#define CALLBACK_HANDLE_RET \
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
((float *)dp)[1] = *(float *)dp;
|
||||
|
||||
#elif LJ_TARGET_MIPS64
|
||||
|
||||
#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
|
||||
#define CALLBACK_HANDLE_REGARG \
|
||||
if (ngpr + n <= maxgpr) { \
|
||||
sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
|
||||
ngpr += n; \
|
||||
goto done; \
|
||||
}
|
||||
#else /* MIPS64 soft-float */
|
||||
#define CALLBACK_HANDLE_REGARG \
|
||||
if (ngpr + n <= maxgpr) { \
|
||||
UNUSED(isfp); \
|
||||
sp = (void*) &cts->cb.gpr[ngpr]; \
|
||||
ngpr += n; \
|
||||
goto done; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define CALLBACK_HANDLE_RET \
|
||||
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
||||
((float *)dp)[1] = *(float *)dp;
|
||||
@ -557,7 +597,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
|
||||
nsp += n;
|
||||
|
||||
done:
|
||||
if (LJ_BE && cta->size < CTSIZE_PTR)
|
||||
if (LJ_BE && cta->size < CTSIZE_PTR
|
||||
#if LJ_TARGET_MIPS64
|
||||
&& !(isfp && nsp)
|
||||
#endif
|
||||
)
|
||||
sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
|
||||
gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
|
||||
}
|
||||
@ -608,6 +652,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
|
||||
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
|
||||
(int32_t)*(int16_t *)dp;
|
||||
}
|
||||
#if LJ_TARGET_MIPS64
|
||||
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
|
||||
if (ctr->size <= 4 &&
|
||||
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
|
||||
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
|
||||
#endif
|
||||
#if LJ_TARGET_X86
|
||||
if (ctype_isfp(ctr->info))
|
||||
cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
|
||||
|
@ -93,11 +93,13 @@ void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
|
||||
setcdataV(L, &tmp, cd);
|
||||
lj_gc_anybarriert(L, t);
|
||||
tv = lj_tab_set(L, t, &tmp);
|
||||
setgcV(L, tv, obj, it);
|
||||
if (!tvisnil(tv))
|
||||
cd->marked |= LJ_GC_CDATA_FIN;
|
||||
else
|
||||
if (it == LJ_TNIL) {
|
||||
setnilV(tv);
|
||||
cd->marked &= ~LJ_GC_CDATA_FIN;
|
||||
} else {
|
||||
setgcV(L, tv, obj, it);
|
||||
cd->marked |= LJ_GC_CDATA_FIN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,13 +297,17 @@ static CPToken cp_next_(CPState *cp)
|
||||
else return '/';
|
||||
break;
|
||||
case '|':
|
||||
if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR;
|
||||
if (cp_get(cp) != '|') return '|';
|
||||
cp_get(cp); return CTOK_OROR;
|
||||
case '&':
|
||||
if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND;
|
||||
if (cp_get(cp) != '&') return '&';
|
||||
cp_get(cp); return CTOK_ANDAND;
|
||||
case '=':
|
||||
if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ;
|
||||
if (cp_get(cp) != '=') return '=';
|
||||
cp_get(cp); return CTOK_EQ;
|
||||
case '!':
|
||||
if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE;
|
||||
if (cp_get(cp) != '=') return '!';
|
||||
cp_get(cp); return CTOK_NE;
|
||||
case '<':
|
||||
if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; }
|
||||
else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; }
|
||||
@ -313,7 +317,8 @@ static CPToken cp_next_(CPState *cp)
|
||||
else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; }
|
||||
return '>';
|
||||
case '-':
|
||||
if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF;
|
||||
if (cp_get(cp) != '>') return '-';
|
||||
cp_get(cp); return CTOK_DEREF;
|
||||
case '$':
|
||||
return cp_param(cp);
|
||||
case '\0': return CTOK_EOF;
|
||||
|
@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
|
||||
return tr;
|
||||
}
|
||||
|
||||
/* Tailcall to function. */
|
||||
static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
|
||||
{
|
||||
TRef kfunc = lj_ir_kfunc(J, funcV(tv));
|
||||
#if LJ_FR2
|
||||
J->base[-2] = kfunc;
|
||||
J->base[-1] = TREF_FRAME;
|
||||
#else
|
||||
J->base[-1] = kfunc | TREF_FRAME;
|
||||
#endif
|
||||
rd->nres = -1; /* Pending tailcall. */
|
||||
}
|
||||
|
||||
/* Record ctype __index/__newindex metamethods. */
|
||||
static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
|
||||
RecordFFData *rd)
|
||||
@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
|
||||
if (!tv)
|
||||
lj_trace_err(J, LJ_TRERR_BADTYPE);
|
||||
if (tvisfunc(tv)) {
|
||||
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
|
||||
rd->nres = -1; /* Pending tailcall. */
|
||||
crec_tailcall(J, rd, tv);
|
||||
} else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
|
||||
/* Specialize to result of __index lookup. */
|
||||
cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
|
||||
@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J)
|
||||
lua_State *L = J->L;
|
||||
TValue *base = L->base, *top = L->top;
|
||||
const BCIns *pc = J->pc;
|
||||
TRef ftr = J->base[-1];
|
||||
TRef ftr = J->base[-1-LJ_FR2];
|
||||
ptrdiff_t delta;
|
||||
if (!frame_islua(base-1) || J->framedepth <= 0)
|
||||
lj_trace_err(J, LJ_TRERR_NYICALL);
|
||||
J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
|
||||
L->top = base; L->base = base - delta;
|
||||
J->base[-1] = TREF_FALSE;
|
||||
J->base[-1-LJ_FR2] = TREF_FALSE;
|
||||
J->base -= delta; J->baseslot -= (BCReg)delta;
|
||||
J->maxslot = (BCReg)delta; J->framedepth--;
|
||||
J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
|
||||
lj_snap_add(J);
|
||||
L->base = base; L->top = top;
|
||||
J->framedepth++; J->maxslot = 1;
|
||||
J->base += delta; J->baseslot += (BCReg)delta;
|
||||
J->base[-1] = ftr; J->pc = pc;
|
||||
J->base[-1-LJ_FR2] = ftr; J->pc = pc;
|
||||
}
|
||||
|
||||
/* Record function call. */
|
||||
@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
|
||||
tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
|
||||
if (tv) {
|
||||
if (tvisfunc(tv)) {
|
||||
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
|
||||
rd->nres = -1; /* Pending tailcall. */
|
||||
crec_tailcall(J, rd, tv);
|
||||
return;
|
||||
}
|
||||
} else if (mm == MM_new) {
|
||||
@ -1238,7 +1249,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
|
||||
|
||||
static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
|
||||
{
|
||||
if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
|
||||
if (sp[0] && sp[1] && ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
|
||||
IRType dt;
|
||||
CTypeID id;
|
||||
TRef tr;
|
||||
@ -1296,6 +1307,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
|
||||
{
|
||||
CTState *cts = ctype_ctsG(J2G(J));
|
||||
CType *ctp = s[0];
|
||||
if (!(sp[0] && sp[1])) return 0;
|
||||
if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) {
|
||||
if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) &&
|
||||
(ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) {
|
||||
@ -1373,8 +1385,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
|
||||
}
|
||||
if (tv) {
|
||||
if (tvisfunc(tv)) {
|
||||
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
|
||||
rd->nres = -1; /* Pending tailcall. */
|
||||
crec_tailcall(J, rd, tv);
|
||||
return 0;
|
||||
} /* NYI: non-function metamethods. */
|
||||
} else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
|
||||
|
@ -42,18 +42,18 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT);
|
||||
** ---------- info ------------
|
||||
** |type flags... A cid | size | sib | next | name |
|
||||
** +----------------------------+--------+-------+-------+-------+--
|
||||
** |NUM BFvcUL.. A | size | | type | |
|
||||
** |STRUCT ..vcU..V A | size | field | name? | name? |
|
||||
** |PTR ..vcR... A cid | size | | type | |
|
||||
** |ARRAY VCvc...V A cid | size | | type | |
|
||||
** |VOID ..vc.... A | size | | type | |
|
||||
** |NUM BFcvUL.. A | size | | type | |
|
||||
** |STRUCT ..cvU..V A | size | field | name? | name? |
|
||||
** |PTR ..cvR... A cid | size | | type | |
|
||||
** |ARRAY VCcv...V A cid | size | | type | |
|
||||
** |VOID ..cv.... A | size | | type | |
|
||||
** |ENUM A cid | size | const | name? | name? |
|
||||
** |FUNC ....VS.. cc cid | nargs | field | name? | name? |
|
||||
** |TYPEDEF cid | | | name | name |
|
||||
** |ATTRIB attrnum cid | attr | sib? | type? | |
|
||||
** |FIELD cid | offset | field | | name? |
|
||||
** |BITFIELD B.vcU csz bsz pos | offset | field | | name? |
|
||||
** |CONSTVAL c cid | value | const | name | name |
|
||||
** |BITFIELD B.cvU csz bsz pos | offset | field | | name? |
|
||||
** |CONSTVAL c cid | value | const | name | name |
|
||||
** |EXTERN cid | | sib? | name | name |
|
||||
** |KW tok | size | | name | name |
|
||||
** +----------------------------+--------+-------+-------+-------+--
|
||||
|
@ -95,6 +95,8 @@ typedef unsigned int uintptr_t;
|
||||
#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
|
||||
#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
|
||||
#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
|
||||
#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
|
||||
#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
|
||||
|
||||
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
|
||||
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
|
||||
|
@ -75,7 +75,7 @@ void lj_dispatch_init(GG_State *GG)
|
||||
for (i = 0; i < GG_NUM_ASMFF; i++)
|
||||
GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
|
||||
#if LJ_TARGET_MIPS
|
||||
memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4);
|
||||
memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
|
||||
|
||||
#if !LJ_SOFTFP
|
||||
/* Load a number constant into an FPR. */
|
||||
static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
|
||||
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
{
|
||||
cTValue *tv = ir_knum(ir);
|
||||
int32_t i;
|
||||
if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
|
||||
uint32_t hi = tv->u32.hi;
|
||||
|
@ -35,7 +35,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
|
||||
|
||||
static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
|
||||
{
|
||||
if ((as->flags & JIT_F_MIPS32R2)) {
|
||||
if ((as->flags & JIT_F_MIPSXXR2)) {
|
||||
emit_dta(as, MIPSI_ROTR, dest, src, shift);
|
||||
} else {
|
||||
emit_dst(as, MIPSI_OR, dest, dest, tmp);
|
||||
@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
|
||||
emit_tsi(as, mi, r, base, i);
|
||||
}
|
||||
|
||||
#define emit_loadn(as, r, tv) \
|
||||
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR)
|
||||
#define emit_loadk64(as, r, ir) \
|
||||
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
|
||||
|
||||
/* Get/set global_State fields. */
|
||||
static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
|
||||
@ -157,7 +157,8 @@ static void emit_call(ASMState *as, void *target, int needcfa)
|
||||
MCode *p = as->mcp;
|
||||
*--p = MIPSI_NOP;
|
||||
if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
|
||||
*--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
|
||||
*--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
|
||||
(((uintptr_t)target >>2) & 0x03ffffffu);
|
||||
} else { /* Target out of range: need indirect call. */
|
||||
*--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
|
||||
needcfa = 1;
|
||||
|
@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
|
||||
emit_tai(as, pi, r, base, i);
|
||||
}
|
||||
|
||||
#define emit_loadn(as, r, tv) \
|
||||
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR)
|
||||
#define emit_loadk64(as, r, ir) \
|
||||
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
|
||||
|
||||
/* Get/set global_State fields. */
|
||||
static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
|
||||
|
@ -20,6 +20,11 @@
|
||||
#define REX_64 0
|
||||
#define VEX_64 0
|
||||
#endif
|
||||
#if LJ_GC64
|
||||
#define REX_GC64 REX_64
|
||||
#else
|
||||
#define REX_GC64 0
|
||||
#endif
|
||||
|
||||
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
|
||||
#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
|
||||
@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p)
|
||||
#define ptr2addr(p) (i32ptr((p)))
|
||||
#endif
|
||||
|
||||
/* op r, [addr] */
|
||||
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = ptr2addr(addr);
|
||||
#if LJ_64
|
||||
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
|
||||
#else
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* op r, [base+ofs] */
|
||||
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
x86Mode mode;
|
||||
if (ra_hasreg(rb)) {
|
||||
if (ofs == 0 && (rb&7) != RID_EBP) {
|
||||
if (LJ_GC64 && rb == RID_RIP) {
|
||||
mode = XM_OFS0;
|
||||
p -= 4;
|
||||
*(int32_t *)p = ofs;
|
||||
} else if (ofs == 0 && (rb&7) != RID_EBP) {
|
||||
mode = XM_OFS0;
|
||||
} else if (checki8(ofs)) {
|
||||
*--p = (MCode)ofs;
|
||||
@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
|
||||
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
rb = RID_ESP;
|
||||
#endif
|
||||
} else if (LJ_GC64 && rb == RID_RIP) {
|
||||
lua_assert(as->mrm.idx == RID_NONE);
|
||||
mode = XM_OFS0;
|
||||
p -= 4;
|
||||
*(int32_t *)p = as->mrm.ofs;
|
||||
} else {
|
||||
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
|
||||
mode = XM_OFS0;
|
||||
@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
|
||||
/* Get/set global_State fields. */
|
||||
#define emit_opgl(as, xo, r, field) \
|
||||
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
|
||||
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
|
||||
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
|
||||
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
|
||||
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
|
||||
|
||||
#define emit_setvmstate(as, i) \
|
||||
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
|
||||
@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
|
||||
}
|
||||
}
|
||||
|
||||
#if LJ_GC64
|
||||
#define dispofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
|
||||
#define mcpofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
|
||||
#define mctopofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
|
||||
/* mov r, addr */
|
||||
#define emit_loada(as, r, addr) \
|
||||
emit_loadu64(as, (r), (uintptr_t)(addr))
|
||||
#else
|
||||
/* mov r, addr */
|
||||
#define emit_loada(as, r, addr) \
|
||||
emit_loadi(as, (r), ptr2addr((addr)))
|
||||
#endif
|
||||
|
||||
#if LJ_64
|
||||
/* mov r, imm64 or shorter 32 bit extended load. */
|
||||
@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = (int32_t)u64;
|
||||
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
|
||||
#if LJ_GC64
|
||||
} else if (checki32(dispofs(as, u64))) {
|
||||
emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
|
||||
} else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
|
||||
/* Since as->realign assumes the code size doesn't change, check
|
||||
** RIP-relative addressing reachability for both as->mcp and as->mctop.
|
||||
*/
|
||||
emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
|
||||
#endif
|
||||
} else { /* Full-size 64 bit load. */
|
||||
MCode *p = as->mcp;
|
||||
*(uint64_t *)(p-8) = u64;
|
||||
@ -313,13 +335,70 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* movsd r, [&tv->n] / xorps r, r */
|
||||
static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
|
||||
/* op r, [addr] */
|
||||
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
|
||||
{
|
||||
if (tvispzero(tv)) /* Use xor only for +0. */
|
||||
emit_rr(as, XO_XORPS, r, r);
|
||||
else
|
||||
emit_rma(as, XO_MOVSD, r, &tv->n);
|
||||
#if LJ_GC64
|
||||
if (checki32(dispofs(as, addr))) {
|
||||
emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
|
||||
} else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
|
||||
emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
|
||||
} else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
|
||||
emit_rmro(as, xo, rr, rr, 0);
|
||||
emit_loadu64(as, rr, (uintptr_t)addr);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = ptr2addr(addr);
|
||||
#if LJ_64
|
||||
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
|
||||
#else
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Load 64 bit IR constant into register. */
|
||||
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
{
|
||||
Reg r64;
|
||||
x86Op xo;
|
||||
const uint64_t *k = &ir_k64(ir)->u64;
|
||||
if (rset_test(RSET_FPR, r)) {
|
||||
r64 = r;
|
||||
xo = XO_MOVSD;
|
||||
} else {
|
||||
r64 = r | REX_64;
|
||||
xo = XO_MOV;
|
||||
}
|
||||
if (*k == 0) {
|
||||
emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
|
||||
#if LJ_GC64
|
||||
} else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
|
||||
(checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
|
||||
emit_rma(as, xo, r64, k);
|
||||
} else {
|
||||
if (ir->i) {
|
||||
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
|
||||
} else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
|
||||
emit_loadu64(as, r, *k);
|
||||
return;
|
||||
} else {
|
||||
/* If all else fails, add the FP constant at the MCode area bottom. */
|
||||
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
|
||||
*(uint64_t *)as->mcbot = *k;
|
||||
ir->i = (int32_t)(as->mctop - as->mcbot);
|
||||
as->mcbot += 8;
|
||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
||||
}
|
||||
emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
|
||||
#else
|
||||
} else {
|
||||
emit_rma(as, xo, r64, k);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* -- Emit control-flow instructions -------------------------------------- */
|
||||
@ -460,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
||||
{
|
||||
if (ofs) {
|
||||
if ((as->flags & JIT_F_LEA_AGU))
|
||||
emit_rmro(as, XO_LEA, r, r, ofs);
|
||||
emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
|
||||
else
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
|
||||
}
|
||||
}
|
||||
|
||||
|
42
src/lj_err.c
42
src/lj_err.c
@ -46,7 +46,8 @@
|
||||
** the wrapper function feature. Lua errors thrown through C++ frames
|
||||
** cannot be caught by C++ code and C++ destructors are not run.
|
||||
**
|
||||
** EXT is the default on x64 systems, INT is the default on all other systems.
|
||||
** EXT is the default on x64 systems and on Windows, INT is the default on all
|
||||
** other systems.
|
||||
**
|
||||
** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
|
||||
** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
|
||||
@ -55,7 +56,6 @@
|
||||
** and all C libraries that have callbacks which may be used to call back
|
||||
** into Lua. C++ code must *not* be compiled with -fno-exceptions.
|
||||
**
|
||||
** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
|
||||
** EXT is mandatory on WIN64 since the calling convention has an abundance
|
||||
** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
|
||||
** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
|
||||
@ -63,7 +63,7 @@
|
||||
|
||||
#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
|
||||
#define LJ_UNWIND_EXT 1
|
||||
#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
|
||||
#elif LJ_TARGET_WINDOWS
|
||||
#define LJ_UNWIND_EXT 1
|
||||
#endif
|
||||
|
||||
@ -384,7 +384,7 @@ static void err_raise_ext(int errcode)
|
||||
|
||||
#endif /* LJ_TARGET_ARM */
|
||||
|
||||
#elif LJ_TARGET_X64 && LJ_ABI_WIN
|
||||
#elif LJ_ABI_WIN
|
||||
|
||||
/*
|
||||
** Someone in Redmond owes me several days of my life. A lot of this is
|
||||
@ -402,6 +402,7 @@ static void err_raise_ext(int errcode)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
#if LJ_TARGET_X64
|
||||
/* Taken from: http://www.nynaeve.net/?p=99 */
|
||||
typedef struct UndocumentedDispatcherContext {
|
||||
ULONG64 ControlPc;
|
||||
@ -416,11 +417,14 @@ typedef struct UndocumentedDispatcherContext {
|
||||
ULONG ScopeIndex;
|
||||
ULONG Fill0;
|
||||
} UndocumentedDispatcherContext;
|
||||
#else
|
||||
typedef void *UndocumentedDispatcherContext;
|
||||
#endif
|
||||
|
||||
/* Another wild guess. */
|
||||
extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
|
||||
|
||||
#ifdef MINGW_SDK_INIT
|
||||
#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
|
||||
/* Workaround for broken MinGW64 declaration. */
|
||||
VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
|
||||
#define RtlUnwindEx RtlUnwindEx_FIXED
|
||||
@ -434,10 +438,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
|
||||
#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
|
||||
#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
|
||||
|
||||
/* Win64 exception handler for interpreter frame. */
|
||||
LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
|
||||
void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
|
||||
/* Windows exception handler for interpreter frame. */
|
||||
LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
|
||||
void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
|
||||
{
|
||||
#if LJ_TARGET_X64
|
||||
void *cf = f;
|
||||
#else
|
||||
void *cf = (char *)f - CFRAME_OFS_SEH;
|
||||
#endif
|
||||
lua_State *L = cframe_L(cf);
|
||||
int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
|
||||
LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
|
||||
@ -455,8 +464,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
|
||||
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
|
||||
} else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
|
||||
/* Don't catch access violations etc. */
|
||||
return ExceptionContinueSearch;
|
||||
return 1; /* ExceptionContinueSearch */
|
||||
}
|
||||
#if LJ_TARGET_X64
|
||||
/* Unwind the stack and call all handlers for all lower C frames
|
||||
** (including ourselves) again with EH_UNWINDING set. Then set
|
||||
** rsp = cf, rax = errcode and jump to the specified target.
|
||||
@ -466,9 +476,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
|
||||
lj_vm_unwind_c_eh),
|
||||
rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
|
||||
/* RtlUnwindEx should never return. */
|
||||
#else
|
||||
UNUSED(ctx);
|
||||
UNUSED(dispatch);
|
||||
/* Call all handlers for all lower C frames (including ourselves) again
|
||||
** with EH_UNWINDING set. Then call the specified function, passing cf
|
||||
** and errcode.
|
||||
*/
|
||||
lj_vm_rtlunwind(cf, (void *)rec,
|
||||
(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
|
||||
(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
|
||||
/* lj_vm_rtlunwind does not return. */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return ExceptionContinueSearch;
|
||||
return 1; /* ExceptionContinueSearch */
|
||||
}
|
||||
|
||||
/* Raise Windows exception. */
|
||||
|
@ -102,42 +102,41 @@ static void recff_stitch(jit_State *J)
|
||||
ASMFunction cont = lj_cont_stitch;
|
||||
lua_State *L = J->L;
|
||||
TValue *base = L->base;
|
||||
BCReg nslot = J->maxslot + 1 + LJ_FR2;
|
||||
TValue *nframe = base + 1 + LJ_FR2;
|
||||
const BCIns *pc = frame_pc(base-1);
|
||||
TValue *pframe = frame_prevl(base-1);
|
||||
TRef trcont;
|
||||
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
|
||||
/* Move func + args up in Lua stack and insert continuation. */
|
||||
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
|
||||
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
|
||||
setcont(base, cont);
|
||||
memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
|
||||
setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
|
||||
setcont(base-LJ_FR2, cont);
|
||||
setframe_pc(base, pc);
|
||||
setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
|
||||
L->base += 2;
|
||||
L->top += 2;
|
||||
setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
|
||||
L->base += 2 + LJ_FR2;
|
||||
L->top += 2 + LJ_FR2;
|
||||
|
||||
/* Ditto for the IR. */
|
||||
memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
|
||||
#if LJ_64
|
||||
trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
|
||||
memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
|
||||
#if LJ_FR2
|
||||
J->base[2] = TREF_FRAME;
|
||||
J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
|
||||
J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
|
||||
#else
|
||||
trcont = lj_ir_kptr(J, (void *)cont);
|
||||
J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
|
||||
#endif
|
||||
J->base[0] = trcont | TREF_CONT;
|
||||
J->ktracep = lj_ir_k64_reserve(J);
|
||||
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
|
||||
J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
|
||||
J->base += 2;
|
||||
J->baseslot += 2;
|
||||
J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
|
||||
J->base += 2 + LJ_FR2;
|
||||
J->baseslot += 2 + LJ_FR2;
|
||||
J->framedepth++;
|
||||
|
||||
lj_record_stop(J, LJ_TRLINK_STITCH, 0);
|
||||
|
||||
/* Undo Lua stack changes. */
|
||||
memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
|
||||
memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
|
||||
setframe_pc(base-1, pc);
|
||||
L->base -= 2;
|
||||
L->top -= 2;
|
||||
L->base -= 2 + LJ_FR2;
|
||||
L->top -= 2 + LJ_FR2;
|
||||
}
|
||||
|
||||
/* Fallback handler for fast functions that are not recorded (yet). */
|
||||
@ -179,7 +178,7 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
|
||||
/* Emit BUFHDR for the global temporary buffer. */
|
||||
static TRef recff_bufhdr(jit_State *J)
|
||||
{
|
||||
return emitir(IRT(IR_BUFHDR, IRT_P32),
|
||||
return emitir(IRT(IR_BUFHDR, IRT_PGC),
|
||||
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
|
||||
}
|
||||
|
||||
@ -229,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
|
||||
ix.tab = tr;
|
||||
copyTV(J->L, &ix.tabv, &rd->argv[0]);
|
||||
lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
|
||||
fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META);
|
||||
fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
|
||||
mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
|
||||
emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
|
||||
if (!tref_isnil(mt))
|
||||
@ -295,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
|
||||
if (strV(tv)->len == 1) {
|
||||
emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
|
||||
} else {
|
||||
TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0));
|
||||
TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
|
||||
TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
|
||||
emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
|
||||
}
|
||||
@ -380,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
|
||||
int errcode;
|
||||
TValue argv0;
|
||||
/* Temporarily insert metamethod below object. */
|
||||
J->base[1] = J->base[0];
|
||||
J->base[1+LJ_FR2] = J->base[0];
|
||||
J->base[0] = ix.mobj;
|
||||
copyTV(J->L, &argv0, &rd->argv[0]);
|
||||
copyTV(J->L, &rd->argv[1], &rd->argv[0]);
|
||||
copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
|
||||
copyTV(J->L, &rd->argv[0], &ix.mobjv);
|
||||
/* Need to protect lj_record_tailcall because it may throw. */
|
||||
errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
|
||||
@ -450,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
|
||||
static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
|
||||
{
|
||||
if (J->maxslot >= 1) {
|
||||
#if LJ_FR2
|
||||
/* Shift function arguments up. */
|
||||
memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
|
||||
#endif
|
||||
lj_record_call(J, 0, J->maxslot - 1);
|
||||
rd->nres = -1; /* Pending call. */
|
||||
} /* else: Interpreter will throw. */
|
||||
@ -469,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
|
||||
TValue argv0, argv1;
|
||||
TRef tmp;
|
||||
int errcode;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
/* Swap function and traceback. */
|
||||
tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
|
||||
copyTV(J->L, &argv0, &rd->argv[0]);
|
||||
copyTV(J->L, &argv1, &rd->argv[1]);
|
||||
copyTV(J->L, &rd->argv[0], &argv1);
|
||||
copyTV(J->L, &rd->argv[1], &argv0);
|
||||
#if LJ_FR2
|
||||
/* Shift function arguments up. */
|
||||
memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
|
||||
#endif
|
||||
/* Need to protect lj_record_call because it may throw. */
|
||||
errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
|
||||
/* Always undo Lua stack swap to avoid confusing the interpreter. */
|
||||
@ -504,7 +510,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
|
||||
static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
|
||||
{
|
||||
TRef tr = lj_ir_tonum(J, J->base[0]);
|
||||
J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J));
|
||||
J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
|
||||
UNUSED(rd);
|
||||
}
|
||||
|
||||
@ -613,10 +619,8 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
|
||||
|
||||
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
|
||||
{
|
||||
TRef tr = lj_ir_tonum(J, J->base[0]);
|
||||
if (!tref_isnumber_str(J->base[1]))
|
||||
lj_trace_err(J, LJ_TRERR_BADTYPE);
|
||||
J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]);
|
||||
J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
|
||||
&rd->argv[0], &rd->argv[1]);
|
||||
UNUSED(rd);
|
||||
}
|
||||
|
||||
@ -822,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
|
||||
/* Also handle empty range here, to avoid extra traces. */
|
||||
TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
|
||||
emitir(IRTGI(IR_GE), trslen, tr0);
|
||||
trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
|
||||
trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
|
||||
J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
|
||||
} else { /* Range underflow: return empty string. */
|
||||
emitir(IRTGI(IR_LT), trend, trstart);
|
||||
@ -838,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
|
||||
rd->nres = len;
|
||||
for (i = 0; i < len; i++) {
|
||||
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
|
||||
tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp);
|
||||
tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
|
||||
J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
|
||||
}
|
||||
} else { /* Empty range or range underflow: return no results. */
|
||||
@ -860,7 +864,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
|
||||
if (i > 1) { /* Concatenate the strings, if there's more than one. */
|
||||
TRef hdr = recff_bufhdr(J), tr = hdr;
|
||||
for (i = 0; J->base[i] != 0; i++)
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
|
||||
J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
|
||||
}
|
||||
UNUSED(rd);
|
||||
@ -877,14 +881,14 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
|
||||
emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
|
||||
if (vrep > 1) {
|
||||
TRef hdr2 = recff_bufhdr(J);
|
||||
TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
|
||||
tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
|
||||
TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
|
||||
tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
|
||||
str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
|
||||
}
|
||||
}
|
||||
tr = hdr = recff_bufhdr(J);
|
||||
if (str2) {
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
|
||||
str = str2;
|
||||
rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
|
||||
}
|
||||
@ -935,8 +939,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
|
||||
if ((J->base[2] && tref_istruecond(J->base[3])) ||
|
||||
(emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
|
||||
!lj_str_haspattern(pat))) { /* Search for fixed string. */
|
||||
TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
|
||||
TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
|
||||
TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
|
||||
TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
|
||||
TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
|
||||
TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
|
||||
TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
|
||||
@ -944,13 +948,13 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
|
||||
if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
|
||||
str->len-(MSize)start, pat->len)) {
|
||||
TRef pos;
|
||||
emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
|
||||
pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
|
||||
emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
|
||||
pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0));
|
||||
J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
|
||||
J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
|
||||
rd->nres = 2;
|
||||
} else {
|
||||
emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
|
||||
emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
|
||||
J->base[0] = TREF_NIL;
|
||||
}
|
||||
} else { /* Search for pattern. */
|
||||
@ -977,7 +981,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
|
||||
IRCallID id;
|
||||
switch (STRFMT_TYPE(sf)) {
|
||||
case STRFMT_LIT:
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
|
||||
lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
|
||||
break;
|
||||
case STRFMT_INT:
|
||||
@ -986,7 +990,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
|
||||
if (!tref_isinteger(tra))
|
||||
goto handle_num;
|
||||
if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
|
||||
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
|
||||
} else {
|
||||
#if LJ_HASFFI
|
||||
@ -1016,7 +1020,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
|
||||
return;
|
||||
}
|
||||
if (sf == STRFMT_STR) /* Shortcut for plain %s. */
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
|
||||
else if ((sf & STRFMT_T_QUOTED))
|
||||
tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
|
||||
else
|
||||
@ -1025,7 +1029,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
|
||||
case STRFMT_CHAR:
|
||||
tra = lj_opt_narrow_toint(J, tra);
|
||||
if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
|
||||
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
|
||||
else
|
||||
tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
|
||||
@ -1110,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
|
||||
{
|
||||
TRef tr, ud, fp;
|
||||
if (id) { /* io.func() */
|
||||
#if LJ_GC64
|
||||
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
|
||||
ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
|
||||
#else
|
||||
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
|
||||
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
|
||||
#endif
|
||||
} else { /* fp:method() */
|
||||
ud = J->base[0];
|
||||
if (!tref_isudata(ud))
|
||||
@ -1133,7 +1142,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
|
||||
ptrdiff_t i = rd->data == 0 ? 1 : 0;
|
||||
for (; J->base[i]; i++) {
|
||||
TRef str = lj_ir_tostr(J, J->base[i]);
|
||||
TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
|
||||
TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
|
||||
TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
|
||||
if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
|
||||
IRIns *irs = IR(tref_ref(str));
|
||||
|
@ -116,6 +116,17 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
|
||||
|
||||
/* These definitions must match with the arch-specific *.dasc files. */
|
||||
#if LJ_TARGET_X86
|
||||
#if LJ_ABI_WIN
|
||||
#define CFRAME_OFS_ERRF (19*4)
|
||||
#define CFRAME_OFS_NRES (18*4)
|
||||
#define CFRAME_OFS_PREV (17*4)
|
||||
#define CFRAME_OFS_L (16*4)
|
||||
#define CFRAME_OFS_SEH (9*4)
|
||||
#define CFRAME_OFS_PC (6*4)
|
||||
#define CFRAME_OFS_MULTRES (5*4)
|
||||
#define CFRAME_SIZE (16*4)
|
||||
#define CFRAME_SHIFT_MULTRES 0
|
||||
#else
|
||||
#define CFRAME_OFS_ERRF (15*4)
|
||||
#define CFRAME_OFS_NRES (14*4)
|
||||
#define CFRAME_OFS_PREV (13*4)
|
||||
@ -124,6 +135,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
|
||||
#define CFRAME_OFS_MULTRES (5*4)
|
||||
#define CFRAME_SIZE (12*4)
|
||||
#define CFRAME_SHIFT_MULTRES 0
|
||||
#endif
|
||||
#elif LJ_TARGET_X64
|
||||
#if LJ_ABI_WIN
|
||||
#define CFRAME_OFS_PREV (13*8)
|
||||
@ -226,26 +238,41 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
|
||||
#define CFRAME_SIZE 272
|
||||
#define CFRAME_SHIFT_MULTRES 3
|
||||
#endif
|
||||
#elif LJ_TARGET_MIPS
|
||||
#elif LJ_TARGET_MIPS32
|
||||
#if LJ_ARCH_HASFPU
|
||||
#define CFRAME_OFS_ERRF 124
|
||||
#define CFRAME_OFS_NRES 120
|
||||
#define CFRAME_OFS_PREV 116
|
||||
#define CFRAME_OFS_L 112
|
||||
#define CFRAME_OFS_PC 20
|
||||
#define CFRAME_OFS_MULTRES 16
|
||||
#define CFRAME_SIZE 112
|
||||
#define CFRAME_SHIFT_MULTRES 3
|
||||
#else
|
||||
#define CFRAME_OFS_ERRF 76
|
||||
#define CFRAME_OFS_NRES 72
|
||||
#define CFRAME_OFS_PREV 68
|
||||
#define CFRAME_OFS_L 64
|
||||
#define CFRAME_SIZE 64
|
||||
#endif
|
||||
#define CFRAME_OFS_PC 20
|
||||
#define CFRAME_OFS_MULTRES 16
|
||||
#define CFRAME_SIZE 64
|
||||
#define CFRAME_SHIFT_MULTRES 3
|
||||
#elif LJ_TARGET_MIPS64
|
||||
#if LJ_ARCH_HASFPU
|
||||
#define CFRAME_OFS_ERRF 188
|
||||
#define CFRAME_OFS_NRES 184
|
||||
#define CFRAME_OFS_PREV 176
|
||||
#define CFRAME_OFS_L 168
|
||||
#define CFRAME_OFS_PC 160
|
||||
#define CFRAME_SIZE 192
|
||||
#else
|
||||
#define CFRAME_OFS_ERRF 124
|
||||
#define CFRAME_OFS_NRES 120
|
||||
#define CFRAME_OFS_PREV 112
|
||||
#define CFRAME_OFS_L 104
|
||||
#define CFRAME_OFS_PC 96
|
||||
#define CFRAME_SIZE 128
|
||||
#endif
|
||||
#define CFRAME_OFS_MULTRES 0
|
||||
#define CFRAME_SHIFT_MULTRES 3
|
||||
#else
|
||||
#error "Missing CFRAME_* definitions for this architecture"
|
||||
#endif
|
||||
|
@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (ir->o == IR_KGC)
|
||||
gc_markobj(g, ir_kgc(ir));
|
||||
if (irt_is64(ir->t) && ir->o != IR_KNULL)
|
||||
ref++;
|
||||
}
|
||||
if (T->link) gc_marktrace(g, T->link);
|
||||
if (T->nextroot) gc_marktrace(g, T->nextroot);
|
||||
|
@ -719,6 +719,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
|
||||
|
||||
/* -- Interface to GDB JIT API -------------------------------------------- */
|
||||
|
||||
static int gdbjit_lock;
|
||||
|
||||
static void gdbjit_lock_acquire()
|
||||
{
|
||||
while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
|
||||
/* Just spin; futexes or pthreads aren't worth the portability cost. */
|
||||
}
|
||||
}
|
||||
|
||||
static void gdbjit_lock_release()
|
||||
{
|
||||
__sync_lock_release(&gdbjit_lock);
|
||||
}
|
||||
|
||||
/* Add new entry to GDB JIT symbol chain. */
|
||||
static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
|
||||
{
|
||||
@ -730,6 +744,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
|
||||
ctx->T->gdbjit_entry = (void *)eo;
|
||||
/* Link new entry to chain and register it. */
|
||||
eo->entry.prev_entry = NULL;
|
||||
gdbjit_lock_acquire();
|
||||
eo->entry.next_entry = __jit_debug_descriptor.first_entry;
|
||||
if (eo->entry.next_entry)
|
||||
eo->entry.next_entry->prev_entry = &eo->entry;
|
||||
@ -739,6 +754,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
|
||||
__jit_debug_descriptor.relevant_entry = &eo->entry;
|
||||
__jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
|
||||
__jit_debug_register_code();
|
||||
gdbjit_lock_release();
|
||||
}
|
||||
|
||||
/* Add debug info for newly compiled trace and notify GDB. */
|
||||
@ -770,6 +786,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
|
||||
{
|
||||
GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
|
||||
if (eo) {
|
||||
gdbjit_lock_acquire();
|
||||
if (eo->entry.prev_entry)
|
||||
eo->entry.prev_entry->next_entry = eo->entry.next_entry;
|
||||
else
|
||||
@ -779,6 +796,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
|
||||
__jit_debug_descriptor.relevant_entry = &eo->entry;
|
||||
__jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
|
||||
__jit_debug_register_code();
|
||||
gdbjit_lock_release();
|
||||
lj_mem_free(J2G(J), eo, eo->sz);
|
||||
}
|
||||
}
|
||||
|
161
src/lj_ir.c
161
src/lj_ir.c
@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J)
|
||||
IRIns *baseir = J->irbuf + J->irbotlim;
|
||||
MSize szins = J->irtoplim - J->irbotlim;
|
||||
lua_assert(szins != 0);
|
||||
lua_assert(J->cur.nk == J->irbotlim);
|
||||
lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
|
||||
if (J->cur.nins + (szins >> 1) < J->irtoplim) {
|
||||
/* More than half of the buffer is free on top: shift up by a quarter. */
|
||||
MSize ofs = szins >> 2;
|
||||
@ -145,6 +145,14 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
|
||||
return emitir(CCI_OPTYPE(ci), tr, id);
|
||||
}
|
||||
|
||||
/* Load field of type t from GG_State + offset. */
|
||||
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
|
||||
{
|
||||
lua_assert(ofs >= IRFL__MAX && ofs < REF_BIAS);
|
||||
lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
|
||||
return lj_opt_fold(J);
|
||||
}
|
||||
|
||||
/* -- Interning of constants ---------------------------------------------- */
|
||||
|
||||
/*
|
||||
@ -165,6 +173,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Get ref of next 64 bit IR constant and optionally grow IR.
|
||||
** Note: this may invalidate all IRIns *!
|
||||
*/
|
||||
static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
|
||||
{
|
||||
IRRef ref = J->cur.nk - 2;
|
||||
lua_assert(J->state != LJ_TRACE_ASM);
|
||||
if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
|
||||
J->cur.nk = ref;
|
||||
return ref;
|
||||
}
|
||||
|
||||
#if LJ_GC64
|
||||
#define ir_nextkgc ir_nextk64
|
||||
#else
|
||||
#define ir_nextkgc ir_nextk
|
||||
#endif
|
||||
|
||||
/* Intern int32_t constant. */
|
||||
TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
|
||||
{
|
||||
@ -184,95 +210,21 @@ found:
|
||||
return TREF(ref, IRT_INT);
|
||||
}
|
||||
|
||||
/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the
|
||||
** 64 bit constant. The constants themselves are stored in a chained array
|
||||
** and shared across traces.
|
||||
**
|
||||
** Rationale for choosing this data structure:
|
||||
** - The address of the constants is embedded in the generated machine code
|
||||
** and must never move. A resizable array or hash table wouldn't work.
|
||||
** - Most apps need very few non-32 bit integer constants (less than a dozen).
|
||||
** - Linear search is hard to beat in terms of speed and low complexity.
|
||||
*/
|
||||
typedef struct K64Array {
|
||||
MRef next; /* Pointer to next list. */
|
||||
MSize numk; /* Number of used elements in this array. */
|
||||
TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
|
||||
} K64Array;
|
||||
|
||||
/* Free all chained arrays. */
|
||||
void lj_ir_k64_freeall(jit_State *J)
|
||||
{
|
||||
K64Array *k;
|
||||
for (k = mref(J->k64, K64Array); k; ) {
|
||||
K64Array *next = mref(k->next, K64Array);
|
||||
lj_mem_free(J2G(J), k, sizeof(K64Array));
|
||||
k = next;
|
||||
}
|
||||
setmref(J->k64, NULL);
|
||||
}
|
||||
|
||||
/* Get new 64 bit constant slot. */
|
||||
static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
|
||||
{
|
||||
TValue *ntv;
|
||||
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
|
||||
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
|
||||
setmref(kn->next, NULL);
|
||||
kn->numk = 0;
|
||||
if (kp)
|
||||
setmref(kp->next, kn); /* Chain to the end of the list. */
|
||||
else
|
||||
setmref(J->k64, kn); /* Link first array. */
|
||||
kp = kn;
|
||||
}
|
||||
ntv = &kp->k[kp->numk++]; /* Add to current array. */
|
||||
ntv->u64 = u64;
|
||||
return ntv;
|
||||
}
|
||||
|
||||
/* Find 64 bit constant in chained array or add it. */
|
||||
cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
|
||||
{
|
||||
K64Array *k, *kp = NULL;
|
||||
MSize idx;
|
||||
/* Search for the constant in the whole chain of arrays. */
|
||||
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
|
||||
kp = k; /* Remember previous element in list. */
|
||||
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
|
||||
TValue *tv = &k->k[idx];
|
||||
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
|
||||
return tv;
|
||||
}
|
||||
}
|
||||
/* Otherwise add a new constant. */
|
||||
return ir_k64_add(J, kp, u64);
|
||||
}
|
||||
|
||||
TValue *lj_ir_k64_reserve(jit_State *J)
|
||||
{
|
||||
K64Array *k, *kp = NULL;
|
||||
lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
|
||||
/* Find last K64Array, if any. */
|
||||
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
|
||||
return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
|
||||
}
|
||||
|
||||
/* Intern 64 bit constant, given by its address. */
|
||||
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
|
||||
/* Intern 64 bit constant, given by its 64 bit pattern. */
|
||||
TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
|
||||
{
|
||||
IRIns *ir, *cir = J->cur.ir;
|
||||
IRRef ref;
|
||||
IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
|
||||
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
|
||||
if (ir_k64(&cir[ref]) == tv)
|
||||
if (ir_k64(&cir[ref])->u64 == u64)
|
||||
goto found;
|
||||
ref = ir_nextk(J);
|
||||
ref = ir_nextk64(J);
|
||||
ir = IR(ref);
|
||||
lua_assert(checkptrGC(tv));
|
||||
setmref(ir->ptr, tv);
|
||||
ir[1].tv.u64 = u64;
|
||||
ir->t.irt = t;
|
||||
ir->o = op;
|
||||
ir->op12 = 0;
|
||||
ir->prev = J->chain[op];
|
||||
J->chain[op] = (IRRef1)ref;
|
||||
found:
|
||||
@ -282,13 +234,13 @@ found:
|
||||
/* Intern FP constant, given by its 64 bit pattern. */
|
||||
TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
|
||||
{
|
||||
return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64));
|
||||
return lj_ir_k64(J, IR_KNUM, u64);
|
||||
}
|
||||
|
||||
/* Intern 64 bit integer constant. */
|
||||
TRef lj_ir_kint64(jit_State *J, uint64_t u64)
|
||||
{
|
||||
return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64));
|
||||
return lj_ir_k64(J, IR_KINT64, u64);
|
||||
}
|
||||
|
||||
/* Check whether a number is int and return it. -0 is NOT considered an int. */
|
||||
@ -323,15 +275,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
|
||||
{
|
||||
IRIns *ir, *cir = J->cur.ir;
|
||||
IRRef ref;
|
||||
lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */
|
||||
lua_assert(!isdead(J2G(J), o));
|
||||
for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
|
||||
if (ir_kgc(&cir[ref]) == o)
|
||||
goto found;
|
||||
ref = ir_nextk(J);
|
||||
ref = ir_nextkgc(J);
|
||||
ir = IR(ref);
|
||||
/* NOBARRIER: Current trace is a GC root. */
|
||||
setgcref(ir->gcr, o);
|
||||
ir->op12 = 0;
|
||||
setgcref(ir[LJ_GC64].gcr, o);
|
||||
ir->t.irt = (uint8_t)t;
|
||||
ir->o = IR_KGC;
|
||||
ir->prev = J->chain[IR_KGC];
|
||||
@ -340,24 +292,44 @@ found:
|
||||
return TREF(ref, t);
|
||||
}
|
||||
|
||||
/* Intern 32 bit pointer constant. */
|
||||
/* Allocate GCtrace constant placeholder (no interning). */
|
||||
TRef lj_ir_ktrace(jit_State *J)
|
||||
{
|
||||
IRRef ref = ir_nextkgc(J);
|
||||
IRIns *ir = IR(ref);
|
||||
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
|
||||
ir->t.irt = IRT_P64;
|
||||
ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
|
||||
ir->op12 = 0;
|
||||
ir->prev = 0;
|
||||
return TREF(ref, IRT_P64);
|
||||
}
|
||||
|
||||
/* Intern pointer constant. */
|
||||
TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
|
||||
{
|
||||
IRIns *ir, *cir = J->cur.ir;
|
||||
IRRef ref;
|
||||
lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr);
|
||||
#if LJ_64 && !LJ_GC64
|
||||
lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
|
||||
#endif
|
||||
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
|
||||
if (mref(cir[ref].ptr, void) == ptr)
|
||||
if (ir_kptr(&cir[ref]) == ptr)
|
||||
goto found;
|
||||
#if LJ_GC64
|
||||
ref = ir_nextk64(J);
|
||||
#else
|
||||
ref = ir_nextk(J);
|
||||
#endif
|
||||
ir = IR(ref);
|
||||
setmref(ir->ptr, ptr);
|
||||
ir->t.irt = IRT_P32;
|
||||
ir->op12 = 0;
|
||||
setmref(ir[LJ_GC64].ptr, ptr);
|
||||
ir->t.irt = IRT_PGC;
|
||||
ir->o = op;
|
||||
ir->prev = J->chain[op];
|
||||
J->chain[op] = (IRRef1)ref;
|
||||
found:
|
||||
return TREF(ref, IRT_P32);
|
||||
return TREF(ref, IRT_PGC);
|
||||
}
|
||||
|
||||
/* Intern typed NULL constant. */
|
||||
@ -412,9 +384,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
|
||||
case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
|
||||
case IR_KINT: setintV(tv, ir->i); break;
|
||||
case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
|
||||
case IR_KPTR: case IR_KKPTR: case IR_KNULL:
|
||||
setlightudV(tv, mref(ir->ptr, void));
|
||||
break;
|
||||
case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
|
||||
case IR_KNULL: setlightudV(tv, NULL); break;
|
||||
case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
|
||||
#if LJ_HASFFI
|
||||
case IR_KINT64: {
|
||||
|
45
src/lj_ir.h
45
src/lj_ir.h
@ -220,7 +220,7 @@ IRFLDEF(FLENUM)
|
||||
|
||||
/* SLOAD mode bits, stored in op2. */
|
||||
#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
|
||||
#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */
|
||||
#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
|
||||
#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
|
||||
#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
|
||||
#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
|
||||
@ -294,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
|
||||
|
||||
/* -- IR instruction types ------------------------------------------------ */
|
||||
|
||||
/* Map of itypes to non-negative numbers. ORDER LJ_T.
|
||||
#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
|
||||
|
||||
/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
|
||||
** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
|
||||
** IRT_P32 and IRT_P64, which never escape the IR.
|
||||
** The various integers are only used in the IR and can only escape to
|
||||
@ -302,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
|
||||
** contiguous and next to IRT_NUM (see the typerange macros below).
|
||||
*/
|
||||
#define IRTDEF(_) \
|
||||
_(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \
|
||||
_(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \
|
||||
_(TAB, 4) _(UDATA, 4) \
|
||||
_(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
|
||||
_(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
|
||||
_(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
|
||||
_(UDATA, IRTSIZE_PGC) \
|
||||
_(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
|
||||
_(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
|
||||
_(SOFTFP, 4) /* There is room for 9 more types. */
|
||||
_(SOFTFP, 4) /* There is room for 8 more types. */
|
||||
|
||||
/* IR result type and flags (8 bit). */
|
||||
typedef enum {
|
||||
@ -318,9 +321,10 @@ IRTDEF(IRTENUM)
|
||||
|
||||
/* Native pointer type and the corresponding integer type. */
|
||||
IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
|
||||
IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
|
||||
IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
|
||||
IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
|
||||
IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
|
||||
/* TODO_GC64: major changes required for all uses of IRT_P32. */
|
||||
|
||||
/* Additional flags. */
|
||||
IRT_MARK = 0x20, /* Marker for misc. purposes. */
|
||||
@ -408,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
|
||||
|
||||
static LJ_AINLINE uint32_t irt_toitype_(IRType t)
|
||||
{
|
||||
lua_assert(!LJ_64 || t != IRT_LIGHTUD);
|
||||
lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
|
||||
if (LJ_DUALNUM && t > IRT_NUM) {
|
||||
return LJ_TISNUM;
|
||||
} else {
|
||||
@ -521,7 +525,9 @@ typedef uint32_t TRef;
|
||||
** +-------+-------+---+---+---+---+
|
||||
** | op1 | op2 | t | o | r | s |
|
||||
** +-------+-------+---+---+---+---+
|
||||
** | op12/i/gco | ot | prev | (alternative fields in union)
|
||||
** | op12/i/gco32 | ot | prev | (alternative fields in union)
|
||||
** +-------+-------+---+---+---+---+
|
||||
** | TValue/gco64 | (2nd IR slot for 64 bit constants)
|
||||
** +---------------+-------+-------+
|
||||
** 32 16 16
|
||||
**
|
||||
@ -549,22 +555,27 @@ typedef union IRIns {
|
||||
)
|
||||
};
|
||||
int32_t i; /* 32 bit signed integer literal (overlaps op12). */
|
||||
GCRef gcr; /* GCobj constant (overlaps op12). */
|
||||
MRef ptr; /* Pointer constant (overlaps op12). */
|
||||
GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
|
||||
MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
|
||||
TValue tv; /* TValue constant (overlaps entire slot). */
|
||||
} IRIns;
|
||||
|
||||
/* TODO_GC64: major changes required. */
|
||||
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
|
||||
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
|
||||
#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
|
||||
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
|
||||
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
|
||||
#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
|
||||
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue))
|
||||
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
|
||||
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
|
||||
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
|
||||
#define ir_k64(ir) \
|
||||
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
|
||||
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
|
||||
(LJ_GC64 && \
|
||||
((ir)->o == IR_KGC || \
|
||||
(ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
|
||||
&(ir)[1].tv)
|
||||
#define ir_kptr(ir) \
|
||||
check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
|
||||
check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
|
||||
mref((ir)[LJ_GC64].ptr, void))
|
||||
|
||||
/* A store or any other op with a non-weak guard has a side-effect. */
|
||||
static LJ_AINLINE int ir_sideeff(IRIns *ir)
|
||||
|
@ -78,13 +78,13 @@ typedef struct CCallInfo {
|
||||
#define IRCALLCOND_SOFTFP_FFI(x) NULL
|
||||
#endif
|
||||
|
||||
#if LJ_SOFTFP && LJ_TARGET_MIPS
|
||||
#if LJ_SOFTFP && LJ_TARGET_MIPS32
|
||||
#define IRCALLCOND_SOFTFP_MIPS(x) x
|
||||
#else
|
||||
#define IRCALLCOND_SOFTFP_MIPS(x) NULL
|
||||
#endif
|
||||
|
||||
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
|
||||
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
|
||||
|
||||
#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
|
||||
#define IRCALLCOND_FP64_FFI(x) x
|
||||
@ -104,12 +104,6 @@ typedef struct CCallInfo {
|
||||
#define IRCALLCOND_FFI32(x) NULL
|
||||
#endif
|
||||
|
||||
#if LJ_TARGET_X86
|
||||
#define CCI_RANDFPR 0 /* Clang on OSX/x86 is overzealous. */
|
||||
#else
|
||||
#define CCI_RANDFPR CCI_NOFPRCLOBBER
|
||||
#endif
|
||||
|
||||
#if LJ_SOFTFP
|
||||
#define XA_FP CCI_XA
|
||||
#define XA2_FP (CCI_XA+CCI_XA)
|
||||
@ -129,40 +123,40 @@ typedef struct CCallInfo {
|
||||
/* Function definitions for CALL* instructions. */
|
||||
#define IRCALLDEF(_) \
|
||||
_(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
|
||||
_(ANY, lj_str_find, 4, N, P32, 0) \
|
||||
_(ANY, lj_str_find, 4, N, PGC, 0) \
|
||||
_(ANY, lj_str_new, 3, S, STR, CCI_L) \
|
||||
_(ANY, lj_strscan_num, 2, FN, INT, 0) \
|
||||
_(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
|
||||
_(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
|
||||
_(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
|
||||
_(ANY, lj_strfmt_putint, 2, FL, P32, 0) \
|
||||
_(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \
|
||||
_(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \
|
||||
_(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \
|
||||
_(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \
|
||||
_(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \
|
||||
_(ANY, lj_buf_putmem, 3, S, P32, 0) \
|
||||
_(ANY, lj_buf_putstr, 2, FL, P32, 0) \
|
||||
_(ANY, lj_buf_putchar, 2, FL, P32, 0) \
|
||||
_(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
|
||||
_(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
|
||||
_(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
|
||||
_(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \
|
||||
_(ANY, lj_buf_puttab, 5, L, P32, 0) \
|
||||
_(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \
|
||||
_(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \
|
||||
_(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \
|
||||
_(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \
|
||||
_(ANY, lj_buf_putmem, 3, S, PGC, 0) \
|
||||
_(ANY, lj_buf_putstr, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_buf_putchar, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \
|
||||
_(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \
|
||||
_(ANY, lj_buf_puttab, 5, L, PGC, 0) \
|
||||
_(ANY, lj_buf_tostr, 1, FL, STR, 0) \
|
||||
_(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
|
||||
_(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
|
||||
_(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
|
||||
_(ANY, lj_tab_clear, 1, FS, NIL, 0) \
|
||||
_(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
|
||||
_(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
|
||||
_(ANY, lj_tab_len, 1, FL, INT, 0) \
|
||||
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
|
||||
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
|
||||
_(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
|
||||
_(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\
|
||||
_(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
|
||||
_(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
|
||||
_(ANY, lj_vm_modi, 2, FN, INT, 0) \
|
||||
_(ANY, sinh, 1, N, NUM, XA_FP) \
|
||||
_(ANY, cosh, 1, N, NUM, XA_FP) \
|
||||
|
@ -36,12 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
|
||||
return ref;
|
||||
}
|
||||
|
||||
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
|
||||
|
||||
/* Interning of constants. */
|
||||
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
|
||||
LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
|
||||
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
|
||||
LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
|
||||
LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
|
||||
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
|
||||
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
|
||||
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
|
||||
LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
|
||||
@ -49,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
|
||||
LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
|
||||
LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
|
||||
LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
|
||||
LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
|
||||
|
||||
#if LJ_64
|
||||
#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
|
||||
@ -75,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
|
||||
#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
|
||||
|
||||
/* Special 128 bit SIMD constants. */
|
||||
#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS))
|
||||
#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG))
|
||||
#define lj_ir_ksimd(J, idx) \
|
||||
lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
|
||||
|
||||
/* Access to constants. */
|
||||
LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
|
||||
@ -143,8 +143,8 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
|
||||
LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
|
||||
TValue *vb, TValue *vc, IROp op);
|
||||
LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
|
||||
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc);
|
||||
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
|
||||
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
|
||||
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
|
||||
LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
|
||||
|
||||
/* Optimization passes. */
|
||||
|
63
src/lj_jit.h
63
src/lj_jit.h
@ -46,12 +46,16 @@
|
||||
#define JIT_F_CPU_FIRST JIT_F_SQRT
|
||||
#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
|
||||
#elif LJ_TARGET_MIPS
|
||||
#define JIT_F_MIPS32R2 0x00000010
|
||||
#define JIT_F_MIPSXXR2 0x00000010
|
||||
|
||||
/* Names for the CPU-specific flags. Must match the order above. */
|
||||
#define JIT_F_CPU_FIRST JIT_F_MIPS32R2
|
||||
#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
|
||||
#if LJ_TARGET_MIPS32
|
||||
#define JIT_F_CPUSTRING "\010MIPS32R2"
|
||||
#else
|
||||
#define JIT_F_CPUSTRING "\010MIPS64R2"
|
||||
#endif
|
||||
#else
|
||||
#define JIT_F_CPU_FIRST 0
|
||||
#define JIT_F_CPUSTRING ""
|
||||
#endif
|
||||
@ -179,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
|
||||
#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
|
||||
#define SNAP_TR(slot, tr) \
|
||||
(((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
|
||||
#if !LJ_FR2
|
||||
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
|
||||
#endif
|
||||
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
|
||||
#define snap_ref(sn) ((sn) & 0xffff)
|
||||
#define snap_slot(sn) ((BCReg)((sn) >> 24))
|
||||
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
|
||||
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
|
||||
#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
|
||||
|
||||
static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
|
||||
{
|
||||
#if LJ_FR2
|
||||
uint64_t pcbase;
|
||||
memcpy(&pcbase, sn, sizeof(uint64_t));
|
||||
return (const BCIns *)(pcbase >> 8);
|
||||
#else
|
||||
return (const BCIns *)(uintptr_t)*sn;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Snapshot and exit numbers. */
|
||||
typedef uint32_t SnapNo;
|
||||
typedef uint32_t ExitNo;
|
||||
@ -308,6 +324,37 @@ enum {
|
||||
LJ_KSIMD__MAX
|
||||
};
|
||||
|
||||
enum {
|
||||
#if LJ_TARGET_X86ORX64
|
||||
LJ_K64_TOBIT, /* 2^52 + 2^51 */
|
||||
LJ_K64_2P64, /* 2^64 */
|
||||
LJ_K64_M2P64, /* -2^64 */
|
||||
#if LJ_32
|
||||
LJ_K64_M2P64_31, /* -2^64 or -2^31 */
|
||||
#else
|
||||
LJ_K64_M2P64_31 = LJ_K64_M2P64,
|
||||
#endif
|
||||
#endif
|
||||
#if LJ_TARGET_MIPS
|
||||
LJ_K64_2P31, /* 2^31 */
|
||||
#endif
|
||||
LJ_K64__MAX,
|
||||
};
|
||||
|
||||
enum {
|
||||
#if LJ_TARGET_X86ORX64
|
||||
LJ_K32_M2P64_31, /* -2^64 or -2^31 */
|
||||
#endif
|
||||
#if LJ_TARGET_PPC
|
||||
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
|
||||
LJ_K32_2P52, /* 2^52 */
|
||||
#endif
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
|
||||
LJ_K32_2P31, /* 2^31 */
|
||||
#endif
|
||||
LJ_K32__MAX
|
||||
};
|
||||
|
||||
/* Get 16 byte aligned pointer to SIMD constant. */
|
||||
#define LJ_KSIMD(J, n) \
|
||||
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
|
||||
@ -324,13 +371,14 @@ enum {
|
||||
/* Fold state is used to fold instructions on-the-fly. */
|
||||
typedef struct FoldState {
|
||||
IRIns ins; /* Currently emitted instruction. */
|
||||
IRIns left; /* Instruction referenced by left operand. */
|
||||
IRIns right; /* Instruction referenced by right operand. */
|
||||
IRIns left[2]; /* Instruction referenced by left operand. */
|
||||
IRIns right[2]; /* Instruction referenced by right operand. */
|
||||
} FoldState;
|
||||
|
||||
/* JIT compiler state. */
|
||||
typedef struct jit_State {
|
||||
GCtrace cur; /* Current trace. */
|
||||
GCtrace *curfinal; /* Final address of current trace (set during asm). */
|
||||
|
||||
lua_State *L; /* Current Lua state. */
|
||||
const BCIns *pc; /* Current PC. */
|
||||
@ -360,8 +408,9 @@ typedef struct jit_State {
|
||||
int32_t framedepth; /* Current frame depth. */
|
||||
int32_t retdepth; /* Return frame depth (count of RETF). */
|
||||
|
||||
MRef k64; /* Pointer to chained array of 64 bit constants. */
|
||||
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
|
||||
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
|
||||
uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
|
||||
|
||||
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
|
||||
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
|
||||
@ -382,7 +431,7 @@ typedef struct jit_State {
|
||||
GCRef *trace; /* Array of traces. */
|
||||
TraceNo freetrace; /* Start of scan for next free trace. */
|
||||
MSize sizetrace; /* Size of trace array. */
|
||||
TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */
|
||||
IRRef1 ktrace; /* Reference to KGC with GCtrace. */
|
||||
|
||||
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
|
||||
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
|
||||
|
@ -843,12 +843,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
|
||||
#endif
|
||||
|
||||
#if LJ_FR2
|
||||
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f))
|
||||
#define contptr(f) ((void *)(f))
|
||||
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
|
||||
#elif LJ_64
|
||||
#define contptr(f) \
|
||||
((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
|
||||
#define setcont(o, f) \
|
||||
((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
|
||||
#else
|
||||
#define setcont(o, f) setlightudV((o), (void *)(f))
|
||||
#define contptr(f) ((void *)(f))
|
||||
#define setcont(o, f) setlightudV((o), contptr(f))
|
||||
#endif
|
||||
|
||||
#define tvchecklive(L, o) \
|
||||
|
@ -136,8 +136,8 @@
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
#define fins (&J->fold.ins)
|
||||
#define fleft (&J->fold.left)
|
||||
#define fright (&J->fold.right)
|
||||
#define fleft (J->fold.left)
|
||||
#define fright (J->fold.right)
|
||||
#define knumleft (ir_knum(fleft)->n)
|
||||
#define knumright (ir_knum(fright)->n)
|
||||
|
||||
@ -502,7 +502,7 @@ LJFOLDF(kfold_strref_snew)
|
||||
PHIBARRIER(ir);
|
||||
fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
|
||||
fins->op1 = str;
|
||||
fins->ot = IRT(IR_STRREF, IRT_P32);
|
||||
fins->ot = IRT(IR_STRREF, IRT_PGC);
|
||||
return RETRYFOLD;
|
||||
}
|
||||
}
|
||||
@ -998,8 +998,10 @@ LJFOLDF(simplify_nummuldiv_k)
|
||||
if (n == 1.0) { /* x o 1 ==> x */
|
||||
return LEFTFOLD;
|
||||
} else if (n == -1.0) { /* x o -1 ==> -x */
|
||||
IRRef op1 = fins->op1;
|
||||
fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
|
||||
fins->op1 = op1;
|
||||
fins->o = IR_NEG;
|
||||
fins->op2 = (IRRef1)lj_ir_knum_neg(J);
|
||||
return RETRYFOLD;
|
||||
} else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
|
||||
fins->o = IR_ADD;
|
||||
@ -2393,10 +2395,14 @@ retry:
|
||||
if (fins->op1 >= J->cur.nk) {
|
||||
key += (uint32_t)IR(fins->op1)->o << 10;
|
||||
*fleft = *IR(fins->op1);
|
||||
if (fins->op1 < REF_TRUE)
|
||||
fleft[1] = IR(fins->op1)[1];
|
||||
}
|
||||
if (fins->op2 >= J->cur.nk) {
|
||||
key += (uint32_t)IR(fins->op2)->o;
|
||||
*fright = *IR(fins->op2);
|
||||
if (fins->op2 < REF_TRUE)
|
||||
fright[1] = IR(fins->op2)[1];
|
||||
} else {
|
||||
key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
|
||||
}
|
||||
|
@ -22,8 +22,8 @@
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
#define fins (&J->fold.ins)
|
||||
#define fleft (&J->fold.left)
|
||||
#define fright (&J->fold.right)
|
||||
#define fleft (J->fold.left)
|
||||
#define fright (J->fold.right)
|
||||
|
||||
/*
|
||||
** Caveat #1: return value is not always a TRef -- only use with tref_ref().
|
||||
|
@ -517,18 +517,24 @@ static int numisint(lua_Number n)
|
||||
return (n == (lua_Number)lj_num2int(n));
|
||||
}
|
||||
|
||||
/* Convert string to number. Error out for non-numeric string values. */
|
||||
static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
|
||||
{
|
||||
if (tref_isstr(tr)) {
|
||||
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
|
||||
/* Would need an inverted STRTO for this rare and useless case. */
|
||||
if (!lj_strscan_num(strV(o), o)) /* Convert in-place. Value used below. */
|
||||
lj_trace_err(J, LJ_TRERR_BADTYPE); /* Punt if non-numeric. */
|
||||
}
|
||||
return tr;
|
||||
}
|
||||
|
||||
/* Narrowing of arithmetic operations. */
|
||||
TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
|
||||
TValue *vb, TValue *vc, IROp op)
|
||||
{
|
||||
if (tref_isstr(rb)) {
|
||||
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
|
||||
lj_strscan_num(strV(vb), vb);
|
||||
}
|
||||
if (tref_isstr(rc)) {
|
||||
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
|
||||
lj_strscan_num(strV(vc), vc);
|
||||
}
|
||||
rb = conv_str_tonum(J, rb, vb);
|
||||
rc = conv_str_tonum(J, rc, vc);
|
||||
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
|
||||
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
|
||||
tref_isinteger(rb) && tref_isinteger(rc) &&
|
||||
@ -543,24 +549,21 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
|
||||
/* Narrowing of unary minus operator. */
|
||||
TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
|
||||
{
|
||||
if (tref_isstr(rc)) {
|
||||
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
|
||||
lj_strscan_num(strV(vc), vc);
|
||||
}
|
||||
rc = conv_str_tonum(J, rc, vc);
|
||||
if (tref_isinteger(rc)) {
|
||||
if ((uint32_t)numberVint(vc) != 0x80000000u)
|
||||
return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
|
||||
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
|
||||
}
|
||||
return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
|
||||
return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
|
||||
}
|
||||
|
||||
/* Narrowing of modulo operator. */
|
||||
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
|
||||
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
|
||||
{
|
||||
TRef tmp;
|
||||
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
|
||||
lj_trace_err(J, LJ_TRERR_BADTYPE);
|
||||
rb = conv_str_tonum(J, rb, vb);
|
||||
rc = conv_str_tonum(J, rc, vc);
|
||||
if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) &&
|
||||
tref_isinteger(rb) && tref_isinteger(rc) &&
|
||||
(tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) {
|
||||
@ -577,10 +580,11 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
|
||||
}
|
||||
|
||||
/* Narrowing of power operator or math.pow. */
|
||||
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
|
||||
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
|
||||
{
|
||||
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
|
||||
lj_trace_err(J, LJ_TRERR_BADTYPE);
|
||||
rb = conv_str_tonum(J, rb, vb);
|
||||
rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
|
||||
rc = conv_str_tonum(J, rc, vc);
|
||||
/* Narrowing must be unconditional to preserve (-x)^i semantics. */
|
||||
if (tvisint(vc) || numisint(numV(vc))) {
|
||||
int checkrange = 0;
|
||||
@ -591,8 +595,6 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
|
||||
checkrange = 1;
|
||||
}
|
||||
if (!tref_isinteger(rc)) {
|
||||
if (tref_isstr(rc))
|
||||
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
|
||||
/* Guarded conversion to integer! */
|
||||
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
|
||||
}
|
||||
|
@ -153,10 +153,9 @@ static void sink_remark_phi(jit_State *J)
|
||||
remark = 0;
|
||||
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
|
||||
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
||||
if (((irl->t.irt ^ irr->t.irt) & IRT_MARK))
|
||||
remark = 1;
|
||||
else if (irl->prev == irr->prev)
|
||||
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev)
|
||||
continue;
|
||||
remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
|
||||
irt_setmark(IR(ir->op1)->t);
|
||||
irt_setmark(IR(ir->op2)->t);
|
||||
}
|
||||
@ -166,8 +165,8 @@ static void sink_remark_phi(jit_State *J)
|
||||
/* Sweep instructions and tag sunken allocations and stores. */
|
||||
static void sink_sweep_ins(jit_State *J)
|
||||
{
|
||||
IRIns *ir, *irfirst = IR(J->cur.nk);
|
||||
for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
|
||||
IRIns *ir, *irbase = IR(REF_BASE);
|
||||
for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
|
||||
switch (ir->o) {
|
||||
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
|
||||
IRIns *ira = sink_checkalloc(J, ir);
|
||||
@ -217,6 +216,12 @@ static void sink_sweep_ins(jit_State *J)
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (ir = IR(J->cur.nk); ir < irbase; ir++) {
|
||||
irt_clearmark(ir->t);
|
||||
ir->prev = REGSP_INIT;
|
||||
if (irt_is64(ir->t) && ir->o != IR_KNULL)
|
||||
ir++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocation sinking and store sinking.
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "lj_jit.h"
|
||||
#include "lj_ircall.h"
|
||||
#include "lj_iropt.h"
|
||||
#include "lj_dispatch.h"
|
||||
#include "lj_vm.h"
|
||||
|
||||
/* SPLIT pass:
|
||||
@ -353,6 +354,8 @@ static void split_ir(jit_State *J)
|
||||
ir->prev = ref; /* Identity substitution for loword. */
|
||||
hisubst[ref] = 0;
|
||||
}
|
||||
if (irt_is64(ir->t) && ir->o != IR_KNULL)
|
||||
ref++;
|
||||
}
|
||||
|
||||
/* Process old IR instructions. */
|
||||
@ -448,6 +451,11 @@ static void split_ir(jit_State *J)
|
||||
case IR_STRTO:
|
||||
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
|
||||
break;
|
||||
case IR_FLOAD:
|
||||
lua_assert(ir->op1 == REF_NIL);
|
||||
hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
|
||||
nir->op2 += LJ_BE*4;
|
||||
break;
|
||||
case IR_XLOAD: {
|
||||
IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
|
||||
J->cur.nins--;
|
||||
|
@ -2177,6 +2177,8 @@ static void assign_adjust(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e)
|
||||
bcemit_nil(fs, reg, (BCReg)extra);
|
||||
}
|
||||
}
|
||||
if (nexps > nvars)
|
||||
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
|
||||
}
|
||||
|
||||
/* Recursively parse assignment statement. */
|
||||
@ -2210,8 +2212,6 @@ static void parse_assignment(LexState *ls, LHSVarList *lh, BCReg nvars)
|
||||
return;
|
||||
}
|
||||
assign_adjust(ls, nvars, nexps, &e);
|
||||
if (nexps > nvars)
|
||||
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
|
||||
}
|
||||
/* Assign RHS to LHS and recurse downwards. */
|
||||
expr_init(&e, VNONRELOC, ls->fs->freereg-1);
|
||||
|
307
src/lj_record.c
307
src/lj_record.c
@ -51,7 +51,7 @@ static void rec_check_ir(jit_State *J)
|
||||
{
|
||||
IRRef i, nins = J->cur.nins, nk = J->cur.nk;
|
||||
lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
|
||||
for (i = nins-1; i >= nk; i--) {
|
||||
for (i = nk; i < nins; i++) {
|
||||
IRIns *ir = IR(i);
|
||||
uint32_t mode = lj_ir_mode[ir->o];
|
||||
IRRef op1 = ir->op1;
|
||||
@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J)
|
||||
case IRMref: lua_assert(op1 >= nk);
|
||||
lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
|
||||
case IRMlit: break;
|
||||
case IRMcst: lua_assert(i < REF_BIAS); continue;
|
||||
case IRMcst: lua_assert(i < REF_BIAS);
|
||||
if (irt_is64(ir->t) && ir->o != IR_KNULL)
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
switch (irm_op2(mode)) {
|
||||
case IRMnone: lua_assert(op2 == 0); break;
|
||||
@ -84,30 +87,48 @@ static void rec_check_slots(jit_State *J)
|
||||
BCReg s, nslots = J->baseslot + J->maxslot;
|
||||
int32_t depth = 0;
|
||||
cTValue *base = J->L->base - J->baseslot;
|
||||
lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
|
||||
lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME));
|
||||
lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS);
|
||||
lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
|
||||
lua_assert(nslots < LJ_MAX_JSLOTS);
|
||||
for (s = 0; s < nslots; s++) {
|
||||
TRef tr = J->slot[s];
|
||||
if (tr) {
|
||||
cTValue *tv = &base[s];
|
||||
IRRef ref = tref_ref(tr);
|
||||
IRIns *ir;
|
||||
lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
|
||||
ir = IR(ref);
|
||||
lua_assert(irt_t(ir->t) == tref_t(tr));
|
||||
IRIns *ir = NULL; /* Silence compiler. */
|
||||
if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
|
||||
lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
|
||||
ir = IR(ref);
|
||||
lua_assert(irt_t(ir->t) == tref_t(tr));
|
||||
}
|
||||
if (s == 0) {
|
||||
lua_assert(tref_isfunc(tr));
|
||||
#if LJ_FR2
|
||||
} else if (s == 1) {
|
||||
lua_assert(0);
|
||||
#endif
|
||||
} else if ((tr & TREF_FRAME)) {
|
||||
GCfunc *fn = gco2func(frame_gc(tv));
|
||||
BCReg delta = (BCReg)(tv - frame_prev(tv));
|
||||
#if LJ_FR2
|
||||
if (ref)
|
||||
lua_assert(ir_knum(ir)->u64 == tv->u64);
|
||||
tr = J->slot[s-1];
|
||||
ir = IR(tref_ref(tr));
|
||||
#endif
|
||||
lua_assert(tref_isfunc(tr));
|
||||
if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
|
||||
lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta));
|
||||
lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
|
||||
: (s == delta + LJ_FR2));
|
||||
depth++;
|
||||
} else if ((tr & TREF_CONT)) {
|
||||
#if LJ_FR2
|
||||
if (ref)
|
||||
lua_assert(ir_knum(ir)->u64 == tv->u64);
|
||||
#else
|
||||
lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
|
||||
lua_assert((J->slot[s+1] & TREF_FRAME));
|
||||
#endif
|
||||
lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
|
||||
depth++;
|
||||
} else {
|
||||
if (tvisnumber(tv))
|
||||
@ -159,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
|
||||
/* Get TRef for current function. */
|
||||
static TRef getcurrf(jit_State *J)
|
||||
{
|
||||
if (J->base[-1])
|
||||
return J->base[-1];
|
||||
lua_assert(J->baseslot == 1);
|
||||
return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
|
||||
if (J->base[-1-LJ_FR2])
|
||||
return J->base[-1-LJ_FR2];
|
||||
lua_assert(J->baseslot == 1+LJ_FR2);
|
||||
return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
|
||||
}
|
||||
|
||||
/* Compare for raw object equality.
|
||||
@ -506,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
|
||||
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
|
||||
{
|
||||
BCReg ra = bc_a(iterins);
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
|
||||
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
|
||||
J->maxslot = ra-1+bc_b(J->pc[-1]);
|
||||
@ -643,8 +663,8 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
|
||||
GCproto *pt = funcproto(fn);
|
||||
/* Too many closures created? Probably not a monomorphic function. */
|
||||
if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
|
||||
TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
|
||||
emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
|
||||
TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
|
||||
emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
|
||||
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
|
||||
return tr;
|
||||
}
|
||||
@ -675,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
|
||||
{
|
||||
RecordIndex ix;
|
||||
TValue *functv = &J->L->base[func];
|
||||
TRef *fbase = &J->base[func];
|
||||
TRef kfunc, *fbase = &J->base[func];
|
||||
ptrdiff_t i;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
for (i = 0; i <= nargs; i++)
|
||||
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */
|
||||
(void)getslot(J, func); /* Ensure func has a reference. */
|
||||
for (i = 1; i <= nargs; i++)
|
||||
(void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
|
||||
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
|
||||
ix.tab = fbase[0];
|
||||
copyTV(J->L, &ix.tabv, functv);
|
||||
if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
|
||||
lj_trace_err(J, LJ_TRERR_NOMM);
|
||||
for (i = ++nargs; i > 0; i--) /* Shift arguments up. */
|
||||
fbase[i] = fbase[i-1];
|
||||
for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
|
||||
fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
|
||||
#if LJ_FR2
|
||||
fbase[2] = fbase[0];
|
||||
#endif
|
||||
fbase[0] = ix.mobj; /* Replace function. */
|
||||
functv = &ix.mobjv;
|
||||
}
|
||||
fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
|
||||
kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
|
||||
#if LJ_FR2
|
||||
fbase[0] = kfunc;
|
||||
fbase[1] = TREF_FRAME;
|
||||
#else
|
||||
fbase[0] = kfunc | TREF_FRAME;
|
||||
#endif
|
||||
J->maxslot = (BCReg)nargs;
|
||||
}
|
||||
|
||||
@ -700,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
|
||||
rec_call_setup(J, func, nargs);
|
||||
/* Bump frame. */
|
||||
J->framedepth++;
|
||||
J->base += func+1;
|
||||
J->baseslot += func+1;
|
||||
J->base += func+1+LJ_FR2;
|
||||
J->baseslot += func+1+LJ_FR2;
|
||||
}
|
||||
|
||||
/* Record tail call. */
|
||||
@ -717,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
|
||||
func += cbase;
|
||||
}
|
||||
/* Move func + args down. */
|
||||
memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1));
|
||||
if (LJ_FR2 && J->baseslot == 2)
|
||||
J->base[func+1] = 0;
|
||||
memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
|
||||
/* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
|
||||
/* Tailcalls can form a loop, so count towards the loop unroll limit. */
|
||||
if (++J->tailcalled > J->loopunroll)
|
||||
@ -758,9 +789,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
(void)getslot(J, rbase+i); /* Ensure all results have a reference. */
|
||||
while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */
|
||||
BCReg cbase = (BCReg)frame_delta(frame);
|
||||
if (--J->framedepth < 0)
|
||||
if (--J->framedepth <= 0)
|
||||
lj_trace_err(J, LJ_TRERR_NYIRETL);
|
||||
lua_assert(J->baseslot > 1);
|
||||
lua_assert(J->baseslot > 1+LJ_FR2);
|
||||
gotresults++;
|
||||
rbase += cbase;
|
||||
J->baseslot -= (BCReg)cbase;
|
||||
@ -784,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
BCReg cbase = (BCReg)frame_delta(frame);
|
||||
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
|
||||
lj_trace_err(J, LJ_TRERR_NYIRETL);
|
||||
lua_assert(J->baseslot > 1);
|
||||
lua_assert(J->baseslot > 1+LJ_FR2);
|
||||
rbase += cbase;
|
||||
J->baseslot -= (BCReg)cbase;
|
||||
J->base -= cbase;
|
||||
@ -794,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
BCIns callins = *(frame_pc(frame)-1);
|
||||
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
|
||||
BCReg cbase = bc_a(callins);
|
||||
GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
|
||||
GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
|
||||
if ((pt->flags & PROTO_NOJIT))
|
||||
lj_trace_err(J, LJ_TRERR_CJITOFF);
|
||||
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
|
||||
@ -808,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
lj_snap_add(J);
|
||||
}
|
||||
for (i = 0; i < nresults; i++) /* Adjust results. */
|
||||
J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
|
||||
J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
|
||||
J->maxslot = cbase+(BCReg)nresults;
|
||||
if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
|
||||
J->framedepth--;
|
||||
lua_assert(J->baseslot > cbase+1);
|
||||
J->baseslot -= cbase+1;
|
||||
J->base -= cbase+1;
|
||||
lua_assert(J->baseslot > cbase+1+LJ_FR2);
|
||||
J->baseslot -= cbase+1+LJ_FR2;
|
||||
J->base -= cbase+1+LJ_FR2;
|
||||
} else if (J->parent == 0 && J->exitno == 0 &&
|
||||
!bc_isret(bc_op(J->cur.startins))) {
|
||||
/* Return to lower frame would leave the loop in a root trace. */
|
||||
@ -824,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
} else { /* Return to lower frame. Guard for the target we return to. */
|
||||
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
|
||||
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
|
||||
emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc);
|
||||
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
|
||||
J->retdepth++;
|
||||
J->needsnap = 1;
|
||||
lua_assert(J->baseslot == 1);
|
||||
lua_assert(J->baseslot == 1+LJ_FR2);
|
||||
/* Shift result slots up and clear the slots of the new frame below. */
|
||||
memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults);
|
||||
memset(J->base-1, 0, sizeof(TRef)*(cbase+1));
|
||||
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
|
||||
memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
|
||||
}
|
||||
} else if (frame_iscont(frame)) { /* Return to continuation frame. */
|
||||
ASMFunction cont = frame_contf(frame);
|
||||
@ -839,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
lj_trace_err(J, LJ_TRERR_NYIRETL);
|
||||
J->baseslot -= (BCReg)cbase;
|
||||
J->base -= cbase;
|
||||
J->maxslot = cbase-2;
|
||||
J->maxslot = cbase-(2<<LJ_FR2);
|
||||
if (cont == lj_cont_ra) {
|
||||
/* Copy result to destination slot. */
|
||||
BCReg dst = bc_a(*(frame_contpc(frame)-1));
|
||||
J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
|
||||
if (dst >= J->maxslot) J->maxslot = dst+1;
|
||||
if (dst >= J->maxslot) {
|
||||
J->maxslot = dst+1;
|
||||
}
|
||||
} else if (cont == lj_cont_nop) {
|
||||
/* Nothing to do here. */
|
||||
} else if (cont == lj_cont_cat) {
|
||||
BCReg bslot = bc_b(*(frame_contpc(frame)-1));
|
||||
TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
|
||||
if (bslot != cbase-2) { /* Concatenate the remainder. */
|
||||
if (bslot != J->maxslot) { /* Concatenate the remainder. */
|
||||
TValue *b = J->L->base, save; /* Simulate lower frame and result. */
|
||||
J->base[cbase-2] = tr;
|
||||
copyTV(J->L, &save, b-2);
|
||||
if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
|
||||
J->base[J->maxslot] = tr;
|
||||
copyTV(J->L, &save, b-(2<<LJ_FR2));
|
||||
if (gotresults)
|
||||
copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
|
||||
else
|
||||
setnilV(b-(2<<LJ_FR2));
|
||||
J->L->base = b - cbase;
|
||||
tr = rec_cat(J, bslot, cbase-2);
|
||||
tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
|
||||
b = J->L->base + cbase; /* Undo. */
|
||||
J->L->base = b;
|
||||
copyTV(J->L, b-2, &save);
|
||||
copyTV(J->L, b-(2<<LJ_FR2), &save);
|
||||
}
|
||||
if (tr) { /* Store final result. */
|
||||
BCReg dst = bc_a(*(frame_contpc(frame)-1));
|
||||
J->base[dst] = tr;
|
||||
if (dst >= J->maxslot) J->maxslot = dst+1;
|
||||
if (dst >= J->maxslot) {
|
||||
J->maxslot = dst+1;
|
||||
}
|
||||
} /* Otherwise continue with another __concat call. */
|
||||
} else {
|
||||
/* Result type already specialized. */
|
||||
@ -873,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
} else {
|
||||
lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
|
||||
}
|
||||
lua_assert(J->baseslot >= 1);
|
||||
lua_assert(J->baseslot >= 1+LJ_FR2);
|
||||
}
|
||||
|
||||
/* -- Metamethod handling ------------------------------------------------- */
|
||||
@ -882,16 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
|
||||
{
|
||||
BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
|
||||
#if LJ_64
|
||||
TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
|
||||
#if LJ_FR2
|
||||
J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
|
||||
J->base[top+1] = TREF_CONT;
|
||||
#else
|
||||
TRef trcont = lj_ir_kptr(J, (void *)cont);
|
||||
J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
|
||||
#endif
|
||||
J->base[top] = trcont | TREF_CONT;
|
||||
J->framedepth++;
|
||||
for (s = J->maxslot; s < top; s++)
|
||||
J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
|
||||
return top+1;
|
||||
return top+1+LJ_FR2;
|
||||
}
|
||||
|
||||
/* Record metamethod lookup. */
|
||||
@ -910,7 +947,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
|
||||
cTValue *mo;
|
||||
if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
|
||||
/* Specialize to the C library namespace object. */
|
||||
emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
|
||||
emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
|
||||
} else {
|
||||
/* Specialize to the type of userdata. */
|
||||
TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
|
||||
@ -939,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
|
||||
}
|
||||
/* The cdata metatable is treated as immutable. */
|
||||
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
|
||||
#if LJ_GC64
|
||||
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
|
||||
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
|
||||
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
|
||||
#else
|
||||
ix->mt = mix.tab = lj_ir_ktab(J, mt);
|
||||
#endif
|
||||
goto nocheck;
|
||||
}
|
||||
ix->mt = mt ? mix.tab : TREF_NIL;
|
||||
@ -969,9 +1012,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
|
||||
BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
|
||||
TRef *base = J->base + func;
|
||||
TValue *basev = J->L->base + func;
|
||||
base[1] = ix->tab; base[2] = ix->key;
|
||||
copyTV(J->L, basev+1, &ix->tabv);
|
||||
copyTV(J->L, basev+2, &ix->keyv);
|
||||
base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
|
||||
copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
|
||||
copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
|
||||
if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
|
||||
if (mm != MM_unm) {
|
||||
ix->tab = ix->key;
|
||||
@ -982,8 +1025,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
|
||||
lj_trace_err(J, LJ_TRERR_NOMM);
|
||||
}
|
||||
ok:
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
base[0] = ix->mobj;
|
||||
#if LJ_FR2
|
||||
base[1] = 0;
|
||||
#endif
|
||||
copyTV(J->L, basev+0, &ix->mobjv);
|
||||
lj_record_call(J, func, 2);
|
||||
return 0; /* No result yet. */
|
||||
@ -999,8 +1044,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
|
||||
BCReg func = rec_mm_prep(J, lj_cont_ra);
|
||||
TRef *base = J->base + func;
|
||||
TValue *basev = J->L->base + func;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
|
||||
base += LJ_FR2;
|
||||
basev += LJ_FR2;
|
||||
base[1] = tr; copyTV(J->L, basev+1, tv);
|
||||
#if LJ_52
|
||||
base[2] = tr; copyTV(J->L, basev+2, tv);
|
||||
@ -1020,11 +1066,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
|
||||
static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
|
||||
{
|
||||
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
|
||||
TRef *base = J->base + func;
|
||||
TValue *tv = J->L->base + func;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
|
||||
copyTV(J->L, tv+0, &ix->mobjv);
|
||||
TRef *base = J->base + func + LJ_FR2;
|
||||
TValue *tv = J->L->base + func + LJ_FR2;
|
||||
base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
|
||||
copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
|
||||
copyTV(J->L, tv+1, &ix->valv);
|
||||
copyTV(J->L, tv+2, &ix->keyv);
|
||||
lj_record_call(J, func, 2);
|
||||
@ -1257,8 +1302,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
|
||||
if ((MSize)k < t->asize) { /* Currently an array key? */
|
||||
TRef arrayref;
|
||||
rec_idx_abc(J, asizeref, ikey, t->asize);
|
||||
arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY);
|
||||
return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey);
|
||||
arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
|
||||
return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
|
||||
} else { /* Currently not in array (may be an array extension)? */
|
||||
emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
|
||||
if (k == 0 && tref_isk(key))
|
||||
@ -1298,13 +1343,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
|
||||
*rbguard = J->guardemit;
|
||||
hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
|
||||
emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
|
||||
node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
|
||||
node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
|
||||
kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
|
||||
return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot);
|
||||
return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
|
||||
}
|
||||
}
|
||||
/* Fall back to a regular hash lookup. */
|
||||
return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key);
|
||||
return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
|
||||
}
|
||||
|
||||
/* Determine whether a key is NOT one of the fast metamethod names. */
|
||||
@ -1341,11 +1386,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
handlemm:
|
||||
if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
|
||||
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
|
||||
TRef *base = J->base + func;
|
||||
TValue *tv = J->L->base + func;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
|
||||
base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
|
||||
setfuncV(J->L, tv+0, funcV(&ix->mobjv));
|
||||
TRef *base = J->base + func + LJ_FR2;
|
||||
TValue *tv = J->L->base + func + LJ_FR2;
|
||||
base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
|
||||
setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
|
||||
copyTV(J->L, tv+1, &ix->tabv);
|
||||
copyTV(J->L, tv+2, &ix->keyv);
|
||||
if (ix->val) {
|
||||
@ -1387,7 +1431,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
IRType t = itype2irt(oldv);
|
||||
TRef res;
|
||||
if (oldv == niltvg(J2G(J))) {
|
||||
emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
|
||||
emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
|
||||
res = TREF_NIL;
|
||||
} else {
|
||||
res = emitir(IRTG(loadop, t), xref, 0);
|
||||
@ -1417,7 +1461,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
if (hasmm)
|
||||
emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
|
||||
else if (xrefop == IR_HREF)
|
||||
emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32),
|
||||
emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
|
||||
xref, lj_ir_kkptr(J, niltvg(J2G(J))));
|
||||
if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
|
||||
lua_assert(hasmm);
|
||||
@ -1428,7 +1472,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
TRef key = ix->key;
|
||||
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
|
||||
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
|
||||
xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
|
||||
xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
|
||||
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
|
||||
#ifdef LUAJIT_ENABLE_TABLE_BUMP
|
||||
if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
|
||||
@ -1438,7 +1482,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
} else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
|
||||
/* Cannot derive that the previous value was non-nil, must do checks. */
|
||||
if (xrefop == IR_HREF) /* Guard against store to niltv. */
|
||||
emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
|
||||
emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
|
||||
if (ix->idxchain) { /* Metamethod lookup required? */
|
||||
/* A check for NULL metatable is cheaper (hoistable) than a load. */
|
||||
if (!mt) {
|
||||
@ -1460,7 +1504,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
|
||||
/* Invalidate neg. metamethod cache for stores with certain string keys. */
|
||||
if (!nommstr(J, ix->key)) {
|
||||
TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM);
|
||||
TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
|
||||
emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
|
||||
}
|
||||
J->needsnap = 1;
|
||||
@ -1535,7 +1579,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
|
||||
goto noconstify;
|
||||
kfunc = lj_ir_kfunc(J, J->fn);
|
||||
emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
|
||||
J->base[-1] = TREF_FRAME | kfunc;
|
||||
#if LJ_FR2
|
||||
J->base[-2] = kfunc;
|
||||
#else
|
||||
J->base[-1] = kfunc | TREF_FRAME;
|
||||
#endif
|
||||
fn = kfunc;
|
||||
}
|
||||
tr = lj_record_constify(J, uvval(uvp));
|
||||
@ -1546,13 +1594,17 @@ noconstify:
|
||||
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
|
||||
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
|
||||
if (!uvp->closed) {
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
|
||||
/* In current stack? */
|
||||
if (uvval(uvp) >= tvref(J->L->stack) &&
|
||||
uvval(uvp) < tvref(J->L->maxstack)) {
|
||||
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
|
||||
if (slot >= 0) { /* Aliases an SSA slot? */
|
||||
emitir(IRTG(IR_EQ, IRT_PGC),
|
||||
REF_BASE,
|
||||
emitir(IRT(IR_ADD, IRT_PGC), uref,
|
||||
lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
|
||||
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
|
||||
/* NYI: add IR to guard that it's still aliasing the same slot. */
|
||||
if (val == 0) {
|
||||
return getslot(J, slot);
|
||||
} else {
|
||||
@ -1562,10 +1614,12 @@ noconstify:
|
||||
}
|
||||
}
|
||||
}
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
|
||||
emitir(IRTG(IR_UGT, IRT_PGC),
|
||||
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
|
||||
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
|
||||
} else {
|
||||
needbarrier = 1;
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
|
||||
}
|
||||
if (val == 0) { /* Upvalue load */
|
||||
IRType t = itype2irt(uvval(uvp));
|
||||
@ -1640,11 +1694,14 @@ static void rec_func_setup(jit_State *J)
|
||||
static void rec_func_vararg(jit_State *J)
|
||||
{
|
||||
GCproto *pt = J->pt;
|
||||
BCReg s, fixargs, vframe = J->maxslot+1;
|
||||
BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
|
||||
lua_assert((pt->flags & PROTO_VARARG));
|
||||
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
|
||||
lj_trace_err(J, LJ_TRERR_STACKOV);
|
||||
J->base[vframe-1] = J->base[-1]; /* Copy function up. */
|
||||
J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
|
||||
#if LJ_FR2
|
||||
J->base[vframe-1] = TREF_FRAME;
|
||||
#endif
|
||||
/* Copy fixarg slots up and set their original slots to nil. */
|
||||
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
|
||||
for (s = 0; s < fixargs; s++) {
|
||||
@ -1706,7 +1763,7 @@ static int select_detect(jit_State *J)
|
||||
static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
{
|
||||
int32_t numparams = J->pt->numparams;
|
||||
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
|
||||
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
|
||||
lua_assert(frame_isvarg(J->L->base-1));
|
||||
if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
|
||||
ptrdiff_t i;
|
||||
@ -1718,10 +1775,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
J->maxslot = dst + (BCReg)nresults;
|
||||
}
|
||||
for (i = 0; i < nresults; i++)
|
||||
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL;
|
||||
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
|
||||
} else { /* Unknown number of varargs passed to trace. */
|
||||
TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
|
||||
int32_t frofs = 8*(1+numparams)+FRAME_VARG;
|
||||
TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
|
||||
int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
|
||||
if (nresults >= 0) { /* Known fixed number of results. */
|
||||
ptrdiff_t i;
|
||||
if (nvararg > 0) {
|
||||
@ -1732,11 +1789,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
else
|
||||
emitir(IRTGI(IR_EQ), fr,
|
||||
lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
|
||||
vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
|
||||
vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
|
||||
for (i = 0; i < nload; i++) {
|
||||
IRType t = itype2irt(&J->L->base[i-1-nvararg]);
|
||||
TRef aref = emitir(IRT(IR_AREF, IRT_P32),
|
||||
IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
|
||||
TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
|
||||
vbase, lj_ir_kint(J, (int32_t)i));
|
||||
TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
|
||||
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
|
||||
@ -1782,15 +1839,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
}
|
||||
if (idx != 0 && idx <= nvararg) {
|
||||
IRType t;
|
||||
TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
|
||||
t = itype2irt(&J->L->base[idx-2-nvararg]);
|
||||
aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx);
|
||||
TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
|
||||
lj_ir_kint(J, frofs-(8<<LJ_FR2)));
|
||||
t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
|
||||
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
|
||||
tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
|
||||
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
|
||||
}
|
||||
J->base[dst-2] = tr;
|
||||
J->maxslot = dst-1;
|
||||
J->base[dst-2-LJ_FR2] = tr;
|
||||
J->maxslot = dst-1-LJ_FR2;
|
||||
J->bcskip = 2; /* Skip CALLM + select. */
|
||||
} else {
|
||||
nyivarg:
|
||||
@ -1839,10 +1897,10 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
|
||||
break;
|
||||
}
|
||||
xbase = ++trp;
|
||||
tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
|
||||
tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
|
||||
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
|
||||
do {
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
|
||||
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
|
||||
} while (trp <= top);
|
||||
tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
|
||||
J->maxslot = (BCReg)(xbase - J->base);
|
||||
@ -1883,7 +1941,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
|
||||
const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
|
||||
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */
|
||||
#if LJ_FR2
|
||||
SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
|
||||
uint64_t pcbase;
|
||||
memcpy(&pcbase, flink, sizeof(uint64_t));
|
||||
pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
|
||||
memcpy(flink, &pcbase, sizeof(uint64_t));
|
||||
#else
|
||||
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
|
||||
#endif
|
||||
J->needsnap = 1;
|
||||
if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
|
||||
lj_snap_shrink(J); /* Shrink last snapshot if possible. */
|
||||
@ -2159,14 +2225,14 @@ void lj_record_ins(jit_State *J)
|
||||
case BC_MODVN: case BC_MODVV:
|
||||
recmod:
|
||||
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
|
||||
rc = lj_opt_narrow_mod(J, rb, rc, rcv);
|
||||
rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv);
|
||||
else
|
||||
rc = rec_mm_arith(J, &ix, MM_mod);
|
||||
break;
|
||||
|
||||
case BC_POW:
|
||||
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
|
||||
rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv);
|
||||
rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
|
||||
else
|
||||
rc = rec_mm_arith(J, &ix, MM_pow);
|
||||
break;
|
||||
@ -2181,7 +2247,13 @@ void lj_record_ins(jit_State *J)
|
||||
|
||||
case BC_MOV:
|
||||
/* Clear gap of method call to avoid resurrecting previous refs. */
|
||||
if (ra > J->maxslot) J->base[ra-1] = 0;
|
||||
if (ra > J->maxslot) {
|
||||
#if LJ_FR2
|
||||
memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
|
||||
#else
|
||||
J->base[ra-1] = 0;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case BC_KSTR: case BC_KNUM: case BC_KPRI:
|
||||
break;
|
||||
@ -2250,14 +2322,14 @@ void lj_record_ins(jit_State *J)
|
||||
/* -- Calls and vararg handling ----------------------------------------- */
|
||||
|
||||
case BC_ITERC:
|
||||
J->base[ra] = getslot(J, ra-3-LJ_FR2);
|
||||
J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
|
||||
J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
|
||||
J->base[ra] = getslot(J, ra-3);
|
||||
J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
|
||||
J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
|
||||
{ /* Do the actual copy now because lj_record_call needs the values. */
|
||||
TValue *b = &J->L->base[ra];
|
||||
copyTV(J->L, b, b-3-LJ_FR2);
|
||||
copyTV(J->L, b+1, b-2-LJ_FR2);
|
||||
copyTV(J->L, b+2, b-1-LJ_FR2);
|
||||
copyTV(J->L, b, b-3);
|
||||
copyTV(J->L, b+1+LJ_FR2, b-2);
|
||||
copyTV(J->L, b+2+LJ_FR2, b-1);
|
||||
}
|
||||
lj_record_call(J, ra, (ptrdiff_t)rc-1);
|
||||
break;
|
||||
@ -2380,7 +2452,12 @@ void lj_record_ins(jit_State *J)
|
||||
/* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
|
||||
if (bcmode_a(op) == BCMdst && rc) {
|
||||
J->base[ra] = rc;
|
||||
if (ra >= J->maxslot) J->maxslot = ra+1;
|
||||
if (ra >= J->maxslot) {
|
||||
#if LJ_FR2
|
||||
if (ra > J->maxslot) J->base[ra-1] = 0;
|
||||
#endif
|
||||
J->maxslot = ra+1;
|
||||
}
|
||||
}
|
||||
|
||||
#undef rav
|
||||
@ -2465,7 +2542,7 @@ void lj_record_setup(jit_State *J)
|
||||
J->scev.idx = REF_NIL;
|
||||
setmref(J->scev.pc, NULL);
|
||||
|
||||
J->baseslot = 1; /* Invoking function is at base[-1]. */
|
||||
J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
|
||||
J->base = J->slot + J->baseslot;
|
||||
J->maxslot = 0;
|
||||
J->framedepth = 0;
|
||||
@ -2480,7 +2557,7 @@ void lj_record_setup(jit_State *J)
|
||||
J->bc_extent = ~(MSize)0;
|
||||
|
||||
/* Emit instructions for fixed references. Also triggers initial IR alloc. */
|
||||
emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno);
|
||||
emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
|
||||
for (i = 0; i <= 2; i++) {
|
||||
IRIns *ir = IR(REF_NIL-i);
|
||||
ir->i = 0;
|
||||
|
@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
|
||||
for (s = 0; s < nslots; s++) {
|
||||
TRef tr = J->slot[s];
|
||||
IRRef ref = tref_ref(tr);
|
||||
#if LJ_FR2
|
||||
if (s == 1) continue;
|
||||
if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
|
||||
TValue *base = J->L->base - J->baseslot;
|
||||
tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
|
||||
ref = tref_ref(tr);
|
||||
}
|
||||
#endif
|
||||
if (ref) {
|
||||
SnapEntry sn = SNAP_TR(s, tr);
|
||||
IRIns *ir = &J->cur.ir[ref];
|
||||
if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
|
||||
if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
|
||||
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
|
||||
/* No need to snapshot unmodified non-inherited slots. */
|
||||
if (!(ir->op2 & IRSLOAD_INHERIT))
|
||||
@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
|
||||
}
|
||||
|
||||
/* Add frame links at the end of the snapshot. */
|
||||
static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
|
||||
static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
|
||||
{
|
||||
cTValue *frame = J->L->base - 1;
|
||||
cTValue *lim = J->L->base - J->baseslot;
|
||||
cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
|
||||
GCfunc *fn = frame_func(frame);
|
||||
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
|
||||
#if LJ_FR2
|
||||
uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
|
||||
lua_assert(2 <= J->baseslot && J->baseslot <= 257);
|
||||
memcpy(map, &pcbase, sizeof(uint64_t));
|
||||
#else
|
||||
MSize f = 0;
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
|
||||
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
|
||||
#endif
|
||||
while (frame > lim) { /* Backwards traversal of all frames above base. */
|
||||
if (frame_islua(frame)) {
|
||||
#if !LJ_FR2
|
||||
map[f++] = SNAP_MKPC(frame_pc(frame));
|
||||
#endif
|
||||
frame = frame_prevl(frame);
|
||||
} else if (frame_iscont(frame)) {
|
||||
#if !LJ_FR2
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
map[f++] = SNAP_MKPC(frame_contpc(frame));
|
||||
#endif
|
||||
frame = frame_prevd(frame);
|
||||
} else {
|
||||
lua_assert(!frame_isc(frame));
|
||||
#if !LJ_FR2
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
#endif
|
||||
frame = frame_prevd(frame);
|
||||
continue;
|
||||
}
|
||||
if (frame + funcproto(frame_func(frame))->framesize > ftop)
|
||||
ftop = frame + funcproto(frame_func(frame))->framesize;
|
||||
}
|
||||
*topslot = (uint8_t)(ftop - lim);
|
||||
#if LJ_FR2
|
||||
lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
|
||||
return 2;
|
||||
#else
|
||||
lua_assert(f == (MSize)(1 + J->framedepth));
|
||||
return (BCReg)(ftop - lim);
|
||||
return f;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Take a snapshot of the current stack. */
|
||||
@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
|
||||
MSize nent;
|
||||
SnapEntry *p;
|
||||
/* Conservative estimate. */
|
||||
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
|
||||
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
|
||||
p = &J->cur.snapmap[nsnapmap];
|
||||
nent = snapshot_slots(J, p, nslots);
|
||||
snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
|
||||
snap->nent = (uint8_t)nent;
|
||||
nent += snapshot_framelinks(J, p + nent, &snap->topslot);
|
||||
snap->mapofs = (uint16_t)nsnapmap;
|
||||
snap->ref = (IRRef1)J->cur.nins;
|
||||
snap->nent = (uint8_t)nent;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->count = 0;
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
|
||||
}
|
||||
|
||||
/* Add or merge a snapshot. */
|
||||
@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J)
|
||||
MSize n, m, nlim, nent = snap->nent;
|
||||
uint8_t udf[SNAP_USEDEF_SLOTS];
|
||||
BCReg maxslot = J->maxslot;
|
||||
BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
|
||||
BCReg baseslot = J->baseslot;
|
||||
BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
|
||||
maxslot += baseslot;
|
||||
minslot += baseslot;
|
||||
snap->nslots = (uint8_t)maxslot;
|
||||
@ -371,8 +396,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
|
||||
case IR_KPRI: return TREF_PRI(irt_type(ir->t));
|
||||
case IR_KINT: return lj_ir_kint(J, ir->i);
|
||||
case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
|
||||
case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
|
||||
case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
|
||||
case IR_KNUM: case IR_KINT64:
|
||||
return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
|
||||
case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
|
||||
default: lua_assert(0); return TREF_NIL; break;
|
||||
}
|
||||
@ -555,8 +580,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
|
||||
uint64_t k = (uint32_t)T->ir[irs->op2].i +
|
||||
((uint64_t)T->ir[(irs+1)->op2].i << 32);
|
||||
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
|
||||
lj_ir_k64_find(J, k));
|
||||
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
|
||||
} else {
|
||||
val = emitir_raw(IRT(IR_HIOP, t), val,
|
||||
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
|
||||
@ -599,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
}
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
@ -608,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#endif
|
||||
} else if (LJ_64 && irt_islightud(t)) {
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_islightud(t)) {
|
||||
/* 64 bit lightuserdata which may escape already has the tag bits. */
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
|
||||
@ -628,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
#endif
|
||||
} else if (LJ_64 && irt_is64(t)) {
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_is64(t)) {
|
||||
/* 64 bit values that already have the tag bits. */
|
||||
o->u64 = ex->gpr[r-RID_MIN_GPR];
|
||||
#endif
|
||||
} else if (irt_ispri(t)) {
|
||||
setpriV(o, irt_toitype(t));
|
||||
} else {
|
||||
@ -651,7 +678,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
|
||||
uint64_t tmp;
|
||||
if (irref_isk(ref)) {
|
||||
if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
|
||||
src = mref(ir->ptr, int32_t);
|
||||
src = (int32_t *)&ir[1];
|
||||
} else if (sz == 8) {
|
||||
tmp = (uint64_t)(uint32_t)ir->i;
|
||||
src = (int32_t *)&tmp;
|
||||
@ -795,11 +822,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
SnapShot *snap = &T->snap[snapno];
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
|
||||
#if !LJ_FR2 || defined(LUA_USE_ASSERT)
|
||||
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
|
||||
#endif
|
||||
#if !LJ_FR2
|
||||
ptrdiff_t ftsz0;
|
||||
#endif
|
||||
TValue *frame;
|
||||
BloomFilter rfilt = snap_renamefilter(T, snapno);
|
||||
const BCIns *pc = snap_pc(map[nent]);
|
||||
const BCIns *pc = snap_pc(&map[nent]);
|
||||
lua_State *L = J->L;
|
||||
|
||||
/* Set interpreter PC to the next PC to get correct error messages. */
|
||||
@ -812,8 +843,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
}
|
||||
|
||||
/* Fill stack slots with data from the registers and spill slots. */
|
||||
frame = L->base-1;
|
||||
frame = L->base-1-LJ_FR2;
|
||||
#if !LJ_FR2
|
||||
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
|
||||
#endif
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
if (!(sn & SNAP_NORESTORE)) {
|
||||
@ -836,14 +869,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
TValue tmp;
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
|
||||
o->u32.hi = tmp.u32.lo;
|
||||
#if !LJ_FR2
|
||||
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
|
||||
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
|
||||
/* Overwrite tag with frame link. */
|
||||
setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
|
||||
L->base = o+1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#if LJ_FR2
|
||||
L->base += (map[nent+LJ_BE] & 0xff);
|
||||
#endif
|
||||
lua_assert(map + nent == flinks);
|
||||
|
||||
/* Compute current stack top. */
|
||||
|
@ -180,7 +180,7 @@ static void close_state(lua_State *L)
|
||||
g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
|
||||
}
|
||||
|
||||
#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
|
||||
#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
|
||||
lua_State *lj_state_newstate(lua_Alloc f, void *ud)
|
||||
#else
|
||||
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
|
||||
|
@ -98,11 +98,15 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
|
||||
uint32_t u = (uint32_t)k;
|
||||
if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
|
||||
if (u < 10000) {
|
||||
if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
|
||||
if (u < 10) goto dig1;
|
||||
if (u < 100) goto dig2;
|
||||
if (u < 1000) goto dig3;
|
||||
} else {
|
||||
uint32_t v = u / 10000; u -= v * 10000;
|
||||
if (v < 10000) {
|
||||
if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
|
||||
if (v < 10) goto dig5;
|
||||
if (v < 100) goto dig6;
|
||||
if (v < 1000) goto dig7;
|
||||
} else {
|
||||
uint32_t w = v / 10000; v -= w * 10000;
|
||||
if (w >= 10) WINT_R(w, 10, 10)
|
||||
|
@ -82,11 +82,15 @@ enum {
|
||||
#if LJ_SOFTFP
|
||||
#define RSET_FPR 0
|
||||
#else
|
||||
#if LJ_32
|
||||
#define RSET_FPR \
|
||||
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
|
||||
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
|
||||
RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
|
||||
RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
|
||||
#else
|
||||
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
|
||||
#endif
|
||||
#endif
|
||||
#define RSET_ALL (RSET_GPR|RSET_FPR)
|
||||
#define RSET_INIT RSET_ALL
|
||||
@ -97,23 +101,37 @@ enum {
|
||||
#if LJ_SOFTFP
|
||||
#define RSET_SCRATCH_FPR 0
|
||||
#else
|
||||
#if LJ_32
|
||||
#define RSET_SCRATCH_FPR \
|
||||
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
|
||||
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
|
||||
RID2RSET(RID_F16)|RID2RSET(RID_F18))
|
||||
#else
|
||||
#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
|
||||
#endif
|
||||
#endif
|
||||
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
|
||||
#define REGARG_FIRSTGPR RID_R4
|
||||
#if LJ_32
|
||||
#define REGARG_LASTGPR RID_R7
|
||||
#define REGARG_NUMGPR 4
|
||||
#else
|
||||
#define REGARG_LASTGPR RID_R11
|
||||
#define REGARG_NUMGPR 8
|
||||
#endif
|
||||
#if LJ_ABI_SOFTFP
|
||||
#define REGARG_FIRSTFPR 0
|
||||
#define REGARG_LASTFPR 0
|
||||
#define REGARG_NUMFPR 0
|
||||
#else
|
||||
#define REGARG_FIRSTFPR RID_F12
|
||||
#if LJ_32
|
||||
#define REGARG_LASTFPR RID_F14
|
||||
#define REGARG_NUMFPR 2
|
||||
#else
|
||||
#define REGARG_LASTFPR RID_F19
|
||||
#define REGARG_NUMFPR 8
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* -- Spill slots --------------------------------------------------------- */
|
||||
@ -125,7 +143,11 @@ enum {
|
||||
**
|
||||
** SPS_FIRST: First spill slot for general use.
|
||||
*/
|
||||
#if LJ_32
|
||||
#define SPS_FIXED 5
|
||||
#else
|
||||
#define SPS_FIXED 4
|
||||
#endif
|
||||
#define SPS_FIRST 4
|
||||
|
||||
#define SPOFS_TMP 0
|
||||
@ -140,7 +162,7 @@ typedef struct {
|
||||
#if !LJ_SOFTFP
|
||||
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
|
||||
#endif
|
||||
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
|
||||
intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
|
||||
int32_t spill[256]; /* Spill slots. */
|
||||
} ExitState;
|
||||
|
||||
@ -172,7 +194,7 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
|
||||
|
||||
typedef enum MIPSIns {
|
||||
/* Integer instructions. */
|
||||
MIPSI_MOVE = 0x00000021,
|
||||
MIPSI_MOVE = 0x00000025,
|
||||
MIPSI_NOP = 0x00000000,
|
||||
|
||||
MIPSI_LI = 0x24000000,
|
||||
@ -204,19 +226,20 @@ typedef enum MIPSIns {
|
||||
MIPSI_SLL = 0x00000000,
|
||||
MIPSI_SRL = 0x00000002,
|
||||
MIPSI_SRA = 0x00000003,
|
||||
MIPSI_ROTR = 0x00200002, /* MIPS32R2 */
|
||||
MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
|
||||
MIPSI_SLLV = 0x00000004,
|
||||
MIPSI_SRLV = 0x00000006,
|
||||
MIPSI_SRAV = 0x00000007,
|
||||
MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */
|
||||
MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
|
||||
|
||||
MIPSI_SEB = 0x7c000420, /* MIPS32R2 */
|
||||
MIPSI_SEH = 0x7c000620, /* MIPS32R2 */
|
||||
MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */
|
||||
MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
|
||||
MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
|
||||
MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
|
||||
|
||||
MIPSI_B = 0x10000000,
|
||||
MIPSI_J = 0x08000000,
|
||||
MIPSI_JAL = 0x0c000000,
|
||||
MIPSI_JALX = 0x74000000,
|
||||
MIPSI_JR = 0x00000008,
|
||||
MIPSI_JALR = 0x0000f809,
|
||||
|
||||
@ -241,6 +264,15 @@ typedef enum MIPSIns {
|
||||
MIPSI_LDC1 = 0xd4000000,
|
||||
MIPSI_SDC1 = 0xf4000000,
|
||||
|
||||
/* MIPS64 instructions. */
|
||||
MIPSI_DSLL = 0x00000038,
|
||||
MIPSI_LD = 0xdc000000,
|
||||
MIPSI_DADDIU = 0x64000000,
|
||||
MIPSI_SD = 0xfc000000,
|
||||
MIPSI_DMFC1 = 0x44200000,
|
||||
MIPSI_DSRA32 = 0x0000003f,
|
||||
MIPSI_MFHC1 = 0x44600000,
|
||||
|
||||
/* FP instructions. */
|
||||
MIPSI_MOV_S = 0x46000006,
|
||||
MIPSI_MOV_D = 0x46200006,
|
||||
|
@ -22,7 +22,7 @@
|
||||
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
|
||||
#endif
|
||||
#define VRIDDEF(_) \
|
||||
_(MRM)
|
||||
_(MRM) _(RIP)
|
||||
|
||||
#define RIDENUM(name) RID_##name,
|
||||
|
||||
@ -31,6 +31,7 @@ enum {
|
||||
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
|
||||
RID_MAX,
|
||||
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
|
||||
RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */
|
||||
|
||||
/* Calling conventions. */
|
||||
RID_SP = RID_ESP,
|
||||
@ -63,8 +64,10 @@ enum {
|
||||
|
||||
/* -- Register sets ------------------------------------------------------- */
|
||||
|
||||
/* Make use of all registers, except the stack pointer. */
|
||||
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
|
||||
/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
|
||||
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
|
||||
- RID2RSET(RID_ESP) \
|
||||
- LJ_GC64*RID2RSET(RID_DISPATCH))
|
||||
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
|
||||
#define RSET_ALL (RSET_GPR|RSET_FPR)
|
||||
#define RSET_INIT RSET_ALL
|
||||
@ -200,6 +203,7 @@ typedef struct {
|
||||
*/
|
||||
typedef enum {
|
||||
/* Fixed length opcodes. XI_* prefix. */
|
||||
XI_O16 = 0x66,
|
||||
XI_NOP = 0x90,
|
||||
XI_XCHGa = 0x90,
|
||||
XI_CALL = 0xe8,
|
||||
@ -217,6 +221,7 @@ typedef enum {
|
||||
XI_PUSHi8 = 0x6a,
|
||||
XI_TESTb = 0x84,
|
||||
XI_TEST = 0x85,
|
||||
XI_INT3 = 0xcc,
|
||||
XI_MOVmi = 0xc7,
|
||||
XI_GROUP5 = 0xff,
|
||||
|
||||
@ -243,6 +248,7 @@ typedef enum {
|
||||
XV_SHRX = XV_f20f38(f7),
|
||||
|
||||
/* Variable-length opcodes. XO_* prefix. */
|
||||
XO_OR = XO_(0b),
|
||||
XO_MOV = XO_(8b),
|
||||
XO_MOVto = XO_(89),
|
||||
XO_MOVtow = XO_66(89),
|
||||
|
@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Allocate space for copy of trace. */
|
||||
static GCtrace *trace_save_alloc(jit_State *J)
|
||||
/* Allocate space for copy of T. */
|
||||
GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
|
||||
{
|
||||
size_t sztr = ((sizeof(GCtrace)+7)&~7);
|
||||
size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
|
||||
size_t szins = (T->nins-T->nk)*sizeof(IRIns);
|
||||
size_t sz = sztr + szins +
|
||||
J->cur.nsnap*sizeof(SnapShot) +
|
||||
J->cur.nsnapmap*sizeof(SnapEntry);
|
||||
return lj_mem_newt(J->L, (MSize)sz, GCtrace);
|
||||
T->nsnap*sizeof(SnapShot) +
|
||||
T->nsnapmap*sizeof(SnapEntry);
|
||||
GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
|
||||
char *p = (char *)T2 + sztr;
|
||||
T2->gct = ~LJ_TTRACE;
|
||||
T2->marked = 0;
|
||||
T2->traceno = 0;
|
||||
T2->ir = (IRIns *)p - T->nk;
|
||||
T2->nins = T->nins;
|
||||
T2->nk = T->nk;
|
||||
T2->nsnap = T->nsnap;
|
||||
T2->nsnapmap = T->nsnapmap;
|
||||
memcpy(p, T->ir + T->nk, szins);
|
||||
return T2;
|
||||
}
|
||||
|
||||
/* Save current trace by copying and compacting it. */
|
||||
@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T)
|
||||
setgcrefp(J2G(J)->gc.root, T);
|
||||
newwhite(J2G(J), T);
|
||||
T->gct = ~LJ_TTRACE;
|
||||
T->ir = (IRIns *)p - J->cur.nk;
|
||||
memcpy(p, J->cur.ir+J->cur.nk, szins);
|
||||
T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
|
||||
p += szins;
|
||||
TRACE_APPENDVEC(snap, nsnap, SnapShot)
|
||||
TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
|
||||
J->cur.traceno = 0;
|
||||
J->curfinal = NULL;
|
||||
setgcrefp(J->trace[T->traceno], T);
|
||||
lj_gc_barriertrace(J2G(J), T->traceno);
|
||||
lj_gdbjit_addtrace(J, T);
|
||||
@ -284,7 +295,6 @@ int lj_trace_flushall(lua_State *L)
|
||||
memset(J->penalty, 0, sizeof(J->penalty));
|
||||
/* Free the whole machine code and invalidate all exit stub groups. */
|
||||
lj_mcode_free(J);
|
||||
lj_ir_k64_freeall(J);
|
||||
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
|
||||
lj_vmevent_send(L, TRACE,
|
||||
setstrV(L, L->top++, lj_str_newlit(L, "flush"));
|
||||
@ -297,13 +307,35 @@ void lj_trace_initstate(global_State *g)
|
||||
{
|
||||
jit_State *J = G2J(g);
|
||||
TValue *tv;
|
||||
/* Initialize SIMD constants. */
|
||||
|
||||
/* Initialize aligned SIMD constants. */
|
||||
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
|
||||
tv[0].u64 = U64x(7fffffff,ffffffff);
|
||||
tv[1].u64 = U64x(7fffffff,ffffffff);
|
||||
tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
|
||||
tv[0].u64 = U64x(80000000,00000000);
|
||||
tv[1].u64 = U64x(80000000,00000000);
|
||||
|
||||
/* Initialize 32/64 bit constants. */
|
||||
#if LJ_TARGET_X86ORX64
|
||||
J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
|
||||
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
|
||||
J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
|
||||
#if LJ_32
|
||||
J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
|
||||
#endif
|
||||
J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
|
||||
#endif
|
||||
#if LJ_TARGET_PPC
|
||||
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
|
||||
J->k32[LJ_K32_2P52] = 0x59800000;
|
||||
#endif
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
|
||||
J->k32[LJ_K32_2P31] = 0x4f000000;
|
||||
#endif
|
||||
#if LJ_TARGET_MIPS
|
||||
J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Free everything associated with the JIT compiler state. */
|
||||
@ -318,7 +350,6 @@ void lj_trace_freestate(global_State *g)
|
||||
}
|
||||
#endif
|
||||
lj_mcode_free(J);
|
||||
lj_ir_k64_freeall(J);
|
||||
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
|
||||
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
|
||||
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
|
||||
@ -403,7 +434,7 @@ static void trace_start(jit_State *J)
|
||||
J->postproc = LJ_POST_NONE;
|
||||
lj_resetsplit(J);
|
||||
J->retryrec = 0;
|
||||
J->ktracep = NULL;
|
||||
J->ktrace = 0;
|
||||
setgcref(J->cur.startpt, obj2gco(J->pt));
|
||||
|
||||
L = J->L;
|
||||
@ -427,7 +458,7 @@ static void trace_stop(jit_State *J)
|
||||
BCOp op = bc_op(J->cur.startins);
|
||||
GCproto *pt = &gcref(J->cur.startpt)->pt;
|
||||
TraceNo traceno = J->cur.traceno;
|
||||
GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */
|
||||
GCtrace *T = J->curfinal;
|
||||
lua_State *L;
|
||||
|
||||
switch (op) {
|
||||
@ -479,9 +510,6 @@ static void trace_stop(jit_State *J)
|
||||
lj_mcode_commit(J, J->cur.mcode);
|
||||
J->postproc = LJ_POST_NONE;
|
||||
trace_save(J, T);
|
||||
if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
|
||||
setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
|
||||
}
|
||||
|
||||
L = J->L;
|
||||
lj_vmevent_send(L, TRACE,
|
||||
@ -515,6 +543,10 @@ static int trace_abort(jit_State *J)
|
||||
|
||||
J->postproc = LJ_POST_NONE;
|
||||
lj_mcode_abort(J);
|
||||
if (J->curfinal) {
|
||||
lj_trace_free(J2G(J), J->curfinal);
|
||||
J->curfinal = NULL;
|
||||
}
|
||||
if (tvisnumber(L->top-1))
|
||||
e = (TraceError)numberVint(L->top-1);
|
||||
if (e == LJ_TRERR_MCODELM) {
|
||||
|
@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
|
||||
LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
|
||||
|
||||
/* Trace management. */
|
||||
LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
|
||||
LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
|
||||
LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
|
||||
LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
|
||||
|
@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
|
||||
LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
|
||||
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
|
||||
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
|
||||
#if LJ_ABI_WIN && LJ_TARGET_X86
|
||||
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
|
||||
void *unwinder, int errcode);
|
||||
#endif
|
||||
LJ_ASMF void lj_vm_unwind_c_eh(void);
|
||||
LJ_ASMF void lj_vm_unwind_ff_eh(void);
|
||||
#if LJ_TARGET_X86ORX64
|
||||
|
105
src/luajit.c
105
src/luajit.c
@ -152,22 +152,15 @@ static void print_jit_status(lua_State *L)
|
||||
putc('\n', stdout);
|
||||
}
|
||||
|
||||
static int getargs(lua_State *L, char **argv, int n)
|
||||
static void createargtable(lua_State *L, char **argv, int argc, int argf)
|
||||
{
|
||||
int narg;
|
||||
int i;
|
||||
int argc = 0;
|
||||
while (argv[argc]) argc++; /* count total number of arguments */
|
||||
narg = argc - (n + 1); /* number of arguments to the script */
|
||||
luaL_checkstack(L, narg + 3, "too many arguments to script");
|
||||
for (i = n+1; i < argc; i++)
|
||||
lua_pushstring(L, argv[i]);
|
||||
lua_createtable(L, narg, n + 1);
|
||||
lua_createtable(L, argc - argf, argf);
|
||||
for (i = 0; i < argc; i++) {
|
||||
lua_pushstring(L, argv[i]);
|
||||
lua_rawseti(L, -2, i - n);
|
||||
lua_rawseti(L, -2, i - argf);
|
||||
}
|
||||
return narg;
|
||||
lua_setglobal(L, "arg");
|
||||
}
|
||||
|
||||
static int dofile(lua_State *L, const char *name)
|
||||
@ -273,21 +266,30 @@ static void dotty(lua_State *L)
|
||||
progname = oldprogname;
|
||||
}
|
||||
|
||||
static int handle_script(lua_State *L, char **argv, int n)
|
||||
static int handle_script(lua_State *L, char **argx)
|
||||
{
|
||||
int status;
|
||||
const char *fname;
|
||||
int narg = getargs(L, argv, n); /* collect arguments */
|
||||
lua_setglobal(L, "arg");
|
||||
fname = argv[n];
|
||||
if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
|
||||
const char *fname = argx[0];
|
||||
if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
|
||||
fname = NULL; /* stdin */
|
||||
status = luaL_loadfile(L, fname);
|
||||
lua_insert(L, -(narg+1));
|
||||
if (status == 0)
|
||||
if (status == 0) {
|
||||
/* Fetch args from arg table. LUA_INIT or -e might have changed them. */
|
||||
int narg = 0;
|
||||
lua_getglobal(L, "arg");
|
||||
if (lua_istable(L, -1)) {
|
||||
do {
|
||||
narg++;
|
||||
lua_rawgeti(L, -narg, narg);
|
||||
} while (!lua_isnil(L, -1));
|
||||
lua_pop(L, 1);
|
||||
lua_remove(L, -narg);
|
||||
narg--;
|
||||
} else {
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
status = docall(L, narg, 0);
|
||||
else
|
||||
lua_pop(L, narg);
|
||||
}
|
||||
return report(L, status);
|
||||
}
|
||||
|
||||
@ -384,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv)
|
||||
}
|
||||
for (argv++; *argv != NULL; narg++, argv++)
|
||||
lua_pushstring(L, *argv);
|
||||
return report(L, lua_pcall(L, narg, 0, 0));
|
||||
report(L, lua_pcall(L, narg, 0, 0));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* check that argument has no extra characters at the end */
|
||||
@ -405,7 +408,7 @@ static int collectargs(char **argv, int *flags)
|
||||
switch (argv[i][1]) { /* Check option. */
|
||||
case '-':
|
||||
notail(argv[i]);
|
||||
return (argv[i+1] != NULL ? i+1 : 0);
|
||||
return i+1;
|
||||
case '\0':
|
||||
return i;
|
||||
case 'i':
|
||||
@ -430,23 +433,23 @@ static int collectargs(char **argv, int *flags)
|
||||
case 'b': /* LuaJIT extension */
|
||||
if (*flags) return -1;
|
||||
*flags |= FLAGS_EXEC;
|
||||
return 0;
|
||||
return i+1;
|
||||
case 'E':
|
||||
*flags |= FLAGS_NOENV;
|
||||
break;
|
||||
default: return -1; /* invalid option */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return i;
|
||||
}
|
||||
|
||||
static int runargs(lua_State *L, char **argv, int n)
|
||||
static int runargs(lua_State *L, char **argv, int argn)
|
||||
{
|
||||
int i;
|
||||
for (i = 1; i < n; i++) {
|
||||
for (i = 1; i < argn; i++) {
|
||||
if (argv[i] == NULL) continue;
|
||||
lua_assert(argv[i][0] == '-');
|
||||
switch (argv[i][1]) { /* option */
|
||||
switch (argv[i][1]) {
|
||||
case 'e': {
|
||||
const char *chunk = argv[i] + 2;
|
||||
if (*chunk == '\0') chunk = argv[++i];
|
||||
@ -460,10 +463,10 @@ static int runargs(lua_State *L, char **argv, int n)
|
||||
if (*filename == '\0') filename = argv[++i];
|
||||
lua_assert(filename != NULL);
|
||||
if (dolibrary(L, filename))
|
||||
return 1; /* stop if file fails */
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
case 'j': { /* LuaJIT extension */
|
||||
case 'j': { /* LuaJIT extension. */
|
||||
const char *cmd = argv[i] + 2;
|
||||
if (*cmd == '\0') cmd = argv[++i];
|
||||
lua_assert(cmd != NULL);
|
||||
@ -471,11 +474,11 @@ static int runargs(lua_State *L, char **argv, int n)
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
case 'O': /* LuaJIT extension */
|
||||
case 'O': /* LuaJIT extension. */
|
||||
if (dojitopt(L, argv[i] + 2))
|
||||
return 1;
|
||||
break;
|
||||
case 'b': /* LuaJIT extension */
|
||||
case 'b': /* LuaJIT extension. */
|
||||
return dobytecode(L, argv+i);
|
||||
default: break;
|
||||
}
|
||||
@ -508,45 +511,57 @@ static int pmain(lua_State *L)
|
||||
{
|
||||
struct Smain *s = &smain;
|
||||
char **argv = s->argv;
|
||||
int script;
|
||||
int argn;
|
||||
int flags = 0;
|
||||
globalL = L;
|
||||
if (argv[0] && argv[0][0]) progname = argv[0];
|
||||
LUAJIT_VERSION_SYM(); /* linker-enforced version check */
|
||||
script = collectargs(argv, &flags);
|
||||
if (script < 0) { /* invalid args? */
|
||||
|
||||
LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
|
||||
|
||||
argn = collectargs(argv, &flags);
|
||||
if (argn < 0) { /* Invalid args? */
|
||||
print_usage();
|
||||
s->status = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((flags & FLAGS_NOENV)) {
|
||||
lua_pushboolean(L, 1);
|
||||
lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
|
||||
}
|
||||
lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
|
||||
luaL_openlibs(L); /* open libraries */
|
||||
|
||||
/* Stop collector during library initialization. */
|
||||
lua_gc(L, LUA_GCSTOP, 0);
|
||||
luaL_openlibs(L);
|
||||
lua_gc(L, LUA_GCRESTART, -1);
|
||||
|
||||
createargtable(L, argv, s->argc, argn);
|
||||
|
||||
if (!(flags & FLAGS_NOENV)) {
|
||||
s->status = handle_luainit(L);
|
||||
if (s->status != 0) return 0;
|
||||
}
|
||||
|
||||
if ((flags & FLAGS_VERSION)) print_version();
|
||||
s->status = runargs(L, argv, (script > 0) ? script : s->argc);
|
||||
|
||||
s->status = runargs(L, argv, argn);
|
||||
if (s->status != 0) return 0;
|
||||
if (script) {
|
||||
s->status = handle_script(L, argv, script);
|
||||
|
||||
if (s->argc > argn) {
|
||||
s->status = handle_script(L, argv + argn);
|
||||
if (s->status != 0) return 0;
|
||||
}
|
||||
|
||||
if ((flags & FLAGS_INTERACTIVE)) {
|
||||
print_jit_status(L);
|
||||
dotty(L);
|
||||
} else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
|
||||
} else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
|
||||
if (lua_stdin_is_tty()) {
|
||||
print_version();
|
||||
print_jit_status(L);
|
||||
dotty(L);
|
||||
} else {
|
||||
dofile(L, NULL); /* executes stdin as a file */
|
||||
dofile(L, NULL); /* Executes stdin as a file. */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -555,7 +570,7 @@ static int pmain(lua_State *L)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int status;
|
||||
lua_State *L = lua_open(); /* create state */
|
||||
lua_State *L = lua_open();
|
||||
if (L == NULL) {
|
||||
l_message(argv[0], "cannot create state: not enough memory");
|
||||
return EXIT_FAILURE;
|
||||
@ -565,6 +580,6 @@ int main(int argc, char **argv)
|
||||
status = lua_cpcall(L, pmain, NULL);
|
||||
report(L, status);
|
||||
lua_close(L);
|
||||
return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,7 @@
|
||||
|.define TMP2, r14
|
||||
|.define TMP3, r15
|
||||
|
|
||||
|// Calling conventions.
|
||||
|// MIPS o32 calling convention.
|
||||
|.define CFUNCADDR, r25
|
||||
|.define CARG1, r4
|
||||
|.define CARG2, r5
|
||||
@ -4546,24 +4546,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
case BC_ISNEXT:
|
||||
| // RA = base*8, RD = target (points to ITERN)
|
||||
| addu RA, BASE, RA
|
||||
| lw TMP0, -24+HI(RA)
|
||||
| lw CFUNC:TMP1, -24+LO(RA)
|
||||
| lw TMP2, -16+HI(RA)
|
||||
| lw TMP3, -8+HI(RA)
|
||||
| srl TMP0, RD, 1
|
||||
| lw CARG1, -24+HI(RA)
|
||||
| lw CFUNC:CARG2, -24+LO(RA)
|
||||
| addu TMP0, PC, TMP0
|
||||
| lw CARG3, -16+HI(RA)
|
||||
| lw CARG4, -8+HI(RA)
|
||||
| li AT, LJ_TFUNC
|
||||
| bne TMP0, AT, >5
|
||||
|. addiu TMP2, TMP2, -LJ_TTAB
|
||||
| lbu TMP1, CFUNC:TMP1->ffid
|
||||
| addiu TMP3, TMP3, -LJ_TNIL
|
||||
| srl TMP0, RD, 1
|
||||
| or TMP2, TMP2, TMP3
|
||||
| addiu TMP1, TMP1, -FF_next_N
|
||||
| addu TMP0, PC, TMP0
|
||||
| or TMP1, TMP1, TMP2
|
||||
| bnez TMP1, >5
|
||||
|. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
|
||||
| bne CARG1, AT, >5
|
||||
|. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
|
||||
| lbu CARG2, CFUNC:CARG2->ffid
|
||||
| addiu CARG3, CARG3, -LJ_TTAB
|
||||
| addiu CARG4, CARG4, -LJ_TNIL
|
||||
| or CARG3, CARG3, CARG4
|
||||
| addiu CARG2, CARG2, -FF_next_N
|
||||
| or CARG2, CARG2, CARG3
|
||||
| bnez CARG2, >5
|
||||
|. lui TMP1, 0xfffe
|
||||
| addu PC, TMP0, TMP2
|
||||
| lui TMP1, 0xfffe
|
||||
| ori TMP1, TMP1, 0x7fff
|
||||
| sw r0, -8+LO(RA) // Initialize control var.
|
||||
| sw TMP1, -8+HI(RA)
|
||||
@ -4573,7 +4573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| li TMP3, BC_JMP
|
||||
| li TMP1, BC_ITERC
|
||||
| sb TMP3, -4+OFS_OP(PC)
|
||||
| addu PC, TMP0, TMP2
|
||||
| addu PC, TMP0, TMP2
|
||||
| b <1
|
||||
|. sb TMP1, OFS_OP(PC)
|
||||
break;
|
||||
|
4849
src/vm_mips64.dasc
Normal file
4849
src/vm_mips64.dasc
Normal file
File diff suppressed because it is too large
Load Diff
@ -1105,11 +1105,11 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| mov BASE, L:RB->base
|
||||
| mov NARGS:RDd, TMP1d
|
||||
| mov LFUNC:RB, [RA-16]
|
||||
| cleartp LFUNC:RB
|
||||
| add NARGS:RDd, 1
|
||||
| // This is fragile. L->base must not move, KBASE must always be defined.
|
||||
| cmp KBASE, BASE // Continue with CALLT if flag set.
|
||||
| je ->BC_CALLT_Z
|
||||
| cleartp LFUNC:RB
|
||||
| mov BASE, RA
|
||||
| ins_call // Otherwise call resolved metamethod.
|
||||
|
|
||||
@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| movzx RCd, byte [rbp-8] // Reconstruct exit number.
|
||||
| mov RCH, byte [rbp-16]
|
||||
| mov [rbp-8], r15; mov [rbp-16], r14
|
||||
| // Caveat: DISPATCH is rbx.
|
||||
| mov DISPATCH, [ebp]
|
||||
| // DISPATCH is preserved on-trace in LJ_GC64 mode.
|
||||
| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
|
||||
| set_vmstate EXIT
|
||||
| mov [DISPATCH+DISPATCH_J(exitno)], RCd
|
||||
@ -3516,7 +3515,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ins_AD // RA = level, RD = target
|
||||
| branchPC RD // Do this first to free RD.
|
||||
| mov L:RB, SAVE_L
|
||||
| cmp dword L:RB->openupval, 0
|
||||
| cmp aword L:RB->openupval, 0
|
||||
| je >1
|
||||
| mov L:RB->base, BASE
|
||||
| lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
|
||||
|
@ -121,19 +121,68 @@
|
||||
|//-----------------------------------------------------------------------
|
||||
|.if not X64 // x86 stack layout.
|
||||
|
|
||||
|.if WIN
|
||||
|
|
||||
|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
|
||||
|.macro saveregs_
|
||||
| push edi; push esi; push ebx
|
||||
| push extern lj_err_unwind_win
|
||||
| fs; push dword [0]
|
||||
| fs; mov [0], esp
|
||||
| sub esp, CFRAME_SPACE
|
||||
|.endmacro
|
||||
|.macro restoreregs
|
||||
| add esp, CFRAME_SPACE
|
||||
| fs; pop dword [0]
|
||||
| pop edi // Short for esp += 4.
|
||||
| pop ebx; pop esi; pop edi; pop ebp
|
||||
|.endmacro
|
||||
|
|
||||
|.else
|
||||
|
|
||||
|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
|
||||
|.macro saveregs_
|
||||
| push edi; push esi; push ebx
|
||||
| sub esp, CFRAME_SPACE
|
||||
|.endmacro
|
||||
|.macro saveregs
|
||||
| push ebp; saveregs_
|
||||
|.endmacro
|
||||
|.macro restoreregs
|
||||
| add esp, CFRAME_SPACE
|
||||
| pop ebx; pop esi; pop edi; pop ebp
|
||||
|.endmacro
|
||||
|
|
||||
|.endif
|
||||
|
|
||||
|.macro saveregs
|
||||
| push ebp; saveregs_
|
||||
|.endmacro
|
||||
|
|
||||
|.if WIN
|
||||
|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
|
||||
|.define SAVE_NRES, aword [esp+aword*18]
|
||||
|.define SAVE_CFRAME, aword [esp+aword*17]
|
||||
|.define SAVE_L, aword [esp+aword*16]
|
||||
|//----- 16 byte aligned, ^^^ arguments from C caller
|
||||
|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
|
||||
|.define SAVE_R4, aword [esp+aword*14]
|
||||
|.define SAVE_R3, aword [esp+aword*13]
|
||||
|.define SAVE_R2, aword [esp+aword*12]
|
||||
|//----- 16 byte aligned
|
||||
|.define SAVE_R1, aword [esp+aword*11]
|
||||
|.define SEH_FUNC, aword [esp+aword*10]
|
||||
|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
|
||||
|.define UNUSED2, aword [esp+aword*8]
|
||||
|//----- 16 byte aligned
|
||||
|.define UNUSED1, aword [esp+aword*7]
|
||||
|.define SAVE_PC, aword [esp+aword*6]
|
||||
|.define TMP2, aword [esp+aword*5]
|
||||
|.define TMP1, aword [esp+aword*4]
|
||||
|//----- 16 byte aligned
|
||||
|.define ARG4, aword [esp+aword*3]
|
||||
|.define ARG3, aword [esp+aword*2]
|
||||
|.define ARG2, aword [esp+aword*1]
|
||||
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|
||||
|//----- 16 byte aligned, ^^^ arguments for C callee
|
||||
|.else
|
||||
|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
|
||||
|.define SAVE_NRES, aword [esp+aword*14]
|
||||
|.define SAVE_CFRAME, aword [esp+aword*13]
|
||||
@ -154,6 +203,7 @@
|
||||
|.define ARG2, aword [esp+aword*1]
|
||||
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|
||||
|//----- 16 byte aligned, ^^^ arguments for C callee
|
||||
|.endif
|
||||
|
|
||||
|// FPARGx overlaps ARGx and ARG(x+1) on x86.
|
||||
|.define FPARG3, qword [esp+qword*1]
|
||||
@ -554,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.else
|
||||
| mov eax, FCARG2 // Error return status for vm_pcall.
|
||||
| mov esp, FCARG1
|
||||
|.if WIN
|
||||
| lea FCARG1, SEH_NEXT
|
||||
| fs; mov [0], FCARG1
|
||||
|.endif
|
||||
|.endif
|
||||
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|
||||
| mov L:RB, SAVE_L
|
||||
@ -577,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.else
|
||||
| and FCARG1, CFRAME_RAWMASK
|
||||
| mov esp, FCARG1
|
||||
|.if WIN
|
||||
| lea FCARG1, SEH_NEXT
|
||||
| fs; mov [0], FCARG1
|
||||
|.endif
|
||||
|.endif
|
||||
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
||||
| mov L:RB, SAVE_L
|
||||
@ -590,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| set_vmstate INTERP
|
||||
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
||||
|
|
||||
|.if WIN and not X64
|
||||
|->vm_rtlunwind@16: // Thin layer around RtlUnwind.
|
||||
| // (void *cframe, void *excptrec, void *unwinder, int errcode)
|
||||
| mov [esp], FCARG1 // Return value for RtlUnwind.
|
||||
| push FCARG2 // Exception record for RtlUnwind.
|
||||
| push 0 // Ignored by RtlUnwind.
|
||||
| push dword [FCARG1+CFRAME_OFS_SEH]
|
||||
| call extern RtlUnwind@16 // Violates ABI (clobbers too much).
|
||||
| mov FCARG1, eax
|
||||
| mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
|
||||
| ret // Jump to unwinder.
|
||||
|.endif
|
||||
|
|
||||
|//-----------------------------------------------------------------------
|
||||
|//-- Grow stack for calls -----------------------------------------------
|
||||
|//-----------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user