Merge remote-tracking branch 'upstream/v2.1' into ppc64-port

This commit is contained in:
Gustavo Serra Scalet 2016-09-05 10:17:50 -03:00
commit c00253828a
74 changed files with 7328 additions and 981 deletions

View File

@ -153,7 +153,7 @@ Contains the target OS name:
<h3 id="jit_arch"><tt>jit.arch</tt></h3>
<p>
Contains the target architecture name:
"x86", "x64", "arm", "ppc", or "mips".
"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
</p>
<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>

View File

@ -349,6 +349,7 @@ break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
LuaJIT supports some extensions from Lua&nbsp;5.3:
<ul>
<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
</ul>
<h2 id="exceptions">C++ Exception Interoperability</h2>
@ -365,25 +366,30 @@ the toolchain used to compile LuaJIT:
</tr>
<tr class="odd separate">
<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
<td class="exccompiler">GCC 4.3+</td>
<td class="exccompiler">GCC 4.3+, Clang</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr>
<tr class="even">
<td class="excplatform">Other platforms, DWARF2 unwinding</td>
<td class="exccompiler">GCC</td>
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
<td class="exccompiler">GCC, Clang</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr>
<tr class="odd">
<td class="excplatform">Other platforms, DWARF2 unwinding</td>
<td class="exccompiler">GCC, Clang</td>
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
</tr>
<tr class="even">
<td class="excplatform">Windows/x64</td>
<td class="exccompiler">MSVC or WinSDK</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr>
<tr class="even">
<tr class="odd">
<td class="excplatform">Windows/x86</td>
<td class="exccompiler">Any</td>
<td class="excinterop"><b style="color: #a00000;">No</b></td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr>
<tr class="odd">
<tr class="even">
<td class="excplatform">Other platforms</td>
<td class="exccompiler">Other compilers</td>
<td class="excinterop"><b style="color: #a00000;">No</b></td>
@ -432,14 +438,6 @@ C++ destructors.</li>
<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
<li>Throwing Lua errors across C++ frames will <b>not</b> call
C++ destructors.</li>
<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
it's <b>not</b> safe to throw a Lua error across any frames containing
a C++ function with any try/catch construct or using variables with
(implicit) destructors. This also applies to any functions which may be
inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
is called inside or outside of a try/catch or whether any object actually
needs to be destroyed: the SEH chain is corrupted and this will eventually
lead to the termination of the process.</li>
</ul>
<br class="flush">
</div>

View File

@ -122,7 +122,7 @@ operating systems, CPUs and compilers:
<tr class="even">
<td class="compatcpu">x64 (64 bit)</td>
<td class="compatos">GCC 4.2+</td>
<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
<td class="compatos">XCode 5.0+<br>Clang</td>
<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
</tr>
@ -148,7 +148,7 @@ operating systems, CPUs and compilers:
<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
</tr>
<tr class="even">
<td class="compatcpu"><a href="#cross2">MIPS</a></td>
<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
<td class="compatos">GCC 4.3+</td>
<td class="compatos">GCC 4.3+</td>
<td class="compatos compatno">&nbsp;</td>
@ -202,7 +202,7 @@ which is probably the default on your system, anyway. Simply run:
make
</pre>
<p>
This always builds a native x86, x64 or PPC binary, depending on the host OS
This always builds a native binary, depending on the host OS
you're running this command on. Check the section on
<a href="#cross">cross-compilation</a> for more options.
</p>
@ -333,25 +333,36 @@ directory where <tt>luajit.exe</tt> is installed
<h2 id="cross">Cross-compiling LuaJIT</h2>
<p>
The GNU Makefile-based build system allows cross-compiling on any host
for any supported target, as long as both architectures have the same
pointer size. If you want to cross-compile to any 32 bit target on an
x64 OS, you need to install the multilib development package (e.g.
<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
(<tt>HOST_CC="gcc -m32"</tt>).
First, let's clear up some terminology:
</p>
<ul>
<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
</ul>
<p>
The GNU Makefile-based build system allows cross-compiling on any host
for any supported target:
</p>
<ul>
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li>
</ul>
<p>
You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
target OS differ, or you'll get assembler or linker errors. E.g. if
you're compiling on a Windows or OSX host for embedded Linux or Android,
you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
minimal target OS, you may need to disable the built-in allocator in
<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
specify the same <tt>TARGET_SYS</tt> for the install step, too.
target OS differ, or you'll get assembler or linker errors:
</p>
<ul>
<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
</ul>
<p>
The examples below only show some popular targets &mdash; please check
the comments in <tt>src/Makefile</tt> for more details.
Here are some examples where host and target have the same CPU:
</p>
<pre class="code">
# Cross-compile to a 32 bit binary on a multilib x64 OS
@ -369,38 +380,47 @@ use the canonical toolchain triplets for Linux.
</p>
<p>
Since there's often no easy way to detect CPU features at runtime, it's
important to compile with the proper CPU or architecture settings. You
can specify these when building the toolchain yourself. Or add
<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
too. Otherwise LuaJIT may not run at the full performance of your target
CPU.
important to compile with the proper CPU or architecture settings:
</o>
<ul>
<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
</ul>
<p>
Here are some examples for targets with a different CPU than the host:
</p>
<pre class="code">
# ARM soft-float
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
TARGET_CFLAGS="-mfloat-abi=soft"
# ARM soft-float ABI with VFP (example for Cortex-A8)
# ARM soft-float ABI with VFP (example for Cortex-A9)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
# ARM hard-float ABI with VFP (armhf, most modern toolchains)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
# ARM64 (requires x64 host)
# ARM64
make CROSS=aarch64-linux-
# PPC
make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
# MIPS big-endian
# MIPS32 big-endian
make HOST_CC="gcc -m32" CROSS=mips-linux-
# MIPS little-endian
# MIPS32 little-endian
make HOST_CC="gcc -m32" CROSS=mipsel-linux-
# MIPS64 big-endian
make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
# MIPS64 little-endian
make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
</pre>
<p>
You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>.
The environment variables need to match the install locations and the
desired target platform. E.g. Android&nbsp;4.0 corresponds to ABI level&nbsp;14.
For details check the folder <tt>docs</tt> in the NDK directory.
@ -414,7 +434,7 @@ to build/deploy or which lowest common denominator you want to pick:
# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
NDK=/opt/android/ndk
NDKABI=8
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -422,16 +442,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
NDK=/opt/android/ndk
NDKABI=14
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6
NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS)
# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
NDK=/opt/android/ndk
NDKABI=14
NDKVER=$NDK/toolchains/mipsel-linux-android-4.6
NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -439,7 +459,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
NDK=/opt/android/ndk
NDKABI=14
NDKVER=$NDK/toolchains/x86-4.6
NDKVER=$NDK/toolchains/x86-4.9
NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
@ -459,14 +479,15 @@ Or use Android. :-p
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
ICC=$(xcrun --sdk iphoneos --find clang)
ISDKF="-arch armv7 -isysroot $ISDKP"
make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
# iOS/ARM64
ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
ICC=$(xcrun --sdk iphoneos --find clang)
ISDKF="-arch arm64 -isysroot $ISDKP"
make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
</pre>
<h3 id="consoles">Cross-compiling for consoles</h3>

View File

@ -169,10 +169,10 @@ LuaJIT is Copyright &copy; 2005-2016 Mike Pall, released under the
<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
</table>
<table class="feature compiler">
<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
</table>
<table class="feature cpu">
<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
</table>
<table class="feature fcompat">
<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>

View File

@ -91,12 +91,6 @@ hooks for non-Lua functions) and shows slightly different behavior
in LuaJIT (no per-coroutine hooks, no tail call counting).
</li>
<li>
Some checks are missing in the JIT-compiled code for obscure situations
with <b>open upvalues aliasing</b> one of the SSA slots later on (or
vice versa). Bonus points, if you can find a real world test case for
this.
</li>
<li>
Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
handled correctly. The error may fall through an on-trace
<tt>pcall</tt> or it may be passed on to the function set with

View File

@ -21,7 +21,7 @@ enum {
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
DASM__MAX
};
@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
case DASM_IMM: case DASM_IMMS:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif
@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: pos++; break;
case DASM_IMM: case DASM_IMMS: pos++; break;
}
}
stop: (void)0;
@ -356,7 +356,7 @@ int dasm_encode(Dst_DECL, void *buffer)
if (ins & 2048)
n = n - (int)((char *)cp - base);
else
n = (n + (int)base) & 0x0fffffff;
n = (n + (int)(size_t)base) & 0x0fffffff;
patchrel:
CK((n & 3) == 0 &&
((n + ((ins & 2048) ? 0x00020000 : 0)) >>
@ -367,6 +367,9 @@ int dasm_encode(Dst_DECL, void *buffer)
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMMS:
cp[-1] |= ((n>>3) & 4); n &= 0x1f;
/* fallthrough */
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;

View File

@ -1,17 +1,19 @@
------------------------------------------------------------------------------
-- DynASM MIPS module.
-- DynASM MIPS32/MIPS64 module.
--
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
local mips64 = mips64
-- Module information:
local _info = {
arch = "mips",
description = "DynASM MIPS module",
arch = mips64 and "mips64" or "mips",
description = "DynASM MIPS32/MIPS64 module",
version = "1.4.0",
vernum = 10400,
release = "2015-10-18",
release = "2016-05-24",
author = "Mike Pall",
license = "MIT",
}
@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local match, gmatch = _s.match, _s.gmatch
local concat, sort = table.concat, table.sort
local bit = bit or require("bit")
local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
local tohex = bit.tohex
-- Inherited tables and callbacks.
local g_opt, g_arch
@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn
local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG",
"REL_PC", "LABEL_PC", "IMM",
"REL_PC", "LABEL_PC", "IMM", "IMMS",
}
-- Maximum number of section buffer positions for dasm_put().
@ -251,6 +254,10 @@ local map_op = {
bnel_3 = "54000000STB",
blezl_2 = "58000000SB",
bgtzl_2 = "5c000000SB",
daddi_3 = mips64 and "60000000TSI",
daddiu_3 = mips64 and "64000000TSI",
ldl_2 = mips64 and "68000000TO",
ldr_2 = mips64 and "6c000000TO",
lb_2 = "80000000TO",
lh_2 = "84000000TO",
lwl_2 = "88000000TO",
@ -258,23 +265,30 @@ local map_op = {
lbu_2 = "90000000TO",
lhu_2 = "94000000TO",
lwr_2 = "98000000TO",
lwu_2 = mips64 and "9c000000TO",
sb_2 = "a0000000TO",
sh_2 = "a4000000TO",
swl_2 = "a8000000TO",
sw_2 = "ac000000TO",
sdl_2 = mips64 and "b0000000TO",
sdr_2 = mips64 and "b1000000TO",
swr_2 = "b8000000TO",
cache_2 = "bc000000NO",
ll_2 = "c0000000TO",
lwc1_2 = "c4000000HO",
pref_2 = "cc000000NO",
ldc1_2 = "d4000000HO",
ld_2 = mips64 and "dc000000TO",
sc_2 = "e0000000TO",
swc1_2 = "e4000000HO",
scd_2 = mips64 and "f0000000TO",
sdc1_2 = "f4000000HO",
sd_2 = mips64 and "fc000000TO",
-- Opcode SPECIAL.
nop_0 = "00000000",
sll_3 = "00000000DTA",
sextw_2 = "00000000DT",
movf_2 = "00000001DS",
movf_3 = "00000001DSC",
movt_2 = "00010001DS",
@ -285,6 +299,7 @@ local map_op = {
sllv_3 = "00000004DTS",
srlv_3 = "00000006DTS",
rotrv_3 = "00000046DTS",
drotrv_3 = mips64 and "00000056DTS",
srav_3 = "00000007DTS",
jr_1 = "00000008S",
jalr_1 = "0000f809S",
@ -300,15 +315,22 @@ local map_op = {
mthi_1 = "00000011S",
mflo_1 = "00000012D",
mtlo_1 = "00000013S",
dsllv_3 = mips64 and "00000014DTS",
dsrlv_3 = mips64 and "00000016DTS",
dsrav_3 = mips64 and "00000017DTS",
mult_2 = "00000018ST",
multu_2 = "00000019ST",
div_2 = "0000001aST",
divu_2 = "0000001bST",
dmult_2 = mips64 and "0000001cST",
dmultu_2 = mips64 and "0000001dST",
ddiv_2 = mips64 and "0000001eST",
ddivu_2 = mips64 and "0000001fST",
add_3 = "00000020DST",
move_2 = "00000021DS",
move_2 = mips64 and "00000025DS" or "00000021DS",
addu_3 = "00000021DST",
sub_3 = "00000022DST",
negu_2 = "00000023DT",
negu_2 = mips64 and "0000002fDT" or "00000023DT",
subu_3 = "00000023DST",
and_3 = "00000024DST",
or_3 = "00000025DST",
@ -317,6 +339,10 @@ local map_op = {
nor_3 = "00000027DST",
slt_3 = "0000002aDST",
sltu_3 = "0000002bDST",
dadd_3 = mips64 and "0000002cDST",
daddu_3 = mips64 and "0000002dDST",
dsub_3 = mips64 and "0000002eDST",
dsubu_3 = mips64 and "0000002fDST",
tge_2 = "00000030ST",
tge_3 = "00000030STZ",
tgeu_2 = "00000031ST",
@ -329,6 +355,14 @@ local map_op = {
teq_3 = "00000034STZ",
tne_2 = "00000036ST",
tne_3 = "00000036STZ",
dsll_3 = mips64 and "00000038DTa",
dsrl_3 = mips64 and "0000003aDTa",
drotr_3 = mips64 and "0020003aDTa",
dsra_3 = mips64 and "0000003bDTa",
dsll32_3 = mips64 and "0000003cDTA",
dsrl32_3 = mips64 and "0000003eDTA",
drotr32_3 = mips64 and "0020003eDTA",
dsra32_3 = mips64 and "0000003fDTA",
-- Opcode REGIMM.
bltz_2 = "04000000SB",
@ -356,13 +390,24 @@ local map_op = {
msubu_2 = "70000005ST",
clz_2 = "70000020DS=",
clo_2 = "70000021DS=",
dclz_2 = mips64 and "70000024DS=",
dclo_2 = mips64 and "70000025DS=",
sdbbp_0 = "7000003f",
sdbbp_1 = "7000003fY",
-- Opcode SPECIAL3.
ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
zextw_2 = mips64 and "7c00f803TS",
ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
wsbh_2 = "7c0000a0DT",
dsbh_2 = mips64 and "7c0000a4DT",
dshd_2 = mips64 and "7c000164DT",
seb_2 = "7c000420DT",
seh_2 = "7c000620DT",
rdhwr_2 = "7c00003bTD",
@ -370,8 +415,12 @@ local map_op = {
-- Opcode COP0.
mfc0_2 = "40000000TD",
mfc0_3 = "40000000TDW",
dmfc0_2 = mips64 and "40200000TD",
dmfc0_3 = mips64 and "40200000TDW",
mtc0_2 = "40800000TD",
mtc0_3 = "40800000TDW",
dmtc0_2 = mips64 and "40a00000TD",
dmtc0_3 = mips64 and "40a00000TDW",
rdpgpr_2 = "41400000DT",
di_0 = "41606000",
di_1 = "41606000T",
@ -388,9 +437,11 @@ local map_op = {
-- Opcode COP1.
mfc1_2 = "44000000TG",
dmfc1_2 = mips64 and "44200000TG",
cfc1_2 = "44400000TG",
mfhc1_2 = "44600000TG",
mtc1_2 = "44800000TG",
dmtc1_2 = mips64 and "44a00000TG",
ctc1_2 = "44c00000TG",
mthc1_2 = "44e00000TG",
@ -633,7 +684,7 @@ local function parse_fpr(expr)
werror("bad register name `"..expr.."'")
end
local function parse_imm(imm, bits, shift, scale, signed)
local function parse_imm(imm, bits, shift, scale, signed, action)
local n = tonumber(imm)
if n then
local m = sar(n, scale)
@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
waction(action or "IMM",
(signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
return 0
end
end
@ -763,6 +815,9 @@ map_op[".template__"] = function(params, template, nparams)
n = n + 1
elseif p == "A" then
op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
elseif p == "a" then
local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
elseif p == "M" then
op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
elseif p == "N" then

12
dynasm/dasm_mips64.lua Normal file
View File

@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM MIPS64 module.
--
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
mips64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_mips")

View File

@ -121,8 +121,8 @@ XCFLAGS=
#
# Use the system provided memory allocator (realloc) instead of the
# bundled memory allocator. This is slower, but sometimes helpful for
# debugging. This option cannot be enabled on x64, since realloc usually
# doesn't return addresses in the right address range.
# debugging. This option cannot be enabled on x64 without GC64, since
# realloc usually doesn't return addresses in the right address range.
# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
# the only way to get useful results from it for all other architectures.
#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
@ -166,10 +166,6 @@ else
HOST_SYS= Windows
HOST_MSYS= cygwin
endif
# Use Clang for OSX host.
ifeq (Darwin,$(HOST_SYS))
DEFAULT_CC= clang
endif
endif
##############################################################################
@ -257,7 +253,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__MIPSEL__=1
endif
ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= mips64
else
TARGET_LJARCH= mips
endif
else
$(error Unsupported target architecture)
endif

View File

@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
@ -215,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
lib_init.c
lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
lib_ffi.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx,
if (p) {
#if LJ_TARGET_X86ORX64
if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
name[0] = '@';
name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
else
*p = '\0';
#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE

View File

@ -109,6 +109,8 @@ enum {
#if LJ_TARGET_X64
PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86
PEOBJ_SECT_SXDATA,
#endif
PEOBJ_SECT_RDATA_Z,
PEOBJ_NSECTIONS
@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx)
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
#elif LJ_TARGET_X86
memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
/* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
#endif
memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx)
nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif
/* Write PE object header and all sections. */
@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86
/* Write .sxdata section. */
for (i = 0; i < nrsym; i++) {
if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
uint32_t symidx = 1+2+i;
owrite(ctx, &symidx, 4);
break;
}
}
if (i == nrsym) {
fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
exit(1);
}
#endif
/* Write .rdata$Z section. */
@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#elif LJ_TARGET_X86
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
#endif
emit_peobj_sym(ctx, ctx->beginsym, 0,

View File

@ -157,11 +157,11 @@ local function merge_includes(src)
if includes[name] then return "" end
includes[name] = true
local fp = assert(io.open(LUA_SOURCE..name, "r"))
local src = fp:read("*a")
local inc = fp:read("*a")
assert(fp:close())
src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
src = gsub(src, "#endif%s*$", "")
return merge_includes(src)
inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "")
inc = gsub(inc, "#endif%s*$", "")
return merge_includes(inc)
end)
end

View File

@ -125,12 +125,12 @@ extern "C"
#ifdef _WIN32
__declspec(dllexport)
#endif
const char %s%s[] = {
const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname))
else
fp:write(string.format([[
#define %s%s_SIZE %d
static const char %s%s[] = {
static const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
end
local t, n, m = {}, 0, 0

View File

@ -12,7 +12,7 @@
local type = type
local sub, byte, format = string.sub, string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local match, gmatch = string.match, string.gmatch
local concat = table.concat
local bit = require("bit")
local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex

View File

@ -11,8 +11,8 @@
------------------------------------------------------------------------------
local type = type
local sub, byte, format = string.sub, string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local byte, format = string.byte, string.format
local match, gmatch = string.match, string.gmatch
local concat = table.concat
local bit = require("bit")
local band, bor, tohex = bit.band, bit.bor, bit.tohex
@ -38,7 +38,7 @@ local map_special = {
"multST", "multuST", "divST", "divuST",
false, false, false, false,
"addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
"andDST", "orDST", "xorDST", "nor|notDST0",
"andDST", "or|moveDST0", "xorDST", "nor|notDST0",
false, false, "sltDST", "sltuDST",
false, false, false, false,
"tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
@ -214,7 +214,7 @@ local map_pri = {
map_cop0, map_cop1, false, map_cop1x,
"beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
false, false, false, false,
map_special2, false, false, map_special3,
map_special2, "jalxJ", false, map_special3,
"lbTSO", "lhTSO", "lwlTSO", "lwTSO",
"lbuTSO", "lhuTSO", "lwrTSO", false,
"sbTSO", "shTSO", "swlTSO", "swTSO",

View File

@ -13,7 +13,7 @@
------------------------------------------------------------------------------
local type = type
local sub, byte, format = string.sub, string.byte, string.format
local byte, format = string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local concat = table.concat
local bit = require("bit")

View File

@ -818,7 +818,7 @@ map_act = {
m = b%32; b = (b-m)/32
local nb = b%2; b = (b-nb)/2
if nb == 0 then ctx.rexb = true end
local nx = b%2; b = (b-nx)/2
local nx = b%2
if nx == 0 then ctx.rexx = true end
b = byte(ctx.code, pos, pos)
if not b then return incomplete(ctx) end

View File

@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
local bit = require("bit")
local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
local band, shr, tohex = bit.band, bit.rshift, bit.tohex
local sub, gsub, format = string.sub, string.gsub, string.format
local byte, char, rep = string.byte, string.char, string.rep
local byte, rep = string.byte, string.rep
local type, tostring = type, tostring
local stdout, stderr = io.stdout, io.stderr
@ -213,7 +213,7 @@ local colortype_ansi = {
"\027[35m%s\027[m",
}
local function colorize_text(s, t)
local function colorize_text(s)
return s
end
@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
end
end
local function formatk(tr, idx)
local function formatk(tr, idx, sn)
local k, t, slot = tracek(tr, idx)
local tn = type(k)
local s
if tn == "number" then
if k == 2^52+2^51 then
if band(sn or 0, 0x30000) ~= 0 then
s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
elseif k == 2^52+2^51 then
s = "bias"
else
s = format("%+.14g", k)
s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
end
elseif tn == "string" then
s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@ -354,7 +356,7 @@ local function printsnap(tr, snap)
n = n + 1
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
if ref < 0 then
out:write(formatk(tr, ref))
out:write(formatk(tr, ref, sn))
elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
else

View File

@ -120,7 +120,7 @@ end
-- Show top N list.
local function prof_top(count1, count2, samples, indent)
local t, n = {}, 0
for k, v in pairs(count1) do
for k in pairs(count1) do
n = n + 1
t[n] = k
end

View File

@ -302,7 +302,7 @@ static int panic(lua_State *L)
#ifdef LUAJIT_USE_SYSMALLOC
#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
#error "Must use builtin allocator for 64 bit target"
#endif
@ -334,7 +334,7 @@ LUALIB_API lua_State *luaL_newstate(void)
lua_State *L;
void *ud = lj_alloc_create();
if (ud == NULL) return NULL;
#if LJ_64
#if LJ_64 && !LJ_GC64
L = lj_state_newstate(lj_alloc_f, ud);
#else
L = lua_newstate(lj_alloc_f, ud);
@ -343,7 +343,7 @@ LUALIB_API lua_State *luaL_newstate(void)
return L;
}
#if LJ_64
#if LJ_64 && !LJ_GC64
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
{
UNUSED(f); UNUSED(ud);

View File

@ -715,15 +715,19 @@ static uint32_t jit_cpudetect(lua_State *L)
#if LJ_HASJIT
/* Compile-time MIPS CPU detection. */
#if LJ_ARCH_VERSION >= 20
flags |= JIT_F_MIPS32R2;
flags |= JIT_F_MIPSXXR2;
#endif
/* Runtime MIPS CPU detection. */
#if defined(__GNUC__)
if (!(flags & JIT_F_MIPS32R2)) {
if (!(flags & JIT_F_MIPSXXR2)) {
int x;
#ifdef __mips16
x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
#else
/* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
__asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */
#endif
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
}
#endif
#endif

View File

@ -27,15 +27,15 @@
{
Optimized string compare
Memcheck:Addr4
fun:lj_str_fastcmp
fun:str_fastcmp
}
{
Optimized string compare
Memcheck:Addr1
fun:lj_str_fastcmp
fun:str_fastcmp
}
{
Optimized string compare
Memcheck:Cond
fun:lj_str_fastcmp
fun:str_fastcmp
}

View File

@ -72,13 +72,56 @@
#define IS_DIRECT_BIT (SIZE_T_ONE)
/* Determine system-specific block allocation method. */
#if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#if LJ_64 && !LJ_GC64
#define LJ_ALLOC_VIRTUALALLOC 1
#if LJ_64 && !LJ_GC64
#define LJ_ALLOC_NTAVM 1
#endif
#else
#include <errno.h>
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
#include <sys/mman.h>
#define LJ_ALLOC_MMAP 1
#if LJ_64
#define LJ_ALLOC_MMAP_PROBE 1
#if LJ_GC64
#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
#elif LJ_TARGET_X64 && LJ_HASJIT
/* Due to limitations in the x64 compiler backend. */
#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
#else
#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
#endif
#endif
#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
#define LJ_ALLOC_MMAP32 1
#endif
#if LJ_TARGET_LINUX
#define LJ_ALLOC_MREMAP 1
#endif
#endif
#if LJ_ALLOC_VIRTUALALLOC
#if LJ_ALLOC_NTAVM
/* Undocumented, but hey, that's what we all love so much about Windows. */
typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
size_t *size, ULONG alloctype, ULONG prot);
@ -89,14 +132,15 @@ static PNTAVM ntavm;
*/
#define NTAVM_ZEROBITS 1
static void INIT_MMAP(void)
static void init_mmap(void)
{
ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
"NtAllocateVirtualMemory");
}
#define INIT_MMAP() init_mmap()
/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
static LJ_AINLINE void *CALL_MMAP(size_t size)
static void *CALL_MMAP(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = NULL;
@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size)
static void *DIRECT_MMAP(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = NULL;
@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#else
#define INIT_MMAP() ((void)0)
/* Win32 MMAP via VirtualAlloc */
static LJ_AINLINE void *CALL_MMAP(size_t size)
static void *CALL_MMAP(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size)
static void *DIRECT_MMAP(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#endif
/* This function supports releasing coalesed segments */
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
static int CALL_MUNMAP(void *ptr, size_t size)
{
DWORD olderr = GetLastError();
MEMORY_BASIC_INFORMATION minfo;
@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return 0;
}
#else
#include <errno.h>
#include <sys/mman.h>
#elif LJ_ALLOC_MMAP
#define MMAP_PROT (PROT_READ|PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@ -174,107 +213,151 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
#endif
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
#if LJ_64 && !LJ_GC64
/* 64 bit mode with 32 bit pointers needs special support for allocating
** memory in the lower 2GB.
*/
#if LJ_ALLOC_MMAP_PROBE
#if defined(MAP_32BIT)
#if defined(__sun__)
#define MMAP_REGION_START ((uintptr_t)0x1000)
#ifdef MAP_TRYFIXED
#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
#else
/* Actually this only gives us max. 1GB in current Linux kernels. */
#define MMAP_REGION_START ((uintptr_t)0)
#define MMAP_FLAGS_PROBE MMAP_FLAGS
#endif
static LJ_AINLINE void *CALL_MMAP(size_t size)
#define LJ_ALLOC_MMAP_PROBE_MAX 30
#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
/* No point in a giant ifdef mess. Just try to open /dev/urandom.
** It doesn't really matter if this fails, since we get some ASLR bits from
** every unsuitable allocation, too. And we prefer linear allocation, anyway.
*/
#include <fcntl.h>
#include <unistd.h>
static uintptr_t mmap_probe_seed(void)
{
int olderr = errno;
void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
errno = olderr;
return ptr;
uintptr_t val;
int fd = open("/dev/urandom", O_RDONLY);
if (fd != -1) {
int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
(void)close(fd);
if (ok) return val;
}
return 1; /* Punt. */
}
#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN
/* OSX and FreeBSD mmap() use a naive first-fit linear search.
** That's perfect for us. Except that -pagezero_size must be set for OSX,
** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
** to be reduced to 250MB on FreeBSD.
*/
#if LJ_TARGET_OSX || defined(__DragonFly__)
#define MMAP_REGION_START ((uintptr_t)0x10000)
#elif LJ_TARGET_PS4
#define MMAP_REGION_START ((uintptr_t)0x4000)
#else
#define MMAP_REGION_START ((uintptr_t)0x10000000)
#endif
#define MMAP_REGION_END ((uintptr_t)0x80000000)
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
#endif
static LJ_AINLINE void *CALL_MMAP(size_t size)
static void *mmap_probe(size_t size)
{
int olderr = errno;
/* Hint for next allocation. Doesn't need to be thread-safe. */
static uintptr_t alloc_hint = MMAP_REGION_START;
int retry = 0;
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
static int rlimit_modified = 0;
if (LJ_UNLIKELY(rlimit_modified == 0)) {
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */
rlimit_modified = 1;
}
#endif
for (;;) {
void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
if ((uintptr_t)p >= MMAP_REGION_START &&
(uintptr_t)p + size < MMAP_REGION_END) {
alloc_hint = (uintptr_t)p + size;
static uintptr_t hint_addr = 0;
static uintptr_t hint_prng = 0;
int olderr = errno;
int retry;
for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
uintptr_t addr = (uintptr_t)p;
if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) {
/* We got a suitable address. Bump the hint address. */
hint_addr = addr + size;
errno = olderr;
return p;
}
if (p != CMFAIL) munmap(p, size);
#if defined(__sun__) || defined(__DragonFly__)
alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */
if (alloc_hint + size < MMAP_REGION_END) continue;
#endif
if (retry) break;
retry = 1;
alloc_hint = MMAP_REGION_START;
if (p != MFAIL) {
munmap(p, size);
} else if (errno == ENOMEM) {
return MFAIL;
}
if (hint_addr) {
/* First, try linear probing. */
if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
hint_addr += 0x1000000;
if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
hint_addr = 0;
continue;
} else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
/* Next, try a no-hint probe to get back an ASLR address. */
hint_addr = 0;
continue;
}
}
/* Finally, try pseudo-random probing. */
if (LJ_UNLIKELY(hint_prng == 0)) {
hint_prng = mmap_probe_seed();
}
/* The unsuitable address we got has some ASLR PRNG bits. */
hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
do { /* The PRNG itself is very weak, but see above. */
hint_prng = hint_prng * 1103515245 + 12345;
hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
} while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
}
errno = olderr;
return CMFAIL;
return MFAIL;
}
#else
#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
#endif
#else
#if LJ_ALLOC_MMAP32
/* 32 bit mode and GC64 mode is easy. */
static LJ_AINLINE void *CALL_MMAP(size_t size)
#if defined(__sun__)
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
#else
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
#endif
static void *mmap_map32(size_t size)
{
#if LJ_ALLOC_MMAP_PROBE
static int fallback = 0;
if (fallback)
return mmap_probe(size);
#endif
{
int olderr = errno;
void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
errno = olderr;
/* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
#if LJ_ALLOC_MMAP_PROBE
if (ptr == MFAIL) {
fallback = 1;
return mmap_probe(size);
}
#endif
return ptr;
}
}
#endif
#if LJ_ALLOC_MMAP32
#define CALL_MMAP(size) mmap_map32(size)
#elif LJ_ALLOC_MMAP_PROBE
#define CALL_MMAP(size) mmap_probe(size)
#else
static void *CALL_MMAP(size_t size)
{
int olderr = errno;
void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
errno = olderr;
return ptr;
}
#endif
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
static void init_mmap(void)
{
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 0x10000;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
}
#define INIT_MMAP() init_mmap()
#endif
#define INIT_MMAP() ((void)0)
#define DIRECT_MMAP(s) CALL_MMAP(s)
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
static int CALL_MUNMAP(void *ptr, size_t size)
{
int olderr = errno;
int ret = munmap(ptr, size);
@ -282,10 +365,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return ret;
}
#if LJ_TARGET_LINUX
#if LJ_ALLOC_MREMAP
/* Need to define _GNU_SOURCE to get the mremap prototype. */
static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
int flags)
static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
{
int olderr = errno;
ptr = mremap(ptr, osz, nsz, flags);
@ -305,6 +387,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#endif
#ifndef INIT_MMAP
#define INIT_MMAP() ((void)0)
#endif
#ifndef DIRECT_MMAP
#define DIRECT_MMAP(s) CALL_MMAP(s)
#endif
#ifndef CALL_MREMAP
#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
#endif

View File

@ -25,6 +25,10 @@
#define LUAJIT_ARCH_ppc 5
#define LUAJIT_ARCH_MIPS 6
#define LUAJIT_ARCH_mips 6
#define LUAJIT_ARCH_MIPS32 6
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@ -47,8 +51,10 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else
#error "No support for this architecture (yet)"
#endif
@ -289,13 +295,21 @@
#define LJ_ARCH_XENON 1
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_NAME "mipsel"
#else
#define LJ_ARCH_NAME "mips64el"
#endif
#define LJ_ARCH_ENDIAN LUAJIT_LE
#else
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_NAME "mips"
#else
#define LJ_ARCH_NAME "mips64"
#endif
#define LJ_ARCH_ENDIAN LUAJIT_BE
#endif
@ -307,11 +321,6 @@
#endif
#endif
/* Temporarily disable features until the code has been merged. */
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
#define LUAJIT_NO_UNWIND 1
#endif
#if !defined(LJ_ABI_SOFTFP)
#ifdef __mips_soft_float
#define LJ_ABI_SOFTFP 1
@ -320,7 +329,15 @@
#endif
#endif
#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_BITS 32
#define LJ_TARGET_MIPS32 1
#else
#define LJ_ARCH_BITS 64
#define LJ_TARGET_MIPS64 1
#define LJ_TARGET_GC64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
#endif
#define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4
#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
@ -329,7 +346,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if _MIPS_ARCH_MIPS32R2
#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
#define LJ_ARCH_VERSION 20
#else
#define LJ_ARCH_VERSION 10
@ -410,9 +427,13 @@
#ifdef __NO_FPRS__
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS
#if defined(_LP64)
#error "No support for MIPS64"
#elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32"
#endif
#elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
#error "Only n64 ABI supported for MIPS64"
#endif
#endif
#endif
@ -453,7 +474,7 @@
#endif
/* Disable or enable the JIT compiler. */
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
#define LJ_HASJIT 0
#else
#define LJ_HASJIT 1
@ -524,6 +545,11 @@
#define LJ_NO_SYSTEM 1
#endif
#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
/* NYI: no support for compact unwind specification, yet. */
#define LUAJIT_NO_UNWIND 1
#endif
#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
#define LJ_NO_UNWIND 1
#endif

View File

@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
RA_DBGX((as, "remat $i $r", ir, r));
#if !LJ_SOFTFP
if (ir->o == IR_KNUM) {
emit_loadn(as, r, ir_knum(ir));
emit_loadk64(as, r, ir);
} else
#endif
if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
#if LJ_64
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, r, ir_kint64(ir)->u64);
#if LJ_GC64
} else if (ir->o == IR_KGC) {
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
#endif
#endif
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@ -619,10 +625,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
return r;
}
/* Add a register rename to the IR. */
static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
{
IRRef ren;
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
ren = tref_ref(lj_ir_emit(as->J));
as->J->cur.ir[ren].r = (uint8_t)down;
as->J->cur.ir[ren].s = SPS_NONE;
}
/* Rename register allocation and emit move. */
static void ra_rename(ASMState *as, Reg down, Reg up)
{
IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
IRIns *ir = IR(ref);
ir->r = (uint8_t)up;
as->cost[down] = 0;
@ -635,11 +651,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
ren = tref_ref(lj_ir_emit(as->J));
as->ir = as->T->ir; /* The IR may have been reallocated. */
IR(ren)->r = (uint8_t)down;
IR(ren)->s = SPS_NONE;
ra_addrename(as, down, ref, as->snapno);
}
}
@ -689,16 +701,20 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
if (ra_noreg(left)) {
if (irref_isk(lref)) {
if (ir->o == IR_KNUM) {
cTValue *tv = ir_knum(ir);
/* FP remat needs a load except for +0. Still better than eviction. */
if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
emit_loadn(as, dest, tv);
if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
emit_loadk64(as, dest, ir);
return;
}
#if LJ_64
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, dest, ir_kint64(ir)->u64);
emit_loadk64(as, dest, ir);
return;
#if LJ_GC64
} else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
emit_loadk64(as, dest, ir);
return;
#endif
#endif
} else if (ir->o != IR_KPRI) {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@ -941,7 +957,7 @@ static void asm_snap_prep(ASMState *as)
} else {
/* Process any renames above the highwater mark. */
for (; as->snaprename < as->T->nins; as->snaprename++) {
IRIns *ir = IR(as->snaprename);
IRIns *ir = &as->T->ir[as->snaprename];
if (asm_snap_checkrename(as, ir->op1))
ir->op2 = REF_BIAS-1; /* Kill rename. */
}
@ -1055,7 +1071,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir)
}
} else {
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
/* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
/* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
}
@ -1472,12 +1488,7 @@ static void asm_phi_fixup(ASMState *as)
irt_clearmark(ir->t);
/* Left PHI gained a spill slot before the loop? */
if (ra_hasspill(ir->s)) {
IRRef ren;
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
ren = tref_ref(lj_ir_emit(as->J));
as->ir = as->T->ir; /* The IR may have been reallocated. */
IR(ren)->r = (uint8_t)r;
IR(ren)->s = SPS_NONE;
ra_addrename(as, r, lref, as->loopsnapno);
}
}
rset_clear(work, r);
@ -1888,7 +1899,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
SnapEntry sn = map[n-1];
if ((sn & SNAP_FRAME)) {
*gotframe = 1;
return snap_slot(sn);
return snap_slot(sn) - LJ_FR2;
}
}
return 0;
@ -1908,16 +1919,20 @@ static void asm_tail_link(ASMState *as)
if (as->T->link == 0) {
/* Setup fixed registers for exit to interpreter. */
const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
int32_t mres;
if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
if (bc_isret(bc_op(*retpc)))
pc = retpc;
}
#if LJ_GC64
emit_loadu64(as, RID_LPC, u64ptr(pc));
#else
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
ra_allockreg(as, i32ptr(pc), RID_LPC);
mres = (int32_t)(snap->nslots - baseslot);
#endif
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
@ -1932,6 +1947,11 @@ static void asm_tail_link(ASMState *as)
}
emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
IR(as->J->ktrace)->o = IR_KGC;
}
/* Sync the interpreter state with the on-trace state. */
asm_stack_restore(as, snap);
@ -1957,17 +1977,22 @@ static void asm_setup_regsp(ASMState *as)
ra_setup(as);
/* Clear reg/sp for constants. */
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
ir->prev = REGSP_INIT;
if (irt_is64(ir->t) && ir->o != IR_KNULL) {
#if LJ_GC64
ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
#else
/* Make life easier for backends by putting address of constant in i. */
ir->i = (int32_t)(intptr_t)(ir+1);
#endif
ir++;
}
}
/* REF_BASE is used for implicit references to the BASE register. */
lastir->prev = REGSP_HINT(RID_BASE);
ir = IR(nins-1);
if (ir->o == IR_RENAME) {
do { ir--; nins--; } while (ir->o == IR_RENAME);
T->nins = nins; /* Remove any renames left over from ASM restart. */
}
as->snaprename = nins;
as->snapref = nins;
as->snapno = T->nsnap;
@ -2199,14 +2224,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
ASMState *as = &as_;
MCode *origtop;
/* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
{
IRRef nins = T->nins;
IRIns *ir = &T->ir[nins-1];
if (ir->o == IR_NOP || ir->o == IR_RENAME) {
do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
T->nins = nins;
}
}
/* Ensure an initialized instruction beyond the last one for HIOP checks. */
J->cur.nins = lj_ir_nextins(J);
J->cur.ir[J->cur.nins].o = IR_NOP;
/* This also allows one RENAME to be added without reallocating curfinal. */
as->orignins = lj_ir_nextins(J);
J->cur.ir[as->orignins].o = IR_NOP;
/* Setup initial state. Copy some fields to reduce indirections. */
as->J = J;
as->T = T;
as->ir = T->ir;
J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
as->flags = J->flags;
as->loopref = J->loopref;
as->realign = NULL;
@ -2219,12 +2255,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->mclim = as->mcbot + MCLIM_REDZONE;
asm_setup_target(as);
do {
/*
** This is a loop, because the MCode may have to be (re-)assembled
** multiple times:
**
** 1. as->realign is set (and the assembly aborted), if the arch-specific
** backend wants the MCode to be aligned differently.
**
** This is currently only the case on x86/x64, where small loops get
** an aligned loop body plus a short branch. Not much effort is wasted,
** because the abort happens very quickly and only once.
**
** 2. The IR is immovable, since the MCode embeds pointers to various
** constants inside the IR. But RENAMEs may need to be added to the IR
** during assembly, which might grow and reallocate the IR. We check
** at the end if the IR (in J->cur.ir) has actually grown, resize the
** copy (in J->curfinal.ir) and try again.
**
** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
** always have one spare slot in the IR (see above), which means we
** have to redo the assembly for only ~2% of all traces.
**
** Very, very rarely, this needs to be done repeatedly, since the
** location of constants inside the IR (actually, reachability from
** a global pointer) may affect register allocation and thus the
** number of RENAMEs.
*/
for (;;) {
as->mcp = as->mctop;
#ifdef LUA_USE_ASSERT
as->mcp_prev = as->mcp;
#endif
as->curins = T->nins;
as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins;
RA_DBG_START();
RA_DBGX((as, "===== STOP ====="));
@ -2252,7 +2317,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
checkmclim(as);
asm_ir(as, ir);
}
} while (as->realign); /* Retry in case the MCode needs to be realigned. */
if (as->realign && J->curfinal->nins >= T->nins)
continue; /* Retry in case only the MCode needs to be realigned. */
/* Emit head of trace. */
RA_DBG_REF();
@ -2261,6 +2328,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->curins = as->T->snap[0].ref;
asm_snap_prep(as); /* The GC check is a guard. */
asm_gc_check(as);
as->curins = as->stopins;
}
ra_evictk(as);
if (as->parent)
@ -2269,6 +2337,21 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
asm_head_root(as);
asm_phi_fixup(as);
if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
lua_assert(J->curfinal->nk == T->nk);
memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
(T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
T->nins = J->curfinal->nins;
break; /* Done. */
}
/* Otherwise try again with a bigger IR. */
lj_trace_free(J2G(J), J->curfinal);
J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
J->curfinal = lj_trace_alloc(J->L, T);
as->realign = NULL;
}
RA_DBGX((as, "===== START ===="));
RA_DBG_FLUSH();
if (as->freeset != RSET_ALL)

View File

@ -909,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -998,6 +997,9 @@ static ARMIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
if (ir->op1 == REF_NIL) {
lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
ARMIns ai = asm_fxloadins(ir);
@ -1015,6 +1017,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
else
emit_lsox(as, ai, dest, idx, ofs);
}
}
static void asm_fstore(ASMState *as, IRIns *ir)
{

View File

@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
dest, dest);
if (irt_isfloat(ir->t))
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
RSET_GPR);
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
tmp, left, tmp);
if (st == IRT_FLOAT)
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
RSET_GPR);
(void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
} else {
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@ -514,7 +510,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
if ((ir->op2 & IRCONV_SEXT)) {
if ((as->flags & JIT_F_MIPS32R2)) {
if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
} else {
uint32_t shift = st == IRT_I8 ? 24 : 16;
@ -743,7 +739,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
if ((as->flags & JIT_F_MIPS32R2)) {
if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
} else {
emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@ -810,7 +806,6 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir)
{
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -901,9 +896,14 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
MIPSIns mi = asm_fxloadins(ir);
Reg idx;
int32_t ofs;
if (ir->op1 == REF_NIL) {
idx = RID_JGL;
ofs = ir->op2 - 32768;
} else {
idx = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->op2 == IRFL_TAB_ARRAY) {
ofs = asm_fuseabase(as, ir->op1);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
@ -912,6 +912,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
}
}
ofs = field_ofs[ir->op2];
}
lua_assert(!irt_isfp(ir->t));
emit_tsi(as, mi, dest, idx, ofs);
}
@ -1456,7 +1457,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
if ((as->flags & JIT_F_MIPS32R2)) {
if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
} else {
@ -1512,7 +1513,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
static void asm_bror(ASMState *as, IRIns *ir)
{
if ((as->flags & JIT_F_MIPS32R2)) {
if ((as->flags & JIT_F_MIPSXXR2)) {
asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);

View File

@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fb(as, PPCI_FCTIWZ, tmp, left);
}
@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, lref, allow);
Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
const float *kbias;
if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
if (st == IRT_U32) kbias++;
emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
emit_lsptr(as, PPCI_LFS, (fbias & 31),
&as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
RID_SP, SPOFS_TMPLO);
@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
emit_fab(as, PPCI_FSUB, tmp, left, tmp);
emit_lsptr(as, PPCI_LFS, (tmp & 31),
(void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
RSET_GPR);
(void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
} else {
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
/* NYI: Check that UREFO is still open and not aliasing a slot. */
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
@ -809,9 +804,14 @@ static PPCIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
PPCIns pi = asm_fxloadins(ir);
Reg idx;
int32_t ofs;
if (ir->op1 == REF_NIL) {
idx = RID_JGL;
ofs = ir->op2 - 32768;
} else {
idx = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->op2 == IRFL_TAB_ARRAY) {
ofs = asm_fuseabase(as, ir->op1);
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
@ -820,6 +820,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
}
}
ofs = field_ofs[ir->op2];
}
lua_assert(!irt_isi8(ir->t));
emit_tai(as, pi, dest, idx, ofs);
}
@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
(void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
(void *)&as->J->k32[LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);

File diff suppressed because it is too large Load Diff

View File

@ -439,8 +439,8 @@
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
#elif LJ_TARGET_MIPS
/* -- MIPS calling conventions -------------------------------------------- */
#elif LJ_TARGET_MIPS32
/* -- MIPS o32 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
@ -515,6 +515,78 @@
sp = (uint8_t *)&cc->fpr[0].f;
#endif
#elif LJ_TARGET_MIPS64
/* -- MIPS n64 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = !(sz <= 16); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_STRUCTRET2 \
ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
((intptr_t *)dp)[0] = cc->gpr[0]; \
} else { /* Copy complex double from GPRs. */ \
((intptr_t *)dp)[0] = cc->gpr[0]; \
((intptr_t *)dp)[1] = cc->gpr[1]; \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
/* Position of soft-float 'float' return value depends on endianess. */
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
#else /* MIPS64 hard-float */
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
((float *)dp)[1] = cc->fpr[1].f; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
#define CCALL_HANDLE_COMPLEXARG \
if (sz == 2*sizeof(float)) { \
isfp = 2; \
if (ngpr < maxgpr) \
sz *= 2; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#endif
#define CCALL_HANDLE_STRUCTARG \
/* Pass all structs by value in registers and/or on the stack. */
#define CCALL_HANDLE_REGARG \
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
} \
goto done; \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -754,6 +826,78 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
/* -- MIPS64 ABI struct classification ---------------------------- */
#if LJ_TARGET_MIPS64
#define FTYPE_FLOAT 1
#define FTYPE_DOUBLE 2
/* Classify FP fields (max. 2) and their types. */
static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
{
int n = 0, ft = 0;
if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
goto noth;
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (n == 2) {
goto noth;
} else if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isfp(sct->info)) {
ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
n++;
} else {
goto noth;
}
} else if (ctype_isbitfield(ct->info) ||
ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
goto noth;
}
}
if (n <= 2)
return ft;
noth: /* Not a homogeneous float/double aggregate. */
return 0; /* Struct is in GPRs. */
}
void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
{
if (LJ_ABI_SOFTFP ? ft :
((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
int i, ofs = 0;
for (i = 0; ft != 0; i++, ft >>= 2) {
if ((ft & 3) == FTYPE_FLOAT) {
#if LJ_ABI_SOFTFP
/* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
memcpy((uint8_t *)dp + ofs,
(uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
#else
*(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
#endif
ofs += 4;
} else {
ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
#if LJ_ABI_SOFTFP
*(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
#else
*(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
#endif
ofs += 8;
}
}
} else {
#if !LJ_ABI_SOFTFP
if (ft) sp = (uint8_t *)&cc->fpr[0];
#endif
memcpy(dp, sp, ctr->size);
}
}
#endif
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@ -921,6 +1065,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_MIPS64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && nsp == 0)) && d->size <= 4) {
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)
@ -936,7 +1086,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
cc->fpr[nfpr-2].d[1] = 0;
}
#elif LJ_TARGET_ARM64
#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
/* Split float HFA or complex float into separate registers. */
CTSize i = (sz >> 2) - 1;
@ -983,7 +1133,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_COMPLEXRET2
return 1; /* One GC step. */
}
if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR)
if (LJ_BE && ctr->size < CTSIZE_PTR &&
(ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
sp += (CTSIZE_PTR - ctr->size);
#if CCALL_NUM_FPR
if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))

View File

@ -104,11 +104,11 @@ typedef union FPRArg {
typedef intptr_t GPRArg;
typedef double FPRArg;
#elif LJ_TARGET_MIPS
#elif LJ_TARGET_MIPS32
#define CCALL_NARG_GPR 4
#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_NRET_GPR 2
#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_SPS_EXTRA 7
#define CCALL_SPS_FREE 1
@ -119,6 +119,22 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#elif LJ_TARGET_MIPS64
/* FP args are positional and overlay the GPR array. */
#define CCALL_NARG_GPR 8
#define CCALL_NARG_FPR 0
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -67,9 +67,13 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#define CALLBACK_MCODE_HEAD 24
#endif
#elif LJ_TARGET_MIPS
#elif LJ_TARGET_MIPS32
#define CALLBACK_MCODE_HEAD 24
#define CALLBACK_MCODE_HEAD 20
#elif LJ_TARGET_MIPS64
#define CALLBACK_MCODE_HEAD 52
#else
@ -221,14 +225,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
static void callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
uintptr_t ug = (uintptr_t)(void *)g;
MSize slot;
*p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0;
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16);
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff);
#if LJ_TARGET_MIPS32
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
#else
*p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
*p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
*p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
#endif
*p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
*p++ = MIPSI_JR | MIPSF_S(RID_R3);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff);
*p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
p++;
@ -440,7 +457,7 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
#elif LJ_TARGET_MIPS
#elif LJ_TARGET_MIPS32
#define CALLBACK_HANDLE_GPR \
if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
@ -466,6 +483,29 @@ void lj_ccallback_mcode_free(CTState *cts)
UNUSED(isfp);
#endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_MIPS64
#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
#define CALLBACK_HANDLE_REGARG \
if (ngpr + n <= maxgpr) { \
sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
}
#else /* MIPS64 soft-float */
#define CALLBACK_HANDLE_REGARG \
if (ngpr + n <= maxgpr) { \
UNUSED(isfp); \
sp = (void*) &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
}
#endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
@ -557,7 +597,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
nsp += n;
done:
if (LJ_BE && cta->size < CTSIZE_PTR)
if (LJ_BE && cta->size < CTSIZE_PTR
#if LJ_TARGET_MIPS64
&& !(isfp && nsp)
#endif
)
sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
}
@ -608,6 +652,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_MIPS64
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
#endif
#if LJ_TARGET_X86
if (ctype_isfp(ctr->info))
cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;

View File

@ -93,11 +93,13 @@ void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
setcdataV(L, &tmp, cd);
lj_gc_anybarriert(L, t);
tv = lj_tab_set(L, t, &tmp);
setgcV(L, tv, obj, it);
if (!tvisnil(tv))
cd->marked |= LJ_GC_CDATA_FIN;
else
if (it == LJ_TNIL) {
setnilV(tv);
cd->marked &= ~LJ_GC_CDATA_FIN;
} else {
setgcV(L, tv, obj, it);
cd->marked |= LJ_GC_CDATA_FIN;
}
}
}

View File

@ -297,13 +297,17 @@ static CPToken cp_next_(CPState *cp)
else return '/';
break;
case '|':
if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR;
if (cp_get(cp) != '|') return '|';
cp_get(cp); return CTOK_OROR;
case '&':
if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND;
if (cp_get(cp) != '&') return '&';
cp_get(cp); return CTOK_ANDAND;
case '=':
if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ;
if (cp_get(cp) != '=') return '=';
cp_get(cp); return CTOK_EQ;
case '!':
if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE;
if (cp_get(cp) != '=') return '!';
cp_get(cp); return CTOK_NE;
case '<':
if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; }
else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; }
@ -313,7 +317,8 @@ static CPToken cp_next_(CPState *cp)
else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; }
return '>';
case '-':
if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF;
if (cp_get(cp) != '>') return '-';
cp_get(cp); return CTOK_DEREF;
case '$':
return cp_param(cp);
case '\0': return CTOK_EOF;

View File

@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
return tr;
}
/* Tailcall to function. */
static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
{
TRef kfunc = lj_ir_kfunc(J, funcV(tv));
#if LJ_FR2
J->base[-2] = kfunc;
J->base[-1] = TREF_FRAME;
#else
J->base[-1] = kfunc | TREF_FRAME;
#endif
rd->nres = -1; /* Pending tailcall. */
}
/* Record ctype __index/__newindex metamethods. */
static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
RecordFFData *rd)
@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
if (!tv)
lj_trace_err(J, LJ_TRERR_BADTYPE);
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
crec_tailcall(J, rd, tv);
} else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
/* Specialize to result of __index lookup. */
cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J)
lua_State *L = J->L;
TValue *base = L->base, *top = L->top;
const BCIns *pc = J->pc;
TRef ftr = J->base[-1];
TRef ftr = J->base[-1-LJ_FR2];
ptrdiff_t delta;
if (!frame_islua(base-1) || J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYICALL);
J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
L->top = base; L->base = base - delta;
J->base[-1] = TREF_FALSE;
J->base[-1-LJ_FR2] = TREF_FALSE;
J->base -= delta; J->baseslot -= (BCReg)delta;
J->maxslot = (BCReg)delta; J->framedepth--;
J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
lj_snap_add(J);
L->base = base; L->top = top;
J->framedepth++; J->maxslot = 1;
J->base += delta; J->baseslot += (BCReg)delta;
J->base[-1] = ftr; J->pc = pc;
J->base[-1-LJ_FR2] = ftr; J->pc = pc;
}
/* Record function call. */
@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
if (tv) {
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
crec_tailcall(J, rd, tv);
return;
}
} else if (mm == MM_new) {
@ -1238,7 +1249,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
{
if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
if (sp[0] && sp[1] && ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
IRType dt;
CTypeID id;
TRef tr;
@ -1296,6 +1307,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
{
CTState *cts = ctype_ctsG(J2G(J));
CType *ctp = s[0];
if (!(sp[0] && sp[1])) return 0;
if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) {
if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) &&
(ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) {
@ -1373,8 +1385,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
}
if (tv) {
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
crec_tailcall(J, rd, tv);
return 0;
} /* NYI: non-function metamethods. */
} else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */

View File

@ -42,17 +42,17 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT);
** ---------- info ------------
** |type flags... A cid | size | sib | next | name |
** +----------------------------+--------+-------+-------+-------+--
** |NUM BFvcUL.. A | size | | type | |
** |STRUCT ..vcU..V A | size | field | name? | name? |
** |PTR ..vcR... A cid | size | | type | |
** |ARRAY VCvc...V A cid | size | | type | |
** |VOID ..vc.... A | size | | type | |
** |NUM BFcvUL.. A | size | | type | |
** |STRUCT ..cvU..V A | size | field | name? | name? |
** |PTR ..cvR... A cid | size | | type | |
** |ARRAY VCcv...V A cid | size | | type | |
** |VOID ..cv.... A | size | | type | |
** |ENUM A cid | size | const | name? | name? |
** |FUNC ....VS.. cc cid | nargs | field | name? | name? |
** |TYPEDEF cid | | | name | name |
** |ATTRIB attrnum cid | attr | sib? | type? | |
** |FIELD cid | offset | field | | name? |
** |BITFIELD B.vcU csz bsz pos | offset | field | | name? |
** |BITFIELD B.cvU csz bsz pos | offset | field | | name? |
** |CONSTVAL c cid | value | const | name | name |
** |EXTERN cid | | sib? | name | name |
** |KW tok | size | | name | name |

View File

@ -95,6 +95,8 @@ typedef unsigned int uintptr_t;
#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))

View File

@ -75,7 +75,7 @@ void lj_dispatch_init(GG_State *GG)
for (i = 0; i < GG_NUM_ASMFF; i++)
GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
#if LJ_TARGET_MIPS
memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4);
memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
#endif
}

View File

@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
#if !LJ_SOFTFP
/* Load a number constant into an FPR. */
static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
cTValue *tv = ir_knum(ir);
int32_t i;
if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
uint32_t hi = tv->u32.hi;

View File

@ -35,7 +35,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
{
if ((as->flags & JIT_F_MIPS32R2)) {
if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, src, shift);
} else {
emit_dst(as, MIPSI_OR, dest, dest, tmp);
@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
emit_tsi(as, mi, r, base, i);
}
#define emit_loadn(as, r, tv) \
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR)
#define emit_loadk64(as, r, ir) \
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@ -157,7 +157,8 @@ static void emit_call(ASMState *as, void *target, int needcfa)
MCode *p = as->mcp;
*--p = MIPSI_NOP;
if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
*--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
*--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
(((uintptr_t)target >>2) & 0x03ffffffu);
} else { /* Target out of range: need indirect call. */
*--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
needcfa = 1;

View File

@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
emit_tai(as, pi, r, base, i);
}
#define emit_loadn(as, r, tv) \
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR)
#define emit_loadk64(as, r, ir) \
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)

View File

@ -20,6 +20,11 @@
#define REX_64 0
#define VEX_64 0
#endif
#if LJ_GC64
#define REX_GC64 REX_64
#else
#define REX_GC64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p)
#define ptr2addr(p) (i32ptr((p)))
#endif
/* op r, [addr] */
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
/* op r, [base+ofs] */
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
{
MCode *p = as->mcp;
x86Mode mode;
if (ra_hasreg(rb)) {
if (ofs == 0 && (rb&7) != RID_EBP) {
if (LJ_GC64 && rb == RID_RIP) {
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = ofs;
} else if (ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0;
} else if (checki8(ofs)) {
*--p = (MCode)ofs;
@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
rb = RID_ESP;
#endif
} else if (LJ_GC64 && rb == RID_RIP) {
lua_assert(as->mrm.idx == RID_NONE);
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = as->mrm.ofs;
} else {
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0;
@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
/* Get/set global_State fields. */
#define emit_opgl(as, xo, r, field) \
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
#define emit_setvmstate(as, i) \
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
}
}
#if LJ_GC64
#define dispofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
#define mcpofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
#define mctopofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadu64(as, (r), (uintptr_t)(addr))
#else
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadi(as, (r), ptr2addr((addr)))
#endif
#if LJ_64
/* mov r, imm64 or shorter 32 bit extended load. */
@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
MCode *p = as->mcp;
*(int32_t *)(p-4) = (int32_t)u64;
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
#if LJ_GC64
} else if (checki32(dispofs(as, u64))) {
emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
} else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
/* Since as->realign assumes the code size doesn't change, check
** RIP-relative addressing reachability for both as->mcp and as->mctop.
*/
emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
#endif
} else { /* Full-size 64 bit load. */
MCode *p = as->mcp;
*(uint64_t *)(p-8) = u64;
@ -313,13 +335,70 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
}
#endif
/* movsd r, [&tv->n] / xorps r, r */
static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
/* op r, [addr] */
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
if (tvispzero(tv)) /* Use xor only for +0. */
emit_rr(as, XO_XORPS, r, r);
else
emit_rma(as, XO_MOVSD, r, &tv->n);
#if LJ_GC64
if (checki32(dispofs(as, addr))) {
emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
} else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
} else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
emit_rmro(as, xo, rr, rr, 0);
emit_loadu64(as, rr, (uintptr_t)addr);
} else
#endif
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
}
/* Load 64 bit IR constant into register. */
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
Reg r64;
x86Op xo;
const uint64_t *k = &ir_k64(ir)->u64;
if (rset_test(RSET_FPR, r)) {
r64 = r;
xo = XO_MOVSD;
} else {
r64 = r | REX_64;
xo = XO_MOV;
}
if (*k == 0) {
emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
#if LJ_GC64
} else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
(checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
emit_rma(as, xo, r64, k);
} else {
if (ir->i) {
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
} else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
emit_loadu64(as, r, *k);
return;
} else {
/* If all else fails, add the FP constant at the MCode area bottom. */
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
*(uint64_t *)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
}
emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
#else
} else {
emit_rma(as, xo, r64, k);
#endif
}
}
/* -- Emit control-flow instructions -------------------------------------- */
@ -460,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
if ((as->flags & JIT_F_LEA_AGU))
emit_rmro(as, XO_LEA, r, r, ofs);
emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
else
emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
}
}

View File

@ -46,7 +46,8 @@
** the wrapper function feature. Lua errors thrown through C++ frames
** cannot be caught by C++ code and C++ destructors are not run.
**
** EXT is the default on x64 systems, INT is the default on all other systems.
** EXT is the default on x64 systems and on Windows, INT is the default on all
** other systems.
**
** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
@ -55,7 +56,6 @@
** and all C libraries that have callbacks which may be used to call back
** into Lua. C++ code must *not* be compiled with -fno-exceptions.
**
** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
** EXT is mandatory on WIN64 since the calling convention has an abundance
** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
@ -63,7 +63,7 @@
#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
#define LJ_UNWIND_EXT 1
#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
#elif LJ_TARGET_WINDOWS
#define LJ_UNWIND_EXT 1
#endif
@ -384,7 +384,7 @@ static void err_raise_ext(int errcode)
#endif /* LJ_TARGET_ARM */
#elif LJ_TARGET_X64 && LJ_ABI_WIN
#elif LJ_ABI_WIN
/*
** Someone in Redmond owes me several days of my life. A lot of this is
@ -402,6 +402,7 @@ static void err_raise_ext(int errcode)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#if LJ_TARGET_X64
/* Taken from: http://www.nynaeve.net/?p=99 */
typedef struct UndocumentedDispatcherContext {
ULONG64 ControlPc;
@ -416,11 +417,14 @@ typedef struct UndocumentedDispatcherContext {
ULONG ScopeIndex;
ULONG Fill0;
} UndocumentedDispatcherContext;
#else
typedef void *UndocumentedDispatcherContext;
#endif
/* Another wild guess. */
extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
#ifdef MINGW_SDK_INIT
#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
/* Workaround for broken MinGW64 declaration. */
VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
#define RtlUnwindEx RtlUnwindEx_FIXED
@ -434,10 +438,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
/* Win64 exception handler for interpreter frame. */
LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
/* Windows exception handler for interpreter frame. */
LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
{
#if LJ_TARGET_X64
void *cf = f;
#else
void *cf = (char *)f - CFRAME_OFS_SEH;
#endif
lua_State *L = cframe_L(cf);
int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
@ -455,8 +464,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
} else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
/* Don't catch access violations etc. */
return ExceptionContinueSearch;
return 1; /* ExceptionContinueSearch */
}
#if LJ_TARGET_X64
/* Unwind the stack and call all handlers for all lower C frames
** (including ourselves) again with EH_UNWINDING set. Then set
** rsp = cf, rax = errcode and jump to the specified target.
@ -466,9 +476,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
lj_vm_unwind_c_eh),
rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
/* RtlUnwindEx should never return. */
#else
UNUSED(ctx);
UNUSED(dispatch);
/* Call all handlers for all lower C frames (including ourselves) again
** with EH_UNWINDING set. Then call the specified function, passing cf
** and errcode.
*/
lj_vm_rtlunwind(cf, (void *)rec,
(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
/* lj_vm_rtlunwind does not return. */
#endif
}
}
return ExceptionContinueSearch;
return 1; /* ExceptionContinueSearch */
}
/* Raise Windows exception. */

View File

@ -102,42 +102,41 @@ static void recff_stitch(jit_State *J)
ASMFunction cont = lj_cont_stitch;
lua_State *L = J->L;
TValue *base = L->base;
BCReg nslot = J->maxslot + 1 + LJ_FR2;
TValue *nframe = base + 1 + LJ_FR2;
const BCIns *pc = frame_pc(base-1);
TValue *pframe = frame_prevl(base-1);
TRef trcont;
lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
/* Move func + args up in Lua stack and insert continuation. */
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
setcont(base-LJ_FR2, cont);
setframe_pc(base, pc);
setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2;
L->top += 2;
setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2 + LJ_FR2;
L->top += 2 + LJ_FR2;
/* Ditto for the IR. */
memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
#if LJ_64
trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
#if LJ_FR2
J->base[2] = TREF_FRAME;
J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
#else
trcont = lj_ir_kptr(J, (void *)cont);
J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif
J->base[0] = trcont | TREF_CONT;
J->ktracep = lj_ir_k64_reserve(J);
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
J->base += 2;
J->baseslot += 2;
J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
J->base += 2 + LJ_FR2;
J->baseslot += 2 + LJ_FR2;
J->framedepth++;
lj_record_stop(J, LJ_TRLINK_STITCH, 0);
/* Undo Lua stack changes. */
memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
setframe_pc(base-1, pc);
L->base -= 2;
L->top -= 2;
L->base -= 2 + LJ_FR2;
L->top -= 2 + LJ_FR2;
}
/* Fallback handler for fast functions that are not recorded (yet). */
@ -179,7 +178,7 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
/* Emit BUFHDR for the global temporary buffer. */
static TRef recff_bufhdr(jit_State *J)
{
return emitir(IRT(IR_BUFHDR, IRT_P32),
return emitir(IRT(IR_BUFHDR, IRT_PGC),
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
}
@ -229,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
ix.tab = tr;
copyTV(J->L, &ix.tabv, &rd->argv[0]);
lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META);
fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
if (!tref_isnil(mt))
@ -295,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
if (strV(tv)->len == 1) {
emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
} else {
TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0));
TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
}
@ -380,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
int errcode;
TValue argv0;
/* Temporarily insert metamethod below object. */
J->base[1] = J->base[0];
J->base[1+LJ_FR2] = J->base[0];
J->base[0] = ix.mobj;
copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &rd->argv[1], &rd->argv[0]);
copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
copyTV(J->L, &rd->argv[0], &ix.mobjv);
/* Need to protect lj_record_tailcall because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@ -450,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
{
if (J->maxslot >= 1) {
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
#endif
lj_record_call(J, 0, J->maxslot - 1);
rd->nres = -1; /* Pending call. */
} /* else: Interpreter will throw. */
@ -469,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
TValue argv0, argv1;
TRef tmp;
int errcode;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
/* Swap function and traceback. */
tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &argv1, &rd->argv[1]);
copyTV(J->L, &rd->argv[0], &argv1);
copyTV(J->L, &rd->argv[1], &argv0);
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
#endif
/* Need to protect lj_record_call because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
/* Always undo Lua stack swap to avoid confusing the interpreter. */
@ -504,7 +510,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J));
J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
UNUSED(rd);
}
@ -613,10 +619,8 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
if (!tref_isnumber_str(J->base[1]))
lj_trace_err(J, LJ_TRERR_BADTYPE);
J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]);
J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
&rd->argv[0], &rd->argv[1]);
UNUSED(rd);
}
@ -822,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
/* Also handle empty range here, to avoid extra traces. */
TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
emitir(IRTGI(IR_GE), trslen, tr0);
trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
} else { /* Range underflow: return empty string. */
emitir(IRTGI(IR_LT), trend, trstart);
@ -838,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
rd->nres = len;
for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp);
tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
}
} else { /* Empty range or range underflow: return no results. */
@ -860,7 +864,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
if (i > 1) { /* Concatenate the strings, if there's more than one. */
TRef hdr = recff_bufhdr(J), tr = hdr;
for (i = 0; J->base[i] != 0; i++)
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
}
UNUSED(rd);
@ -877,14 +881,14 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
if (vrep > 1) {
TRef hdr2 = recff_bufhdr(J);
TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
}
}
tr = hdr = recff_bufhdr(J);
if (str2) {
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
str = str2;
rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
}
@ -935,8 +939,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
if ((J->base[2] && tref_istruecond(J->base[3])) ||
(emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
!lj_str_haspattern(pat))) { /* Search for fixed string. */
TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
@ -944,13 +948,13 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
str->len-(MSize)start, pat->len)) {
TRef pos;
emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0));
J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
rd->nres = 2;
} else {
emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
J->base[0] = TREF_NIL;
}
} else { /* Search for pattern. */
@ -977,7 +981,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
IRCallID id;
switch (STRFMT_TYPE(sf)) {
case STRFMT_LIT:
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
break;
case STRFMT_INT:
@ -986,7 +990,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
if (!tref_isinteger(tra))
goto handle_num;
if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
} else {
#if LJ_HASFFI
@ -1016,7 +1020,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
return;
}
if (sf == STRFMT_STR) /* Shortcut for plain %s. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
else if ((sf & STRFMT_T_QUOTED))
tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
else
@ -1025,7 +1029,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
case STRFMT_CHAR:
tra = lj_opt_narrow_toint(J, tra);
if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
else
tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
@ -1110,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
{
TRef tr, ud, fp;
if (id) { /* io.func() */
#if LJ_GC64
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
#else
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
#endif
} else { /* fp:method() */
ud = J->base[0];
if (!tref_isudata(ud))
@ -1133,7 +1142,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
ptrdiff_t i = rd->data == 0 ? 1 : 0;
for (; J->base[i]; i++) {
TRef str = lj_ir_tostr(J, J->base[i]);
TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
IRIns *irs = IR(tref_ref(str));

View File

@ -116,6 +116,17 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
/* These definitions must match with the arch-specific *.dasc files. */
#if LJ_TARGET_X86
#if LJ_ABI_WIN
#define CFRAME_OFS_ERRF (19*4)
#define CFRAME_OFS_NRES (18*4)
#define CFRAME_OFS_PREV (17*4)
#define CFRAME_OFS_L (16*4)
#define CFRAME_OFS_SEH (9*4)
#define CFRAME_OFS_PC (6*4)
#define CFRAME_OFS_MULTRES (5*4)
#define CFRAME_SIZE (16*4)
#define CFRAME_SHIFT_MULTRES 0
#else
#define CFRAME_OFS_ERRF (15*4)
#define CFRAME_OFS_NRES (14*4)
#define CFRAME_OFS_PREV (13*4)
@ -124,6 +135,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES (5*4)
#define CFRAME_SIZE (12*4)
#define CFRAME_SHIFT_MULTRES 0
#endif
#elif LJ_TARGET_X64
#if LJ_ABI_WIN
#define CFRAME_OFS_PREV (13*8)
@ -226,26 +238,41 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SIZE 272
#define CFRAME_SHIFT_MULTRES 3
#endif
#elif LJ_TARGET_MIPS
#elif LJ_TARGET_MIPS32
#if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 116
#define CFRAME_OFS_L 112
#define CFRAME_OFS_PC 20
#define CFRAME_OFS_MULTRES 16
#define CFRAME_SIZE 112
#define CFRAME_SHIFT_MULTRES 3
#else
#define CFRAME_OFS_ERRF 76
#define CFRAME_OFS_NRES 72
#define CFRAME_OFS_PREV 68
#define CFRAME_OFS_L 64
#define CFRAME_SIZE 64
#endif
#define CFRAME_OFS_PC 20
#define CFRAME_OFS_MULTRES 16
#define CFRAME_SIZE 64
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_MIPS64
#if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 188
#define CFRAME_OFS_NRES 184
#define CFRAME_OFS_PREV 176
#define CFRAME_OFS_L 168
#define CFRAME_OFS_PC 160
#define CFRAME_SIZE 192
#else
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 112
#define CFRAME_OFS_L 104
#define CFRAME_OFS_PC 96
#define CFRAME_SIZE 128
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif

View File

@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
IRIns *ir = &T->ir[ref];
if (ir->o == IR_KGC)
gc_markobj(g, ir_kgc(ir));
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ref++;
}
if (T->link) gc_marktrace(g, T->link);
if (T->nextroot) gc_marktrace(g, T->nextroot);

View File

@ -719,6 +719,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
/* -- Interface to GDB JIT API -------------------------------------------- */
static int gdbjit_lock;
static void gdbjit_lock_acquire()
{
while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
/* Just spin; futexes or pthreads aren't worth the portability cost. */
}
}
static void gdbjit_lock_release()
{
__sync_lock_release(&gdbjit_lock);
}
/* Add new entry to GDB JIT symbol chain. */
static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
{
@ -730,6 +744,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
ctx->T->gdbjit_entry = (void *)eo;
/* Link new entry to chain and register it. */
eo->entry.prev_entry = NULL;
gdbjit_lock_acquire();
eo->entry.next_entry = __jit_debug_descriptor.first_entry;
if (eo->entry.next_entry)
eo->entry.next_entry->prev_entry = &eo->entry;
@ -739,6 +754,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
__jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
__jit_debug_register_code();
gdbjit_lock_release();
}
/* Add debug info for newly compiled trace and notify GDB. */
@ -770,6 +786,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
{
GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
if (eo) {
gdbjit_lock_acquire();
if (eo->entry.prev_entry)
eo->entry.prev_entry->next_entry = eo->entry.next_entry;
else
@ -779,6 +796,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
__jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
__jit_debug_register_code();
gdbjit_lock_release();
lj_mem_free(J2G(J), eo, eo->sz);
}
}

View File

@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J)
IRIns *baseir = J->irbuf + J->irbotlim;
MSize szins = J->irtoplim - J->irbotlim;
lua_assert(szins != 0);
lua_assert(J->cur.nk == J->irbotlim);
lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
if (J->cur.nins + (szins >> 1) < J->irtoplim) {
/* More than half of the buffer is free on top: shift up by a quarter. */
MSize ofs = szins >> 2;
@ -145,6 +145,14 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
return emitir(CCI_OPTYPE(ci), tr, id);
}
/* Load field of type t from GG_State + offset. */
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
{
lua_assert(ofs >= IRFL__MAX && ofs < REF_BIAS);
lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
return lj_opt_fold(J);
}
/* -- Interning of constants ---------------------------------------------- */
/*
@ -165,6 +173,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
return ref;
}
/* Get ref of next 64 bit IR constant and optionally grow IR.
** Note: this may invalidate all IRIns *!
*/
static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
{
IRRef ref = J->cur.nk - 2;
lua_assert(J->state != LJ_TRACE_ASM);
if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
J->cur.nk = ref;
return ref;
}
#if LJ_GC64
#define ir_nextkgc ir_nextk64
#else
#define ir_nextkgc ir_nextk
#endif
/* Intern int32_t constant. */
TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
{
@ -184,95 +210,21 @@ found:
return TREF(ref, IRT_INT);
}
/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the
** 64 bit constant. The constants themselves are stored in a chained array
** and shared across traces.
**
** Rationale for choosing this data structure:
** - The address of the constants is embedded in the generated machine code
** and must never move. A resizable array or hash table wouldn't work.
** - Most apps need very few non-32 bit integer constants (less than a dozen).
** - Linear search is hard to beat in terms of speed and low complexity.
*/
typedef struct K64Array {
MRef next; /* Pointer to next list. */
MSize numk; /* Number of used elements in this array. */
TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
} K64Array;
/* Free all chained arrays. */
void lj_ir_k64_freeall(jit_State *J)
{
K64Array *k;
for (k = mref(J->k64, K64Array); k; ) {
K64Array *next = mref(k->next, K64Array);
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
setmref(J->k64, NULL);
}
/* Get new 64 bit constant slot. */
static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
{
TValue *ntv;
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
setmref(kn->next, NULL);
kn->numk = 0;
if (kp)
setmref(kp->next, kn); /* Chain to the end of the list. */
else
setmref(J->k64, kn); /* Link first array. */
kp = kn;
}
ntv = &kp->k[kp->numk++]; /* Add to current array. */
ntv->u64 = u64;
return ntv;
}
/* Find 64 bit constant in chained array or add it. */
cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
{
K64Array *k, *kp = NULL;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
return tv;
}
}
/* Otherwise add a new constant. */
return ir_k64_add(J, kp, u64);
}
TValue *lj_ir_k64_reserve(jit_State *J)
{
K64Array *k, *kp = NULL;
lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
/* Find last K64Array, if any. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
}
/* Intern 64 bit constant, given by its address. */
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
/* Intern 64 bit constant, given by its 64 bit pattern. */
TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
if (ir_k64(&cir[ref]) == tv)
if (ir_k64(&cir[ref])->u64 == u64)
goto found;
ref = ir_nextk(J);
ref = ir_nextk64(J);
ir = IR(ref);
lua_assert(checkptrGC(tv));
setmref(ir->ptr, tv);
ir[1].tv.u64 = u64;
ir->t.irt = t;
ir->o = op;
ir->op12 = 0;
ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref;
found:
@ -282,13 +234,13 @@ found:
/* Intern FP constant, given by its 64 bit pattern. */
TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
{
return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64));
return lj_ir_k64(J, IR_KNUM, u64);
}
/* Intern 64 bit integer constant. */
TRef lj_ir_kint64(jit_State *J, uint64_t u64)
{
return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64));
return lj_ir_k64(J, IR_KINT64, u64);
}
/* Check whether a number is int and return it. -0 is NOT considered an int. */
@ -323,15 +275,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */
lua_assert(!isdead(J2G(J), o));
for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
if (ir_kgc(&cir[ref]) == o)
goto found;
ref = ir_nextk(J);
ref = ir_nextkgc(J);
ir = IR(ref);
/* NOBARRIER: Current trace is a GC root. */
setgcref(ir->gcr, o);
ir->op12 = 0;
setgcref(ir[LJ_GC64].gcr, o);
ir->t.irt = (uint8_t)t;
ir->o = IR_KGC;
ir->prev = J->chain[IR_KGC];
@ -340,24 +292,44 @@ found:
return TREF(ref, t);
}
/* Intern 32 bit pointer constant. */
/* Allocate GCtrace constant placeholder (no interning). */
TRef lj_ir_ktrace(jit_State *J)
{
IRRef ref = ir_nextkgc(J);
IRIns *ir = IR(ref);
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
ir->t.irt = IRT_P64;
ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
ir->op12 = 0;
ir->prev = 0;
return TREF(ref, IRT_P64);
}
/* Intern pointer constant. */
TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr);
#if LJ_64 && !LJ_GC64
lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
#endif
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
if (mref(cir[ref].ptr, void) == ptr)
if (ir_kptr(&cir[ref]) == ptr)
goto found;
#if LJ_GC64
ref = ir_nextk64(J);
#else
ref = ir_nextk(J);
#endif
ir = IR(ref);
setmref(ir->ptr, ptr);
ir->t.irt = IRT_P32;
ir->op12 = 0;
setmref(ir[LJ_GC64].ptr, ptr);
ir->t.irt = IRT_PGC;
ir->o = op;
ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref;
found:
return TREF(ref, IRT_P32);
return TREF(ref, IRT_PGC);
}
/* Intern typed NULL constant. */
@ -412,9 +384,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
case IR_KINT: setintV(tv, ir->i); break;
case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
case IR_KPTR: case IR_KKPTR: case IR_KNULL:
setlightudV(tv, mref(ir->ptr, void));
break;
case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
case IR_KNULL: setlightudV(tv, NULL); break;
case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
#if LJ_HASFFI
case IR_KINT64: {

View File

@ -220,7 +220,7 @@ IRFLDEF(FLENUM)
/* SLOAD mode bits, stored in op2. */
#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */
#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
@ -294,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
/* -- IR instruction types ------------------------------------------------ */
/* Map of itypes to non-negative numbers. ORDER LJ_T.
#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
** IRT_P32 and IRT_P64, which never escape the IR.
** The various integers are only used in the IR and can only escape to
@ -302,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
** contiguous and next to IRT_NUM (see the typerange macros below).
*/
#define IRTDEF(_) \
_(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \
_(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \
_(TAB, 4) _(UDATA, 4) \
_(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
_(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
_(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
_(UDATA, IRTSIZE_PGC) \
_(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
_(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
_(SOFTFP, 4) /* There is room for 9 more types. */
_(SOFTFP, 4) /* There is room for 8 more types. */
/* IR result type and flags (8 bit). */
typedef enum {
@ -318,9 +321,10 @@ IRTDEF(IRTENUM)
/* Native pointer type and the corresponding integer type. */
IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
/* TODO_GC64: major changes required for all uses of IRT_P32. */
/* Additional flags. */
IRT_MARK = 0x20, /* Marker for misc. purposes. */
@ -408,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
static LJ_AINLINE uint32_t irt_toitype_(IRType t)
{
lua_assert(!LJ_64 || t != IRT_LIGHTUD);
lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
if (LJ_DUALNUM && t > IRT_NUM) {
return LJ_TISNUM;
} else {
@ -521,7 +525,9 @@ typedef uint32_t TRef;
** +-------+-------+---+---+---+---+
** | op1 | op2 | t | o | r | s |
** +-------+-------+---+---+---+---+
** | op12/i/gco | ot | prev | (alternative fields in union)
** | op12/i/gco32 | ot | prev | (alternative fields in union)
** +-------+-------+---+---+---+---+
** | TValue/gco64 | (2nd IR slot for 64 bit constants)
** +---------------+-------+-------+
** 32 16 16
**
@ -549,22 +555,27 @@ typedef union IRIns {
)
};
int32_t i; /* 32 bit signed integer literal (overlaps op12). */
GCRef gcr; /* GCobj constant (overlaps op12). */
MRef ptr; /* Pointer constant (overlaps op12). */
GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
TValue tv; /* TValue constant (overlaps entire slot). */
} IRIns;
/* TODO_GC64: major changes required. */
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue))
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
#define ir_k64(ir) \
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
(LJ_GC64 && \
((ir)->o == IR_KGC || \
(ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
&(ir)[1].tv)
#define ir_kptr(ir) \
check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
mref((ir)[LJ_GC64].ptr, void))
/* A store or any other op with a non-weak guard has a side-effect. */
static LJ_AINLINE int ir_sideeff(IRIns *ir)

View File

@ -78,13 +78,13 @@ typedef struct CCallInfo {
#define IRCALLCOND_SOFTFP_FFI(x) NULL
#endif
#if LJ_SOFTFP && LJ_TARGET_MIPS
#if LJ_SOFTFP && LJ_TARGET_MIPS32
#define IRCALLCOND_SOFTFP_MIPS(x) x
#else
#define IRCALLCOND_SOFTFP_MIPS(x) NULL
#endif
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
#define IRCALLCOND_FP64_FFI(x) x
@ -104,12 +104,6 @@ typedef struct CCallInfo {
#define IRCALLCOND_FFI32(x) NULL
#endif
#if LJ_TARGET_X86
#define CCI_RANDFPR 0 /* Clang on OSX/x86 is overzealous. */
#else
#define CCI_RANDFPR CCI_NOFPRCLOBBER
#endif
#if LJ_SOFTFP
#define XA_FP CCI_XA
#define XA2_FP (CCI_XA+CCI_XA)
@ -129,40 +123,40 @@ typedef struct CCallInfo {
/* Function definitions for CALL* instructions. */
#define IRCALLDEF(_) \
_(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(ANY, lj_str_find, 4, N, P32, 0) \
_(ANY, lj_str_find, 4, N, PGC, 0) \
_(ANY, lj_str_new, 3, S, STR, CCI_L) \
_(ANY, lj_strscan_num, 2, FN, INT, 0) \
_(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
_(ANY, lj_strfmt_putint, 2, FL, P32, 0) \
_(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \
_(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \
_(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \
_(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \
_(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \
_(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \
_(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \
_(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \
_(ANY, lj_buf_putmem, 3, S, P32, 0) \
_(ANY, lj_buf_putstr, 2, FL, P32, 0) \
_(ANY, lj_buf_putchar, 2, FL, P32, 0) \
_(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
_(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
_(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
_(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \
_(ANY, lj_buf_puttab, 5, L, P32, 0) \
_(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \
_(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \
_(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \
_(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \
_(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \
_(ANY, lj_buf_putmem, 3, S, PGC, 0) \
_(ANY, lj_buf_putstr, 2, FL, PGC, 0) \
_(ANY, lj_buf_putchar, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \
_(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \
_(ANY, lj_buf_puttab, 5, L, PGC, 0) \
_(ANY, lj_buf_tostr, 1, FL, STR, 0) \
_(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
_(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
_(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
_(ANY, lj_tab_clear, 1, FS, NIL, 0) \
_(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
_(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
_(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
_(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\
_(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
_(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
_(ANY, lj_vm_modi, 2, FN, INT, 0) \
_(ANY, sinh, 1, N, NUM, XA_FP) \
_(ANY, cosh, 1, N, NUM, XA_FP) \

View File

@ -36,12 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
return ref;
}
LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
/* Interning of constants. */
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@ -49,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
#if LJ_64
#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@ -75,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
/* Special 128 bit SIMD constants. */
#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS))
#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG))
#define lj_ir_ksimd(J, idx) \
lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
/* Access to constants. */
LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@ -143,8 +143,8 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op);
LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */

View File

@ -46,12 +46,16 @@
#define JIT_F_CPU_FIRST JIT_F_SQRT
#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
#elif LJ_TARGET_MIPS
#define JIT_F_MIPS32R2 0x00000010
#define JIT_F_MIPSXXR2 0x00000010
/* Names for the CPU-specific flags. Must match the order above. */
#define JIT_F_CPU_FIRST JIT_F_MIPS32R2
#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
#if LJ_TARGET_MIPS32
#define JIT_F_CPUSTRING "\010MIPS32R2"
#else
#define JIT_F_CPUSTRING "\010MIPS64R2"
#endif
#else
#define JIT_F_CPU_FIRST 0
#define JIT_F_CPUSTRING ""
#endif
@ -179,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
#define SNAP_TR(slot, tr) \
(((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
#if !LJ_FR2
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
#endif
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
#define snap_ref(sn) ((sn) & 0xffff)
#define snap_slot(sn) ((BCReg)((sn) >> 24))
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
{
#if LJ_FR2
uint64_t pcbase;
memcpy(&pcbase, sn, sizeof(uint64_t));
return (const BCIns *)(pcbase >> 8);
#else
return (const BCIns *)(uintptr_t)*sn;
#endif
}
/* Snapshot and exit numbers. */
typedef uint32_t SnapNo;
typedef uint32_t ExitNo;
@ -308,6 +324,37 @@ enum {
LJ_KSIMD__MAX
};
enum {
#if LJ_TARGET_X86ORX64
LJ_K64_TOBIT, /* 2^52 + 2^51 */
LJ_K64_2P64, /* 2^64 */
LJ_K64_M2P64, /* -2^64 */
#if LJ_32
LJ_K64_M2P64_31, /* -2^64 or -2^31 */
#else
LJ_K64_M2P64_31 = LJ_K64_M2P64,
#endif
#endif
#if LJ_TARGET_MIPS
LJ_K64_2P31, /* 2^31 */
#endif
LJ_K64__MAX,
};
enum {
#if LJ_TARGET_X86ORX64
LJ_K32_M2P64_31, /* -2^64 or -2^31 */
#endif
#if LJ_TARGET_PPC
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
LJ_K32_2P52, /* 2^52 */
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
LJ_K32_2P31, /* 2^31 */
#endif
LJ_K32__MAX
};
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
@ -324,13 +371,14 @@ enum {
/* Fold state is used to fold instructions on-the-fly. */
typedef struct FoldState {
IRIns ins; /* Currently emitted instruction. */
IRIns left; /* Instruction referenced by left operand. */
IRIns right; /* Instruction referenced by right operand. */
IRIns left[2]; /* Instruction referenced by left operand. */
IRIns right[2]; /* Instruction referenced by right operand. */
} FoldState;
/* JIT compiler state. */
typedef struct jit_State {
GCtrace cur; /* Current trace. */
GCtrace *curfinal; /* Final address of current trace (set during asm). */
lua_State *L; /* Current Lua state. */
const BCIns *pc; /* Current PC. */
@ -360,8 +408,9 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
MRef k64; /* Pointer to chained array of 64 bit constants. */
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@ -382,7 +431,7 @@ typedef struct jit_State {
GCRef *trace; /* Array of traces. */
TraceNo freetrace; /* Start of scan for next free trace. */
MSize sizetrace; /* Size of trace array. */
TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */
IRRef1 ktrace; /* Reference to KGC with GCtrace. */
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */

View File

@ -843,12 +843,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
#endif
#if LJ_FR2
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f))
#define contptr(f) ((void *)(f))
#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
#elif LJ_64
#define contptr(f) \
((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
#define setcont(o, f) \
((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
#else
#define setcont(o, f) setlightudV((o), (void *)(f))
#define contptr(f) ((void *)(f))
#define setcont(o, f) setlightudV((o), contptr(f))
#endif
#define tvchecklive(L, o) \

View File

@ -136,8 +136,8 @@
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins)
#define fleft (&J->fold.left)
#define fright (&J->fold.right)
#define fleft (J->fold.left)
#define fright (J->fold.right)
#define knumleft (ir_knum(fleft)->n)
#define knumright (ir_knum(fright)->n)
@ -502,7 +502,7 @@ LJFOLDF(kfold_strref_snew)
PHIBARRIER(ir);
fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
fins->op1 = str;
fins->ot = IRT(IR_STRREF, IRT_P32);
fins->ot = IRT(IR_STRREF, IRT_PGC);
return RETRYFOLD;
}
}
@ -998,8 +998,10 @@ LJFOLDF(simplify_nummuldiv_k)
if (n == 1.0) { /* x o 1 ==> x */
return LEFTFOLD;
} else if (n == -1.0) { /* x o -1 ==> -x */
IRRef op1 = fins->op1;
fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
fins->op1 = op1;
fins->o = IR_NEG;
fins->op2 = (IRRef1)lj_ir_knum_neg(J);
return RETRYFOLD;
} else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
fins->o = IR_ADD;
@ -2393,10 +2395,14 @@ retry:
if (fins->op1 >= J->cur.nk) {
key += (uint32_t)IR(fins->op1)->o << 10;
*fleft = *IR(fins->op1);
if (fins->op1 < REF_TRUE)
fleft[1] = IR(fins->op1)[1];
}
if (fins->op2 >= J->cur.nk) {
key += (uint32_t)IR(fins->op2)->o;
*fright = *IR(fins->op2);
if (fins->op2 < REF_TRUE)
fright[1] = IR(fins->op2)[1];
} else {
key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
}

View File

@ -22,8 +22,8 @@
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins)
#define fleft (&J->fold.left)
#define fright (&J->fold.right)
#define fleft (J->fold.left)
#define fright (J->fold.right)
/*
** Caveat #1: return value is not always a TRef -- only use with tref_ref().

View File

@ -517,18 +517,24 @@ static int numisint(lua_Number n)
return (n == (lua_Number)lj_num2int(n));
}
/* Convert string to number. Error out for non-numeric string values. */
static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
{
if (tref_isstr(tr)) {
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
/* Would need an inverted STRTO for this rare and useless case. */
if (!lj_strscan_num(strV(o), o)) /* Convert in-place. Value used below. */
lj_trace_err(J, LJ_TRERR_BADTYPE); /* Punt if non-numeric. */
}
return tr;
}
/* Narrowing of arithmetic operations. */
TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op)
{
if (tref_isstr(rb)) {
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
lj_strscan_num(strV(vb), vb);
}
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_strscan_num(strV(vc), vc);
}
rb = conv_str_tonum(J, rb, vb);
rc = conv_str_tonum(J, rc, vc);
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
tref_isinteger(rb) && tref_isinteger(rc) &&
@ -543,24 +549,21 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
/* Narrowing of unary minus operator. */
TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
{
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_strscan_num(strV(vc), vc);
}
rc = conv_str_tonum(J, rc, vc);
if (tref_isinteger(rc)) {
if ((uint32_t)numberVint(vc) != 0x80000000u)
return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
}
return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
}
/* Narrowing of modulo operator. */
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
{
TRef tmp;
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
rb = conv_str_tonum(J, rb, vb);
rc = conv_str_tonum(J, rc, vc);
if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) &&
tref_isinteger(rb) && tref_isinteger(rc) &&
(tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) {
@ -577,10 +580,11 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
}
/* Narrowing of power operator or math.pow. */
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
{
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
rb = conv_str_tonum(J, rb, vb);
rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
rc = conv_str_tonum(J, rc, vc);
/* Narrowing must be unconditional to preserve (-x)^i semantics. */
if (tvisint(vc) || numisint(numV(vc))) {
int checkrange = 0;
@ -591,8 +595,6 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
checkrange = 1;
}
if (!tref_isinteger(rc)) {
if (tref_isstr(rc))
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
/* Guarded conversion to integer! */
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
}

View File

@ -153,10 +153,9 @@ static void sink_remark_phi(jit_State *J)
remark = 0;
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
if (((irl->t.irt ^ irr->t.irt) & IRT_MARK))
remark = 1;
else if (irl->prev == irr->prev)
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev)
continue;
remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
irt_setmark(IR(ir->op1)->t);
irt_setmark(IR(ir->op2)->t);
}
@ -166,8 +165,8 @@ static void sink_remark_phi(jit_State *J)
/* Sweep instructions and tag sunken allocations and stores. */
static void sink_sweep_ins(jit_State *J)
{
IRIns *ir, *irfirst = IR(J->cur.nk);
for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
IRIns *ir, *irbase = IR(REF_BASE);
for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
switch (ir->o) {
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir);
@ -217,6 +216,12 @@ static void sink_sweep_ins(jit_State *J)
break;
}
}
for (ir = IR(J->cur.nk); ir < irbase; ir++) {
irt_clearmark(ir->t);
ir->prev = REGSP_INIT;
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ir++;
}
}
/* Allocation sinking and store sinking.

View File

@ -16,6 +16,7 @@
#include "lj_jit.h"
#include "lj_ircall.h"
#include "lj_iropt.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
/* SPLIT pass:
@ -353,6 +354,8 @@ static void split_ir(jit_State *J)
ir->prev = ref; /* Identity substitution for loword. */
hisubst[ref] = 0;
}
if (irt_is64(ir->t) && ir->o != IR_KNULL)
ref++;
}
/* Process old IR instructions. */
@ -448,6 +451,11 @@ static void split_ir(jit_State *J)
case IR_STRTO:
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
break;
case IR_FLOAD:
lua_assert(ir->op1 == REF_NIL);
hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
nir->op2 += LJ_BE*4;
break;
case IR_XLOAD: {
IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
J->cur.nins--;

View File

@ -2177,6 +2177,8 @@ static void assign_adjust(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e)
bcemit_nil(fs, reg, (BCReg)extra);
}
}
if (nexps > nvars)
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
}
/* Recursively parse assignment statement. */
@ -2210,8 +2212,6 @@ static void parse_assignment(LexState *ls, LHSVarList *lh, BCReg nvars)
return;
}
assign_adjust(ls, nvars, nexps, &e);
if (nexps > nvars)
ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */
}
/* Assign RHS to LHS and recurse downwards. */
expr_init(&e, VNONRELOC, ls->fs->freereg-1);

View File

@ -51,7 +51,7 @@ static void rec_check_ir(jit_State *J)
{
IRRef i, nins = J->cur.nins, nk = J->cur.nk;
lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
for (i = nins-1; i >= nk; i--) {
for (i = nk; i < nins; i++) {
IRIns *ir = IR(i);
uint32_t mode = lj_ir_mode[ir->o];
IRRef op1 = ir->op1;
@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J)
case IRMref: lua_assert(op1 >= nk);
lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
case IRMlit: break;
case IRMcst: lua_assert(i < REF_BIAS); continue;
case IRMcst: lua_assert(i < REF_BIAS);
if (irt_is64(ir->t) && ir->o != IR_KNULL)
i++;
continue;
}
switch (irm_op2(mode)) {
case IRMnone: lua_assert(op2 == 0); break;
@ -84,30 +87,48 @@ static void rec_check_slots(jit_State *J)
BCReg s, nslots = J->baseslot + J->maxslot;
int32_t depth = 0;
cTValue *base = J->L->base - J->baseslot;
lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME));
lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
lua_assert(nslots < LJ_MAX_JSLOTS);
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
if (tr) {
cTValue *tv = &base[s];
IRRef ref = tref_ref(tr);
IRIns *ir;
IRIns *ir = NULL; /* Silence compiler. */
if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
ir = IR(ref);
lua_assert(irt_t(ir->t) == tref_t(tr));
}
if (s == 0) {
lua_assert(tref_isfunc(tr));
#if LJ_FR2
} else if (s == 1) {
lua_assert(0);
#endif
} else if ((tr & TREF_FRAME)) {
GCfunc *fn = gco2func(frame_gc(tv));
BCReg delta = (BCReg)(tv - frame_prev(tv));
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
tr = J->slot[s-1];
ir = IR(tref_ref(tr));
#endif
lua_assert(tref_isfunc(tr));
if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta));
lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
: (s == delta + LJ_FR2));
depth++;
} else if ((tr & TREF_CONT)) {
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
#else
lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
lua_assert((J->slot[s+1] & TREF_FRAME));
#endif
lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
depth++;
} else {
if (tvisnumber(tv))
@ -159,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
/* Get TRef for current function. */
static TRef getcurrf(jit_State *J)
{
if (J->base[-1])
return J->base[-1];
lua_assert(J->baseslot == 1);
return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
if (J->base[-1-LJ_FR2])
return J->base[-1-LJ_FR2];
lua_assert(J->baseslot == 1+LJ_FR2);
return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
}
/* Compare for raw object equality.
@ -506,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{
BCReg ra = bc_a(iterins);
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]);
@ -643,8 +663,8 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
GCproto *pt = funcproto(fn);
/* Too many closures created? Probably not a monomorphic function. */
if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
return tr;
}
@ -675,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
{
RecordIndex ix;
TValue *functv = &J->L->base[func];
TRef *fbase = &J->base[func];
TRef kfunc, *fbase = &J->base[func];
ptrdiff_t i;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
for (i = 0; i <= nargs; i++)
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */
(void)getslot(J, func); /* Ensure func has a reference. */
for (i = 1; i <= nargs; i++)
(void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
ix.tab = fbase[0];
copyTV(J->L, &ix.tabv, functv);
if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
lj_trace_err(J, LJ_TRERR_NOMM);
for (i = ++nargs; i > 0; i--) /* Shift arguments up. */
fbase[i] = fbase[i-1];
for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
#if LJ_FR2
fbase[2] = fbase[0];
#endif
fbase[0] = ix.mobj; /* Replace function. */
functv = &ix.mobjv;
}
fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
#if LJ_FR2
fbase[0] = kfunc;
fbase[1] = TREF_FRAME;
#else
fbase[0] = kfunc | TREF_FRAME;
#endif
J->maxslot = (BCReg)nargs;
}
@ -700,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
rec_call_setup(J, func, nargs);
/* Bump frame. */
J->framedepth++;
J->base += func+1;
J->baseslot += func+1;
J->base += func+1+LJ_FR2;
J->baseslot += func+1+LJ_FR2;
}
/* Record tail call. */
@ -717,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
func += cbase;
}
/* Move func + args down. */
memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1));
if (LJ_FR2 && J->baseslot == 2)
J->base[func+1] = 0;
memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
/* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
/* Tailcalls can form a loop, so count towards the loop unroll limit. */
if (++J->tailcalled > J->loopunroll)
@ -758,9 +789,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
(void)getslot(J, rbase+i); /* Ensure all results have a reference. */
while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0)
if (--J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
lua_assert(J->baseslot > 1+LJ_FR2);
gotresults++;
rbase += cbase;
J->baseslot -= (BCReg)cbase;
@ -784,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
lua_assert(J->baseslot > 1+LJ_FR2);
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
@ -794,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
BCReg cbase = bc_a(callins);
GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
if ((pt->flags & PROTO_NOJIT))
lj_trace_err(J, LJ_TRERR_CJITOFF);
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
@ -808,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_snap_add(J);
}
for (i = 0; i < nresults; i++) /* Adjust results. */
J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->maxslot = cbase+(BCReg)nresults;
if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
J->framedepth--;
lua_assert(J->baseslot > cbase+1);
J->baseslot -= cbase+1;
J->base -= cbase+1;
lua_assert(J->baseslot > cbase+1+LJ_FR2);
J->baseslot -= cbase+1+LJ_FR2;
J->base -= cbase+1+LJ_FR2;
} else if (J->parent == 0 && J->exitno == 0 &&
!bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */
@ -824,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc);
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++;
J->needsnap = 1;
lua_assert(J->baseslot == 1);
lua_assert(J->baseslot == 1+LJ_FR2);
/* Shift result slots up and clear the slots of the new frame below. */
memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults);
memset(J->base-1, 0, sizeof(TRef)*(cbase+1));
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
}
} else if (frame_iscont(frame)) { /* Return to continuation frame. */
ASMFunction cont = frame_contf(frame);
@ -839,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_trace_err(J, LJ_TRERR_NYIRETL);
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
J->maxslot = cbase-2;
J->maxslot = cbase-(2<<LJ_FR2);
if (cont == lj_cont_ra) {
/* Copy result to destination slot. */
BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (dst >= J->maxslot) J->maxslot = dst+1;
if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} else if (cont == lj_cont_nop) {
/* Nothing to do here. */
} else if (cont == lj_cont_cat) {
BCReg bslot = bc_b(*(frame_contpc(frame)-1));
TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (bslot != cbase-2) { /* Concatenate the remainder. */
if (bslot != J->maxslot) { /* Concatenate the remainder. */
TValue *b = J->L->base, save; /* Simulate lower frame and result. */
J->base[cbase-2] = tr;
copyTV(J->L, &save, b-2);
if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
J->base[J->maxslot] = tr;
copyTV(J->L, &save, b-(2<<LJ_FR2));
if (gotresults)
copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
else
setnilV(b-(2<<LJ_FR2));
J->L->base = b - cbase;
tr = rec_cat(J, bslot, cbase-2);
tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
b = J->L->base + cbase; /* Undo. */
J->L->base = b;
copyTV(J->L, b-2, &save);
copyTV(J->L, b-(2<<LJ_FR2), &save);
}
if (tr) { /* Store final result. */
BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = tr;
if (dst >= J->maxslot) J->maxslot = dst+1;
if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} /* Otherwise continue with another __concat call. */
} else {
/* Result type already specialized. */
@ -873,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else {
lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
}
lua_assert(J->baseslot >= 1);
lua_assert(J->baseslot >= 1+LJ_FR2);
}
/* -- Metamethod handling ------------------------------------------------- */
@ -882,16 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
{
BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
#if LJ_64
TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
#if LJ_FR2
J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[top+1] = TREF_CONT;
#else
TRef trcont = lj_ir_kptr(J, (void *)cont);
J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif
J->base[top] = trcont | TREF_CONT;
J->framedepth++;
for (s = J->maxslot; s < top; s++)
J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
return top+1;
return top+1+LJ_FR2;
}
/* Record metamethod lookup. */
@ -910,7 +947,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
cTValue *mo;
if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
/* Specialize to the C library namespace object. */
emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
} else {
/* Specialize to the type of userdata. */
TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@ -939,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
}
/* The cdata metatable is treated as immutable. */
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
#if LJ_GC64
/* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
#else
ix->mt = mix.tab = lj_ir_ktab(J, mt);
#endif
goto nocheck;
}
ix->mt = mt ? mix.tab : TREF_NIL;
@ -969,9 +1012,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
base[1] = ix->tab; base[2] = ix->key;
copyTV(J->L, basev+1, &ix->tabv);
copyTV(J->L, basev+2, &ix->keyv);
base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
if (mm != MM_unm) {
ix->tab = ix->key;
@ -982,8 +1025,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
lj_trace_err(J, LJ_TRERR_NOMM);
}
ok:
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj;
#if LJ_FR2
base[1] = 0;
#endif
copyTV(J->L, basev+0, &ix->mobjv);
lj_record_call(J, func, 2);
return 0; /* No result yet. */
@ -999,8 +1044,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
BCReg func = rec_mm_prep(J, lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
base += LJ_FR2;
basev += LJ_FR2;
base[1] = tr; copyTV(J->L, basev+1, tv);
#if LJ_52
base[2] = tr; copyTV(J->L, basev+2, tv);
@ -1020,11 +1066,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
{
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
copyTV(J->L, tv+0, &ix->mobjv);
TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func + LJ_FR2;
base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
copyTV(J->L, tv+1, &ix->valv);
copyTV(J->L, tv+2, &ix->keyv);
lj_record_call(J, func, 2);
@ -1257,8 +1302,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
if ((MSize)k < t->asize) { /* Currently an array key? */
TRef arrayref;
rec_idx_abc(J, asizeref, ikey, t->asize);
arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY);
return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey);
arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
} else { /* Currently not in array (may be an array extension)? */
emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
if (k == 0 && tref_isk(key))
@ -1298,13 +1343,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
*rbguard = J->guardemit;
hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot);
return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
}
}
/* Fall back to a regular hash lookup. */
return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key);
return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
}
/* Determine whether a key is NOT one of the fast metamethod names. */
@ -1341,11 +1386,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
handlemm:
if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
setfuncV(J->L, tv+0, funcV(&ix->mobjv));
TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func + LJ_FR2;
base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
copyTV(J->L, tv+1, &ix->tabv);
copyTV(J->L, tv+2, &ix->keyv);
if (ix->val) {
@ -1387,7 +1431,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
IRType t = itype2irt(oldv);
TRef res;
if (oldv == niltvg(J2G(J))) {
emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
res = TREF_NIL;
} else {
res = emitir(IRTG(loadop, t), xref, 0);
@ -1417,7 +1461,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
if (hasmm)
emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
else if (xrefop == IR_HREF)
emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32),
emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
lua_assert(hasmm);
@ -1428,7 +1472,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
TRef key = ix->key;
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
#ifdef LUAJIT_ENABLE_TABLE_BUMP
if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
@ -1438,7 +1482,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
/* Cannot derive that the previous value was non-nil, must do checks. */
if (xrefop == IR_HREF) /* Guard against store to niltv. */
emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain) { /* Metamethod lookup required? */
/* A check for NULL metatable is cheaper (hoistable) than a load. */
if (!mt) {
@ -1460,7 +1504,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
/* Invalidate neg. metamethod cache for stores with certain string keys. */
if (!nommstr(J, ix->key)) {
TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM);
TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
}
J->needsnap = 1;
@ -1535,7 +1579,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
goto noconstify;
kfunc = lj_ir_kfunc(J, J->fn);
emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
J->base[-1] = TREF_FRAME | kfunc;
#if LJ_FR2
J->base[-2] = kfunc;
#else
J->base[-1] = kfunc | TREF_FRAME;
#endif
fn = kfunc;
}
tr = lj_record_constify(J, uvval(uvp));
@ -1546,13 +1594,17 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
/* NYI: add IR to guard that it's still aliasing the same slot. */
if (val == 0) {
return getslot(J, slot);
} else {
@ -1562,10 +1614,12 @@ noconstify:
}
}
}
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
} else {
needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
}
if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp));
@ -1640,11 +1694,14 @@ static void rec_func_setup(jit_State *J)
static void rec_func_vararg(jit_State *J)
{
GCproto *pt = J->pt;
BCReg s, fixargs, vframe = J->maxslot+1;
BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
lua_assert((pt->flags & PROTO_VARARG));
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
J->base[vframe-1] = J->base[-1]; /* Copy function up. */
J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
#if LJ_FR2
J->base[vframe-1] = TREF_FRAME;
#endif
/* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) {
@ -1706,7 +1763,7 @@ static int select_detect(jit_State *J)
static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
{
int32_t numparams = J->pt->numparams;
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
lua_assert(frame_isvarg(J->L->base-1));
if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
ptrdiff_t i;
@ -1718,10 +1775,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
J->maxslot = dst + (BCReg)nresults;
}
for (i = 0; i < nresults; i++)
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL;
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
} else { /* Unknown number of varargs passed to trace. */
TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
int32_t frofs = 8*(1+numparams)+FRAME_VARG;
TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
if (nresults >= 0) { /* Known fixed number of results. */
ptrdiff_t i;
if (nvararg > 0) {
@ -1732,11 +1789,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
else
emitir(IRTGI(IR_EQ), fr,
lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
for (i = 0; i < nload; i++) {
IRType t = itype2irt(&J->L->base[i-1-nvararg]);
TRef aref = emitir(IRT(IR_AREF, IRT_P32),
IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
vbase, lj_ir_kint(J, (int32_t)i));
TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
@ -1782,15 +1839,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
}
if (idx != 0 && idx <= nvararg) {
IRType t;
TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
t = itype2irt(&J->L->base[idx-2-nvararg]);
aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx);
TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
lj_ir_kint(J, frofs-(8<<LJ_FR2)));
t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
}
J->base[dst-2] = tr;
J->maxslot = dst-1;
J->base[dst-2-LJ_FR2] = tr;
J->maxslot = dst-1-LJ_FR2;
J->bcskip = 2; /* Skip CALLM + select. */
} else {
nyivarg:
@ -1839,10 +1897,10 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
break;
}
xbase = ++trp;
tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
do {
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
} while (trp <= top);
tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
J->maxslot = (BCReg)(xbase - J->base);
@ -1883,7 +1941,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */
#if LJ_FR2
SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
uint64_t pcbase;
memcpy(&pcbase, flink, sizeof(uint64_t));
pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
memcpy(flink, &pcbase, sizeof(uint64_t));
#else
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
#endif
J->needsnap = 1;
if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@ -2159,14 +2225,14 @@ void lj_record_ins(jit_State *J)
case BC_MODVN: case BC_MODVV:
recmod:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
rc = lj_opt_narrow_mod(J, rb, rc, rcv);
rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv);
else
rc = rec_mm_arith(J, &ix, MM_mod);
break;
case BC_POW:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv);
rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
else
rc = rec_mm_arith(J, &ix, MM_pow);
break;
@ -2181,7 +2247,13 @@ void lj_record_ins(jit_State *J)
case BC_MOV:
/* Clear gap of method call to avoid resurrecting previous refs. */
if (ra > J->maxslot) J->base[ra-1] = 0;
if (ra > J->maxslot) {
#if LJ_FR2
memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
#else
J->base[ra-1] = 0;
#endif
}
break;
case BC_KSTR: case BC_KNUM: case BC_KPRI:
break;
@ -2250,14 +2322,14 @@ void lj_record_ins(jit_State *J)
/* -- Calls and vararg handling ----------------------------------------- */
case BC_ITERC:
J->base[ra] = getslot(J, ra-3-LJ_FR2);
J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
J->base[ra] = getslot(J, ra-3);
J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
{ /* Do the actual copy now because lj_record_call needs the values. */
TValue *b = &J->L->base[ra];
copyTV(J->L, b, b-3-LJ_FR2);
copyTV(J->L, b+1, b-2-LJ_FR2);
copyTV(J->L, b+2, b-1-LJ_FR2);
copyTV(J->L, b, b-3);
copyTV(J->L, b+1+LJ_FR2, b-2);
copyTV(J->L, b+2+LJ_FR2, b-1);
}
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
@ -2380,7 +2452,12 @@ void lj_record_ins(jit_State *J)
/* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
if (bcmode_a(op) == BCMdst && rc) {
J->base[ra] = rc;
if (ra >= J->maxslot) J->maxslot = ra+1;
if (ra >= J->maxslot) {
#if LJ_FR2
if (ra > J->maxslot) J->base[ra-1] = 0;
#endif
J->maxslot = ra+1;
}
}
#undef rav
@ -2465,7 +2542,7 @@ void lj_record_setup(jit_State *J)
J->scev.idx = REF_NIL;
setmref(J->scev.pc, NULL);
J->baseslot = 1; /* Invoking function is at base[-1]. */
J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
J->base = J->slot + J->baseslot;
J->maxslot = 0;
J->framedepth = 0;
@ -2480,7 +2557,7 @@ void lj_record_setup(jit_State *J)
J->bc_extent = ~(MSize)0;
/* Emit instructions for fixed references. Also triggers initial IR alloc. */
emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno);
emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
for (i = 0; i <= 2; i++) {
IRIns *ir = IR(REF_NIL-i);
ir->i = 0;

View File

@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
IRRef ref = tref_ref(tr);
#if LJ_FR2
if (s == 1) continue;
if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
TValue *base = J->L->base - J->baseslot;
tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
ref = tref_ref(tr);
}
#endif
if (ref) {
SnapEntry sn = SNAP_TR(s, tr);
IRIns *ir = &J->cur.ir[ref];
if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
/* No need to snapshot unmodified non-inherited slots. */
if (!(ir->op2 & IRSLOAD_INHERIT))
@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
}
/* Add frame links at the end of the snapshot. */
static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
{
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
GCfunc *fn = frame_func(frame);
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
#if LJ_FR2
uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
lua_assert(2 <= J->baseslot && J->baseslot <= 257);
memcpy(map, &pcbase, sizeof(uint64_t));
#else
MSize f = 0;
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
#endif
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKPC(frame_pc(frame));
#endif
frame = frame_prevl(frame);
} else if (frame_iscont(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
#endif
frame = frame_prevd(frame);
} else {
lua_assert(!frame_isc(frame));
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
#endif
frame = frame_prevd(frame);
continue;
}
if (frame + funcproto(frame_func(frame))->framesize > ftop)
ftop = frame + funcproto(frame_func(frame))->framesize;
}
*topslot = (uint8_t)(ftop - lim);
#if LJ_FR2
lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
return 2;
#else
lua_assert(f == (MSize)(1 + J->framedepth));
return (BCReg)(ftop - lim);
return f;
#endif
}
/* Take a snapshot of the current stack. */
@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
MSize nent;
SnapEntry *p;
/* Conservative estimate. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots);
snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
snap->nent = (uint8_t)nent;
nent += snapshot_framelinks(J, p + nent, &snap->topslot);
snap->mapofs = (uint16_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins;
snap->nent = (uint8_t)nent;
snap->nslots = (uint8_t)nslots;
snap->count = 0;
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
}
/* Add or merge a snapshot. */
@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J)
MSize n, m, nlim, nent = snap->nent;
uint8_t udf[SNAP_USEDEF_SLOTS];
BCReg maxslot = J->maxslot;
BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
BCReg baseslot = J->baseslot;
BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
maxslot += baseslot;
minslot += baseslot;
snap->nslots = (uint8_t)maxslot;
@ -371,8 +396,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
case IR_KPRI: return TREF_PRI(irt_type(ir->t));
case IR_KINT: return lj_ir_kint(J, ir->i);
case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
case IR_KNUM: case IR_KINT64:
return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
default: lua_assert(0); return TREF_NIL; break;
}
@ -555,8 +580,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32);
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
lj_ir_k64_find(J, k));
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
} else {
val = emitir_raw(IRT(IR_HIOP, t), val,
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@ -599,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
}
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
@ -608,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
#endif
} else if (LJ_64 && irt_islightud(t)) {
#if LJ_64 && !LJ_GC64
} else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = *(uint64_t *)sps;
#endif
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
@ -628,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
} else if (LJ_64 && irt_is64(t)) {
#if LJ_64 && !LJ_GC64
} else if (irt_is64(t)) {
/* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
} else if (irt_ispri(t)) {
setpriV(o, irt_toitype(t));
} else {
@ -651,7 +678,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
uint64_t tmp;
if (irref_isk(ref)) {
if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
src = mref(ir->ptr, int32_t);
src = (int32_t *)&ir[1];
} else if (sz == 8) {
tmp = (uint64_t)(uint32_t)ir->i;
src = (int32_t *)&tmp;
@ -795,11 +822,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
#if !LJ_FR2 || defined(LUA_USE_ASSERT)
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
#endif
#if !LJ_FR2
ptrdiff_t ftsz0;
#endif
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
const BCIns *pc = snap_pc(map[nent]);
const BCIns *pc = snap_pc(&map[nent]);
lua_State *L = J->L;
/* Set interpreter PC to the next PC to get correct error messages. */
@ -812,8 +843,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
}
/* Fill stack slots with data from the registers and spill slots. */
frame = L->base-1;
frame = L->base-1-LJ_FR2;
#if !LJ_FR2
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
#endif
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
if (!(sn & SNAP_NORESTORE)) {
@ -836,14 +869,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo;
#if !LJ_FR2
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
/* Overwrite tag with frame link. */
setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
L->base = o+1;
#endif
}
}
}
#if LJ_FR2
L->base += (map[nent+LJ_BE] & 0xff);
#endif
lua_assert(map + nent == flinks);
/* Compute current stack top. */

View File

@ -180,7 +180,7 @@ static void close_state(lua_State *L)
g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
}
#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
lua_State *lj_state_newstate(lua_Alloc f, void *ud)
#else
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)

View File

@ -98,11 +98,15 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
uint32_t u = (uint32_t)k;
if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
if (u < 10000) {
if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
if (u < 10) goto dig1;
if (u < 100) goto dig2;
if (u < 1000) goto dig3;
} else {
uint32_t v = u / 10000; u -= v * 10000;
if (v < 10000) {
if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
if (v < 10) goto dig5;
if (v < 100) goto dig6;
if (v < 1000) goto dig7;
} else {
uint32_t w = v / 10000; v -= w * 10000;
if (w >= 10) WINT_R(w, 10, 10)

View File

@ -82,11 +82,15 @@ enum {
#if LJ_SOFTFP
#define RSET_FPR 0
#else
#if LJ_32
#define RSET_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
#else
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
#endif
#endif
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
@ -97,23 +101,37 @@ enum {
#if LJ_SOFTFP
#define RSET_SCRATCH_FPR 0
#else
#if LJ_32
#define RSET_SCRATCH_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18))
#else
#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
#endif
#endif
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R4
#if LJ_32
#define REGARG_LASTGPR RID_R7
#define REGARG_NUMGPR 4
#else
#define REGARG_LASTGPR RID_R11
#define REGARG_NUMGPR 8
#endif
#if LJ_ABI_SOFTFP
#define REGARG_FIRSTFPR 0
#define REGARG_LASTFPR 0
#define REGARG_NUMFPR 0
#else
#define REGARG_FIRSTFPR RID_F12
#if LJ_32
#define REGARG_LASTFPR RID_F14
#define REGARG_NUMFPR 2
#else
#define REGARG_LASTFPR RID_F19
#define REGARG_NUMFPR 8
#endif
#endif
/* -- Spill slots --------------------------------------------------------- */
@ -125,7 +143,11 @@ enum {
**
** SPS_FIRST: First spill slot for general use.
*/
#if LJ_32
#define SPS_FIXED 5
#else
#define SPS_FIXED 4
#endif
#define SPS_FIRST 4
#define SPOFS_TMP 0
@ -140,7 +162,7 @@ typedef struct {
#if !LJ_SOFTFP
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
#endif
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
@ -172,7 +194,7 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
typedef enum MIPSIns {
/* Integer instructions. */
MIPSI_MOVE = 0x00000021,
MIPSI_MOVE = 0x00000025,
MIPSI_NOP = 0x00000000,
MIPSI_LI = 0x24000000,
@ -204,19 +226,20 @@ typedef enum MIPSIns {
MIPSI_SLL = 0x00000000,
MIPSI_SRL = 0x00000002,
MIPSI_SRA = 0x00000003,
MIPSI_ROTR = 0x00200002, /* MIPS32R2 */
MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
MIPSI_SLLV = 0x00000004,
MIPSI_SRLV = 0x00000006,
MIPSI_SRAV = 0x00000007,
MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */
MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
MIPSI_SEB = 0x7c000420, /* MIPS32R2 */
MIPSI_SEH = 0x7c000620, /* MIPS32R2 */
MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */
MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
MIPSI_B = 0x10000000,
MIPSI_J = 0x08000000,
MIPSI_JAL = 0x0c000000,
MIPSI_JALX = 0x74000000,
MIPSI_JR = 0x00000008,
MIPSI_JALR = 0x0000f809,
@ -241,6 +264,15 @@ typedef enum MIPSIns {
MIPSI_LDC1 = 0xd4000000,
MIPSI_SDC1 = 0xf4000000,
/* MIPS64 instructions. */
MIPSI_DSLL = 0x00000038,
MIPSI_LD = 0xdc000000,
MIPSI_DADDIU = 0x64000000,
MIPSI_SD = 0xfc000000,
MIPSI_DMFC1 = 0x44200000,
MIPSI_DSRA32 = 0x0000003f,
MIPSI_MFHC1 = 0x44600000,
/* FP instructions. */
MIPSI_MOV_S = 0x46000006,
MIPSI_MOV_D = 0x46200006,

View File

@ -22,7 +22,7 @@
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
#endif
#define VRIDDEF(_) \
_(MRM)
_(MRM) _(RIP)
#define RIDENUM(name) RID_##name,
@ -31,6 +31,7 @@ enum {
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */
/* Calling conventions. */
RID_SP = RID_ESP,
@ -63,8 +64,10 @@ enum {
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except the stack pointer. */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
- RID2RSET(RID_ESP) \
- LJ_GC64*RID2RSET(RID_DISPATCH))
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
@ -200,6 +203,7 @@ typedef struct {
*/
typedef enum {
/* Fixed length opcodes. XI_* prefix. */
XI_O16 = 0x66,
XI_NOP = 0x90,
XI_XCHGa = 0x90,
XI_CALL = 0xe8,
@ -217,6 +221,7 @@ typedef enum {
XI_PUSHi8 = 0x6a,
XI_TESTb = 0x84,
XI_TEST = 0x85,
XI_INT3 = 0xcc,
XI_MOVmi = 0xc7,
XI_GROUP5 = 0xff,
@ -243,6 +248,7 @@ typedef enum {
XV_SHRX = XV_f20f38(f7),
/* Variable-length opcodes. XO_* prefix. */
XO_OR = XO_(0b),
XO_MOV = XO_(8b),
XO_MOVto = XO_(89),
XO_MOVtow = XO_66(89),

View File

@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T)
}
#endif
/* Allocate space for copy of trace. */
static GCtrace *trace_save_alloc(jit_State *J)
/* Allocate space for copy of T. */
GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
{
size_t sztr = ((sizeof(GCtrace)+7)&~7);
size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
size_t szins = (T->nins-T->nk)*sizeof(IRIns);
size_t sz = sztr + szins +
J->cur.nsnap*sizeof(SnapShot) +
J->cur.nsnapmap*sizeof(SnapEntry);
return lj_mem_newt(J->L, (MSize)sz, GCtrace);
T->nsnap*sizeof(SnapShot) +
T->nsnapmap*sizeof(SnapEntry);
GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
char *p = (char *)T2 + sztr;
T2->gct = ~LJ_TTRACE;
T2->marked = 0;
T2->traceno = 0;
T2->ir = (IRIns *)p - T->nk;
T2->nins = T->nins;
T2->nk = T->nk;
T2->nsnap = T->nsnap;
T2->nsnapmap = T->nsnapmap;
memcpy(p, T->ir + T->nk, szins);
return T2;
}
/* Save current trace by copying and compacting it. */
@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T)
setgcrefp(J2G(J)->gc.root, T);
newwhite(J2G(J), T);
T->gct = ~LJ_TTRACE;
T->ir = (IRIns *)p - J->cur.nk;
memcpy(p, J->cur.ir+J->cur.nk, szins);
T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
p += szins;
TRACE_APPENDVEC(snap, nsnap, SnapShot)
TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
J->cur.traceno = 0;
J->curfinal = NULL;
setgcrefp(J->trace[T->traceno], T);
lj_gc_barriertrace(J2G(J), T->traceno);
lj_gdbjit_addtrace(J, T);
@ -284,7 +295,6 @@ int lj_trace_flushall(lua_State *L)
memset(J->penalty, 0, sizeof(J->penalty));
/* Free the whole machine code and invalidate all exit stub groups. */
lj_mcode_free(J);
lj_ir_k64_freeall(J);
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
lj_vmevent_send(L, TRACE,
setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@ -297,13 +307,35 @@ void lj_trace_initstate(global_State *g)
{
jit_State *J = G2J(g);
TValue *tv;
/* Initialize SIMD constants. */
/* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff);
tv[1].u64 = U64x(7fffffff,ffffffff);
tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
tv[0].u64 = U64x(80000000,00000000);
tv[1].u64 = U64x(80000000,00000000);
/* Initialize 32/64 bit constants. */
#if LJ_TARGET_X86ORX64
J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
#if LJ_32
J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
#endif
J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
#endif
#if LJ_TARGET_PPC
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
J->k32[LJ_K32_2P52] = 0x59800000;
#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
J->k32[LJ_K32_2P31] = 0x4f000000;
#endif
#if LJ_TARGET_MIPS
J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
#endif
}
/* Free everything associated with the JIT compiler state. */
@ -318,7 +350,6 @@ void lj_trace_freestate(global_State *g)
}
#endif
lj_mcode_free(J);
lj_ir_k64_freeall(J);
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@ -403,7 +434,7 @@ static void trace_start(jit_State *J)
J->postproc = LJ_POST_NONE;
lj_resetsplit(J);
J->retryrec = 0;
J->ktracep = NULL;
J->ktrace = 0;
setgcref(J->cur.startpt, obj2gco(J->pt));
L = J->L;
@ -427,7 +458,7 @@ static void trace_stop(jit_State *J)
BCOp op = bc_op(J->cur.startins);
GCproto *pt = &gcref(J->cur.startpt)->pt;
TraceNo traceno = J->cur.traceno;
GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */
GCtrace *T = J->curfinal;
lua_State *L;
switch (op) {
@ -479,9 +510,6 @@ static void trace_stop(jit_State *J)
lj_mcode_commit(J, J->cur.mcode);
J->postproc = LJ_POST_NONE;
trace_save(J, T);
if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
}
L = J->L;
lj_vmevent_send(L, TRACE,
@ -515,6 +543,10 @@ static int trace_abort(jit_State *J)
J->postproc = LJ_POST_NONE;
lj_mcode_abort(J);
if (J->curfinal) {
lj_trace_free(J2G(J), J->curfinal);
J->curfinal = NULL;
}
if (tvisnumber(L->top-1))
e = (TraceError)numberVint(L->top-1);
if (e == LJ_TRERR_MCODELM) {

View File

@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
/* Trace management. */
LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);

View File

@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
#if LJ_ABI_WIN && LJ_TARGET_X86
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
void *unwinder, int errcode);
#endif
LJ_ASMF void lj_vm_unwind_c_eh(void);
LJ_ASMF void lj_vm_unwind_ff_eh(void);
#if LJ_TARGET_X86ORX64

View File

@ -152,22 +152,15 @@ static void print_jit_status(lua_State *L)
putc('\n', stdout);
}
static int getargs(lua_State *L, char **argv, int n)
static void createargtable(lua_State *L, char **argv, int argc, int argf)
{
int narg;
int i;
int argc = 0;
while (argv[argc]) argc++; /* count total number of arguments */
narg = argc - (n + 1); /* number of arguments to the script */
luaL_checkstack(L, narg + 3, "too many arguments to script");
for (i = n+1; i < argc; i++)
lua_pushstring(L, argv[i]);
lua_createtable(L, narg, n + 1);
lua_createtable(L, argc - argf, argf);
for (i = 0; i < argc; i++) {
lua_pushstring(L, argv[i]);
lua_rawseti(L, -2, i - n);
lua_rawseti(L, -2, i - argf);
}
return narg;
lua_setglobal(L, "arg");
}
static int dofile(lua_State *L, const char *name)
@ -273,21 +266,30 @@ static void dotty(lua_State *L)
progname = oldprogname;
}
static int handle_script(lua_State *L, char **argv, int n)
static int handle_script(lua_State *L, char **argx)
{
int status;
const char *fname;
int narg = getargs(L, argv, n); /* collect arguments */
lua_setglobal(L, "arg");
fname = argv[n];
if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
const char *fname = argx[0];
if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
fname = NULL; /* stdin */
status = luaL_loadfile(L, fname);
lua_insert(L, -(narg+1));
if (status == 0)
if (status == 0) {
/* Fetch args from arg table. LUA_INIT or -e might have changed them. */
int narg = 0;
lua_getglobal(L, "arg");
if (lua_istable(L, -1)) {
do {
narg++;
lua_rawgeti(L, -narg, narg);
} while (!lua_isnil(L, -1));
lua_pop(L, 1);
lua_remove(L, -narg);
narg--;
} else {
lua_pop(L, 1);
}
status = docall(L, narg, 0);
else
lua_pop(L, narg);
}
return report(L, status);
}
@ -384,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv)
}
for (argv++; *argv != NULL; narg++, argv++)
lua_pushstring(L, *argv);
return report(L, lua_pcall(L, narg, 0, 0));
report(L, lua_pcall(L, narg, 0, 0));
return -1;
}
/* check that argument has no extra characters at the end */
@ -405,7 +408,7 @@ static int collectargs(char **argv, int *flags)
switch (argv[i][1]) { /* Check option. */
case '-':
notail(argv[i]);
return (argv[i+1] != NULL ? i+1 : 0);
return i+1;
case '\0':
return i;
case 'i':
@ -430,23 +433,23 @@ static int collectargs(char **argv, int *flags)
case 'b': /* LuaJIT extension */
if (*flags) return -1;
*flags |= FLAGS_EXEC;
return 0;
return i+1;
case 'E':
*flags |= FLAGS_NOENV;
break;
default: return -1; /* invalid option */
}
}
return 0;
return i;
}
static int runargs(lua_State *L, char **argv, int n)
static int runargs(lua_State *L, char **argv, int argn)
{
int i;
for (i = 1; i < n; i++) {
for (i = 1; i < argn; i++) {
if (argv[i] == NULL) continue;
lua_assert(argv[i][0] == '-');
switch (argv[i][1]) { /* option */
switch (argv[i][1]) {
case 'e': {
const char *chunk = argv[i] + 2;
if (*chunk == '\0') chunk = argv[++i];
@ -460,10 +463,10 @@ static int runargs(lua_State *L, char **argv, int n)
if (*filename == '\0') filename = argv[++i];
lua_assert(filename != NULL);
if (dolibrary(L, filename))
return 1; /* stop if file fails */
return 1;
break;
}
case 'j': { /* LuaJIT extension */
case 'j': { /* LuaJIT extension. */
const char *cmd = argv[i] + 2;
if (*cmd == '\0') cmd = argv[++i];
lua_assert(cmd != NULL);
@ -471,11 +474,11 @@ static int runargs(lua_State *L, char **argv, int n)
return 1;
break;
}
case 'O': /* LuaJIT extension */
case 'O': /* LuaJIT extension. */
if (dojitopt(L, argv[i] + 2))
return 1;
break;
case 'b': /* LuaJIT extension */
case 'b': /* LuaJIT extension. */
return dobytecode(L, argv+i);
default: break;
}
@ -508,45 +511,57 @@ static int pmain(lua_State *L)
{
struct Smain *s = &smain;
char **argv = s->argv;
int script;
int argn;
int flags = 0;
globalL = L;
if (argv[0] && argv[0][0]) progname = argv[0];
LUAJIT_VERSION_SYM(); /* linker-enforced version check */
script = collectargs(argv, &flags);
if (script < 0) { /* invalid args? */
LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
argn = collectargs(argv, &flags);
if (argn < 0) { /* Invalid args? */
print_usage();
s->status = 1;
return 0;
}
if ((flags & FLAGS_NOENV)) {
lua_pushboolean(L, 1);
lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
}
lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
luaL_openlibs(L); /* open libraries */
/* Stop collector during library initialization. */
lua_gc(L, LUA_GCSTOP, 0);
luaL_openlibs(L);
lua_gc(L, LUA_GCRESTART, -1);
createargtable(L, argv, s->argc, argn);
if (!(flags & FLAGS_NOENV)) {
s->status = handle_luainit(L);
if (s->status != 0) return 0;
}
if ((flags & FLAGS_VERSION)) print_version();
s->status = runargs(L, argv, (script > 0) ? script : s->argc);
s->status = runargs(L, argv, argn);
if (s->status != 0) return 0;
if (script) {
s->status = handle_script(L, argv, script);
if (s->argc > argn) {
s->status = handle_script(L, argv + argn);
if (s->status != 0) return 0;
}
if ((flags & FLAGS_INTERACTIVE)) {
print_jit_status(L);
dotty(L);
} else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
} else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
if (lua_stdin_is_tty()) {
print_version();
print_jit_status(L);
dotty(L);
} else {
dofile(L, NULL); /* executes stdin as a file */
dofile(L, NULL); /* Executes stdin as a file. */
}
}
return 0;
@ -555,7 +570,7 @@ static int pmain(lua_State *L)
int main(int argc, char **argv)
{
int status;
lua_State *L = lua_open(); /* create state */
lua_State *L = lua_open();
if (L == NULL) {
l_message(argv[0], "cannot create state: not enough memory");
return EXIT_FAILURE;
@ -565,6 +580,6 @@ int main(int argc, char **argv)
status = lua_cpcall(L, pmain, NULL);
report(L, status);
lua_close(L);
return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS;
return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
}

View File

@ -57,7 +57,7 @@
|.define TMP2, r14
|.define TMP3, r15
|
|// Calling conventions.
|// MIPS o32 calling convention.
|.define CFUNCADDR, r25
|.define CARG1, r4
|.define CARG2, r5
@ -4546,24 +4546,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISNEXT:
| // RA = base*8, RD = target (points to ITERN)
| addu RA, BASE, RA
| lw TMP0, -24+HI(RA)
| lw CFUNC:TMP1, -24+LO(RA)
| lw TMP2, -16+HI(RA)
| lw TMP3, -8+HI(RA)
| li AT, LJ_TFUNC
| bne TMP0, AT, >5
|. addiu TMP2, TMP2, -LJ_TTAB
| lbu TMP1, CFUNC:TMP1->ffid
| addiu TMP3, TMP3, -LJ_TNIL
| srl TMP0, RD, 1
| or TMP2, TMP2, TMP3
| addiu TMP1, TMP1, -FF_next_N
| lw CARG1, -24+HI(RA)
| lw CFUNC:CARG2, -24+LO(RA)
| addu TMP0, PC, TMP0
| or TMP1, TMP1, TMP2
| bnez TMP1, >5
| lw CARG3, -16+HI(RA)
| lw CARG4, -8+HI(RA)
| li AT, LJ_TFUNC
| bne CARG1, AT, >5
|. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
| lbu CARG2, CFUNC:CARG2->ffid
| addiu CARG3, CARG3, -LJ_TTAB
| addiu CARG4, CARG4, -LJ_TNIL
| or CARG3, CARG3, CARG4
| addiu CARG2, CARG2, -FF_next_N
| or CARG2, CARG2, CARG3
| bnez CARG2, >5
|. lui TMP1, 0xfffe
| addu PC, TMP0, TMP2
| lui TMP1, 0xfffe
| ori TMP1, TMP1, 0x7fff
| sw r0, -8+LO(RA) // Initialize control var.
| sw TMP1, -8+HI(RA)

4849
src/vm_mips64.dasc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1105,11 +1105,11 @@ static void build_subroutines(BuildCtx *ctx)
| mov BASE, L:RB->base
| mov NARGS:RDd, TMP1d
| mov LFUNC:RB, [RA-16]
| cleartp LFUNC:RB
| add NARGS:RDd, 1
| // This is fragile. L->base must not move, KBASE must always be defined.
| cmp KBASE, BASE // Continue with CALLT if flag set.
| je ->BC_CALLT_Z
| cleartp LFUNC:RB
| mov BASE, RA
| ins_call // Otherwise call resolved metamethod.
|
@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RCd, byte [rbp-8] // Reconstruct exit number.
| mov RCH, byte [rbp-16]
| mov [rbp-8], r15; mov [rbp-16], r14
| // Caveat: DISPATCH is rbx.
| mov DISPATCH, [ebp]
| // DISPATCH is preserved on-trace in LJ_GC64 mode.
| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
| set_vmstate EXIT
| mov [DISPATCH+DISPATCH_J(exitno)], RCd
@ -3516,7 +3515,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_AD // RA = level, RD = target
| branchPC RD // Do this first to free RD.
| mov L:RB, SAVE_L
| cmp dword L:RB->openupval, 0
| cmp aword L:RB->openupval, 0
| je >1
| mov L:RB->base, BASE
| lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE

View File

@ -121,19 +121,68 @@
|//-----------------------------------------------------------------------
|.if not X64 // x86 stack layout.
|
|.if WIN
|
|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
|.macro saveregs_
| push edi; push esi; push ebx
| push extern lj_err_unwind_win
| fs; push dword [0]
| fs; mov [0], esp
| sub esp, CFRAME_SPACE
|.endmacro
|.macro restoreregs
| add esp, CFRAME_SPACE
| fs; pop dword [0]
| pop edi // Short for esp += 4.
| pop ebx; pop esi; pop edi; pop ebp
|.endmacro
|
|.else
|
|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
|.macro saveregs_
| push edi; push esi; push ebx
| sub esp, CFRAME_SPACE
|.endmacro
|.macro saveregs
| push ebp; saveregs_
|.endmacro
|.macro restoreregs
| add esp, CFRAME_SPACE
| pop ebx; pop esi; pop edi; pop ebp
|.endmacro
|
|.endif
|
|.macro saveregs
| push ebp; saveregs_
|.endmacro
|
|.if WIN
|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
|.define SAVE_NRES, aword [esp+aword*18]
|.define SAVE_CFRAME, aword [esp+aword*17]
|.define SAVE_L, aword [esp+aword*16]
|//----- 16 byte aligned, ^^^ arguments from C caller
|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
|.define SAVE_R4, aword [esp+aword*14]
|.define SAVE_R3, aword [esp+aword*13]
|.define SAVE_R2, aword [esp+aword*12]
|//----- 16 byte aligned
|.define SAVE_R1, aword [esp+aword*11]
|.define SEH_FUNC, aword [esp+aword*10]
|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
|.define UNUSED2, aword [esp+aword*8]
|//----- 16 byte aligned
|.define UNUSED1, aword [esp+aword*7]
|.define SAVE_PC, aword [esp+aword*6]
|.define TMP2, aword [esp+aword*5]
|.define TMP1, aword [esp+aword*4]
|//----- 16 byte aligned
|.define ARG4, aword [esp+aword*3]
|.define ARG3, aword [esp+aword*2]
|.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
|.else
|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
|.define SAVE_NRES, aword [esp+aword*14]
|.define SAVE_CFRAME, aword [esp+aword*13]
@ -154,6 +203,7 @@
|.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
|.endif
|
|// FPARGx overlaps ARGx and ARG(x+1) on x86.
|.define FPARG3, qword [esp+qword*1]
@ -554,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| mov eax, FCARG2 // Error return status for vm_pcall.
| mov esp, FCARG1
|.if WIN
| lea FCARG1, SEH_NEXT
| fs; mov [0], FCARG1
|.endif
|.endif
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L
@ -577,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| and FCARG1, CFRAME_RAWMASK
| mov esp, FCARG1
|.if WIN
| lea FCARG1, SEH_NEXT
| fs; mov [0], FCARG1
|.endif
|.endif
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L
@ -590,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
| set_vmstate INTERP
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
|.if WIN and not X64
|->vm_rtlunwind@16: // Thin layer around RtlUnwind.
| // (void *cframe, void *excptrec, void *unwinder, int errcode)
| mov [esp], FCARG1 // Return value for RtlUnwind.
| push FCARG2 // Exception record for RtlUnwind.
| push 0 // Ignored by RtlUnwind.
| push dword [FCARG1+CFRAME_OFS_SEH]
| call extern RtlUnwind@16 // Violates ABI (clobbers too much).
| mov FCARG1, eax
| mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
| ret // Jump to unwinder.
|.endif
|
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------