mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-04-20 05:53:26 +00:00
Merge commit '0d313b243194a0b8d2399d8b549ca5a0ff234db5' into dontstarve
This commit is contained in:
commit
3e78833b0a
@ -160,13 +160,33 @@ passes any arguments after the error function to the function
|
||||
which is called in a protected context.
|
||||
</p>
|
||||
|
||||
<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3>
|
||||
<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3>
|
||||
<p>
|
||||
Non-ASCII characters are handled transparently by the Lua source code parser.
|
||||
This allows the use of UTF-8 characters in identifiers and strings.
|
||||
A UTF-8 BOM is skipped at the start of the source code.
|
||||
</p>
|
||||
|
||||
<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
|
||||
<p>
|
||||
As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
|
||||
<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
|
||||
<tt>mode</tt> parameter.
|
||||
</p>
|
||||
<p>
|
||||
The default mode string is <tt>"bt"</tt>, which allows loading of both
|
||||
source code and bytecode. Use <tt>"t"</tt> to allow only source code
|
||||
or <tt>"b"</tt> to allow only bytecode to be loaded.
|
||||
</p>
|
||||
<p>
|
||||
By default, the <tt>load*</tt> functions generate the native bytecode format.
|
||||
For cross-compilation purposes, add <tt>W</tt> to the mode string to
|
||||
force the 32 bit format and <tt>X</tt> to force the 64 bit format.
|
||||
Add both to force the opposite format. Note that non-native bytecode
|
||||
generated by <tt>load*</tt> cannot be run, but can still be passed
|
||||
to <tt>string.dump</tt>.
|
||||
</p>
|
||||
|
||||
<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and ±Inf</h3>
|
||||
<p>
|
||||
All number-to-string conversions consistently convert non-finite numbers
|
||||
@ -186,26 +206,33 @@ works independently of the current locale and it supports hex floating-point
|
||||
numbers (e.g. <tt>0x1.5p-3</tt>).
|
||||
</p>
|
||||
|
||||
<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3>
|
||||
<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3>
|
||||
<p>
|
||||
An extra argument has been added to <tt>string.dump()</tt>. If set to
|
||||
<tt>true</tt>, 'stripped' bytecode without debug information is
|
||||
generated. This speeds up later bytecode loading and reduces memory
|
||||
usage. See also the
|
||||
<tt>true</tt> or to a string which contains the character <tt>s</tt>,
|
||||
'stripped' bytecode without debug information is generated. This speeds
|
||||
up later bytecode loading and reduces memory usage. See also the
|
||||
<a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
|
||||
</p>
|
||||
<p>
|
||||
The generated bytecode is portable and can be loaded on any architecture
|
||||
that LuaJIT supports, independent of word size or endianess. However, the
|
||||
bytecode compatibility versions must match. Bytecode stays compatible
|
||||
for dot releases (x.y.0 → x.y.1), but may change with major or
|
||||
minor releases (2.0 → 2.1) or between any beta release. Foreign
|
||||
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
|
||||
that LuaJIT supports. However, the bytecode compatibility versions must
|
||||
match. Bytecode only stays compatible within a major+minor version
|
||||
(x.y.aaa → x.y.bbb), except for development branches. Foreign bytecode
|
||||
(e.g. from Lua 5.1) is incompatible and cannot be loaded.
|
||||
</p>
|
||||
<p>
|
||||
Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
|
||||
a different, incompatible bytecode format for all 64 bit ports. This may be
|
||||
rectified in the future.
|
||||
a different, incompatible bytecode format between 32 bit and 64 bit ports.
|
||||
This may be rectified in the future. In the meantime, use the <tt>W</tt>
|
||||
and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
|
||||
for cross-compilation purposes.
|
||||
</p>
|
||||
<p>
|
||||
Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
|
||||
mode string to dump it in a deterministic manner: identical source code
|
||||
always gives a byte-for-byte identical bytecode dump. This feature is
|
||||
mainly useful for reproducible builds.
|
||||
</p>
|
||||
|
||||
<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
|
||||
@ -286,7 +313,7 @@ enabled:
|
||||
</p>
|
||||
<ul>
|
||||
<li><tt>goto</tt> and <tt>::labels::</tt>.</li>
|
||||
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
|
||||
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li>
|
||||
<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
|
||||
<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
|
||||
<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
|
||||
@ -426,9 +453,7 @@ the toolchain used to compile LuaJIT:
|
||||
on the C stack. The contents of the C++ exception object
|
||||
pass through unmodified.</li>
|
||||
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
|
||||
The corresponding Lua error message can be retrieved from the Lua stack.<br>
|
||||
For MSVC for Windows 64 bit this requires compilation of your C++ code
|
||||
with <tt>/EHa</tt>.</li>
|
||||
The corresponding Lua error message can be retrieved from the Lua stack.</li>
|
||||
<li>Throwing Lua errors across C++ frames is safe. C++ destructors
|
||||
will be called.</li>
|
||||
</ul>
|
||||
|
@ -203,7 +203,7 @@ Or install Microsoft's Visual Studio (MSVC).
|
||||
</p>
|
||||
<h3>Building with MSVC</h3>
|
||||
<p>
|
||||
Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
|
||||
Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
|
||||
directory with the source code and run these commands:
|
||||
</p>
|
||||
<pre class="code">
|
||||
@ -214,6 +214,9 @@ msvcbuild
|
||||
Check the <tt>msvcbuild.bat</tt> file for more options.
|
||||
Then follow the installation instructions below.
|
||||
</p>
|
||||
<p>
|
||||
For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
|
||||
</p>
|
||||
<h3>Building with MinGW or Cygwin</h3>
|
||||
<p>
|
||||
Open a command prompt window and make sure the MinGW or Cygwin programs
|
||||
@ -266,6 +269,7 @@ for any supported target:
|
||||
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
|
||||
<li>Both host and target architectures must have the same pointer size.</li>
|
||||
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
|
||||
<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
|
||||
<li>64 bit targets always require compilation on a 64 bit host.</li>
|
||||
</ul>
|
||||
<p>
|
||||
|
@ -106,6 +106,9 @@ are accepted:
|
||||
<li><tt>-l</tt> — Only list bytecode.</li>
|
||||
<li><tt>-s</tt> — Strip debug info (this is the default).</li>
|
||||
<li><tt>-g</tt> — Keep debug info.</li>
|
||||
<li><tt>-W</tt> — Generate 32 bit (non-GC64) bytecode.</li>
|
||||
<li><tt>-X</tt> — Generate 64 bit (GC64) bytecode.</li>
|
||||
<li><tt>-d</tt> — Generate bytecode in deterministic manner.</li>
|
||||
<li><tt>-n name</tt> — Set module name (default: auto-detect from input name)</li>
|
||||
<li><tt>-t type</tt> — Set output file type (default: auto-detect from output name).</li>
|
||||
<li><tt>-a arch</tt> — Override architecture for object files (default: native).</li>
|
||||
@ -120,7 +123,8 @@ file name:
|
||||
</p>
|
||||
<ul>
|
||||
<li><tt>c</tt> — C source file, exported bytecode data.</li>
|
||||
<li><tt>h</tt> — C header file, static bytecode data.</li>
|
||||
<li><tt>cc</tt> — C++ source file, exported bytecode data.</li>
|
||||
<li><tt>h</tt> — C/C++ header file, static bytecode data.</li>
|
||||
<li><tt>obj</tt> or <tt>o</tt> — Object file, exported bytecode data
|
||||
(OS- and architecture-specific).</li>
|
||||
<li><tt>raw</tt> or any other extension — Raw bytecode file (portable).
|
||||
|
@ -549,7 +549,7 @@ end
|
||||
local function parse_load_pair(params, nparams, n, op)
|
||||
if params[n+2] then werror("too many operands") end
|
||||
local pn, p2 = params[n], params[n+1]
|
||||
local scale = shr(op, 30) == 0 and 2 or 3
|
||||
local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
|
||||
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
|
||||
if not p1 then
|
||||
if not p2 then
|
||||
@ -806,8 +806,8 @@ map_op = {
|
||||
["ldrsw_*"] = "98000000DxB|b8800000DxL",
|
||||
-- NOTE: ldur etc. are handled by ldr et al.
|
||||
|
||||
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
|
||||
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
|
||||
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
|
||||
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
|
||||
["ldpsw_*"] = "68400000DAxP",
|
||||
|
||||
-- Branches.
|
||||
@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
|
||||
werror("bad register type")
|
||||
end
|
||||
parse_reg_type = false
|
||||
elseif p == "x" or p == "w" or p == "d" or p == "s" then
|
||||
elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
|
||||
if parse_reg_type ~= p then
|
||||
werror("register size mismatch")
|
||||
end
|
||||
|
@ -627,7 +627,11 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
|
||||
werror("NYI: rip-relative displacement followed by immediate")
|
||||
end
|
||||
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
|
||||
if disp[2] == "iPJ" then
|
||||
waction("REL_A", disp[1])
|
||||
else
|
||||
wputlabel("REL_", disp[1], 2)
|
||||
end
|
||||
else
|
||||
wputdarg(disp)
|
||||
end
|
||||
@ -744,9 +748,9 @@ local function dispexpr(expr)
|
||||
return imm*map_opsizenum[ops]
|
||||
end
|
||||
local mode, iexpr = immexpr(dispt)
|
||||
if mode == "iJ" then
|
||||
if mode == "iJ" or mode == "iPJ" then
|
||||
if c == "-" then werror("cannot invert label reference") end
|
||||
return { iexpr }
|
||||
return { iexpr, mode }
|
||||
end
|
||||
return expr -- Need to return original signed expression.
|
||||
end
|
||||
@ -1147,6 +1151,8 @@ local map_op = {
|
||||
rep_0 = "F3",
|
||||
repe_0 = "F3",
|
||||
repz_0 = "F3",
|
||||
endbr32_0 = "F30F1EFB",
|
||||
endbr64_0 = "F30F1EFA",
|
||||
-- F4: *hlt
|
||||
cmc_0 = "F5",
|
||||
-- F6: test... mb,i; div... mb
|
||||
|
@ -75,7 +75,7 @@ local function wline(line, needindent)
|
||||
g_synclineno = g_synclineno + 1
|
||||
end
|
||||
|
||||
-- Write assembler line as a comment, if requestd.
|
||||
-- Write assembler line as a comment, if requested.
|
||||
local function wcomment(aline)
|
||||
if g_opt.comment then
|
||||
wline(g_opt.comment..aline..g_opt.endcomment, true)
|
||||
|
@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
|
||||
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
|
||||
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
|
||||
|
||||
TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
|
||||
TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
|
||||
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
|
||||
TARGET_LJARCH= x64
|
||||
else
|
||||
@ -475,7 +475,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
|
||||
DASM_DASC= vm_$(DASM_ARCH).dasc
|
||||
|
||||
GIT= git
|
||||
ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
|
||||
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
|
||||
else
|
||||
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
|
||||
endif
|
||||
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
|
||||
|
||||
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "buildvm.h"
|
||||
#include "lj_bc.h"
|
||||
|
||||
#if LJ_TARGET_X86ORX64
|
||||
#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
|
||||
|
||||
/* Context for PE object emitter. */
|
||||
static char *strtab;
|
||||
@ -93,6 +93,17 @@ typedef struct PEsymaux {
|
||||
#define PEOBJ_RELOC_ADDR32NB 0x03
|
||||
#define PEOBJ_RELOC_OFS 0
|
||||
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
|
||||
#define PEOBJ_PDATA_NRELOC 6
|
||||
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
|
||||
#elif LJ_TARGET_ARM64
|
||||
#define PEOBJ_ARCH_TARGET 0xaa64
|
||||
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
|
||||
#define PEOBJ_RELOC_DIR32 0x01
|
||||
#define PEOBJ_RELOC_ADDR32NB 0x02
|
||||
#define PEOBJ_RELOC_OFS (-4)
|
||||
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
|
||||
#define PEOBJ_PDATA_NRELOC 4
|
||||
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
|
||||
#endif
|
||||
|
||||
/* Section numbers (0-based). */
|
||||
@ -100,7 +111,7 @@ enum {
|
||||
PEOBJ_SECT_ABS = -2,
|
||||
PEOBJ_SECT_UNDEF = -1,
|
||||
PEOBJ_SECT_TEXT,
|
||||
#if LJ_TARGET_X64
|
||||
#ifdef PEOBJ_PDATA_NRELOC
|
||||
PEOBJ_SECT_PDATA,
|
||||
PEOBJ_SECT_XDATA,
|
||||
#elif LJ_TARGET_X86
|
||||
@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
|
||||
uint32_t sofs;
|
||||
int i, nrsym;
|
||||
union { uint8_t b; uint32_t u; } host_endian;
|
||||
#ifdef PEOBJ_PDATA_NRELOC
|
||||
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
|
||||
#endif
|
||||
|
||||
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
|
||||
|
||||
@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
|
||||
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */
|
||||
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
|
||||
|
||||
#if LJ_TARGET_X64
|
||||
#ifdef PEOBJ_PDATA_NRELOC
|
||||
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
|
||||
pesect[PEOBJ_SECT_PDATA].ofs = sofs;
|
||||
sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
|
||||
sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
|
||||
pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
|
||||
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
|
||||
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
|
||||
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
|
||||
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
|
||||
|
||||
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
|
||||
pesect[PEOBJ_SECT_XDATA].ofs = sofs;
|
||||
sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */
|
||||
sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
|
||||
pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
|
||||
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
|
||||
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
|
||||
@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
|
||||
*/
|
||||
nrsym = ctx->nrelocsym;
|
||||
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
|
||||
#if LJ_TARGET_X64
|
||||
#ifdef PEOBJ_PDATA_NRELOC
|
||||
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
|
||||
#endif
|
||||
|
||||
@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
|
||||
|
||||
#if LJ_TARGET_X64
|
||||
{ /* Write .pdata section. */
|
||||
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
|
||||
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
|
||||
PEreloc reloc;
|
||||
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
|
||||
@ -308,6 +321,87 @@ void emit_peobj(BuildCtx *ctx)
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
}
|
||||
#elif LJ_TARGET_ARM64
|
||||
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
|
||||
{ /* Write .pdata section. */
|
||||
uint32_t pdata[4];
|
||||
PEreloc reloc;
|
||||
pdata[0] = 0;
|
||||
pdata[1] = 0;
|
||||
pdata[2] = fcofs;
|
||||
pdata[3] = 4+24+4;
|
||||
owrite(ctx, &pdata, sizeof(pdata));
|
||||
/* Start of .text and start of .xdata. */
|
||||
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
/* Start of vm_ffi_call and start of second part of .xdata. */
|
||||
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
}
|
||||
{ /* Write .xdata section. */
|
||||
uint32_t u32;
|
||||
uint8_t *p, uwc[24];
|
||||
PEreloc reloc;
|
||||
|
||||
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
|
||||
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
|
||||
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
|
||||
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
|
||||
#define CSAVE_REGS(r1,r2,o1) do { \
|
||||
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
|
||||
} while (0)
|
||||
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
|
||||
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
|
||||
#define CSAVE_FREGS(r1,r2,o1) do { \
|
||||
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
|
||||
} while (0)
|
||||
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
|
||||
#define CODE_NOP 0xe3
|
||||
#define CODE_END 0xe4
|
||||
#define CEND_ALIGN do { \
|
||||
*p++ = CODE_END; \
|
||||
while ((p - uwc) & 3) *p++ = CODE_NOP; \
|
||||
} while (0)
|
||||
|
||||
/* Unwind codes for .text section with handler. */
|
||||
p = uwc;
|
||||
CADD_FP(192); /* +2 */
|
||||
CSAVE_REGS(19, 28, 176); /* +5*2 */
|
||||
CSAVE_FREGS(8, 15, 96); /* +4*2 */
|
||||
CSAVE_FPLR(192); /* +1 */
|
||||
CALLOC_S(208); /* +1 */
|
||||
CEND_ALIGN; /* +1 +1 -> 24 */
|
||||
|
||||
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
|
||||
owrite(ctx, &u32, 4);
|
||||
owrite(ctx, &uwc, 24);
|
||||
|
||||
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
|
||||
owrite(ctx, &u32, 4);
|
||||
|
||||
/* Unwind codes for vm_ffi_call without handler. */
|
||||
p = uwc;
|
||||
CADD_FP(16); /* +2 */
|
||||
CSAVE_FPLR(16); /* +1 */
|
||||
CSAVE_REGPX(19, -32); /* +2 */
|
||||
CEND_ALIGN; /* +1 +2 -> 8 */
|
||||
|
||||
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
|
||||
owrite(ctx, &u32, 4);
|
||||
owrite(ctx, &uwc, 8);
|
||||
|
||||
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
|
||||
reloc.type = PEOBJ_RELOC_ADDR32NB;
|
||||
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
|
||||
}
|
||||
#elif LJ_TARGET_X86
|
||||
/* Write .sxdata section. */
|
||||
for (i = 0; i < nrsym; i++) {
|
||||
@ -339,7 +433,7 @@ void emit_peobj(BuildCtx *ctx)
|
||||
emit_peobj_sym(ctx, ctx->relocsym[i], 0,
|
||||
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
|
||||
|
||||
#if LJ_TARGET_X64
|
||||
#ifdef PEOBJ_PDATA_NRELOC
|
||||
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
|
||||
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
|
||||
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
|
||||
|
@ -138,23 +138,23 @@ local function fixup_dump(dump, fixup)
|
||||
return { dump = ndump, startbc = startbc, sizebc = sizebc }
|
||||
end
|
||||
|
||||
local function find_defs(src)
|
||||
local function find_defs(src, mode)
|
||||
local defs = {}
|
||||
for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
|
||||
local env = {}
|
||||
local tcode, fixup = transform_lua(code)
|
||||
local func = assert(load(tcode, "", nil, env))()
|
||||
defs[name] = fixup_dump(string.dump(func, true), fixup)
|
||||
local func = assert(load(tcode, "", mode))
|
||||
defs[name] = fixup_dump(string.dump(func, mode), fixup)
|
||||
defs[#defs+1] = name
|
||||
end
|
||||
return defs
|
||||
end
|
||||
|
||||
local function gen_header(defs)
|
||||
local function gen_header(defs32, defs64)
|
||||
local t = {}
|
||||
local function w(x) t[#t+1] = x end
|
||||
w("/* This is a generated file. DO NOT EDIT! */\n\n")
|
||||
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
|
||||
for j,defs in ipairs{defs64, defs32} do
|
||||
local s, sb = "", ""
|
||||
for i,name in ipairs(defs) do
|
||||
local d = defs[name]
|
||||
@ -163,7 +163,11 @@ local function gen_header(defs)
|
||||
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
|
||||
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
|
||||
end
|
||||
w("static const uint8_t libbc_code[] = {\n")
|
||||
if j == 1 then
|
||||
w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
|
||||
else
|
||||
w("\n#else\n")
|
||||
end
|
||||
local n = 0
|
||||
for i=1,#s do
|
||||
local x = string.byte(s, i)
|
||||
@ -189,14 +193,18 @@ local function gen_header(defs)
|
||||
end
|
||||
w(",")
|
||||
end
|
||||
w("\n0\n};\n\n")
|
||||
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
|
||||
local m = 0
|
||||
for _,name in ipairs(defs) do
|
||||
w('{"'); w(name); w('",'); w(m) w('},\n')
|
||||
m = m + #defs[name].dump
|
||||
end
|
||||
w("{NULL,"); w(m); w("}\n};\n\n")
|
||||
w("\n#endif\n0\n};\n\n")
|
||||
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
|
||||
local m32, m64 = 0, 0
|
||||
for i,name in ipairs(defs32) do
|
||||
assert(name == defs64[i])
|
||||
w('{"'); w(name); w('",'); w(m32) w('},\n')
|
||||
m32 = m32 + #defs32[name].dump
|
||||
m64 = m64 + #defs64[name].dump
|
||||
assert(m32 == m64)
|
||||
end
|
||||
w("{NULL,"); w(m32); w("}\n};\n\n")
|
||||
return table.concat(t)
|
||||
end
|
||||
|
||||
@ -219,7 +227,8 @@ end
|
||||
|
||||
local outfile = parse_arg(arg)
|
||||
local src = read_files(arg)
|
||||
local defs = find_defs(src)
|
||||
local hdr = gen_header(defs)
|
||||
local defs32 = find_defs(src, "Wdts")
|
||||
local defs64 = find_defs(src, "Xdts")
|
||||
local hdr = gen_header(defs32, defs64)
|
||||
write_file(outfile, hdr)
|
||||
|
||||
|
@ -5,9 +5,10 @@
|
||||
-- Released under the MIT license. See Copyright Notice in luajit.h
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
local FILE_ROLLING_H = "luajit_rolling.h"
|
||||
local FILE_RELVER_TXT = "luajit_relver.txt"
|
||||
local FILE_LUAJIT_H = "luajit.h"
|
||||
local arg = {...}
|
||||
local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
|
||||
local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
|
||||
local FILE_LUAJIT_H = arg[3] or "luajit.h"
|
||||
|
||||
local function file_read(file)
|
||||
local fp = assert(io.open(file, "rb"), "run from the wrong directory")
|
||||
@ -28,7 +29,7 @@ local function file_write_mod(file, data)
|
||||
assert(fp:close())
|
||||
end
|
||||
|
||||
local text = file_read(FILE_ROLLING_H)
|
||||
local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "")
|
||||
local relver = file_read(FILE_RELVER_TXT):match("(%d+)")
|
||||
|
||||
if relver then
|
||||
|
@ -29,6 +29,9 @@ Save LuaJIT bytecode: luajit -b[options] input output
|
||||
-l Only list bytecode.
|
||||
-s Strip debug info (default).
|
||||
-g Keep debug info.
|
||||
-W Generate 32 bit (non-GC64) bytecode.
|
||||
-X Generate 64 bit (GC64) bytecode.
|
||||
-d Generate bytecode in deterministic manner.
|
||||
-n name Set module name (default: auto-detect from input name).
|
||||
-t type Set output file type (default: auto-detect from output name).
|
||||
-a arch Override architecture for object files (default: native).
|
||||
@ -38,7 +41,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
|
||||
-- Stop handling options.
|
||||
- Use stdin as input and/or stdout as output.
|
||||
|
||||
File types: c h obj o raw (default)
|
||||
File types: c cc h obj o raw (default)
|
||||
]]
|
||||
os.exit(1)
|
||||
end
|
||||
@ -51,8 +54,9 @@ local function check(ok, ...)
|
||||
end
|
||||
|
||||
local function readfile(ctx, input)
|
||||
if type(input) == "function" then return input end
|
||||
if ctx.filename then
|
||||
if ctx.string then
|
||||
return check(loadstring(input, nil, ctx.mode))
|
||||
elseif ctx.filename then
|
||||
local data
|
||||
if input == "-" then
|
||||
data = io.stdin:read("*a")
|
||||
@ -61,10 +65,10 @@ local function readfile(ctx, input)
|
||||
data = assert(fp:read("*a"))
|
||||
assert(fp:close())
|
||||
end
|
||||
return check(load(data, ctx.filename))
|
||||
return check(load(data, ctx.filename, ctx.mode))
|
||||
else
|
||||
if input == "-" then input = nil end
|
||||
return check(loadfile(input))
|
||||
return check(loadfile(input, ctx.mode))
|
||||
end
|
||||
end
|
||||
|
||||
@ -81,7 +85,7 @@ end
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
local map_type = {
|
||||
raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
|
||||
raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
|
||||
}
|
||||
|
||||
local map_arch = {
|
||||
@ -624,7 +628,7 @@ end
|
||||
|
||||
local function bcsave(ctx, input, output)
|
||||
local f = readfile(ctx, input)
|
||||
local s = string.dump(f, ctx.strip)
|
||||
local s = string.dump(f, ctx.mode)
|
||||
local t = ctx.type
|
||||
if not t then
|
||||
t = detecttype(output)
|
||||
@ -647,9 +651,11 @@ local function docmd(...)
|
||||
local n = 1
|
||||
local list = false
|
||||
local ctx = {
|
||||
strip = true, arch = jit.arch, os = jit.os:lower(),
|
||||
type = false, modname = false,
|
||||
mode = "bt", arch = jit.arch, os = jit.os:lower(),
|
||||
type = false, modname = false, string = false,
|
||||
}
|
||||
local strip = "s"
|
||||
local gc64 = ""
|
||||
while n <= #arg do
|
||||
local a = arg[n]
|
||||
if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
|
||||
@ -660,14 +666,18 @@ local function docmd(...)
|
||||
if opt == "l" then
|
||||
list = true
|
||||
elseif opt == "s" then
|
||||
ctx.strip = true
|
||||
strip = "s"
|
||||
elseif opt == "g" then
|
||||
ctx.strip = false
|
||||
strip = ""
|
||||
elseif opt == "W" or opt == "X" then
|
||||
gc64 = opt
|
||||
elseif opt == "d" then
|
||||
ctx.mode = ctx.mode .. opt
|
||||
else
|
||||
if arg[n] == nil or m ~= #a then usage() end
|
||||
if opt == "e" then
|
||||
if n ~= 1 then usage() end
|
||||
arg[1] = check(loadstring(arg[1]))
|
||||
ctx.string = true
|
||||
elseif opt == "n" then
|
||||
ctx.modname = checkmodname(tremove(arg, n))
|
||||
elseif opt == "t" then
|
||||
@ -687,6 +697,7 @@ local function docmd(...)
|
||||
n = n + 1
|
||||
end
|
||||
end
|
||||
ctx.mode = ctx.mode .. strip .. gc64
|
||||
if list then
|
||||
if #arg == 0 or #arg > 2 then usage() end
|
||||
bclist(ctx, arg[1], arg[2] or "-")
|
||||
|
@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register.
|
||||
[0] = {
|
||||
shift = 29, mask = 3,
|
||||
[0] = {
|
||||
shift = 21, mask = 7,
|
||||
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
|
||||
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "andDNMSg", "bicDNMSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
|
||||
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
|
||||
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "eorDNMSg", "eonDNMSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
|
||||
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "ands|tstD0NMSg", "bicsDNMSg"
|
||||
}
|
||||
},
|
||||
false -- unallocated
|
||||
@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register.
|
||||
{
|
||||
shift = 29, mask = 3,
|
||||
[0] = {
|
||||
shift = 21, mask = 7,
|
||||
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
|
||||
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "andDNMSg", "bicDNMSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
|
||||
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
|
||||
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "eorDNMSg", "eonDNMSg"
|
||||
},
|
||||
{
|
||||
shift = 21, mask = 7,
|
||||
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
|
||||
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
|
||||
shift = 21, mask = 1,
|
||||
[0] = "ands|tstD0NMSg", "bicsDNMSg"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -735,7 +727,7 @@ local map_cond = {
|
||||
"hi", "ls", "ge", "lt", "gt", "le", "al",
|
||||
}
|
||||
|
||||
local map_shift = { [0] = "lsl", "lsr", "asr", }
|
||||
local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
|
||||
|
||||
local map_extend = {
|
||||
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
|
||||
@ -956,7 +948,7 @@ local function disass_ins(ctx)
|
||||
elseif p == "U" then
|
||||
local rn = map_regs.x[band(rshift(op, 5), 31)]
|
||||
local sz = band(rshift(op, 30), 3)
|
||||
local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
|
||||
local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
|
||||
if imm12 ~= 0 then
|
||||
x = "["..rn..", #"..imm12.."]"
|
||||
else
|
||||
@ -993,8 +985,7 @@ local function disass_ins(ctx)
|
||||
x = x.."]"
|
||||
end
|
||||
elseif p == "P" then
|
||||
local opcv, sh = rshift(op, 26), 2
|
||||
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
|
||||
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
|
||||
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
|
||||
local rn = map_regs.x[band(rshift(op, 5), 31)]
|
||||
local ind = band(rshift(op, 23), 3)
|
||||
|
@ -360,7 +360,11 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
|
||||
static int load_aux(lua_State *L, int status, int envarg)
|
||||
{
|
||||
if (status == LUA_OK) {
|
||||
if (tvistab(L->base+envarg-1)) {
|
||||
/*
|
||||
** Set environment table for top-level function.
|
||||
** Don't do this for non-native bytecode, which returns a prototype.
|
||||
*/
|
||||
if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
|
||||
GCfunc *fn = funcV(L->top-1);
|
||||
GCtab *t = tabV(L->base+envarg-1);
|
||||
setgcref(fn->c.env, obj2gco(t));
|
||||
@ -697,7 +701,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
|
||||
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
|
||||
return FFH_RES(2);
|
||||
}
|
||||
lj_state_growstack(co, (MSize)(L->top - L->base));
|
||||
if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
|
||||
cTValue *msg = --co->top;
|
||||
lj_err_callermsg(L, strVdata(msg));
|
||||
}
|
||||
return FFH_RETRY;
|
||||
}
|
||||
|
||||
|
@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
|
||||
"\003win"
|
||||
#endif
|
||||
#if LJ_ABI_PAUTH
|
||||
"\007pauth"
|
||||
"\005pauth"
|
||||
#endif
|
||||
#if LJ_TARGET_UWP
|
||||
"\003uwp"
|
||||
|
@ -161,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
|
||||
|
||||
/* -- Reflection API for Lua functions ------------------------------------ */
|
||||
|
||||
/* Return prototype of first argument (Lua function or prototype object) */
|
||||
static GCproto *check_Lproto(lua_State *L, int nolua)
|
||||
{
|
||||
TValue *o = L->base;
|
||||
if (L->top > o) {
|
||||
if (tvisproto(o)) {
|
||||
return protoV(o);
|
||||
} else if (tvisfunc(o)) {
|
||||
if (isluafunc(funcV(o)))
|
||||
return funcproto(funcV(o));
|
||||
else if (nolua)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
lj_err_argt(L, 1, LUA_TFUNCTION);
|
||||
return NULL; /* unreachable */
|
||||
}
|
||||
|
||||
static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
|
||||
{
|
||||
setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
|
||||
@ -187,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
|
||||
/* local info = jit.util.funcinfo(func [,pc]) */
|
||||
LJLIB_CF(jit_util_funcinfo)
|
||||
{
|
||||
GCproto *pt = check_Lproto(L, 1);
|
||||
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
|
||||
if (pt) {
|
||||
BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
|
||||
GCtab *t;
|
||||
@ -229,7 +211,7 @@ LJLIB_CF(jit_util_funcinfo)
|
||||
/* local ins, m = jit.util.funcbc(func, pc) */
|
||||
LJLIB_CF(jit_util_funcbc)
|
||||
{
|
||||
GCproto *pt = check_Lproto(L, 0);
|
||||
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
|
||||
BCPos pc = (BCPos)lj_lib_checkint(L, 2);
|
||||
if (pc < pt->sizebc) {
|
||||
BCIns ins = proto_bc(pt)[pc];
|
||||
@ -246,7 +228,7 @@ LJLIB_CF(jit_util_funcbc)
|
||||
/* local k = jit.util.funck(func, idx) */
|
||||
LJLIB_CF(jit_util_funck)
|
||||
{
|
||||
GCproto *pt = check_Lproto(L, 0);
|
||||
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
|
||||
ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
|
||||
if (idx >= 0) {
|
||||
if (idx < (ptrdiff_t)pt->sizekn) {
|
||||
@ -266,7 +248,7 @@ LJLIB_CF(jit_util_funck)
|
||||
/* local name = jit.util.funcuvname(func, idx) */
|
||||
LJLIB_CF(jit_util_funcuvname)
|
||||
{
|
||||
GCproto *pt = check_Lproto(L, 0);
|
||||
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
|
||||
uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
|
||||
if (idx < pt->sizeuv) {
|
||||
setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));
|
||||
|
@ -122,11 +122,25 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
|
||||
|
||||
LJLIB_CF(string_dump)
|
||||
{
|
||||
GCfunc *fn = lj_lib_checkfunc(L, 1);
|
||||
int strip = L->base+1 < L->top && tvistruecond(L->base+1);
|
||||
SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
|
||||
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
|
||||
uint32_t flags = 0;
|
||||
SBuf *sb;
|
||||
TValue *o = L->base+1;
|
||||
if (o < L->top) {
|
||||
if (tvisstr(o)) {
|
||||
const char *mode = strVdata(o);
|
||||
char c;
|
||||
while ((c = *mode++)) {
|
||||
if (c == 's') flags |= BCDUMP_F_STRIP;
|
||||
if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
|
||||
}
|
||||
} else if (tvistruecond(o)) {
|
||||
flags |= BCDUMP_F_STRIP;
|
||||
}
|
||||
}
|
||||
sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
|
||||
L->top = L->base+1;
|
||||
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
|
||||
if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags))
|
||||
lj_err_caller(L, LJ_ERR_STRDUMP);
|
||||
setstrV(L, L->top-1, lj_buf_str(L, sb));
|
||||
lj_gc_check(L);
|
||||
|
@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
|
||||
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
|
||||
return 0; /* Stack overflow. */
|
||||
} else if (size > 0) {
|
||||
lj_state_checkstack(L, (MSize)size);
|
||||
int avail = (int)(mref(L->maxstack, TValue) - L->top);
|
||||
if (size > avail &&
|
||||
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
|
||||
L->top--;
|
||||
return 0; /* Out of memory. */
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -57,7 +57,7 @@
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_X64
|
||||
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
|
||||
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
|
||||
@ -66,7 +66,7 @@
|
||||
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
|
||||
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
|
||||
#else
|
||||
#error "No support for this architecture (yet)"
|
||||
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -237,7 +237,7 @@
|
||||
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
|
||||
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
|
||||
|
||||
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
|
||||
#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
|
||||
#define LJ_ARCH_VERSION 80
|
||||
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
|
||||
#define LJ_ARCH_VERSION 70
|
||||
@ -331,6 +331,7 @@
|
||||
#define LJ_ARCH_NOFFI 1
|
||||
#elif LJ_ARCH_BITS == 64
|
||||
#error "No support for PPC64"
|
||||
#undef LJ_TARGET_PPC
|
||||
#endif
|
||||
|
||||
#if _ARCH_PWR7
|
||||
@ -490,36 +491,45 @@
|
||||
#elif LJ_TARGET_ARM
|
||||
#if defined(__ARMEB__)
|
||||
#error "No support for big-endian ARM"
|
||||
#undef LJ_TARGET_ARM
|
||||
#endif
|
||||
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
#error "No support for Cortex-M CPUs"
|
||||
#undef LJ_TARGET_ARM
|
||||
#endif
|
||||
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
|
||||
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
|
||||
#undef LJ_TARGET_ARM
|
||||
#endif
|
||||
#elif LJ_TARGET_ARM64
|
||||
#if defined(_ILP32)
|
||||
#error "No support for ILP32 model on ARM64"
|
||||
#undef LJ_TARGET_ARM64
|
||||
#endif
|
||||
#elif LJ_TARGET_PPC
|
||||
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
|
||||
#error "No support for little-endian PPC32"
|
||||
#undef LJ_TARGET_PPC
|
||||
#endif
|
||||
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
|
||||
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
|
||||
#error "No support for PPC/e500, use LuaJIT 2.0"
|
||||
#undef LJ_TARGET_PPC
|
||||
#endif
|
||||
#elif LJ_TARGET_MIPS32
|
||||
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
|
||||
#error "Only o32 ABI supported for MIPS32"
|
||||
#undef LJ_TARGET_MIPS
|
||||
#endif
|
||||
#if LJ_TARGET_MIPSR6
|
||||
/* Not that useful, since most available r6 CPUs are 64 bit. */
|
||||
#error "No support for MIPS32R6"
|
||||
#undef LJ_TARGET_MIPS
|
||||
#endif
|
||||
#elif LJ_TARGET_MIPS64
|
||||
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
|
||||
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
|
||||
#error "Only n64 ABI supported for MIPS64"
|
||||
#undef LJ_TARGET_MIPS
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
|
||||
IRIns *ir = IR(ref);
|
||||
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
|
||||
#if LJ_GC64
|
||||
#if LJ_TARGET_ARM64
|
||||
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
|
||||
#else
|
||||
(ir->o == IR_KINT && k == ir->i) ||
|
||||
#endif
|
||||
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
|
||||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
|
||||
k == (intptr_t)ir_kptr(ir))
|
||||
|
@ -969,26 +969,34 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
||||
if (irref_isk(ir->op1) && !guarded) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_lsptr(as, ARMI_LDR, dest, v);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
asm_guardcc(as, CC_NE);
|
||||
if (guarded) {
|
||||
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
|
||||
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
|
||||
emit_opk(as, ARMI_ADD, dest, uv,
|
||||
(int32_t)offsetof(GCupval, tv), RSET_GPR);
|
||||
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
||||
} else {
|
||||
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
|
||||
}
|
||||
emit_lso(as, ARMI_LDR, uv, func,
|
||||
if (ir->o == IR_UREFC)
|
||||
emit_opk(as, ARMI_ADD, dest, dest,
|
||||
(int32_t)offsetof(GCupval, tv), RSET_GPR);
|
||||
else
|
||||
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
|
||||
if (guarded)
|
||||
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
|
||||
(int32_t)offsetof(GCupval, closed));
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
|
||||
emit_loadi(as, dest, k);
|
||||
} else {
|
||||
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
|
||||
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -1990,6 +1998,7 @@ static void asm_prof(ASMState *as, IRIns *ir)
|
||||
static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
IRIns *irp, RegSet allow, ExitNo exitno)
|
||||
{
|
||||
int savereg = 0;
|
||||
Reg pbase;
|
||||
uint32_t k;
|
||||
if (irp) {
|
||||
@ -2000,12 +2009,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
pbase = rset_pickbot(allow);
|
||||
} else {
|
||||
pbase = RID_RET;
|
||||
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
|
||||
savereg = 1;
|
||||
}
|
||||
} else {
|
||||
pbase = RID_BASE;
|
||||
}
|
||||
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
|
||||
if (savereg)
|
||||
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
|
||||
k = emit_isk12(0, (int32_t)(8*topslot));
|
||||
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
|
||||
emit_n(as, ARMI_CMP^k, RID_TMP);
|
||||
@ -2017,7 +2028,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
if (ra_hasspill(irp->s))
|
||||
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
|
||||
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
|
||||
if (ra_hasspill(irp->s) && !allow)
|
||||
if (savereg)
|
||||
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
|
||||
emit_loadi(as, RID_TMP, (i & ~4095));
|
||||
} else {
|
||||
@ -2031,11 +2042,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
|
||||
MSize n, nent = snap->nent;
|
||||
int32_t bias = 0;
|
||||
/* Store the value of all modified slots to the Lua stack. */
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
BCReg s = snap_slot(sn);
|
||||
int32_t ofs = 8*((int32_t)s-1);
|
||||
int32_t ofs = 8*((int32_t)s-1) - bias;
|
||||
IRRef ref = snap_ref(sn);
|
||||
IRIns *ir = IR(ref);
|
||||
if ((sn & SNAP_NORESTORE))
|
||||
@ -2054,6 +2066,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
|
||||
#else
|
||||
Reg src = ra_alloc1(as, ref, RSET_FPR);
|
||||
if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) {
|
||||
int32_t adj = ofs & 0xffffff00; /* K12-friendly. */
|
||||
bias += adj;
|
||||
ofs -= adj;
|
||||
emit_addptr(as, RID_BASE, -adj);
|
||||
}
|
||||
emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
|
||||
#endif
|
||||
} else {
|
||||
@ -2082,6 +2100,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
}
|
||||
checkmclim(as);
|
||||
}
|
||||
emit_addptr(as, RID_BASE, bias);
|
||||
lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
|
||||
}
|
||||
|
||||
@ -2252,7 +2271,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
}
|
||||
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
||||
as->evenspill = nslots;
|
||||
return REGSP_HINT(RID_RET);
|
||||
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
|
||||
}
|
||||
|
||||
static void asm_setup_target(ASMState *as)
|
||||
|
@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc)
|
||||
emit_cond_branch(as, cc, target);
|
||||
}
|
||||
|
||||
/* Emit test and branch instruction to exit for guard. */
|
||||
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
|
||||
/* Emit test and branch instruction to exit for guard, if in range. */
|
||||
static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
|
||||
{
|
||||
MCode *target = asm_exitstub_addr(as, as->snapno);
|
||||
MCode *p = as->mcp;
|
||||
ptrdiff_t delta = target - p;
|
||||
if (LJ_UNLIKELY(p == as->invmcp)) {
|
||||
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
|
||||
as->loopinv = 1;
|
||||
*p = A64I_B | A64F_S26(target-p);
|
||||
emit_tnb(as, ai^0x01000000u, r, bit, p-1);
|
||||
return;
|
||||
*p = A64I_B | A64F_S26(delta);
|
||||
ai ^= 0x01000000u;
|
||||
target = p-1;
|
||||
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
|
||||
return 0;
|
||||
}
|
||||
emit_tnb(as, ai, r, bit, target);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Emit compare and branch instruction to exit for guard. */
|
||||
@ -211,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
|
||||
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
|
||||
{
|
||||
IRIns *ir = IR(ref);
|
||||
int logical = (ai & 0x1f000000) == 0x0a000000;
|
||||
if (ra_hasreg(ir->r)) {
|
||||
ra_noweak(as, ir->r);
|
||||
return A64F_M(ir->r);
|
||||
} else if (irref_isk(ref)) {
|
||||
uint32_t m;
|
||||
int64_t k = get_k64val(as, ref);
|
||||
if ((ai & 0x1f000000) == 0x0a000000)
|
||||
m = emit_isk13(k, irt_is64(ir->t));
|
||||
else
|
||||
m = emit_isk12(k);
|
||||
uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
|
||||
emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
|
||||
if (m)
|
||||
return m;
|
||||
} else if (mayfuse(as, ref)) {
|
||||
@ -232,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
|
||||
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
|
||||
IRIns *irl = IR(ir->op1);
|
||||
if (sh == A64SH_LSL &&
|
||||
irl->o == IR_CONV &&
|
||||
irl->o == IR_CONV && !logical &&
|
||||
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
|
||||
shift <= 4 &&
|
||||
canfuse(as, irl)) {
|
||||
@ -242,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
|
||||
Reg m = ra_alloc1(as, ir->op1, allow);
|
||||
return A64F_M(m) | A64F_SH(sh, shift);
|
||||
}
|
||||
} else if (ir->o == IR_CONV &&
|
||||
} else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
|
||||
Reg m = ra_alloc1(as, ir->op1, allow);
|
||||
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
|
||||
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
|
||||
} else if (ir->o == IR_CONV && !logical &&
|
||||
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
|
||||
Reg m = ra_alloc1(as, ir->op1, allow);
|
||||
return A64F_M(m) | A64F_EX(A64EX_SXTW);
|
||||
@ -426,6 +433,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
|
||||
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
|
||||
gpr = REGARG_FIRSTGPR;
|
||||
#if LJ_HASFFI && LJ_ABI_WIN
|
||||
if ((ci->flags & CCI_VARARG)) {
|
||||
fpr = REGARG_LASTFPR+1;
|
||||
}
|
||||
#endif
|
||||
for (n = 0; n < nargs; n++) { /* Setup args. */
|
||||
IRRef ref = args[n];
|
||||
IRIns *ir = IR(ref);
|
||||
@ -436,6 +448,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
"reg %d not free", fpr); /* Must have been evicted. */
|
||||
ra_leftov(as, fpr, ref);
|
||||
fpr++;
|
||||
#if LJ_HASFFI && LJ_ABI_WIN
|
||||
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
|
||||
Reg rf = ra_alloc1(as, ref, RSET_FPR);
|
||||
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
|
||||
#endif
|
||||
} else {
|
||||
Reg r = ra_alloc1(as, ref, RSET_FPR);
|
||||
int32_t al = spalign;
|
||||
@ -541,8 +558,6 @@ static void asm_retf(ASMState *as, IRIns *ir)
|
||||
as->topslot -= (BCReg)delta;
|
||||
if ((int32_t)as->topslot < 0) as->topslot = 0;
|
||||
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
|
||||
/* Need to force a spill on REF_BASE now to update the stack slot. */
|
||||
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
|
||||
emit_setgl(as, base, jit_base);
|
||||
emit_addptr(as, base, -8*delta);
|
||||
asm_guardcc(as, CC_NE);
|
||||
@ -666,25 +681,22 @@ static void asm_strto(ASMState *as, IRIns *ir)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
|
||||
IRRef args[2];
|
||||
Reg dest = 0, tmp;
|
||||
int destused = ra_used(ir);
|
||||
Reg tmp;
|
||||
int32_t ofs = 0;
|
||||
ra_evictset(as, RSET_SCRATCH);
|
||||
if (destused) {
|
||||
if (ra_used(ir)) {
|
||||
if (ra_hasspill(ir->s)) {
|
||||
ofs = sps_scale(ir->s);
|
||||
destused = 0;
|
||||
if (ra_hasreg(ir->r)) {
|
||||
ra_free(as, ir->r);
|
||||
ra_modified(as, ir->r);
|
||||
emit_spload(as, ir, ir->r, ofs);
|
||||
}
|
||||
} else {
|
||||
dest = ra_dest(as, ir, RSET_FPR);
|
||||
}
|
||||
}
|
||||
if (destused)
|
||||
Reg dest = ra_dest(as, ir, RSET_FPR);
|
||||
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
|
||||
}
|
||||
}
|
||||
asm_guardcnb(as, A64I_CBZ, RID_RET);
|
||||
args[0] = ir->op1; /* GCstr *str */
|
||||
args[1] = ASMREF_TMP1; /* TValue *n */
|
||||
@ -775,113 +787,75 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
int destused = ra_used(ir);
|
||||
Reg dest = ra_dest(as, ir, allow);
|
||||
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
|
||||
Reg key = 0, tmp = RID_TMP;
|
||||
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
|
||||
Reg tmp = RID_TMP, type = RID_NONE, key, tkey;
|
||||
IRRef refkey = ir->op2;
|
||||
IRIns *irkey = IR(refkey);
|
||||
int isk = irref_isk(ir->op2);
|
||||
int isk = irref_isk(refkey);
|
||||
IRType1 kt = irkey->t;
|
||||
uint32_t k = 0;
|
||||
uint32_t khash;
|
||||
MCLabel l_end, l_loop, l_next;
|
||||
MCLabel l_end, l_loop;
|
||||
rset_clear(allow, tab);
|
||||
|
||||
if (!isk) {
|
||||
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
|
||||
rset_clear(allow, key);
|
||||
if (!irt_isstr(kt)) {
|
||||
tmp = ra_scratch(as, allow);
|
||||
rset_clear(allow, tmp);
|
||||
}
|
||||
} else if (irt_isnum(kt)) {
|
||||
int64_t val = (int64_t)ir_knum(irkey)->u64;
|
||||
if (!(k = emit_isk12(val))) {
|
||||
key = ra_allock(as, val, allow);
|
||||
rset_clear(allow, key);
|
||||
}
|
||||
} else if (!irt_ispri(kt)) {
|
||||
if (!(k = emit_isk12(irkey->i))) {
|
||||
key = ra_alloc1(as, refkey, allow);
|
||||
rset_clear(allow, key);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate constants early. */
|
||||
if (irt_isnum(kt)) {
|
||||
if (!isk) {
|
||||
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
|
||||
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
|
||||
rset_clear(allow, tisnum);
|
||||
}
|
||||
} else if (irt_isaddr(kt)) {
|
||||
/* Allocate register for tkey outside of the loop. */
|
||||
if (isk) {
|
||||
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
|
||||
scr = ra_allock(as, kk, allow);
|
||||
} else {
|
||||
scr = ra_scratch(as, allow);
|
||||
}
|
||||
rset_clear(allow, scr);
|
||||
int64_t kk;
|
||||
if (irt_isaddr(kt)) {
|
||||
kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
|
||||
} else if (irt_isnum(kt)) {
|
||||
kk = (int64_t)ir_knum(irkey)->u64;
|
||||
/* Assumes -0.0 is already canonicalized to +0.0. */
|
||||
} else {
|
||||
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
|
||||
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
|
||||
scr = ra_scratch(as, rset_clear(allow, type));
|
||||
rset_clear(allow, scr);
|
||||
kk = ~((int64_t)~irt_toitype(kt) << 47);
|
||||
}
|
||||
k = emit_isk12(kk);
|
||||
tkey = k ? 0 : ra_allock(as, kk, allow);
|
||||
} else {
|
||||
tkey = ra_scratch(as, allow);
|
||||
}
|
||||
|
||||
/* Key not found in chain: jump to exit (if merged) or load niltv. */
|
||||
l_end = emit_label(as);
|
||||
as->invmcp = NULL;
|
||||
if (merge == IR_NE)
|
||||
if (merge == IR_NE) {
|
||||
asm_guardcc(as, CC_AL);
|
||||
else if (destused)
|
||||
emit_loada(as, dest, niltvg(J2G(as->J)));
|
||||
} else if (destused) {
|
||||
uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
|
||||
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
|
||||
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
|
||||
}
|
||||
|
||||
/* Follow hash chain until the end. */
|
||||
l_loop = --as->mcp;
|
||||
emit_n(as, A64I_CMPx^A64I_K12^0, dest);
|
||||
if (destused)
|
||||
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
|
||||
l_next = emit_label(as);
|
||||
|
||||
/* Type and value comparison. */
|
||||
if (merge == IR_EQ)
|
||||
asm_guardcc(as, CC_EQ);
|
||||
else
|
||||
emit_cond_branch(as, CC_EQ, l_end);
|
||||
emit_nm(as, A64I_CMPx^k, tmp, tkey);
|
||||
if (!destused)
|
||||
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
|
||||
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
|
||||
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
|
||||
|
||||
/* Construct tkey as canonicalized or tagged key. */
|
||||
if (!isk) {
|
||||
if (irt_isnum(kt)) {
|
||||
if (isk) {
|
||||
/* Assumes -0.0 is already canonicalized to +0.0. */
|
||||
if (k)
|
||||
emit_n(as, A64I_CMPx^k, tmp);
|
||||
else
|
||||
emit_nm(as, A64I_CMPx, key, tmp);
|
||||
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
|
||||
key = ra_alloc1(as, refkey, RSET_FPR);
|
||||
emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
|
||||
/* A64I_FMOV_R_D from key to tkey done below. */
|
||||
} else {
|
||||
emit_nm(as, A64I_FCMPd, key, ftmp);
|
||||
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
|
||||
emit_cond_branch(as, CC_LO, l_next);
|
||||
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
|
||||
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
|
||||
lj_assertA(irt_isaddr(kt), "bad HREF key type");
|
||||
key = ra_alloc1(as, refkey, allow);
|
||||
type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
|
||||
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
|
||||
}
|
||||
} else if (irt_isaddr(kt)) {
|
||||
if (isk) {
|
||||
emit_nm(as, A64I_CMPx, scr, tmp);
|
||||
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
|
||||
} else {
|
||||
emit_nm(as, A64I_CMPx, tmp, scr);
|
||||
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
|
||||
}
|
||||
} else {
|
||||
emit_nm(as, A64I_CMPx, scr, type);
|
||||
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
|
||||
}
|
||||
|
||||
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
|
||||
if (!isk && irt_isaddr(kt)) {
|
||||
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
|
||||
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
|
||||
rset_clear(allow, type);
|
||||
}
|
||||
/* Load main position relative to tab->node into dest. */
|
||||
khash = isk ? ir_khash(as, irkey) : 1;
|
||||
if (khash == 0) {
|
||||
@ -895,7 +869,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
|
||||
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
|
||||
} else if (irt_isstr(kt)) {
|
||||
/* Fetch of str->sid is cheaper than ra_allock. */
|
||||
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
|
||||
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
|
||||
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
|
||||
@ -904,23 +877,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
|
||||
emit_dnm(as, A64I_SUBw, dest, dest, tmp);
|
||||
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
|
||||
emit_dnm(as, A64I_EORw, dest, dest, tmp);
|
||||
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
|
||||
emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
|
||||
emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
|
||||
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
|
||||
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
|
||||
if (irt_isnum(kt)) {
|
||||
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
|
||||
emit_dnm(as, A64I_ADDw, dest, dest, dest);
|
||||
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
|
||||
emit_dm(as, A64I_MOVw, tmp, dest);
|
||||
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
|
||||
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
|
||||
emit_nm(as, A64I_FCMPZd, (key & 31), 0);
|
||||
emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
|
||||
} else {
|
||||
checkmclim(as);
|
||||
emit_dm(as, A64I_MOVw, tmp, key);
|
||||
emit_dnm(as, A64I_EORw, dest, dest,
|
||||
ra_allock(as, irt_toitype(kt) << 15, allow));
|
||||
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
|
||||
emit_dm(as, A64I_MOVx, dest, key);
|
||||
emit_dnm(as, A64I_EORw, tmp, key, dest);
|
||||
emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -935,7 +903,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
int bigofs = !emit_checkofs(A64I_LDRx, kofs);
|
||||
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
|
||||
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
Reg key, idx = node;
|
||||
Reg idx = node;
|
||||
RegSet allow = rset_exclude(RSET_GPR, node);
|
||||
uint64_t k;
|
||||
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
|
||||
@ -954,9 +922,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
} else {
|
||||
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
|
||||
}
|
||||
key = ra_scratch(as, allow);
|
||||
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
|
||||
emit_lso(as, A64I_LDRx, key, idx, kofs);
|
||||
emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
|
||||
emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
|
||||
if (bigofs)
|
||||
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
|
||||
}
|
||||
@ -964,26 +931,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
||||
if (irref_isk(ir->op1) && !guarded) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_lsptr(as, A64I_LDRx, dest, v);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
|
||||
emit_opk(as, A64I_ADDx, dest, uv,
|
||||
if (guarded)
|
||||
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
|
||||
if (ir->o == IR_UREFC)
|
||||
emit_opk(as, A64I_ADDx, dest, dest,
|
||||
(int32_t)offsetof(GCupval, tv), RSET_GPR);
|
||||
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
||||
else
|
||||
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
|
||||
if (guarded)
|
||||
emit_lso(as, A64I_LDRB, RID_TMP, dest,
|
||||
(int32_t)offsetof(GCupval, closed));
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
|
||||
emit_loadu64(as, dest, k);
|
||||
} else {
|
||||
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
|
||||
}
|
||||
emit_lso(as, A64I_LDRx, uv, func,
|
||||
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
|
||||
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -1086,7 +1059,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg idx, tmp, type;
|
||||
Reg idx, tmp;
|
||||
int32_t ofs = 0;
|
||||
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
||||
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
|
||||
@ -1105,8 +1078,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
} else {
|
||||
tmp = ra_scratch(as, gpr);
|
||||
}
|
||||
type = ra_scratch(as, rset_clear(gpr, tmp));
|
||||
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
|
||||
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
|
||||
rset_clear(gpr, idx);
|
||||
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
|
||||
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
|
||||
@ -1118,8 +1090,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
|
||||
ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
|
||||
} else if (irt_isaddr(ir->t)) {
|
||||
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
|
||||
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
|
||||
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
|
||||
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
|
||||
} else if (irt_isnil(ir->t)) {
|
||||
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
|
||||
} else {
|
||||
@ -1242,9 +1214,8 @@ dotypecheck:
|
||||
emit_nm(as, A64I_CMPx,
|
||||
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
|
||||
} else {
|
||||
Reg type = ra_scratch(as, allow);
|
||||
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
|
||||
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
|
||||
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
|
||||
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
|
||||
}
|
||||
emit_lso(as, A64I_LDRx, tmp, base, ofs);
|
||||
return;
|
||||
@ -1330,7 +1301,6 @@ static void asm_obar(ASMState *as, IRIns *ir)
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
||||
IRRef args[2];
|
||||
MCLabel l_end;
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg obj, val, tmp;
|
||||
/* No need for other object barriers (yet). */
|
||||
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
|
||||
@ -1341,9 +1311,8 @@ static void asm_obar(ASMState *as, IRIns *ir)
|
||||
asm_gencall(as, ci, args);
|
||||
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
|
||||
obj = IR(ir->op1)->r;
|
||||
tmp = ra_scratch(as, rset_exclude(allow, obj));
|
||||
emit_cond_branch(as, CC_EQ, l_end);
|
||||
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
|
||||
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
|
||||
emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
|
||||
emit_cond_branch(as, CC_EQ, l_end);
|
||||
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
|
||||
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
|
||||
@ -1390,12 +1359,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
|
||||
if (irref_isk(lref))
|
||||
return 1; /* But swap constants to the right. */
|
||||
ir = IR(rref);
|
||||
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
|
||||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
|
||||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
|
||||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
|
||||
return 0; /* Don't swap fusable operands to the left. */
|
||||
ir = IR(lref);
|
||||
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
|
||||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
|
||||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
|
||||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
|
||||
return 1; /* But swap fusable operands to the right. */
|
||||
@ -1446,8 +1415,7 @@ static void asm_intmul(ASMState *as, IRIns *ir)
|
||||
if (irt_isguard(ir->t)) { /* IR_MULOV */
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
|
||||
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
|
||||
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
|
||||
emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
|
||||
emit_dnm(as, A64I_SMULL, dest, right, left);
|
||||
} else {
|
||||
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
|
||||
@ -1707,16 +1675,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
|
||||
if (asm_swapops(as, blref, brref)) {
|
||||
Reg tmp = blref; blref = brref; brref = tmp;
|
||||
}
|
||||
bleft = ra_alloc1(as, blref, RSET_GPR);
|
||||
if (irref_isk(brref)) {
|
||||
uint64_t k = get_k64val(as, brref);
|
||||
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
|
||||
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
|
||||
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
|
||||
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
|
||||
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
|
||||
emit_ctz64(k)))
|
||||
return;
|
||||
}
|
||||
m2 = emit_isk13(k, irt_is64(irl->t));
|
||||
}
|
||||
bleft = ra_alloc1(as, blref, RSET_GPR);
|
||||
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
|
||||
if (!m2)
|
||||
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
|
||||
@ -1791,37 +1758,28 @@ static void asm_prof(ASMState *as, IRIns *ir)
|
||||
static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
IRIns *irp, RegSet allow, ExitNo exitno)
|
||||
{
|
||||
Reg pbase;
|
||||
uint32_t k;
|
||||
Reg pbase = RID_BASE;
|
||||
if (irp) {
|
||||
if (!ra_hasspill(irp->s)) {
|
||||
pbase = irp->r;
|
||||
lj_assertA(ra_hasreg(pbase), "base reg lost");
|
||||
} else if (allow) {
|
||||
pbase = rset_pickbot(allow);
|
||||
} else {
|
||||
pbase = RID_RET;
|
||||
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
|
||||
}
|
||||
} else {
|
||||
pbase = RID_BASE;
|
||||
if (!ra_hasreg(pbase))
|
||||
pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
|
||||
}
|
||||
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
|
||||
if (pbase & 0x80) /* Restore temp. register. */
|
||||
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
|
||||
k = emit_isk12((8*topslot));
|
||||
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
|
||||
emit_n(as, A64I_CMPx^k, RID_TMP);
|
||||
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
|
||||
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
|
||||
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
|
||||
(int32_t)offsetof(lua_State, maxstack));
|
||||
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
|
||||
if (ra_hasspill(irp->s))
|
||||
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
|
||||
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
|
||||
if (ra_hasspill(irp->s) && !allow)
|
||||
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
|
||||
} else {
|
||||
emit_getgl(as, RID_TMP, cur_L);
|
||||
if (pbase & 0x40) {
|
||||
emit_getgl(as, (pbase & 31), jit_base);
|
||||
if (pbase & 0x80) /* Save temp register. */
|
||||
emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
|
||||
}
|
||||
emit_getgl(as, RID_TMP, cur_L);
|
||||
}
|
||||
|
||||
/* Restore Lua stack from on-trace state. */
|
||||
@ -1863,7 +1821,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
|
||||
/* Marker to prevent patching the GC check exit. */
|
||||
#define ARM64_NOPATCH_GC_CHECK \
|
||||
(A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
|
||||
(A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
|
||||
|
||||
/* Check GC threshold and do one or more GC steps. */
|
||||
static void asm_gc_check(ASMState *as)
|
||||
@ -1918,47 +1876,41 @@ static void asm_loop_tail_fixup(ASMState *as)
|
||||
|
||||
/* -- Head of trace ------------------------------------------------------- */
|
||||
|
||||
/* Reload L register from g->cur_L. */
|
||||
static void asm_head_lreg(ASMState *as)
|
||||
{
|
||||
IRIns *ir = IR(ASMREF_L);
|
||||
if (ra_used(ir)) {
|
||||
Reg r = ra_dest(as, ir, RSET_GPR);
|
||||
emit_getgl(as, r, cur_L);
|
||||
ra_evictk(as);
|
||||
}
|
||||
}
|
||||
|
||||
/* Coalesce BASE register for a root trace. */
|
||||
static void asm_head_root_base(ASMState *as)
|
||||
{
|
||||
IRIns *ir;
|
||||
asm_head_lreg(as);
|
||||
ir = IR(REF_BASE);
|
||||
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
||||
ra_spill(as, ir);
|
||||
ra_destreg(as, ir, RID_BASE);
|
||||
IRIns *ir = IR(REF_BASE);
|
||||
Reg r = ir->r;
|
||||
if (ra_hasreg(r)) {
|
||||
ra_free(as, r);
|
||||
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
||||
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
||||
if (r != RID_BASE)
|
||||
emit_movrr(as, ir, r, RID_BASE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Coalesce BASE register for a side trace. */
|
||||
static Reg asm_head_side_base(ASMState *as, IRIns *irp)
|
||||
{
|
||||
IRIns *ir;
|
||||
asm_head_lreg(as);
|
||||
ir = IR(REF_BASE);
|
||||
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
||||
ra_spill(as, ir);
|
||||
if (ra_hasspill(irp->s)) {
|
||||
return ra_dest(as, ir, RSET_GPR);
|
||||
IRIns *ir = IR(REF_BASE);
|
||||
Reg r = ir->r;
|
||||
if (ra_hasreg(r)) {
|
||||
ra_free(as, r);
|
||||
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
||||
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
||||
if (irp->r == r) {
|
||||
return r; /* Same BASE register already coalesced. */
|
||||
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
|
||||
/* Move from coalesced parent reg. */
|
||||
emit_movrr(as, ir, r, irp->r);
|
||||
return irp->r;
|
||||
} else {
|
||||
Reg r = irp->r;
|
||||
lj_assertA(ra_hasreg(r), "base reg lost");
|
||||
if (r != ir->r && !rset_test(as->freeset, r))
|
||||
ra_restore(as, regcost_ref(as->cost[r]));
|
||||
ra_destreg(as, ir, r);
|
||||
return r;
|
||||
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
|
||||
}
|
||||
}
|
||||
return RID_NONE;
|
||||
}
|
||||
|
||||
/* -- Tail of trace ------------------------------------------------------- */
|
||||
|
||||
@ -2009,6 +1961,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
|
||||
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
|
||||
asm_collectargs(as, ir, ci, args);
|
||||
#if LJ_ABI_WIN
|
||||
if ((ci->flags & CCI_VARARG)) nfpr = 0;
|
||||
#endif
|
||||
for (i = 0; i < nargs; i++) {
|
||||
int al = spalign;
|
||||
if (!args[i]) {
|
||||
@ -2020,7 +1975,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
#endif
|
||||
} else if (irt_isfp(IR(args[i])->t)) {
|
||||
if (nfpr > 0) { nfpr--; continue; }
|
||||
#if LJ_TARGET_OSX
|
||||
#if LJ_ABI_WIN
|
||||
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
|
||||
#elif LJ_TARGET_OSX
|
||||
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
|
||||
#endif
|
||||
} else {
|
||||
@ -2036,7 +1993,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
as->evenspill = nslots;
|
||||
}
|
||||
#endif
|
||||
return REGSP_HINT(RID_RET);
|
||||
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
|
||||
}
|
||||
|
||||
static void asm_setup_target(ASMState *as)
|
||||
|
@ -656,8 +656,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
|
||||
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
|
||||
#else
|
||||
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
|
||||
emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
|
||||
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
|
||||
emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
|
||||
#endif
|
||||
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
|
||||
(void *)&as->J->k64[LJ_K64_2P63],
|
||||
@ -673,8 +673,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
|
||||
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
|
||||
#else
|
||||
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
|
||||
emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
|
||||
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
|
||||
emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
|
||||
#endif
|
||||
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
|
||||
(void *)&as->J->k32[LJ_K32_2P63],
|
||||
@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
MIPSIns mi = irt_is64(ir->t) ?
|
||||
(st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
|
||||
(st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
|
||||
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
|
||||
emit_fg(as, mi, left, left);
|
||||
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp);
|
||||
emit_fg(as, mi, tmp, left);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1207,24 +1207,31 @@ nolo:
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
||||
if (irref_isk(ir->op1) && !guarded) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
|
||||
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
|
||||
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
||||
if (guarded)
|
||||
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
|
||||
if (ir->o == IR_UREFC)
|
||||
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
|
||||
else
|
||||
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
|
||||
if (guarded)
|
||||
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
|
||||
emit_loada(as, dest, o);
|
||||
} else {
|
||||
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
|
||||
}
|
||||
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
|
||||
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
|
||||
(int32_t)offsetof(GCfuncL, uvptr) +
|
||||
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
|
@ -840,25 +840,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
||||
if (irref_isk(ir->op1) && !guarded) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
asm_guardcc(as, CC_NE);
|
||||
if (guarded) {
|
||||
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
|
||||
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
|
||||
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
|
||||
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
||||
} else {
|
||||
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
|
||||
}
|
||||
emit_tai(as, PPCI_LWZ, uv, func,
|
||||
if (ir->o == IR_UREFC)
|
||||
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
|
||||
else
|
||||
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
|
||||
if (guarded)
|
||||
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
|
||||
emit_loadi(as, dest, k);
|
||||
} else {
|
||||
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
|
||||
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
|
@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
|
||||
/* Check if there's no conflicting instruction between curins and ref.
|
||||
** Also avoid fusing loads if there are multiple references.
|
||||
*/
|
||||
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
|
||||
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
|
||||
{
|
||||
IRIns *ir = as->ir;
|
||||
IRRef i = as->curins;
|
||||
@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
|
||||
while (--i > ref) {
|
||||
if (ir[i].o == conflict)
|
||||
return 0; /* Conflict found. */
|
||||
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
|
||||
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
|
||||
return 0;
|
||||
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
|
||||
return 0;
|
||||
}
|
||||
return 1; /* Ok, no conflict. */
|
||||
@ -134,13 +136,14 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
|
||||
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
|
||||
/* We can avoid the FLOAD of t->array for colocated arrays. */
|
||||
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
|
||||
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
|
||||
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
|
||||
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
|
||||
return irb->op1; /* Table obj. */
|
||||
}
|
||||
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
|
||||
/* Fuse base offset (vararg load). */
|
||||
as->mrm.ofs = IR(irb->op2)->i;
|
||||
IRIns *irk = IR(irb->op2);
|
||||
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
|
||||
return irb->op1;
|
||||
}
|
||||
return ref; /* Otherwise use the given array base. */
|
||||
@ -455,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
|
||||
if (ir->o == IR_SLOAD) {
|
||||
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
|
||||
noconflict(as, ref, IR_RETF, 0) &&
|
||||
noconflict(as, ref, IR_RETF, 2) &&
|
||||
!(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
|
||||
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
|
||||
@ -466,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
} else if (ir->o == IR_FLOAD) {
|
||||
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
|
||||
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
|
||||
noconflict(as, ref, IR_FSTORE, 0)) {
|
||||
noconflict(as, ref, IR_FSTORE, 2)) {
|
||||
asm_fusefref(as, ir, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
|
||||
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
|
||||
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
|
||||
!(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
asm_fuseahuref(as, ir->op1, xallow);
|
||||
return RID_MRM;
|
||||
@ -481,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
|
||||
*/
|
||||
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
|
||||
noconflict(as, ref, IR_XSTORE, 0)) {
|
||||
noconflict(as, ref, IR_XSTORE, 2)) {
|
||||
asm_fusexref(as, ir->op1, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
@ -814,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
||||
emit_rr(as, XO_UCOMISD, left, tmp);
|
||||
emit_rr(as, XO_CVTSI2SD, tmp, dest);
|
||||
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
|
||||
checkmclim(as);
|
||||
emit_rr(as, XO_CVTTSD2SI, dest, left);
|
||||
/* Can't fuse since left is needed twice. */
|
||||
}
|
||||
@ -856,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
|
||||
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
|
||||
emit_rma(as, XO_MOVSD, bias, k);
|
||||
checkmclim(as);
|
||||
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
|
||||
return;
|
||||
} else { /* Integer to FP conversion. */
|
||||
@ -1172,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
asm_guardcc(as, CC_E);
|
||||
else
|
||||
emit_sjcc(as, CC_E, l_end);
|
||||
checkmclim(as);
|
||||
if (irt_isnum(kt)) {
|
||||
if (isk) {
|
||||
/* Assumes -0.0 is already canonicalized to +0.0. */
|
||||
@ -1231,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
#endif
|
||||
}
|
||||
emit_sfixup(as, l_loop);
|
||||
checkmclim(as);
|
||||
#if LJ_GC64
|
||||
if (!isk && irt_isaddr(kt)) {
|
||||
emit_rr(as, XO_OR, tmp|REX_64, key);
|
||||
@ -1258,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
|
||||
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
|
||||
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
|
||||
checkmclim(as);
|
||||
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
|
||||
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
|
||||
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
|
||||
@ -1275,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
} else {
|
||||
emit_rr(as, XO_MOV, tmp, key);
|
||||
#if LJ_GC64
|
||||
checkmclim(as);
|
||||
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
|
||||
if ((as->flags & JIT_F_BMI2)) {
|
||||
emit_i8(as, 32);
|
||||
@ -1372,26 +1377,33 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (irref_isk(ir->op1)) {
|
||||
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
||||
if (irref_isk(ir->op1) && !guarded) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_rma(as, XO_MOV, dest|REX_GC64, v);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
if (ir->o == IR_UREFC)
|
||||
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_i8(as, 1);
|
||||
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
|
||||
} else {
|
||||
else
|
||||
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
|
||||
if (guarded) {
|
||||
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
|
||||
emit_i8(as, 0);
|
||||
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
|
||||
}
|
||||
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
|
||||
emit_loada(as, uv, o);
|
||||
} else {
|
||||
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
|
||||
(int32_t)offsetof(GCfuncL, uvptr) +
|
||||
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -1546,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
if (irt_islightud(ir->t)) {
|
||||
Reg dest = asm_load_lightud64(as, ir, 1);
|
||||
if (ra_hasreg(dest)) {
|
||||
checkmclim(as);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
|
||||
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
|
||||
@ -1593,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
|
||||
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
|
||||
"bad load type %d", irt_type(ir->t));
|
||||
checkmclim(as);
|
||||
#if LJ_GC64
|
||||
emit_u32(as, LJ_TISNUM << 15);
|
||||
#else
|
||||
|
@ -46,6 +46,8 @@
|
||||
|
||||
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
|
||||
|
||||
#define BCDUMP_F_DETERMINISTIC 0x80000000
|
||||
|
||||
/* Type codes for the GC constants of a prototype. Plus length for strings. */
|
||||
enum {
|
||||
BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
|
||||
@ -61,7 +63,7 @@ enum {
|
||||
/* -- Bytecode reader/writer ---------------------------------------------- */
|
||||
|
||||
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
|
||||
void *data, int strip);
|
||||
void *data, uint32_t flags);
|
||||
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
|
||||
LJ_FUNC GCproto *lj_bcread(LexState *ls);
|
||||
|
||||
|
@ -281,8 +281,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
|
||||
static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
|
||||
{
|
||||
BCIns *bc = proto_bc(pt);
|
||||
bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
|
||||
pt->framesize, 0);
|
||||
BCIns op;
|
||||
if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
|
||||
else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
|
||||
else op = BC_FUNCF;
|
||||
bc[0] = BCINS_AD(op, pt->framesize, 0);
|
||||
bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
|
||||
/* Swap bytecode instructions if the endianess differs. */
|
||||
if (bcread_swap(ls)) {
|
||||
@ -395,7 +398,7 @@ static int bcread_header(LexState *ls)
|
||||
bcread_byte(ls) != BCDUMP_VERSION) return 0;
|
||||
bcread_flags(ls) = flags = bcread_uleb128(ls);
|
||||
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
|
||||
if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
|
||||
if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
|
||||
if ((flags & BCDUMP_F_FFI)) {
|
||||
#if LJ_HASFFI
|
||||
lua_State *L = ls->L;
|
||||
|
101
src/lj_bcwrite.c
101
src/lj_bcwrite.c
@ -27,7 +27,9 @@ typedef struct BCWriteCtx {
|
||||
GCproto *pt; /* Root prototype. */
|
||||
lua_Writer wfunc; /* Writer callback. */
|
||||
void *wdata; /* Writer callback data. */
|
||||
int strip; /* Strip debug info. */
|
||||
TValue **heap; /* Heap used for deterministic sorting. */
|
||||
uint32_t heapsz; /* Size of heap. */
|
||||
uint32_t flags; /* BCDUMP_F_* flags. */
|
||||
int status; /* Status from writer callback. */
|
||||
#ifdef LUA_USE_ASSERT
|
||||
global_State *g;
|
||||
@ -76,6 +78,75 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
|
||||
ctx->sb.w = p;
|
||||
}
|
||||
|
||||
/* Compare two template table keys. */
|
||||
static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
|
||||
{
|
||||
uint32_t at = itype(a), bt = itype(b);
|
||||
if (at != bt) { /* This also handles false and true keys. */
|
||||
return at < bt;
|
||||
} else if (at == LJ_TSTR) {
|
||||
return lj_str_cmp(strV(a), strV(b)) < 0;
|
||||
} else {
|
||||
return a->u64 < b->u64; /* This works for numbers and integers. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert key into a sorted heap. */
|
||||
static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
|
||||
TValue *key)
|
||||
{
|
||||
MSize child;
|
||||
while ((child = idx * 2 + 1) < end) {
|
||||
/* Find lower of the two children. */
|
||||
TValue *c0 = heap[child];
|
||||
if (child + 1 < end) {
|
||||
TValue *c1 = heap[child + 1];
|
||||
if (bcwrite_ktabk_lt(c1, c0)) {
|
||||
c0 = c1;
|
||||
child++;
|
||||
}
|
||||
}
|
||||
if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
|
||||
heap[idx] = c0; /* Move lower child up. */
|
||||
idx = child; /* Descend. */
|
||||
}
|
||||
heap[idx] = key; /* Insert key here. */
|
||||
}
|
||||
|
||||
/* Resize heap, dropping content. */
|
||||
static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
|
||||
{
|
||||
lua_State *L = sbufL(&ctx->sb);
|
||||
if (ctx->heapsz) {
|
||||
lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
|
||||
ctx->heapsz = 0;
|
||||
}
|
||||
if (nsz) {
|
||||
ctx->heap = lj_mem_newvec(L, nsz, TValue *);
|
||||
ctx->heapsz = nsz;
|
||||
}
|
||||
}
|
||||
|
||||
/* Write hash part of template table in sorted order. */
|
||||
static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
|
||||
{
|
||||
TValue **heap = ctx->heap;
|
||||
MSize i = nhash;
|
||||
for (;; node--) { /* Build heap. */
|
||||
if (!tvisnil(&node->key)) {
|
||||
bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
|
||||
if (i == 0) break;
|
||||
}
|
||||
}
|
||||
do { /* Drain heap. */
|
||||
TValue *key = heap[0]; /* Output lowest key from top. */
|
||||
bcwrite_ktabk(ctx, key, 0);
|
||||
bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
|
||||
key = heap[--nhash]; /* Remove last key. */
|
||||
bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
|
||||
} while (nhash);
|
||||
}
|
||||
|
||||
/* Write a template table. */
|
||||
static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
|
||||
{
|
||||
@ -92,7 +163,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
|
||||
MSize i, hmask = t->hmask;
|
||||
Node *node = noderef(t->node);
|
||||
for (i = 0; i <= hmask; i++)
|
||||
nhash += !tvisnil(&node[i].val);
|
||||
nhash += !tvisnil(&node[i].key);
|
||||
}
|
||||
/* Write number of array slots and hash slots. */
|
||||
p = lj_strfmt_wuleb128(p, narray);
|
||||
@ -105,16 +176,22 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
|
||||
bcwrite_ktabk(ctx, o, 1);
|
||||
}
|
||||
if (nhash) { /* Write hash entries. */
|
||||
MSize i = nhash;
|
||||
Node *node = noderef(t->node) + t->hmask;
|
||||
if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
|
||||
if (ctx->heapsz < nhash)
|
||||
bcwrite_heap_resize(ctx, t->hmask + 1);
|
||||
bcwrite_ktab_sorted_hash(ctx, node, nhash);
|
||||
} else {
|
||||
MSize i = nhash;
|
||||
for (;; node--)
|
||||
if (!tvisnil(&node->val)) {
|
||||
if (!tvisnil(&node->key)) {
|
||||
bcwrite_ktabk(ctx, &node->key, 0);
|
||||
bcwrite_ktabk(ctx, &node->val, 1);
|
||||
if (--i == 0) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Write GC constants of a prototype. */
|
||||
static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
|
||||
@ -269,7 +346,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
|
||||
p = lj_strfmt_wuleb128(p, pt->sizekgc);
|
||||
p = lj_strfmt_wuleb128(p, pt->sizekn);
|
||||
p = lj_strfmt_wuleb128(p, pt->sizebc-1);
|
||||
if (!ctx->strip) {
|
||||
if (!(ctx->flags & BCDUMP_F_STRIP)) {
|
||||
if (proto_lineinfo(pt))
|
||||
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
|
||||
p = lj_strfmt_wuleb128(p, sizedbg);
|
||||
@ -317,11 +394,10 @@ static void bcwrite_header(BCWriteCtx *ctx)
|
||||
*p++ = BCDUMP_HEAD2;
|
||||
*p++ = BCDUMP_HEAD3;
|
||||
*p++ = BCDUMP_VERSION;
|
||||
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
|
||||
*p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) +
|
||||
LJ_BE*BCDUMP_F_BE +
|
||||
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
|
||||
LJ_FR2*BCDUMP_F_FR2;
|
||||
if (!ctx->strip) {
|
||||
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
|
||||
if (!(ctx->flags & BCDUMP_F_STRIP)) {
|
||||
p = lj_strfmt_wuleb128(p, len);
|
||||
p = lj_buf_wmem(p, name, len);
|
||||
}
|
||||
@ -352,14 +428,16 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
|
||||
|
||||
/* Write bytecode for a prototype. */
|
||||
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
|
||||
int strip)
|
||||
uint32_t flags)
|
||||
{
|
||||
BCWriteCtx ctx;
|
||||
int status;
|
||||
ctx.pt = pt;
|
||||
ctx.wfunc = writer;
|
||||
ctx.wdata = data;
|
||||
ctx.strip = strip;
|
||||
ctx.heapsz = 0;
|
||||
if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
|
||||
ctx.flags = flags;
|
||||
ctx.status = 0;
|
||||
#ifdef LUA_USE_ASSERT
|
||||
ctx.g = G(L);
|
||||
@ -368,6 +446,7 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
|
||||
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
|
||||
if (status == 0) status = ctx.status;
|
||||
lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
|
||||
bcwrite_heap_resize(&ctx, 0);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
|
||||
p = (uint8_t *)cdata_getptr(p, ct->size);
|
||||
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
|
||||
} else if (ctype_isfunc(ct->info)) {
|
||||
CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
|
||||
p = (uint8_t *)*(void **)p;
|
||||
ct = ctype_get(cts,
|
||||
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
|
||||
if (i) { /* cts->tab may have been reallocated. */
|
||||
ca->ct[0] = ctype_get(cts, id0);
|
||||
}
|
||||
}
|
||||
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
|
||||
ca->ct[i] = ct;
|
||||
|
@ -985,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
||||
fid = ctf->sib;
|
||||
}
|
||||
|
||||
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
|
||||
if ((ct->info & CTF_VARARG)) {
|
||||
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
|
||||
ngpr = maxgpr;
|
||||
nfpr = CCALL_NARG_FPR;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Walk through all passed arguments. */
|
||||
for (o = L->base+1, narg = 1; o < top; o++, narg++) {
|
||||
CTypeID did;
|
||||
@ -1035,9 +1043,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
||||
align = CTSIZE_PTR-1;
|
||||
nsp = (nsp + align) & ~align;
|
||||
}
|
||||
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
|
||||
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
|
||||
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
|
||||
#else
|
||||
dp = ((uint8_t *)cc->stack) + nsp;
|
||||
#endif
|
||||
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
|
||||
if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
|
||||
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
|
||||
err_nyi:
|
||||
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
|
||||
}
|
||||
@ -1099,6 +1112,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
||||
#endif
|
||||
}
|
||||
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
|
||||
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
|
||||
if ((int32_t)nsp < 0) nsp = 0;
|
||||
#endif
|
||||
|
||||
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
|
||||
cc->nfpr = nfpr; /* Required for vararg functions. */
|
||||
|
@ -1768,9 +1768,11 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
|
||||
cp_check(cp, '(');
|
||||
if (cp->tok == CTOK_IDENT) {
|
||||
if (cp_str_is(cp->str, "push")) {
|
||||
if (cp->curpack < CPARSE_MAX_PACKSTACK) {
|
||||
if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
|
||||
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
|
||||
cp->curpack++;
|
||||
} else {
|
||||
cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
|
||||
}
|
||||
} else if (cp_str_is(cp->str, "pop")) {
|
||||
if (cp->curpack > 0) cp->curpack--;
|
||||
|
@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
|
||||
ngpr = 1;
|
||||
else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
|
||||
ngpr = 2;
|
||||
#elif LJ_TARGET_ARM64
|
||||
#if LJ_ABI_WIN
|
||||
#error "NYI: ARM64 Windows ABI calling conventions"
|
||||
#elif LJ_TARGET_OSX
|
||||
#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
|
||||
int ngpr = CCALL_NARG_GPR;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Skip initial attributes. */
|
||||
|
@ -276,6 +276,8 @@ typedef struct CTState {
|
||||
#define CTTYDEFP(_)
|
||||
#endif
|
||||
|
||||
#define CTF_LONG_IF8 (CTF_LONG * (sizeof(long) == 8))
|
||||
|
||||
/* Common types. */
|
||||
#define CTTYDEF(_) \
|
||||
_(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \
|
||||
@ -289,8 +291,8 @@ typedef struct CTState {
|
||||
_(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \
|
||||
_(INT32, 4, CT_NUM, CTALIGN(2)) \
|
||||
_(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \
|
||||
_(INT64, 8, CT_NUM, CTF_LONG|CTALIGN(3)) \
|
||||
_(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \
|
||||
_(INT64, 8, CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \
|
||||
_(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \
|
||||
_(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \
|
||||
_(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \
|
||||
_(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \
|
||||
|
@ -64,6 +64,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
|
||||
if (cf == NULL || (char *)cframe_pc(cf) == (char *)cframe_L(cf))
|
||||
return NO_BCPOS;
|
||||
ins = cframe_pc(cf); /* Only happens during error/hook handling. */
|
||||
if (!ins) return NO_BCPOS;
|
||||
} else {
|
||||
if (frame_islua(nextframe)) {
|
||||
ins = frame_pc(nextframe);
|
||||
|
29
src/lj_def.h
29
src/lj_def.h
@ -69,7 +69,7 @@ typedef unsigned int uintptr_t;
|
||||
#define LJ_MAX_UPVAL 160 /* Max. # of upvalues. */
|
||||
|
||||
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
|
||||
#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
|
||||
#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */
|
||||
|
||||
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
|
||||
|
||||
@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter;
|
||||
#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
|
||||
/* Don't ask ... */
|
||||
#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
|
||||
static LJ_AINLINE uint32_t lj_fls(uint32_t x)
|
||||
{
|
||||
uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
|
||||
}
|
||||
#else
|
||||
#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
|
||||
#endif
|
||||
#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x))
|
||||
#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63))
|
||||
|
||||
#if defined(__arm__)
|
||||
static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
|
||||
@ -277,6 +271,23 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
|
||||
{
|
||||
unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
|
||||
}
|
||||
|
||||
#if defined(_M_X64) || defined(_M_ARM64)
|
||||
unsigned char _BitScanForward64(unsigned long *, uint64_t);
|
||||
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
|
||||
#pragma intrinsic(_BitScanForward64)
|
||||
#pragma intrinsic(_BitScanReverse64)
|
||||
|
||||
static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
|
||||
{
|
||||
unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
|
||||
}
|
||||
|
||||
static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
|
||||
{
|
||||
unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
unsigned long _byteswap_ulong(unsigned long);
|
||||
|
@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn)
|
||||
int numparams = pt->numparams;
|
||||
int gotparams = (int)(L->top - L->base);
|
||||
int need = pt->framesize;
|
||||
if ((pt->flags & PROTO_VARARG)) need += 1+gotparams;
|
||||
if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams;
|
||||
lj_state_checkstack(L, (MSize)need);
|
||||
numparams -= gotparams;
|
||||
return numparams >= 0 ? numparams : 0;
|
||||
|
@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref)
|
||||
} else {
|
||||
lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
|
||||
"bad 64 bit const IR op %d", ir->o);
|
||||
return ir->i; /* Sign-extended. */
|
||||
return (uint32_t)ir->i; /* Zero-extended. */
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,39 +30,31 @@ static uint32_t emit_isk12(int64_t n)
|
||||
uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
|
||||
uint32_t m = n < 0 ? 0x40000000 : 0;
|
||||
if (k < 0x1000) {
|
||||
return A64I_K12|m|A64F_U12(k);
|
||||
return (uint32_t)(A64I_K12|m|A64F_U12(k));
|
||||
} else if ((k & 0xfff000) == k) {
|
||||
return A64I_K12|m|0x400000|A64F_U12(k>>12);
|
||||
return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define emit_clz64(n) __builtin_clzll(n)
|
||||
#define emit_ctz64(n) __builtin_ctzll(n)
|
||||
#define emit_clz64(n) (lj_fls64(n)^63)
|
||||
#define emit_ctz64(n) lj_ffs64(n)
|
||||
|
||||
/* Encode constant in K13 format for logical data processing instructions. */
|
||||
static uint32_t emit_isk13(uint64_t n, int is64)
|
||||
{
|
||||
int inv = 0, w = 128, lz, tz;
|
||||
if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
|
||||
if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
|
||||
do { /* Find the repeat width. */
|
||||
if (is64 && (uint32_t)(n^(n>>32))) break;
|
||||
n = (uint32_t)n;
|
||||
if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */
|
||||
w = 32; if ((n^(n>>16)) & 0xffff) break;
|
||||
n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
|
||||
n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
|
||||
n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
|
||||
n = n & 0x3; w = 2;
|
||||
} while (0);
|
||||
lz = emit_clz64(n);
|
||||
tz = emit_ctz64(n);
|
||||
if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
|
||||
if (inv)
|
||||
return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
|
||||
else
|
||||
return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
|
||||
/* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */
|
||||
int rot, ones, size, immr, imms;
|
||||
if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n;
|
||||
if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */
|
||||
rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64;
|
||||
n = lj_ror(n, rot & 63);
|
||||
ones = emit_ctz64(~n);
|
||||
size = emit_clz64(n) + ones;
|
||||
if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */
|
||||
immr = -rot & (size - 1);
|
||||
imms = (-(size << 1) | (ones - 1)) & 63;
|
||||
return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms);
|
||||
}
|
||||
|
||||
static uint32_t emit_isfpk64(uint64_t n)
|
||||
@ -121,9 +113,20 @@ static int emit_checkofs(A64Ins ai, int64_t ofs)
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
|
||||
static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
|
||||
{
|
||||
int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
|
||||
if (ofs >= 0) {
|
||||
return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */
|
||||
} else if (ofs >= -256) {
|
||||
return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff);
|
||||
} else {
|
||||
return A64F_D(31); /* Will mismatch prev. */
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
|
||||
{
|
||||
int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
|
||||
lj_assertA(ot, "load/store offset %d out of range", ofs);
|
||||
/* Combine LDR/STR pairs to LDP/STP. */
|
||||
if ((sc == 2 || sc == 3) &&
|
||||
@ -132,11 +135,9 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
|
||||
uint32_t prev = *as->mcp & ~A64F_D(31);
|
||||
int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
|
||||
A64Ins aip;
|
||||
if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
|
||||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
|
||||
if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) {
|
||||
aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
|
||||
} else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
|
||||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
|
||||
} else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) {
|
||||
aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
|
||||
ofsm = ofs;
|
||||
} else {
|
||||
@ -158,13 +159,12 @@ nopair:
|
||||
/* -- Emit loads/stores --------------------------------------------------- */
|
||||
|
||||
/* Prefer rematerialization of BASE/L from global_State over spills. */
|
||||
#define emit_canremat(ref) ((ref) <= ASMREF_L)
|
||||
#define emit_canremat(ref) ((ref) <= REF_BASE)
|
||||
|
||||
/* Try to find an N-step delta relative to other consts with N < lim. */
|
||||
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
|
||||
/* Try to find a one-step delta relative to other consts. */
|
||||
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
|
||||
{
|
||||
RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
|
||||
if (lim <= 1) return 0; /* Can't beat that. */
|
||||
while (work) {
|
||||
Reg r = rset_picktop(work);
|
||||
IRRef ref = regcost_ref(as->cost[r]);
|
||||
@ -173,13 +173,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
|
||||
uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
|
||||
get_k64val(as, ref);
|
||||
int64_t delta = (int64_t)(k - kx);
|
||||
if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */
|
||||
if (delta == 0) {
|
||||
emit_dm(as, A64I_MOVx, rd, r);
|
||||
emit_dm(as, is64|A64I_MOVw, rd, r);
|
||||
return 1;
|
||||
} else {
|
||||
uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
|
||||
if (k12) {
|
||||
emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
|
||||
emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
|
||||
return 1;
|
||||
}
|
||||
/* Do other ops or multi-step deltas pay off? Probably not.
|
||||
@ -192,54 +193,6 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
|
||||
return 0; /* Failed. */
|
||||
}
|
||||
|
||||
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
|
||||
{
|
||||
int i, zeros = 0, ones = 0, neg;
|
||||
if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
|
||||
/* Count homogeneous 16 bit fragments. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
uint64_t frag = (u64 >> i*16) & 0xffff;
|
||||
zeros += (frag == 0);
|
||||
ones += (frag == 0xffff);
|
||||
}
|
||||
neg = ones > zeros; /* Use MOVN if it pays off. */
|
||||
if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
|
||||
uint32_t k13 = emit_isk13(u64, is64);
|
||||
if (k13) {
|
||||
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
|
||||
int shift = 0, lshift = 0;
|
||||
uint64_t n64 = neg ? ~u64 : u64;
|
||||
if (n64 != 0) {
|
||||
/* Find first/last fragment to be filled. */
|
||||
shift = (63-emit_clz64(n64)) & ~15;
|
||||
lshift = emit_ctz64(n64) & ~15;
|
||||
}
|
||||
/* MOVK requires the original value (u64). */
|
||||
while (shift > lshift) {
|
||||
uint32_t u16 = (u64 >> shift) & 0xffff;
|
||||
/* Skip fragments that are correctly filled by MOVN/MOVZ. */
|
||||
if (u16 != (neg ? 0xffff : 0))
|
||||
emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
|
||||
shift -= 16;
|
||||
}
|
||||
/* But MOVN needs an inverted value (n64). */
|
||||
emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
|
||||
A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load a 32 bit constant into a GPR. */
|
||||
#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
|
||||
|
||||
/* Load a 64 bit constant into a GPR. */
|
||||
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
|
||||
|
||||
#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
|
||||
|
||||
#define glofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
|
||||
#define mcpofs(as, k) \
|
||||
@ -247,25 +200,95 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
|
||||
#define checkmcpofs(as, k) \
|
||||
(A64F_S_OK(mcpofs(as, k)>>2, 19))
|
||||
|
||||
/* Try to form a const as ADR or ADRP or ADRP + ADD. */
|
||||
static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
|
||||
{
|
||||
A64Ins ai = A64I_ADR;
|
||||
int64_t ofs = mcpofs(as, k);
|
||||
if (!A64F_S_OK((uint64_t)ofs, 21)) {
|
||||
uint64_t kpage = k & ~0xfffull;
|
||||
MCode *adrp = as->mcp - 1 - (k != kpage);
|
||||
ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
|
||||
if (!A64F_S_OK(ofs, 21))
|
||||
return 0; /* Failed. */
|
||||
if (k != kpage)
|
||||
emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
|
||||
ai = A64I_ADRP;
|
||||
}
|
||||
emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
|
||||
{
|
||||
int zeros = 0, ones = 0, neg, lshift = 0;
|
||||
int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
|
||||
/* Count non-homogeneous 16 bit fragments. */
|
||||
while (--i >= 0) {
|
||||
uint32_t frag = (u64 >> i*16) & 0xffff;
|
||||
zeros += (frag != 0);
|
||||
ones += (frag != 0xffff);
|
||||
}
|
||||
neg = ones < zeros; /* Use MOVN if it pays off. */
|
||||
if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */
|
||||
uint32_t k13 = emit_isk13(u64, is64);
|
||||
if (k13) {
|
||||
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
|
||||
return;
|
||||
}
|
||||
if (emit_kdelta(as, rd, u64, is64)) {
|
||||
return;
|
||||
}
|
||||
if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (neg) {
|
||||
u64 = ~u64;
|
||||
if (!is64) u64 = (uint32_t)u64;
|
||||
}
|
||||
if (u64) {
|
||||
/* Find first/last fragment to be filled. */
|
||||
int shift = (63-emit_clz64(u64)) & ~15;
|
||||
lshift = emit_ctz64(u64) & ~15;
|
||||
for (; shift > lshift; shift -= 16) {
|
||||
uint32_t frag = (u64 >> shift) & 0xffff;
|
||||
if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
|
||||
if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
|
||||
emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
|
||||
}
|
||||
}
|
||||
/* But MOVN needs an inverted value. */
|
||||
emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
|
||||
A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
|
||||
}
|
||||
|
||||
/* Load a 32 bit constant into a GPR. */
|
||||
#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i)
|
||||
|
||||
/* Load a 64 bit constant into a GPR. */
|
||||
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
|
||||
|
||||
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
|
||||
|
||||
/* Get/set from constant pointer. */
|
||||
static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
|
||||
{
|
||||
/* First, check if ip + offset is in range. */
|
||||
if ((ai & 0x00400000) && checkmcpofs(as, p)) {
|
||||
emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
|
||||
} else {
|
||||
Reg base = RID_GL; /* Next, try GL + offset. */
|
||||
Reg base = RID_GL;
|
||||
int64_t ofs = glofs(as, p);
|
||||
if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
|
||||
if (emit_checkofs(ai, ofs)) {
|
||||
/* GL + offset, might subsequently fuse to LDP/STP. */
|
||||
} else if (ai == A64I_LDRx && checkmcpofs(as, p)) {
|
||||
/* IP + offset is cheaper than allock, but address must be in range. */
|
||||
emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
|
||||
return;
|
||||
} else { /* Split up into base reg + offset. */
|
||||
int64_t i64 = i64ptr(p);
|
||||
base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
|
||||
ofs = i64 & 0x7fffull;
|
||||
}
|
||||
emit_lso(as, ai, r, base, ofs);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load 64 bit IR constant into register. */
|
||||
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
|
70
src/lj_err.c
70
src/lj_err.c
@ -174,12 +174,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
|
||||
case FRAME_PCALL: /* FF pcall() frame. */
|
||||
case FRAME_PCALLH: /* FF pcall() frame inside hook. */
|
||||
if (errcode) {
|
||||
global_State *g;
|
||||
if (errcode == LUA_YIELD) {
|
||||
frame = frame_prevd(frame);
|
||||
break;
|
||||
}
|
||||
g = G(L);
|
||||
setgcref(g->cur_L, obj2gco(L));
|
||||
if (frame_typep(frame) == FRAME_PCALL)
|
||||
hook_leave(G(L));
|
||||
hook_leave(g);
|
||||
L->base = frame_prevd(frame) + 1;
|
||||
L->cframe = cf;
|
||||
unwindstack(L, L->base);
|
||||
@ -209,11 +212,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
|
||||
** from 3rd party docs or must be found by trial-and-error. They really
|
||||
** don't want you to write your own language-specific exception handler
|
||||
** or to interact gracefully with MSVC. :-(
|
||||
**
|
||||
** Apparently MSVC doesn't call C++ destructors for foreign exceptions
|
||||
** unless you compile your C++ code with /EHa. Unfortunately this means
|
||||
** catch (...) also catches things like access violations. The use of
|
||||
** _set_se_translator doesn't really help, because it requires /EHa, too.
|
||||
*/
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
@ -261,6 +259,8 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
|
||||
{
|
||||
#if LJ_TARGET_X86
|
||||
void *cf = (char *)f - CFRAME_OFS_SEH;
|
||||
#elif LJ_TARGET_ARM64
|
||||
void *cf = (char *)f - CFRAME_SIZE;
|
||||
#else
|
||||
void *cf = f;
|
||||
#endif
|
||||
@ -268,11 +268,25 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
|
||||
int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
|
||||
LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
|
||||
if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
|
||||
if (rec->ExceptionCode == STATUS_LONGJUMP &&
|
||||
rec->ExceptionRecord &&
|
||||
LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) {
|
||||
errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode);
|
||||
if ((rec->ExceptionFlags & 0x20)) { /* EH_TARGET_UNWIND */
|
||||
/* Unwinding is about to finish; revert the ExceptionCode so that
|
||||
** RtlRestoreContext does not try to restore from a _JUMP_BUFFER.
|
||||
*/
|
||||
rec->ExceptionCode = 0;
|
||||
}
|
||||
}
|
||||
/* Unwind internal frames. */
|
||||
err_unwind(L, cf, errcode);
|
||||
} else {
|
||||
void *cf2 = err_unwind(L, cf, 0);
|
||||
if (cf2) { /* We catch it, so start unwinding the upper frames. */
|
||||
#if !LJ_TARGET_X86
|
||||
EXCEPTION_RECORD rec2;
|
||||
#endif
|
||||
if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
|
||||
rec->ExceptionCode == LJ_GCC_EXCODE) {
|
||||
#if !LJ_TARGET_CYGWIN
|
||||
@ -295,14 +309,29 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
|
||||
(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
|
||||
/* lj_vm_rtlunwind does not return. */
|
||||
#else
|
||||
if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
|
||||
/* For unwind purposes, wrap the EXCEPTION_RECORD in something that
|
||||
** looks like a longjmp, so that MSVC will execute C++ destructors in
|
||||
** the frames we unwind over. ExceptionInformation[0] should really
|
||||
** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely
|
||||
** at this point.
|
||||
*/
|
||||
rec2.ExceptionCode = STATUS_LONGJUMP;
|
||||
rec2.ExceptionRecord = rec;
|
||||
rec2.ExceptionAddress = 0;
|
||||
rec2.NumberParameters = 1;
|
||||
rec2.ExceptionInformation[0] = (ULONG_PTR)ctx;
|
||||
rec = &rec2;
|
||||
}
|
||||
/* Unwind the stack and call all handlers for all lower C frames
|
||||
** (including ourselves) again with EH_UNWINDING set. Then set
|
||||
** stack pointer = cf, result = errcode and jump to the specified target.
|
||||
** stack pointer = f, result = errcode and jump to the specified target.
|
||||
*/
|
||||
RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
|
||||
RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
|
||||
lj_vm_unwind_ff_eh :
|
||||
lj_vm_unwind_c_eh),
|
||||
rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
|
||||
rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord,
|
||||
dispatch->HistoryTable);
|
||||
/* RtlUnwindEx should never return. */
|
||||
#endif
|
||||
}
|
||||
@ -789,7 +818,14 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
|
||||
TValue *base = tvref(G(L)->jit_base);
|
||||
if (base) L->base = base;
|
||||
}
|
||||
if (curr_funcisL(L)) L->top = curr_topL(L);
|
||||
if (curr_funcisL(L)) {
|
||||
L->top = curr_topL(L);
|
||||
if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) {
|
||||
/* The current Lua frame violates the stack. Replace it with a dummy. */
|
||||
L->top = L->base;
|
||||
setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
|
||||
}
|
||||
}
|
||||
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
|
||||
lj_err_throw(L, LUA_ERRMEM);
|
||||
}
|
||||
@ -850,9 +886,11 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
|
||||
{
|
||||
ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
|
||||
if (ef) {
|
||||
TValue *errfunc = restorestack(L, ef);
|
||||
TValue *top = L->top;
|
||||
TValue *errfunc, *top;
|
||||
lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */
|
||||
lj_trace_abort(G(L));
|
||||
errfunc = restorestack(L, ef);
|
||||
top = L->top;
|
||||
if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) {
|
||||
setstrV(L, top-1, lj_err_str(L, LJ_ERR_ERRERR));
|
||||
lj_err_throw(L, LUA_ERRERR);
|
||||
@ -867,7 +905,15 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
|
||||
lj_err_throw(L, LUA_ERRRUN);
|
||||
}
|
||||
|
||||
/* Stack overflow error. */
|
||||
void LJ_FASTCALL lj_err_stkov(lua_State *L)
|
||||
{
|
||||
lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL);
|
||||
lj_err_run(L);
|
||||
}
|
||||
|
||||
#if LJ_HASJIT
|
||||
/* Rethrow error after doing a trace exit. */
|
||||
LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
|
||||
{
|
||||
if (errcode == LUA_ERRRUN)
|
||||
|
@ -23,6 +23,7 @@ LJ_DATA const char *lj_err_allmsg;
|
||||
LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
|
||||
LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
|
||||
LJ_FUNC_NORET void lj_err_mem(lua_State *L);
|
||||
LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L);
|
||||
LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
|
||||
#if LJ_HASJIT
|
||||
LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
|
||||
|
@ -1130,7 +1130,7 @@ static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
|
||||
/* Emit BUFHDR for write to extended string buffer. */
|
||||
static TRef recff_sbufx_write(jit_State *J, TRef ud)
|
||||
{
|
||||
TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
|
||||
TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata)));
|
||||
return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
|
||||
}
|
||||
|
||||
@ -1164,20 +1164,19 @@ static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd
|
||||
SBufExt *sbx = bufV(&rd->argv[0]);
|
||||
int iscow = (int)sbufiscow(sbx);
|
||||
TRef trl = recff_sbufx_get_L(J, ud);
|
||||
TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
|
||||
TRef zero = lj_ir_kint(J, 0);
|
||||
emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
|
||||
TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
|
||||
TRef zeropgc = lj_ir_kintpgc(J, 0);
|
||||
emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc);
|
||||
if (iscow) {
|
||||
trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
|
||||
LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
|
||||
lj_ir_kint(J, SBUF_FLAG_COW));
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
|
||||
TRef zerop = lj_ir_kintp(J, 0);
|
||||
trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop);
|
||||
recff_sbufx_set_L(J, ud, trl);
|
||||
emitir(IRT(IR_FSTORE, IRT_PGC),
|
||||
emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
|
||||
emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop);
|
||||
} else {
|
||||
TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
|
||||
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
|
||||
@ -1205,6 +1204,12 @@ static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
|
||||
if (tref_isstr(tr)) {
|
||||
TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
|
||||
TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
|
||||
IRIns *irp = IR(tref_ref(trp));
|
||||
/* trp must point into the anchored obj, even after folding. */
|
||||
if (irp->o == IR_STRREF)
|
||||
tr = irp->op1;
|
||||
else if (!tref_isk(tr))
|
||||
trp = emitir(IRT(IR_ADD, IRT_PGC), tr, lj_ir_kintpgc(J, sizeof(GCstr)));
|
||||
lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
|
||||
#if LJ_HASFFI
|
||||
} else if (tref_iscdata(tr)) {
|
||||
@ -1445,6 +1450,15 @@ static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
|
||||
{
|
||||
TRef tra = lj_opt_narrow_toint(J, J->base[0]);
|
||||
TRef trh = lj_opt_narrow_toint(J, J->base[1]);
|
||||
if (tref_isk(tra) && tref_isk(trh)) {
|
||||
int32_t a = IR(tref_ref(tra))->i;
|
||||
if (a < 0x7fff) {
|
||||
uint32_t hbits = hsize2hbits(IR(tref_ref(trh))->i);
|
||||
a = a > 0 ? a+1 : 0;
|
||||
J->base[0] = emitir(IRTG(IR_TNEW, IRT_TAB), (uint32_t)a, hbits);
|
||||
return;
|
||||
}
|
||||
}
|
||||
J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
|
||||
UNUSED(rd);
|
||||
}
|
||||
|
@ -637,7 +637,7 @@ static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx)
|
||||
DUV(DW_AT_low_pc); DUV(DW_FORM_addr);
|
||||
DUV(DW_AT_high_pc); DUV(DW_FORM_addr);
|
||||
DUV(DW_AT_stmt_list); DUV(DW_FORM_data4);
|
||||
DB(0); DB(0);
|
||||
DB(0); DB(0); DB(0);
|
||||
|
||||
ctx->p = p;
|
||||
}
|
||||
|
@ -76,8 +76,8 @@
|
||||
\
|
||||
_(ABS, N , ref, ref) \
|
||||
_(LDEXP, N , ref, ref) \
|
||||
_(MIN, C , ref, ref) \
|
||||
_(MAX, C , ref, ref) \
|
||||
_(MIN, N , ref, ref) \
|
||||
_(MAX, N , ref, ref) \
|
||||
_(FPMATH, N , ref, lit) \
|
||||
\
|
||||
/* Overflow-checking arithmetic ops. */ \
|
||||
@ -383,6 +383,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
|
||||
#define irt_isu32(t) (irt_type(t) == IRT_U32)
|
||||
#define irt_isi64(t) (irt_type(t) == IRT_I64)
|
||||
#define irt_isu64(t) (irt_type(t) == IRT_U64)
|
||||
#define irt_isp32(t) (irt_type(t) == IRT_P32)
|
||||
|
||||
#define irt_isfp(t) (irt_isnum(t) || irt_isfloat(t))
|
||||
#define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT))
|
||||
|
@ -63,7 +63,7 @@ typedef struct CCallInfo {
|
||||
/* Helpers for conditional function definitions. */
|
||||
#define IRCALLCOND_ANY(x) x
|
||||
|
||||
#if LJ_TARGET_X86ORX64
|
||||
#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64
|
||||
#define IRCALLCOND_FPMATH(x) NULL
|
||||
#else
|
||||
#define IRCALLCOND_FPMATH(x) x
|
||||
|
@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
|
||||
#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k))
|
||||
#endif
|
||||
|
||||
#if LJ_GC64
|
||||
#define lj_ir_kintpgc lj_ir_kintp
|
||||
#else
|
||||
#define lj_ir_kintpgc lj_ir_kint
|
||||
#endif
|
||||
|
||||
static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
|
||||
{
|
||||
TValue tv;
|
||||
|
@ -415,6 +415,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
|
||||
ls->linenumber = 1;
|
||||
ls->lastline = 1;
|
||||
ls->endmark = 0;
|
||||
ls->fr2 = LJ_FR2; /* Generate native bytecode by default. */
|
||||
lex_next(ls); /* Read-ahead first char. */
|
||||
if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
|
||||
(uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
|
||||
|
@ -74,6 +74,7 @@ typedef struct LexState {
|
||||
MSize sizebcstack; /* Size of bytecode stack. */
|
||||
uint32_t level; /* Syntactical nesting level. */
|
||||
int endmark; /* Trust bytecode end marker, even if not at EOF. */
|
||||
int fr2; /* Generate bytecode for LJ_FR2 mode. */
|
||||
} LexState;
|
||||
|
||||
LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
|
||||
|
18
src/lj_lib.c
18
src/lj_lib.c
@ -62,6 +62,7 @@ static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
|
||||
ls.pe = (const char *)~(uintptr_t)0;
|
||||
ls.c = -1;
|
||||
ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
|
||||
ls.fr2 = LJ_FR2;
|
||||
ls.chunkname = name;
|
||||
pt = lj_bcread_proto(&ls);
|
||||
pt->firstline = ~(BCLine)0;
|
||||
@ -266,6 +267,23 @@ GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
|
||||
return funcV(o);
|
||||
}
|
||||
|
||||
GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua)
|
||||
{
|
||||
TValue *o = L->base + narg-1;
|
||||
if (L->top > o) {
|
||||
if (tvisproto(o)) {
|
||||
return protoV(o);
|
||||
} else if (tvisfunc(o)) {
|
||||
if (isluafunc(funcV(o)))
|
||||
return funcproto(funcV(o));
|
||||
else if (nolua)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
lj_err_argt(L, narg, LUA_TFUNCTION);
|
||||
return NULL; /* unreachable */
|
||||
}
|
||||
|
||||
GCtab *lj_lib_checktab(lua_State *L, int narg)
|
||||
{
|
||||
TValue *o = L->base + narg-1;
|
||||
|
@ -42,6 +42,7 @@ LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
|
||||
LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
|
||||
LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
|
||||
LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
|
||||
LJ_FUNC GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua);
|
||||
LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
|
||||
LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
|
||||
LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
|
||||
|
@ -34,14 +34,28 @@ static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud)
|
||||
UNUSED(dummy);
|
||||
cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
|
||||
bc = lj_lex_setup(L, ls);
|
||||
if (ls->mode && !strchr(ls->mode, bc ? 'b' : 't')) {
|
||||
if (ls->mode) {
|
||||
int xmode = 1;
|
||||
const char *mode = ls->mode;
|
||||
char c;
|
||||
while ((c = *mode++)) {
|
||||
if (c == (bc ? 'b' : 't')) xmode = 0;
|
||||
if (c == (LJ_FR2 ? 'W' : 'X')) ls->fr2 = !LJ_FR2;
|
||||
}
|
||||
if (xmode) {
|
||||
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE));
|
||||
lj_err_throw(L, LUA_ERRSYNTAX);
|
||||
}
|
||||
}
|
||||
pt = bc ? lj_bcread(ls) : lj_parse(ls);
|
||||
if (ls->fr2 == LJ_FR2) {
|
||||
fn = lj_func_newL_empty(L, pt, tabref(L->env));
|
||||
/* Don't combine above/below into one statement. */
|
||||
setfuncV(L, L->top++, fn);
|
||||
} else {
|
||||
/* Non-native generation returns a dumpable, but non-runnable prototype. */
|
||||
setprotoV(L, L->top++, pt);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -245,9 +259,10 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
|
||||
LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
|
||||
{
|
||||
cTValue *o = L->top-1;
|
||||
uint32_t flags = LJ_FR2*BCDUMP_F_FR2; /* Default mode for legacy C API. */
|
||||
lj_checkapi(L->top > L->base, "top slot empty");
|
||||
if (tvisfunc(o) && isluafunc(funcV(o)))
|
||||
return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0);
|
||||
return lj_bcwrite(L, funcproto(funcV(o)), writer, data, flags);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
@ -29,6 +29,11 @@
|
||||
#include <valgrind/valgrind.h>
|
||||
#endif
|
||||
|
||||
#if LJ_TARGET_WINDOWS
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if LJ_TARGET_IOS
|
||||
void sys_icache_invalidate(void *start, size_t len);
|
||||
#endif
|
||||
@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end)
|
||||
#endif
|
||||
#if LJ_TARGET_X86ORX64
|
||||
UNUSED(start); UNUSED(end);
|
||||
#elif LJ_TARGET_WINDOWS
|
||||
FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start);
|
||||
#elif LJ_TARGET_IOS
|
||||
sys_icache_invalidate(start, (char *)end-(char *)start);
|
||||
#elif LJ_TARGET_PPC
|
||||
@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end)
|
||||
|
||||
#if LJ_TARGET_WINDOWS
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
#define MCPROT_RW PAGE_READWRITE
|
||||
#define MCPROT_RX PAGE_EXECUTE_READ
|
||||
#define MCPROT_RWX PAGE_EXECUTE_READWRITE
|
||||
@ -363,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, size_t need)
|
||||
sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
|
||||
sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
|
||||
maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
|
||||
if ((size_t)need > sizemcode)
|
||||
if (need * sizeof(MCode) > sizemcode)
|
||||
lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
|
||||
if (J->szallmcarea + sizemcode > maxmcode)
|
||||
lj_trace_err(J, LJ_TRERR_MCODEAL);
|
||||
|
@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J)
|
||||
IRIns *ir = IR(ins);
|
||||
if (irt_ismarked(ir->t)) {
|
||||
irt_clearmark(ir->t);
|
||||
pchain[ir->o] = &ir->prev;
|
||||
} else if (!ir_sideeff(ir)) {
|
||||
*pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */
|
||||
lj_ir_nop(ir);
|
||||
continue;
|
||||
}
|
||||
pchain[ir->o] = &ir->prev;
|
||||
if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
|
||||
if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
|
||||
}
|
||||
|
@ -377,10 +377,10 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
|
||||
case IR_BOR: k1 |= k2; break;
|
||||
case IR_BXOR: k1 ^= k2; break;
|
||||
case IR_BSHL: k1 <<= (k2 & 63); break;
|
||||
case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
|
||||
case IR_BSAR: k1 >>= (k2 & 63); break;
|
||||
case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
|
||||
case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
|
||||
case IR_BSHR: k1 >>= (k2 & 63); break;
|
||||
case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
|
||||
case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
|
||||
case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
|
||||
default: lj_assertJ(0, "bad IR op %d", op); break;
|
||||
}
|
||||
#else
|
||||
@ -1972,7 +1972,10 @@ LJFOLD(NE any any)
|
||||
LJFOLDF(comm_equal)
|
||||
{
|
||||
/* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
|
||||
if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
|
||||
if (fins->op1 == fins->op2 &&
|
||||
(!irt_isnum(fins->t) ||
|
||||
(fleft->o == IR_CONV && /* Converted integers cannot be NaN. */
|
||||
(uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))))
|
||||
return CONDFOLD(fins->o == IR_EQ);
|
||||
return fold_comm_swap(J);
|
||||
}
|
||||
@ -2131,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload)
|
||||
LJFOLD(ALEN any any)
|
||||
LJFOLDX(lj_opt_fwd_alen)
|
||||
|
||||
/* Try to merge UREFO/UREFC into referenced instruction. */
|
||||
static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
|
||||
{
|
||||
if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
|
||||
/* Might be pointing to some other coroutine's stack.
|
||||
** And GC might shrink said stack, thereby repointing the upvalue.
|
||||
** GC might even collect said coroutine, thereby closing the upvalue.
|
||||
*/
|
||||
if (gcstep_barrier(J, ref))
|
||||
return EMITFOLD; /* So cannot merge. */
|
||||
/* Current fins wants a check, but ir doesn't have one. */
|
||||
if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
|
||||
irt_type(ir->t) == IRT_IGC)
|
||||
ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
|
||||
}
|
||||
return ref; /* Not a TRef, but the caller doesn't care. */
|
||||
}
|
||||
|
||||
/* Upvalue refs are really loads, but there are no corresponding stores.
|
||||
** So CSE is ok for them, except for UREFO across a GC step (see below).
|
||||
** So CSE is ok for them, except for guarded UREFO across a GC step.
|
||||
** If the referenced function is const, its upvalue addresses are const, too.
|
||||
** This can be used to improve CSE by looking for the same address,
|
||||
** even if the upvalues originate from a different function.
|
||||
@ -2150,9 +2171,7 @@ LJFOLDF(cse_uref)
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn2 = ir_kfunc(IR(ir->op1));
|
||||
if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
|
||||
if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
|
||||
break;
|
||||
return ref;
|
||||
return merge_uref(J, ref, ir);
|
||||
}
|
||||
}
|
||||
ref = ir->prev;
|
||||
@ -2161,6 +2180,24 @@ LJFOLDF(cse_uref)
|
||||
return EMITFOLD;
|
||||
}
|
||||
|
||||
/* Custom CSE for UREFO. */
|
||||
LJFOLD(UREFO any any)
|
||||
LJFOLDF(cse_urefo)
|
||||
{
|
||||
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
|
||||
IRRef ref = J->chain[IR_UREFO];
|
||||
IRRef lim = fins->op1;
|
||||
IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
|
||||
while (ref > lim) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (ir->op12 == op12)
|
||||
return merge_uref(J, ref, ir);
|
||||
ref = ir->prev;
|
||||
}
|
||||
}
|
||||
return EMITFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(HREFK any any)
|
||||
LJFOLDX(lj_opt_fwd_hrefk)
|
||||
|
||||
@ -2381,14 +2418,9 @@ LJFOLDF(fold_base)
|
||||
|
||||
/* Write barriers are amenable to CSE, but not across any incremental
|
||||
** GC steps.
|
||||
**
|
||||
** The same logic applies to open upvalue references, because a stack
|
||||
** may be resized during a GC step (not the current stack, but maybe that
|
||||
** of a coroutine).
|
||||
*/
|
||||
LJFOLD(TBAR any)
|
||||
LJFOLD(OBAR any any)
|
||||
LJFOLD(UREFO any any)
|
||||
LJFOLDF(barrier_tab)
|
||||
{
|
||||
TRef tr = lj_opt_cse(J);
|
||||
|
@ -217,25 +217,23 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
|
||||
}
|
||||
ref = store->prev;
|
||||
}
|
||||
if (ir->o == IR_TNEW && !irt_isnil(fins->t))
|
||||
return 0; /* Type instability in loop-carried dependency. */
|
||||
if (irt_ispri(fins->t)) {
|
||||
return TREF_PRI(irt_type(fins->t));
|
||||
} else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) ||
|
||||
irt_isstr(fins->t)) {
|
||||
/* Simplified here: let loop_unroll() figure out any type instability. */
|
||||
if (ir->o == IR_TNEW) {
|
||||
return TREF_NIL;
|
||||
} else {
|
||||
TValue keyv;
|
||||
cTValue *tv;
|
||||
IRIns *key = IR(xr->op2);
|
||||
if (key->o == IR_KSLOT) key = IR(key->op1);
|
||||
lj_ir_kvalue(J->L, &keyv, key);
|
||||
tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
|
||||
if (itype2irt(tv) != irt_type(fins->t))
|
||||
return 0; /* Type instability in loop-carried dependency. */
|
||||
if (irt_isnum(fins->t))
|
||||
if (tvispri(tv))
|
||||
return TREF_PRI(itype2irt(tv));
|
||||
else if (tvisnum(tv))
|
||||
return lj_ir_knum_u64(J, tv->u64);
|
||||
else if (LJ_DUALNUM && irt_isint(fins->t))
|
||||
else if (tvisint(tv))
|
||||
return lj_ir_kint(J, intV(tv));
|
||||
else
|
||||
else if (tvisgcv(tv))
|
||||
return lj_ir_kstr(J, strV(tv));
|
||||
}
|
||||
/* Othwerwise: don't intern as a constant. */
|
||||
@ -464,18 +462,23 @@ doemit:
|
||||
*/
|
||||
static AliasRet aa_uref(IRIns *refa, IRIns *refb)
|
||||
{
|
||||
if (refa->o != refb->o)
|
||||
return ALIAS_NO; /* Different UREFx type. */
|
||||
if (refa->op1 == refb->op1) { /* Same function. */
|
||||
if (refa->op2 == refb->op2)
|
||||
return ALIAS_MUST; /* Same function, same upvalue idx. */
|
||||
else
|
||||
return ALIAS_NO; /* Same function, different upvalue idx. */
|
||||
} else { /* Different functions, check disambiguation hash values. */
|
||||
if (((refa->op2 ^ refb->op2) & 0xff))
|
||||
if (((refa->op2 ^ refb->op2) & 0xff)) {
|
||||
return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
|
||||
else
|
||||
return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */
|
||||
} else if (refa->o != refb->o) {
|
||||
/* Different UREFx type, but need to confirm the UREFO really is open. */
|
||||
if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
|
||||
else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
|
||||
return ALIAS_NO;
|
||||
} else {
|
||||
/* No conclusion can be drawn for same hash value and same UREFx type. */
|
||||
return ALIAS_MAY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -957,6 +960,8 @@ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
|
||||
if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
|
||||
return 0; /* A nil store with same const key or var key MAY alias. */
|
||||
/* Different const keys CANNOT alias. */
|
||||
} else if (irt_isp32(IR(skref)->t) != irt_isp32(IR(xkref)->t)) {
|
||||
return 0; /* HREF and HREFK MAY alias. */
|
||||
} /* Different key types CANNOT alias. */
|
||||
} /* Other non-nil stores MAY alias. */
|
||||
ref = store->prev;
|
||||
|
@ -670,19 +670,20 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
|
||||
/* Emit method lookup expression. */
|
||||
static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
|
||||
{
|
||||
BCReg idx, func, obj = expr_toanyreg(fs, e);
|
||||
BCReg idx, func, fr2, obj = expr_toanyreg(fs, e);
|
||||
expr_free(fs, e);
|
||||
func = fs->freereg;
|
||||
bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
|
||||
fr2 = fs->ls->fr2;
|
||||
bcemit_AD(fs, BC_MOV, func+1+fr2, obj); /* Copy object to 1st argument. */
|
||||
lj_assertFS(expr_isstrk(key), "bad usage");
|
||||
idx = const_str(fs, key);
|
||||
if (idx <= BCMAX_C) {
|
||||
bcreg_reserve(fs, 2+LJ_FR2);
|
||||
bcreg_reserve(fs, 2+fr2);
|
||||
bcemit_ABC(fs, BC_TGETS, func, obj, idx);
|
||||
} else {
|
||||
bcreg_reserve(fs, 3+LJ_FR2);
|
||||
bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
|
||||
bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
|
||||
bcreg_reserve(fs, 3+fr2);
|
||||
bcemit_AD(fs, BC_KSTR, func+2+fr2, idx);
|
||||
bcemit_ABC(fs, BC_TGETV, func, obj, func+2+fr2);
|
||||
fs->freereg--;
|
||||
}
|
||||
e->u.s.info = func;
|
||||
@ -1336,9 +1337,12 @@ static void fs_fixup_bc(FuncState *fs, GCproto *pt, BCIns *bc, MSize n)
|
||||
{
|
||||
BCInsLine *base = fs->bcbase;
|
||||
MSize i;
|
||||
BCIns op;
|
||||
pt->sizebc = n;
|
||||
bc[0] = BCINS_AD((fs->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
|
||||
fs->framesize, 0);
|
||||
if (fs->ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
|
||||
else if ((fs->flags & PROTO_VARARG)) op = BC_FUNCV;
|
||||
else op = BC_FUNCF;
|
||||
bc[0] = BCINS_AD(op, fs->framesize, 0);
|
||||
for (i = 1; i < n; i++)
|
||||
bc[i] = base[i].ins;
|
||||
}
|
||||
@ -1981,11 +1985,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
|
||||
lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
|
||||
base = e->u.s.info; /* Base register for call. */
|
||||
if (args.k == VCALL) {
|
||||
ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
|
||||
ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - ls->fr2);
|
||||
} else {
|
||||
if (args.k != VVOID)
|
||||
expr_tonextreg(fs, &args);
|
||||
ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
|
||||
ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - ls->fr2);
|
||||
}
|
||||
expr_init(e, VCALL, bcemit_INS(fs, ins));
|
||||
e->u.s.aux = base;
|
||||
@ -2025,7 +2029,7 @@ static void expr_primary(LexState *ls, ExpDesc *v)
|
||||
parse_args(ls, v);
|
||||
} else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
|
||||
expr_tonextreg(fs, v);
|
||||
if (LJ_FR2) bcreg_reserve(fs, 1);
|
||||
if (ls->fr2) bcreg_reserve(fs, 1);
|
||||
parse_args(ls, v);
|
||||
} else {
|
||||
break;
|
||||
@ -2610,7 +2614,7 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
|
||||
line = ls->linenumber;
|
||||
assign_adjust(ls, 3, expr_list(ls, &e), &e);
|
||||
/* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
|
||||
bcreg_bump(fs, 3+LJ_FR2);
|
||||
bcreg_bump(fs, 3+ls->fr2);
|
||||
isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
|
||||
var_add(ls, 3); /* Hidden control variables. */
|
||||
lex_check(ls, TK_do);
|
||||
|
@ -976,6 +976,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
|
||||
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
|
||||
J->retdepth++;
|
||||
J->needsnap = 1;
|
||||
J->scev.idx = REF_NIL;
|
||||
lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
|
||||
/* Shift result slots up and clear the slots of the new frame below. */
|
||||
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
|
||||
@ -1599,10 +1600,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
|
||||
lj_assertJ(!hasmm, "inconsistent metamethod handling");
|
||||
if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
|
||||
TRef key = ix->key;
|
||||
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
|
||||
if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */
|
||||
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
|
||||
else if (tref_isnumber(key) && tref_isk(key) && tvismzero(&ix->keyv))
|
||||
} else if (tref_isnum(key)) {
|
||||
if (tref_isk(key)) {
|
||||
if (tvismzero(&ix->keyv))
|
||||
key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */
|
||||
} else {
|
||||
emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */
|
||||
}
|
||||
}
|
||||
xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
|
||||
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
|
||||
#ifdef LUAJIT_ENABLE_TABLE_BUMP
|
||||
@ -1766,16 +1773,16 @@ noconstify:
|
||||
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
|
||||
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
|
||||
if (!uvp->closed) {
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
|
||||
/* In current stack? */
|
||||
if (uvval(uvp) >= tvref(J->L->stack) &&
|
||||
uvval(uvp) < tvref(J->L->maxstack)) {
|
||||
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
|
||||
if (slot >= 0) { /* Aliases an SSA slot? */
|
||||
uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
|
||||
emitir(IRTG(IR_EQ, IRT_PGC),
|
||||
REF_BASE,
|
||||
emitir(IRT(IR_ADD, IRT_PGC), uref,
|
||||
lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
|
||||
lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8)));
|
||||
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
|
||||
if (val == 0) {
|
||||
return getslot(J, slot);
|
||||
@ -1786,12 +1793,21 @@ noconstify:
|
||||
}
|
||||
}
|
||||
}
|
||||
/* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
|
||||
** Always marked as a guard, since it might get promoted to IRT_PGC later.
|
||||
*/
|
||||
uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
|
||||
uref = tref_ref(uref);
|
||||
emitir(IRTG(IR_UGT, IRT_PGC),
|
||||
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
|
||||
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
|
||||
lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
|
||||
} else {
|
||||
/* If fn is constant, then so is the GCupval*, and the upvalue cannot
|
||||
** transition back to open, so no guard is required in this case.
|
||||
*/
|
||||
IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
|
||||
uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
|
||||
needbarrier = 1;
|
||||
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
|
||||
}
|
||||
if (val == 0) { /* Upvalue load */
|
||||
IRType t = itype2irt(uvval(uvp));
|
||||
@ -1966,7 +1982,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
emitir(IRTGI(IR_EQ), fr,
|
||||
lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
|
||||
vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2)));
|
||||
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
|
||||
lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2)));
|
||||
for (i = 0; i < nload; i++) {
|
||||
IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
|
||||
J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t);
|
||||
@ -1985,8 +2002,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
TRef tr = TREF_NIL;
|
||||
ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
|
||||
if (idx < 0) goto nyivarg;
|
||||
if (idx != 0 && !tref_isinteger(tridx))
|
||||
if (idx != 0 && !tref_isinteger(tridx)) {
|
||||
if (tref_isstr(tridx))
|
||||
tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0);
|
||||
tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
|
||||
}
|
||||
if (idx != 0 && tref_isk(tridx)) {
|
||||
emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
|
||||
fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
|
||||
@ -2014,7 +2034,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
|
||||
IRType t;
|
||||
TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
|
||||
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
|
||||
lj_ir_kint(J, frofs-(8<<LJ_FR2)));
|
||||
lj_ir_kintpgc(J, frofs-(8<<LJ_FR2)));
|
||||
t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
|
||||
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
|
||||
tr = lj_record_vload(J, aref, 0, t);
|
||||
|
@ -453,6 +453,7 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
|
||||
case IR_KNUM: case IR_KINT64:
|
||||
return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
|
||||
case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
|
||||
case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t));
|
||||
default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
|
||||
}
|
||||
}
|
||||
@ -557,13 +558,15 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
IRRef refp = snap_ref(sn);
|
||||
IRIns *ir = &T->ir[refp];
|
||||
if (regsp_reg(ir->r) == RID_SUNK) {
|
||||
uint8_t m;
|
||||
if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
|
||||
pass23 = 1;
|
||||
lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
|
||||
ir->o == IR_CNEW || ir->o == IR_CNEWI,
|
||||
"sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
|
||||
if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
|
||||
if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
|
||||
m = lj_ir_mode[ir->o];
|
||||
if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1);
|
||||
if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2);
|
||||
if (LJ_HASFFI && ir->o == IR_CNEWI) {
|
||||
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
|
||||
snap_pref(J, T, map, nent, seen, (ir+1)->op2);
|
||||
@ -591,14 +594,16 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
IRIns *ir = &T->ir[refp];
|
||||
if (regsp_reg(ir->r) == RID_SUNK) {
|
||||
TRef op1, op2;
|
||||
uint8_t m;
|
||||
if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
|
||||
J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
|
||||
continue;
|
||||
}
|
||||
op1 = ir->op1;
|
||||
if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
|
||||
m = lj_ir_mode[ir->o];
|
||||
if (irm_op1(m) == IRMref) op1 = snap_pref(J, T, map, nent, seen, op1);
|
||||
op2 = ir->op2;
|
||||
if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
|
||||
if (irm_op2(m) == IRMref) op2 = snap_pref(J, T, map, nent, seen, op2);
|
||||
if (LJ_HASFFI && ir->o == IR_CNEWI) {
|
||||
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
|
||||
lj_needsplit(J); /* Emit joining HIOP. */
|
||||
@ -624,9 +629,25 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
if (irr->o == IR_HREFK || irr->o == IR_AREF) {
|
||||
IRIns *irf = &T->ir[irr->op1];
|
||||
tmp = emitir(irf->ot, tmp, irf->op2);
|
||||
} else if (irr->o == IR_NEWREF) {
|
||||
IRRef allocref = tref_ref(tr);
|
||||
IRRef keyref = tref_ref(key);
|
||||
IRRef newref_ref = J->chain[IR_NEWREF];
|
||||
IRIns *newref = &J->cur.ir[newref_ref];
|
||||
lj_assertJ(irref_isk(keyref),
|
||||
"sunk store for parent IR %04d with bad key %04d",
|
||||
refp - REF_BIAS, keyref - REF_BIAS);
|
||||
if (newref_ref > allocref && newref->op2 == keyref) {
|
||||
lj_assertJ(newref->op1 == allocref,
|
||||
"sunk store for parent IR %04d with bad tab %04d",
|
||||
refp - REF_BIAS, allocref - REF_BIAS);
|
||||
tmp = newref_ref;
|
||||
goto skip_newref;
|
||||
}
|
||||
}
|
||||
}
|
||||
tmp = emitir(irr->ot, tmp, key);
|
||||
skip_newref:
|
||||
val = snap_pref(J, T, map, nent, seen, irs->op2);
|
||||
if (val == 0) {
|
||||
IRIns *irc = &T->ir[irs->op2];
|
||||
@ -882,9 +903,13 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
if (irk->o == IR_FREF) {
|
||||
switch (irk->op2) {
|
||||
case IRFL_TAB_META:
|
||||
if (T->ir[irs->op2].o == IR_KNULL) {
|
||||
setgcrefnull(t->metatable);
|
||||
} else {
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
|
||||
/* NOBARRIER: The table is new (marked white). */
|
||||
setgcref(t->metatable, obj2gco(tabV(&tmp)));
|
||||
}
|
||||
break;
|
||||
case IRFL_TAB_NOMM:
|
||||
/* Negative metamethod cache invalidated by lj_tab_set() below. */
|
||||
|
@ -102,20 +102,49 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
|
||||
/* Try to grow stack. */
|
||||
void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
|
||||
{
|
||||
MSize n;
|
||||
if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */
|
||||
lj_err_throw(L, LUA_ERRERR);
|
||||
n = L->stacksize + need;
|
||||
if (n > LJ_STACK_MAX) {
|
||||
n += 2*LUA_MINSTACK;
|
||||
} else if (n < 2*L->stacksize) {
|
||||
MSize n = L->stacksize + need;
|
||||
if (LJ_LIKELY(n < LJ_STACK_MAX)) { /* The stack can grow as requested. */
|
||||
if (n < 2 * L->stacksize) { /* Try to double the size. */
|
||||
n = 2 * L->stacksize;
|
||||
if (n >= LJ_STACK_MAX)
|
||||
if (n > LJ_STACK_MAX)
|
||||
n = LJ_STACK_MAX;
|
||||
}
|
||||
resizestack(L, n);
|
||||
if (L->stacksize >= LJ_STACK_MAXEX)
|
||||
lj_err_msg(L, LJ_ERR_STKOV);
|
||||
} else { /* Request would overflow. Raise a stack overflow error. */
|
||||
if (LJ_HASJIT) {
|
||||
TValue *base = tvref(G(L)->jit_base);
|
||||
if (base) L->base = base;
|
||||
}
|
||||
if (curr_funcisL(L)) {
|
||||
L->top = curr_topL(L);
|
||||
if (L->top > tvref(L->maxstack)) {
|
||||
/* The current Lua frame violates the stack, so replace it with a
|
||||
** dummy. This can happen when BC_IFUNCF is trying to grow the stack.
|
||||
*/
|
||||
L->top = L->base;
|
||||
setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
|
||||
}
|
||||
}
|
||||
if (L->stacksize <= LJ_STACK_MAXEX) {
|
||||
/* An error handler might want to inspect the stack overflow error, but
|
||||
** will need some stack space to run in. We give it a stack size beyond
|
||||
** the normal limit in order to do so, then rely on lj_state_relimitstack
|
||||
** calls during unwinding to bring us back to a convential stack size.
|
||||
** The + 1 is space for the error message, and 2 * LUA_MINSTACK is for
|
||||
** the lj_state_checkstack() call in lj_err_run().
|
||||
*/
|
||||
resizestack(L, LJ_STACK_MAX + 1 + 2 * LUA_MINSTACK);
|
||||
lj_err_stkov(L); /* May invoke an error handler. */
|
||||
} else {
|
||||
/* If we're here, then the stack overflow error handler is requesting
|
||||
** to grow the stack even further. We have no choice but to abort the
|
||||
** error handler.
|
||||
*/
|
||||
GCstr *em = lj_err_str(L, LJ_ERR_STKOV); /* Might OOM. */
|
||||
setstrV(L, L->top++, em); /* There is always space to push an error. */
|
||||
lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LJ_FASTCALL lj_state_growstack1(lua_State *L)
|
||||
@ -123,6 +152,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L)
|
||||
lj_state_growstack(L, 1);
|
||||
}
|
||||
|
||||
static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud)
|
||||
{
|
||||
UNUSED(dummy);
|
||||
lj_state_growstack(co, *(MSize *)ud);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need)
|
||||
{
|
||||
return lj_vm_cpcall(L, NULL, &need, cpgrowstack);
|
||||
}
|
||||
|
||||
/* Allocate basic stack for new state. */
|
||||
static void stack_init(lua_State *L1, lua_State *L)
|
||||
{
|
||||
@ -327,8 +368,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
|
||||
lj_assertG(L != mainthread(g), "free of main thread");
|
||||
if (obj2gco(L) == gcref(g->cur_L))
|
||||
setgcrefnull(g->cur_L);
|
||||
if (gcref(L->openupval) != NULL) {
|
||||
lj_func_closeuv(L, tvref(L->stack));
|
||||
lj_trace_abort(g); /* For aa_uref soundness. */
|
||||
lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
|
||||
}
|
||||
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
|
||||
lj_mem_freet(g, L);
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L);
|
||||
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
|
||||
LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
|
||||
LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
|
||||
LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need);
|
||||
|
||||
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
|
||||
{
|
||||
|
@ -454,7 +454,8 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
|
||||
prec--;
|
||||
if (!i) {
|
||||
if (ndlo == ndhi) { prec = 0; break; }
|
||||
lj_strfmt_wuint9(tail, nd[++ndlo]);
|
||||
ndlo = (ndlo + 1) & 0x3f;
|
||||
lj_strfmt_wuint9(tail, nd[ndlo]);
|
||||
i = 9;
|
||||
}
|
||||
}
|
||||
|
@ -58,9 +58,13 @@ typedef uint32_t RegSP;
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
|
||||
typedef uint64_t RegSet;
|
||||
#define RSET_BITS 6
|
||||
#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
|
||||
#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs))
|
||||
#else
|
||||
typedef uint32_t RegSet;
|
||||
#define RSET_BITS 5
|
||||
#define rset_picktop_(rs) ((Reg)lj_fls(rs))
|
||||
#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
|
||||
#endif
|
||||
|
||||
#define RID2RSET(r) (((RegSet)1) << (r))
|
||||
@ -71,13 +75,6 @@ typedef uint32_t RegSet;
|
||||
#define rset_set(rs, r) (rs |= RID2RSET(r))
|
||||
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
|
||||
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
|
||||
#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63))
|
||||
#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs))
|
||||
#else
|
||||
#define rset_picktop_(rs) ((Reg)lj_fls(rs))
|
||||
#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
|
||||
#endif
|
||||
|
||||
/* -- Register allocation cost -------------------------------------------- */
|
||||
|
||||
|
@ -234,6 +234,8 @@ typedef enum A64Ins {
|
||||
A64I_MOVZx = 0xd2800000,
|
||||
A64I_MOVNw = 0x12800000,
|
||||
A64I_MOVNx = 0x92800000,
|
||||
A64I_ADR = 0x10000000,
|
||||
A64I_ADRP = 0x90000000,
|
||||
|
||||
A64I_LDRB = 0x39400000,
|
||||
A64I_LDRH = 0x79400000,
|
||||
|
@ -613,21 +613,27 @@ static int trace_abort(jit_State *J)
|
||||
J->cur.link = 0;
|
||||
J->cur.linktype = LJ_TRLINK_NONE;
|
||||
lj_vmevent_send(L, TRACE,
|
||||
TValue *frame;
|
||||
cTValue *bot = tvref(L->stack)+LJ_FR2;
|
||||
cTValue *frame;
|
||||
const BCIns *pc;
|
||||
GCfunc *fn;
|
||||
BCPos pos = 0;
|
||||
setstrV(L, L->top++, lj_str_newlit(L, "abort"));
|
||||
setintV(L->top++, traceno);
|
||||
/* Find original Lua function call to generate a better error message. */
|
||||
frame = J->L->base-1;
|
||||
pc = J->pc;
|
||||
while (!isluafunc(frame_func(frame))) {
|
||||
pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
|
||||
frame = frame_prev(frame);
|
||||
for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
|
||||
if (isluafunc(frame_func(frame))) {
|
||||
pos = proto_bcpos(funcproto(frame_func(frame)), pc);
|
||||
break;
|
||||
} else if (frame_prev(frame) <= bot) {
|
||||
break;
|
||||
} else if (frame_iscont(frame)) {
|
||||
pc = frame_contpc(frame) - 1;
|
||||
} else {
|
||||
pc = frame_pc(frame) - 1;
|
||||
}
|
||||
fn = frame_func(frame);
|
||||
setfuncV(L, L->top++, fn);
|
||||
setintV(L->top++, proto_bcpos(funcproto(fn), pc));
|
||||
}
|
||||
setfuncV(L, L->top++, frame_func(frame));
|
||||
setintV(L->top++, pos);
|
||||
copyTV(L, L->top++, restorestack(L, errobj));
|
||||
copyTV(L, L->top++, &J->errinfo);
|
||||
);
|
||||
@ -922,7 +928,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
|
||||
} else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
|
||||
if (!(G(L)->hookmask & HOOK_GC))
|
||||
lj_gc_step(L); /* Exited because of GC: drive GC forward. */
|
||||
} else {
|
||||
} else if ((J->flags & JIT_F_ON)) {
|
||||
trace_hotside(J, pc);
|
||||
}
|
||||
/* Return MULTRES or 0 or -17. */
|
||||
|
@ -76,4 +76,5 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
|
||||
/* Enforce (dynamic) linker error for version mismatches. Call from main. */
|
||||
LUA_API void LUAJIT_VERSION_SYM(void);
|
||||
|
||||
#error "DO NOT USE luajit_rolling.h -- only include build-generated luajit.h"
|
||||
#endif
|
||||
|
@ -16,6 +16,7 @@
|
||||
@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_APICHECK
|
||||
@set DEBUGCFLAGS= /DLUA_USE_APICHECK /DLUA_USE_ASSERT /DLUAJIT_USE_SYSMALLOC /fsanitize=address
|
||||
@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline /DLUAJIT_NUMMODE=2
|
||||
@set LJDYNBUILD=/MD /DLUA_BUILD_AS_DLL
|
||||
@set LJLINK=link /nologo
|
||||
@set LJMT=mt /nologo
|
||||
@set LJLIB=lib /nologo /nodefaultlib
|
||||
@ -27,39 +28,52 @@
|
||||
@set BUILDTYPE=release
|
||||
@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
|
||||
|
||||
@setlocal
|
||||
@call :SETHOSTVARS
|
||||
%LJCOMPILE% host\minilua.c
|
||||
@if errorlevel 1 goto :BAD
|
||||
%LJLINK% /out:minilua.exe minilua.obj
|
||||
@if errorlevel 1 goto :BAD
|
||||
if exist minilua.exe.manifest^
|
||||
%LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
|
||||
@endlocal
|
||||
|
||||
@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64
|
||||
@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
|
||||
@set LJARCH=x64
|
||||
@minilua
|
||||
@if errorlevel 8 goto :X64
|
||||
@if errorlevel 8 goto :NO32
|
||||
@set DASC=vm_x86.dasc
|
||||
@set DASMFLAGS=-D WIN -D JIT -D FFI
|
||||
@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU
|
||||
@set LJARCH=x86
|
||||
@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
|
||||
@goto :DA
|
||||
:NO32
|
||||
@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
|
||||
@set DASC=vm_arm64.dasc
|
||||
@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
|
||||
@set LJARCH=arm64
|
||||
@goto :DA
|
||||
:X64
|
||||
@if "%1" neq "nogc64" goto :GC64
|
||||
@if "%1" neq "nogc64" goto :DA
|
||||
@shift
|
||||
@set DASC=vm_x86.dasc
|
||||
@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
|
||||
:GC64
|
||||
:DA
|
||||
minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
|
||||
@if errorlevel 1 goto :BAD
|
||||
|
||||
if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
|
||||
minilua host\genversion.lua
|
||||
|
||||
%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
|
||||
@setlocal
|
||||
@call :SETHOSTVARS
|
||||
%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c
|
||||
@if errorlevel 1 goto :BAD
|
||||
%LJLINK% /out:buildvm.exe buildvm*.obj
|
||||
@if errorlevel 1 goto :BAD
|
||||
if exist buildvm.exe.manifest^
|
||||
%LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
|
||||
@endlocal
|
||||
|
||||
buildvm -m peobj -o lj_vm.obj
|
||||
@if errorlevel 1 goto :BAD
|
||||
@ -80,12 +94,13 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
|
||||
@shift
|
||||
@set BUILDTYPE=debug
|
||||
@set LJCOMPILE=%LJCOMPILE% /Od /Zi %DEBUGCFLAGS%
|
||||
@set LJDYNBUILD=/MDd /DLUA_BUILD_AS_DLL
|
||||
@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
|
||||
:NODEBUG
|
||||
@set LJLINK=%LJLINK% /%BUILDTYPE%
|
||||
@if "%1"=="amalg" goto :AMALGDLL
|
||||
@if "%1"=="static" goto :STATIC
|
||||
%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
|
||||
%LJCOMPILE% %LJDYNBUILD% lj_*.c lib_*.c
|
||||
@if errorlevel 1 goto :BAD
|
||||
%LJLINK% /DLL /out:%LJDLLNAME% lj_*.obj lib_*.obj
|
||||
@if errorlevel 1 goto :BAD
|
||||
@ -97,7 +112,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
|
||||
@if errorlevel 1 goto :BAD
|
||||
@goto :MTDLL
|
||||
:AMALGDLL
|
||||
%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL ljamalg.c
|
||||
%LJCOMPILE% %LJDYNBUILD% ljamalg.c
|
||||
@if errorlevel 1 goto :BAD
|
||||
%LJLINK% /DLL /out:%LJDLLNAME% ljamalg.obj lj_vm.obj
|
||||
@if errorlevel 1 goto :BAD
|
||||
@ -118,6 +133,12 @@ if exist luajit.exe.manifest^
|
||||
@echo.
|
||||
@echo === Successfully built LuaJIT for Windows/%LJARCH%[%BUILDTYPE%] ===
|
||||
|
||||
@goto :END
|
||||
:SETHOSTVARS
|
||||
@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" (
|
||||
call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo
|
||||
echo on
|
||||
)
|
||||
@goto :END
|
||||
:BAD
|
||||
@echo.
|
||||
|
@ -1195,8 +1195,11 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc pcall
|
||||
| ldr RB, L->maxstack
|
||||
| add INS, BASE, NARGS8:RC
|
||||
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
|
||||
| cmp NARGS8:RC, #8
|
||||
| cmphs RB, INS
|
||||
| blo ->fff_fallback
|
||||
| tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
|
||||
| mov RB, BASE
|
||||
@ -1207,7 +1210,11 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->vm_call_dispatch
|
||||
|
|
||||
|.ffunc_2 xpcall
|
||||
| ldr RB, L->maxstack
|
||||
| add INS, BASE, NARGS8:RC
|
||||
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
|
||||
| cmp RB, INS
|
||||
| blo ->fff_fallback
|
||||
| checkfunc CARG4, ->fff_fallback // Traceback must be a function.
|
||||
| mov RB, BASE
|
||||
| strd CARG12, [BASE, #8] // Swap function and traceback.
|
||||
|
@ -113,13 +113,37 @@
|
||||
|
|
||||
|.define TMPDofs, #24
|
||||
|
|
||||
|.if WIN
|
||||
|// Windows unwind data is suited to r1 stored first.
|
||||
|.macro stp_unwind, r1, r2, where
|
||||
| stp r1, r2, where
|
||||
|.endmacro
|
||||
|.macro ldp_unwind, r1, r2, where
|
||||
| ldp r1, r2, where
|
||||
|.endmacro
|
||||
|.macro ldp_unwind, r1, r2, where, post_index
|
||||
| ldp r1, r2, where, post_index
|
||||
|.endmacro
|
||||
|.else
|
||||
|// Otherwise store r2 first for compact unwind info (OSX).
|
||||
|.macro stp_unwind, r1, r2, where
|
||||
| stp r2, r1, where
|
||||
|.endmacro
|
||||
|.macro ldp_unwind, r1, r2, where
|
||||
| ldp r2, r1, where
|
||||
|.endmacro
|
||||
|.macro ldp_unwind, r1, r2, where, post_index
|
||||
| ldp r2, r1, where, post_index
|
||||
|.endmacro
|
||||
|.endif
|
||||
|
|
||||
|.macro save_, gpr1, gpr2, fpr1, fpr2
|
||||
| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
|
||||
| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
|
||||
| stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
|
||||
| stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
|
||||
|.endmacro
|
||||
|.macro rest_, gpr1, gpr2, fpr1, fpr2
|
||||
| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
|
||||
| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
|
||||
| ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
|
||||
| ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
|
||||
|.endmacro
|
||||
|
|
||||
|.macro saveregs
|
||||
@ -127,14 +151,14 @@
|
||||
| sub sp, sp, # CFRAME_SPACE
|
||||
| stp fp, lr, [sp, # SAVE_FP_LR_]
|
||||
| add fp, sp, # SAVE_FP_LR_
|
||||
| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
|
||||
| stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
|
||||
| save_ 21, 22, 8, 9
|
||||
| save_ 23, 24, 10, 11
|
||||
| save_ 25, 26, 12, 13
|
||||
| save_ 27, 28, 14, 15
|
||||
|.endmacro
|
||||
|.macro restoreregs
|
||||
| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
|
||||
| ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
|
||||
| rest_ 21, 22, 8, 9
|
||||
| rest_ 23, 24, 10, 11
|
||||
| rest_ 25, 26, 12, 13
|
||||
@ -267,8 +291,17 @@
|
||||
| blo target
|
||||
|.endmacro
|
||||
|
|
||||
|.macro init_constants
|
||||
| movn TISNIL, #0
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
|.endmacro
|
||||
|
|
||||
|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
|
||||
|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
|
||||
|.macro mov_nil, reg; mov reg, TISNIL; .endmacro
|
||||
|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro
|
||||
|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro
|
||||
|
|
||||
#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
|
||||
|
|
||||
@ -406,26 +439,26 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|
|
||||
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
|
||||
| // (void *cframe, int errcode)
|
||||
| add fp, CARG1, # SAVE_FP_LR_
|
||||
| mov sp, CARG1
|
||||
| mov CRET1, CARG2
|
||||
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|
||||
| ldr L, SAVE_L
|
||||
| mv_vmstate TMP0w, C
|
||||
| ldr GL, L->glref
|
||||
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|
||||
| mv_vmstate TMP0w, C
|
||||
| st_vmstate TMP0w
|
||||
| b ->vm_leave_unw
|
||||
|
|
||||
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
|
||||
| // (void *cframe)
|
||||
| and sp, CARG1, #CFRAME_RAWMASK
|
||||
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
||||
| add fp, CARG1, # SAVE_FP_LR_
|
||||
| mov sp, CARG1
|
||||
| ldr L, SAVE_L
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| movn TISNIL, #0
|
||||
| init_constants
|
||||
| ldr GL, L->glref // Setup pointer to global state.
|
||||
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
||||
| mov RC, #16 // 2 results: false + error message.
|
||||
| ldr BASE, L->base
|
||||
| ldr GL, L->glref // Setup pointer to global state.
|
||||
| mov_false TMP0
|
||||
| sub RA, BASE, #8 // Results start at BASE-8.
|
||||
| ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
|
||||
@ -486,11 +519,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| str L, GL->cur_L
|
||||
| mov RA, BASE
|
||||
| ldp BASE, CARG1, L->base
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| init_constants
|
||||
| ldr PC, [BASE, FRAME_PC]
|
||||
| strb wzr, L->status
|
||||
| movn TISNIL, #0
|
||||
| sub RC, CARG1, BASE
|
||||
| ands CARG1, PC, #FRAME_TYPE
|
||||
| add RC, RC, #8
|
||||
@ -526,10 +557,8 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
|
||||
| str L, GL->cur_L
|
||||
| ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| add PC, PC, BASE
|
||||
| movn TISNIL, #0
|
||||
| init_constants
|
||||
| sub PC, PC, RB // PC = frame delta + frame type
|
||||
| sub NARGS8:RC, CARG1, BASE
|
||||
| st_vmstate ST_INTERP
|
||||
@ -638,7 +667,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b >1
|
||||
|
|
||||
|->vmeta_tgetb: // RB = table, RC = index
|
||||
| add RC, RC, TISNUM
|
||||
| add_TISNUM RC, RC
|
||||
| add CARG2, BASE, RB, lsl #3
|
||||
| add CARG3, sp, TMPDofs
|
||||
| str RC, TMPD
|
||||
@ -673,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| sxtw CARG2, TMP1w
|
||||
| bl extern lj_tab_getinth // (GCtab *t, int32_t key)
|
||||
| // Returns cTValue * or NULL.
|
||||
| mov TMP0, TISNIL
|
||||
| mov_nil TMP0
|
||||
| cbz CRET1, ->BC_TGETR_Z
|
||||
| ldr TMP0, [CRET1]
|
||||
| b ->BC_TGETR_Z
|
||||
@ -696,7 +725,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b >1
|
||||
|
|
||||
|->vmeta_tsetb: // RB = table, RC = index
|
||||
| add RC, RC, TISNUM
|
||||
| add_TISNUM RC, RC
|
||||
| add CARG2, BASE, RB, lsl #3
|
||||
| add CARG3, sp, TMPDofs
|
||||
| str RC, TMPD
|
||||
@ -1010,7 +1039,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|1: // Field metatable must be at same offset for GCtab and GCudata!
|
||||
| ldr TAB:RB, TAB:CARG1->metatable
|
||||
|2:
|
||||
| mov CARG1, TISNIL
|
||||
| mov_nil CARG1
|
||||
| ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
|
||||
| cbz TAB:RB, ->fff_restv
|
||||
| ldr TMP1w, TAB:RB->hmask
|
||||
@ -1032,7 +1061,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
|
||||
| b ->fff_restv
|
||||
|5:
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| bne ->fff_restv
|
||||
| b <4
|
||||
|
|
||||
@ -1132,8 +1161,8 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| cbnz TAB:CARG2, ->fff_fallback
|
||||
#endif
|
||||
| mov RC, #(3+1)*8
|
||||
| stp CARG1, TISNIL, [BASE, #-8]
|
||||
| str CFUNC:CARG4, [BASE, #-16]
|
||||
| stp CFUNC:CARG4, CARG1, [BASE, #-16]
|
||||
| str TISNIL, [BASE]
|
||||
| b ->fff_res
|
||||
|
|
||||
|.ffunc_2 ipairs_aux
|
||||
@ -1145,14 +1174,14 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| add CARG2w, CARG2w, #1
|
||||
| cmp CARG2w, TMP1w
|
||||
| ldr PC, [BASE, FRAME_PC]
|
||||
| add TMP2, CARG2, TISNUM
|
||||
| add_TISNUM TMP2, CARG2
|
||||
| mov RC, #(0+1)*8
|
||||
| str TMP2, [BASE, #-16]
|
||||
| bhs >2 // Not in array part?
|
||||
| ldr TMP0, [CARG3, CARG2, lsl #3]
|
||||
|1:
|
||||
| mov TMP1, #(2+1)*8
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| str TMP0, [BASE, #-8]
|
||||
| csel RC, RC, TMP1, eq
|
||||
| b ->fff_res
|
||||
@ -1175,13 +1204,17 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| cbnz TAB:CARG2, ->fff_fallback
|
||||
#endif
|
||||
| mov RC, #(3+1)*8
|
||||
| stp CARG1, TISNUM, [BASE, #-8]
|
||||
| str CFUNC:CARG4, [BASE, #-16]
|
||||
| stp CFUNC:CARG4, CARG1, [BASE, #-16]
|
||||
| str TISNUM, [BASE]
|
||||
| b ->fff_res
|
||||
|
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc pcall
|
||||
| ldr TMP1, L->maxstack
|
||||
| add TMP2, BASE, NARGS8:RC
|
||||
| cmp TMP1, TMP2
|
||||
| blo ->fff_fallback
|
||||
| cmp NARGS8:RC, #8
|
||||
| ldrb TMP0w, GL->hookmask
|
||||
| blo ->fff_fallback
|
||||
@ -1201,6 +1234,10 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->vm_call_dispatch
|
||||
|
|
||||
|.ffunc xpcall
|
||||
| ldr TMP1, L->maxstack
|
||||
| add TMP2, BASE, NARGS8:RC
|
||||
| cmp TMP1, TMP2
|
||||
| blo ->fff_fallback
|
||||
| ldp CARG1, CARG2, [BASE]
|
||||
| ldrb TMP0w, GL->hookmask
|
||||
| subs NARGS8:TMP1, NARGS8:RC, #16
|
||||
@ -1366,7 +1403,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| eor CARG2w, CARG1w, CARG1w, asr #31
|
||||
| movz CARG3, #0x41e0, lsl #48 // 2^31.
|
||||
| subs CARG1w, CARG2w, CARG1w, asr #31
|
||||
| add CARG1, CARG1, TISNUM
|
||||
| add_TISNUM CARG1, CARG1
|
||||
| csel CARG1, CARG1, CARG3, pl
|
||||
| // Fallthrough.
|
||||
|
|
||||
@ -1457,7 +1494,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| ldr PC, [BASE, FRAME_PC]
|
||||
| str d0, [BASE, #-16]
|
||||
| mov RC, #(2+1)*8
|
||||
| add CARG2, CARG2, TISNUM
|
||||
| add_TISNUM CARG2, CARG2
|
||||
| str CARG2, [BASE, #-8]
|
||||
| b ->fff_res
|
||||
|
|
||||
@ -1523,7 +1560,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| bne ->fff_fallback
|
||||
| ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
|
||||
| ldr CARG3w, STR:CARG1->len
|
||||
| add TMP0, TMP0, TISNUM
|
||||
| add_TISNUM TMP0, TMP0
|
||||
| str TMP0, [BASE, #-16]
|
||||
| mov RC, #(0+1)*8
|
||||
| cbz CARG3, ->fff_res
|
||||
@ -1669,17 +1706,17 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.ffunc_bit tobit
|
||||
| mov TMP0w, CARG1w
|
||||
|9: // Label reused by .ffunc_bit_op users.
|
||||
| add CARG1, TMP0, TISNUM
|
||||
| add_TISNUM CARG1, TMP0
|
||||
| b ->fff_restv
|
||||
|
|
||||
|.ffunc_bit bswap
|
||||
| rev TMP0w, CARG1w
|
||||
| add CARG1, TMP0, TISNUM
|
||||
| add_TISNUM CARG1, TMP0
|
||||
| b ->fff_restv
|
||||
|
|
||||
|.ffunc_bit bnot
|
||||
| mvn TMP0w, CARG1w
|
||||
| add CARG1, TMP0, TISNUM
|
||||
| add_TISNUM CARG1, TMP0
|
||||
| b ->fff_restv
|
||||
|
|
||||
|.macro .ffunc_bit_sh, name, ins, shmod
|
||||
@ -1700,7 +1737,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| checkint CARG1, ->vm_tobit_fb
|
||||
|2:
|
||||
| ins TMP0w, CARG1w, TMP1w
|
||||
| add CARG1, TMP0, TISNUM
|
||||
| add_TISNUM CARG1, TMP0
|
||||
| b ->fff_restv
|
||||
|.endmacro
|
||||
|
|
||||
@ -1889,8 +1926,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| and CARG3, CARG3, #LJ_GCVMASK
|
||||
| beq >2
|
||||
|1: // Move results down.
|
||||
| ldr CARG1, [RA]
|
||||
| add RA, RA, #8
|
||||
| ldr CARG1, [RA], #8
|
||||
| subs RB, RB, #8
|
||||
| str CARG1, [BASE, RC, lsl #3]
|
||||
| add RC, RC, #1
|
||||
@ -2005,13 +2041,11 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.if JIT
|
||||
| ldr L, SAVE_L
|
||||
|1:
|
||||
| init_constants
|
||||
| cmn CARG1w, #LUA_ERRERR
|
||||
| bhs >9 // Check for error from exit.
|
||||
| lsl RC, CARG1, #3
|
||||
| ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| movn TISNIL, #0
|
||||
| lsl RC, CARG1, #3
|
||||
| and LFUNC:CARG2, CARG2, #LJ_GCVMASK
|
||||
| str RCw, SAVE_MULTRES
|
||||
| str BASE, L->base
|
||||
@ -2162,7 +2196,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-----------------------------------------------------------------------
|
||||
|
|
||||
|// Handler for callback functions.
|
||||
|// Saveregs already performed. Callback slot number in [sp], g in r12.
|
||||
|// Saveregs already performed. Callback slot number in w9, g in x10.
|
||||
|->vm_ffi_callback:
|
||||
|.if FFI
|
||||
|.type CTSTATE, CTState, PC
|
||||
@ -2186,9 +2220,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| bl extern lj_ccallback_enter // (CTState *cts, void *cf)
|
||||
| // Returns lua_State *.
|
||||
| ldp BASE, RC, L:CRET1->base
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| movn TISNIL, #0
|
||||
| init_constants
|
||||
| mov L, CRET1
|
||||
| ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
|
||||
| sub RC, RC, BASE
|
||||
@ -2215,7 +2247,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.if FFI
|
||||
| .type CCSTATE, CCallState, x19
|
||||
| sp_auth
|
||||
| stp x20, CCSTATE, [sp, #-32]!
|
||||
| stp_unwind CCSTATE, x20, [sp, #-32]!
|
||||
| stp fp, lr, [sp, #16]
|
||||
| add fp, sp, #16
|
||||
| mov CCSTATE, x0
|
||||
@ -2247,7 +2279,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| stp d0, d1, CCSTATE->fpr[0]
|
||||
| stp d2, d3, CCSTATE->fpr[2]
|
||||
| ldp fp, lr, [sp, #16]
|
||||
| ldp x20, CCSTATE, [sp], #32
|
||||
| ldp_unwind CCSTATE, x20, [sp], #32
|
||||
| ret_auth
|
||||
|.endif
|
||||
|// Note: vm_ffi_call must be the last function in this object file!
|
||||
@ -2567,7 +2599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| bne >5
|
||||
| negs TMP0w, TMP0w
|
||||
| movz CARG3, #0x41e0, lsl #48 // 2^31.
|
||||
| add TMP0, TMP0, TISNUM
|
||||
| add_TISNUM TMP0, TMP0
|
||||
| csel TMP0, TMP0, CARG3, vc
|
||||
|5:
|
||||
| str TMP0, [BASE, RA, lsl #3]
|
||||
@ -2582,7 +2614,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| bne >2
|
||||
| ldr CARG1w, STR:CARG1->len
|
||||
|1:
|
||||
| add CARG1, CARG1, TISNUM
|
||||
| add_TISNUM CARG1, CARG1
|
||||
| str CARG1, [BASE, RA, lsl #3]
|
||||
| ins_next
|
||||
|
|
||||
@ -2690,7 +2722,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| intins CARG1w, CARG1w, CARG2w
|
||||
| ins_arithfallback bvs
|
||||
|.endif
|
||||
| add CARG1, CARG1, TISNUM
|
||||
| add_TISNUM CARG1, CARG1
|
||||
| str CARG1, [BASE, RA, lsl #3]
|
||||
|4:
|
||||
| ins_next
|
||||
@ -2783,7 +2815,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
case BC_KSHORT:
|
||||
| // RA = dst, RC = int16_literal
|
||||
| sxth RCw, RCw
|
||||
| add TMP0, RC, TISNUM
|
||||
| add_TISNUM TMP0, RC
|
||||
| str TMP0, [BASE, RA, lsl #3]
|
||||
| ins_next
|
||||
break;
|
||||
@ -3006,7 +3038,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| cmp TMP1w, CARG1w // In array part?
|
||||
| bhs ->vmeta_tgetv
|
||||
| ldr TMP0, [CARG3]
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| beq >5
|
||||
|1:
|
||||
| str TMP0, [BASE, RA, lsl #3]
|
||||
@ -3049,7 +3081,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ldr NODE:CARG3, NODE:CARG3->next
|
||||
| cmp CARG1, CARG4
|
||||
| bne >4
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| beq >5
|
||||
|3:
|
||||
| str TMP0, [BASE, RA, lsl #3]
|
||||
@ -3058,7 +3090,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|4: // Follow hash chain.
|
||||
| cbnz NODE:CARG3, <1
|
||||
| // End of hash chain: key not found, nil result.
|
||||
| mov TMP0, TISNIL
|
||||
| mov_nil TMP0
|
||||
|
|
||||
|5: // Check for __index if table value is nil.
|
||||
| ldr TAB:CARG1, TAB:CARG2->metatable
|
||||
@ -3079,7 +3111,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| cmp RCw, CARG1w // In array part?
|
||||
| bhs ->vmeta_tgetb
|
||||
| ldr TMP0, [CARG3]
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| beq >5
|
||||
|1:
|
||||
| str TMP0, [BASE, RA, lsl #3]
|
||||
@ -3126,7 +3158,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ldr TMP1, [CARG3]
|
||||
| ldr TMP0, [BASE, RA, lsl #3]
|
||||
| ldrb TMP2w, TAB:CARG2->marked
|
||||
| cmp TMP1, TISNIL // Previous value is nil?
|
||||
| cmp_nil TMP1 // Previous value is nil?
|
||||
| beq >5
|
||||
|1:
|
||||
| str TMP0, [CARG3]
|
||||
@ -3178,7 +3210,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| cmp CARG1, CARG4
|
||||
| bne >5
|
||||
| ldr TMP0, [BASE, RA, lsl #3]
|
||||
| cmp TMP1, TISNIL // Previous value is nil?
|
||||
| cmp_nil TMP1 // Previous value is nil?
|
||||
| beq >4
|
||||
|2:
|
||||
| str TMP0, NODE:CARG3->val
|
||||
@ -3237,7 +3269,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ldr TMP1, [CARG3]
|
||||
| ldr TMP0, [BASE, RA, lsl #3]
|
||||
| ldrb TMP2w, TAB:CARG2->marked
|
||||
| cmp TMP1, TISNIL // Previous value is nil?
|
||||
| cmp_nil TMP1 // Previous value is nil?
|
||||
| beq >5
|
||||
|1:
|
||||
| str TMP0, [CARG3]
|
||||
@ -3336,9 +3368,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|->BC_CALL_Z:
|
||||
| mov RB, BASE // Save old BASE for vmeta_call.
|
||||
| add BASE, BASE, RA, lsl #3
|
||||
| ldr CARG3, [BASE]
|
||||
| ldr CARG3, [BASE], #16
|
||||
| sub NARGS8:RC, NARGS8:RC, #8
|
||||
| add BASE, BASE, #16
|
||||
| checkfunc CARG3, ->vmeta_call
|
||||
| ins_call
|
||||
break;
|
||||
@ -3354,9 +3385,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| // RA = base, (RB = 0,) RC = (nargs+1)*8
|
||||
|->BC_CALLT1_Z:
|
||||
| add RA, BASE, RA, lsl #3
|
||||
| ldr TMP1, [RA]
|
||||
| ldr TMP1, [RA], #16
|
||||
| sub NARGS8:RC, NARGS8:RC, #8
|
||||
| add RA, RA, #16
|
||||
| checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
|
||||
| ldr PC, [BASE, FRAME_PC]
|
||||
|->BC_CALLT2_Z:
|
||||
@ -3436,10 +3466,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| add CARG3, CARG2, CARG1, lsl #3
|
||||
| bhs >5 // Index points after array part?
|
||||
| ldr TMP0, [CARG3]
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| cinc CARG1, CARG1, eq // Skip holes in array part.
|
||||
| beq <1
|
||||
| add CARG1, CARG1, TISNUM
|
||||
| add_TISNUM CARG1, CARG1
|
||||
| stp CARG1, TMP0, [RA]
|
||||
| add CARG1, CARG1, #1
|
||||
|3:
|
||||
@ -3457,7 +3487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
|
||||
| bhi <4
|
||||
| ldp TMP0, CARG1, NODE:CARG3->val
|
||||
| cmp TMP0, TISNIL
|
||||
| cmp_nil TMP0
|
||||
| add RC, RC, #1
|
||||
| beq <6 // Skip holes in hash part.
|
||||
| stp CARG1, TMP0, [RA]
|
||||
@ -3475,8 +3505,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| checkfunc CFUNC:CARG1, >5
|
||||
| asr TMP0, TAB:CARG3, #47
|
||||
| ldrb TMP1w, CFUNC:CARG1->ffid
|
||||
| cmn TMP0, #-LJ_TTAB
|
||||
| ccmp CARG4, TISNIL, #0, eq
|
||||
| cmp_nil CARG4
|
||||
| ccmn TMP0, #-LJ_TTAB, #0, eq
|
||||
| ccmp TMP1w, #FF_next_N, #0, eq
|
||||
| bne >5
|
||||
| mov TMP0w, #0xfffe7fff // LJ_KEYINDEX
|
||||
@ -3516,51 +3546,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| and RC, RC, #255
|
||||
| // RA = base, RB = (nresults+1), RC = numparams
|
||||
| ldr TMP1, [BASE, FRAME_PC]
|
||||
| add RC, BASE, RC, lsl #3
|
||||
| add RA, BASE, RA, lsl #3
|
||||
| add RC, RC, #FRAME_VARG
|
||||
| add TMP2, RA, RB, lsl #3
|
||||
| sub RC, RC, TMP1 // RC = vbase
|
||||
| // Note: RC may now be even _above_ BASE if nargs was < numparams.
|
||||
| add TMP0, BASE, RC, lsl #3
|
||||
| add RC, BASE, RA, lsl #3 // RC = destination
|
||||
| add TMP0, TMP0, #FRAME_VARG
|
||||
| add TMP2, RC, RB, lsl #3
|
||||
| sub RA, TMP0, TMP1 // RA = vbase
|
||||
| // Note: RA may now be even _above_ BASE if nargs was < numparams.
|
||||
| sub TMP3, BASE, #16 // TMP3 = vtop
|
||||
| cbz RB, >5
|
||||
| sub TMP2, TMP2, #16
|
||||
|1: // Copy vararg slots to destination slots.
|
||||
| cmp RC, TMP3
|
||||
| ldr TMP0, [RC], #8
|
||||
| csel TMP0, TMP0, TISNIL, lo
|
||||
| cmp RA, TMP2
|
||||
| str TMP0, [RA], #8
|
||||
| cmp RA, TMP3
|
||||
| ldr TMP0, [RA], #8
|
||||
| csinv TMP0, TMP0, xzr, lo // TISNIL = ~xzr
|
||||
| cmp RC, TMP2
|
||||
| str TMP0, [RC], #8
|
||||
| blo <1
|
||||
|2:
|
||||
| ins_next
|
||||
|
|
||||
|5: // Copy all varargs.
|
||||
| ldr TMP0, L->maxstack
|
||||
| subs TMP2, TMP3, RC
|
||||
| subs TMP2, TMP3, RA
|
||||
| csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
|
||||
| add RB, RB, #8
|
||||
| add TMP1, RA, TMP2
|
||||
| add TMP1, RC, TMP2
|
||||
| str RBw, SAVE_MULTRES
|
||||
| ble <2 // Nothing to copy.
|
||||
| cmp TMP1, TMP0
|
||||
| bhi >7
|
||||
|6:
|
||||
| ldr TMP0, [RC], #8
|
||||
| str TMP0, [RA], #8
|
||||
| cmp RC, TMP3
|
||||
| ldr TMP0, [RA], #8
|
||||
| str TMP0, [RC], #8
|
||||
| cmp RA, TMP3
|
||||
| blo <6
|
||||
| b <2
|
||||
|
|
||||
|7: // Grow stack for varargs.
|
||||
| lsr CARG2, TMP2, #3
|
||||
| stp BASE, RA, L->base
|
||||
| stp BASE, RC, L->base
|
||||
| mov CARG1, L
|
||||
| sub RC, RC, BASE // Need delta, because BASE may change.
|
||||
| sub RA, RA, BASE // Need delta, because BASE may change.
|
||||
| str PC, SAVE_PC
|
||||
| bl extern lj_state_growstack // (lua_State *L, int n)
|
||||
| ldp BASE, RA, L->base
|
||||
| add RC, BASE, RC
|
||||
| ldp BASE, RC, L->base
|
||||
| add RA, BASE, RA
|
||||
| sub TMP3, BASE, #16
|
||||
| b <6
|
||||
break;
|
||||
@ -3704,7 +3734,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
} else {
|
||||
| adds CARG1w, CARG1w, CARG3w
|
||||
| bvs >2
|
||||
| add TMP0, CARG1, TISNUM
|
||||
| add_TISNUM TMP0, CARG1
|
||||
| tbnz CARG3w, #31, >4
|
||||
| cmp CARG1w, CARG2w
|
||||
}
|
||||
@ -3783,7 +3813,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| // RA = base, RC = target
|
||||
| ldr CARG1, [BASE, RA, lsl #3]
|
||||
| add TMP1, BASE, RA, lsl #3
|
||||
| cmp CARG1, TISNIL
|
||||
| cmp_nil CARG1
|
||||
| beq >1 // Stop if iterator returned nil.
|
||||
if (op == BC_JITERL) {
|
||||
| str CARG1, [TMP1, #-8]
|
||||
@ -3816,9 +3846,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|.if JIT
|
||||
| // RA = base (ignored), RC = traceno
|
||||
| ldr CARG1, [GL, #GL_J(trace)]
|
||||
| mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
|
||||
| st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0.
|
||||
| ldr TRACE:RC, [CARG1, RC, lsl #3]
|
||||
| st_vmstate CARG2w
|
||||
|.if PAUTH
|
||||
| ldr RA, TRACE:RC->mcauth
|
||||
|.else
|
||||
@ -3893,6 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| add TMP2, BASE, RC
|
||||
| add LFUNC:CARG3, CARG3, TMP0, lsl #47
|
||||
| add RA, RA, RC
|
||||
| sub CARG1, CARG1, #8
|
||||
| add TMP0, RC, #16+FRAME_VARG
|
||||
| str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
|
||||
| ldr KBASE, [PC, #-4+PC2PROTO(k)]
|
||||
|
@ -1374,9 +1374,13 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc pcall
|
||||
| lw TMP1, L->maxstack
|
||||
| addu TMP2, BASE, NARGS8:RC
|
||||
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
|
||||
| beqz NARGS8:RC, ->fff_fallback
|
||||
| move TMP2, BASE
|
||||
|. sltu AT, TMP1, TMP2
|
||||
| bnez AT, ->fff_fallback
|
||||
|. move TMP2, BASE
|
||||
| addiu BASE, BASE, 8
|
||||
| // Remember active hook before pcall.
|
||||
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
|
||||
@ -1386,8 +1390,12 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|. addiu NARGS8:RC, NARGS8:RC, -8
|
||||
|
|
||||
|.ffunc xpcall
|
||||
| lw TMP1, L->maxstack
|
||||
| addu TMP2, BASE, NARGS8:RC
|
||||
| sltiu AT, NARGS8:RC, 16
|
||||
| lw CARG4, 8+HI(BASE)
|
||||
| sltu TMP1, TMP1, TMP2
|
||||
| or AT, AT, TMP1
|
||||
| bnez AT, ->fff_fallback
|
||||
|. lw CARG3, 8+LO(BASE)
|
||||
| lw CARG1, LO(BASE)
|
||||
|
@ -1415,8 +1415,12 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc pcall
|
||||
| ld TMP1, L->maxstack
|
||||
| daddu TMP2, BASE, NARGS8:RC
|
||||
| sltu AT, TMP1, TMP2
|
||||
| bnez AT, ->fff_fallback
|
||||
|. lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
|
||||
| daddiu NARGS8:RC, NARGS8:RC, -8
|
||||
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
|
||||
| bltz NARGS8:RC, ->fff_fallback
|
||||
|. move TMP2, BASE
|
||||
| daddiu BASE, BASE, 16
|
||||
@ -1437,8 +1441,12 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|. nop
|
||||
|
|
||||
|.ffunc xpcall
|
||||
| ld TMP1, L->maxstack
|
||||
| daddu TMP2, BASE, NARGS8:RC
|
||||
| sltu AT, TMP1, TMP2
|
||||
| bnez AT, ->fff_fallback
|
||||
|. ld CARG1, 0(BASE)
|
||||
| daddiu NARGS8:TMP0, NARGS8:RC, -16
|
||||
| ld CARG1, 0(BASE)
|
||||
| ld CARG2, 8(BASE)
|
||||
| bltz NARGS8:TMP0, ->fff_fallback
|
||||
|. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
|
||||
@ -5396,6 +5404,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| settp LFUNC:RB, TMP0
|
||||
| daddu TMP0, RA, RC
|
||||
| sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
|
||||
| daddiu TMP2, TMP2, -8
|
||||
| daddiu TMP3, RC, 16+FRAME_VARG
|
||||
| sltu AT, TMP0, TMP2
|
||||
| ld KBASE, -4+PC2PROTO(k)(PC)
|
||||
|
@ -1735,8 +1735,12 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc pcall
|
||||
| lwz TMP1, L->maxstack
|
||||
| add TMP2, BASE, NARGS8:RC
|
||||
| cmplwi NARGS8:RC, 8
|
||||
| lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
|
||||
| cmplw cr1, TMP1, TMP2
|
||||
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
||||
| blt ->fff_fallback
|
||||
| mr TMP2, BASE
|
||||
| la BASE, 8(BASE)
|
||||
@ -1747,14 +1751,19 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->vm_call_dispatch
|
||||
|
|
||||
|.ffunc xpcall
|
||||
| lwz TMP1, L->maxstack
|
||||
| add TMP2, BASE, NARGS8:RC
|
||||
| cmplwi NARGS8:RC, 16
|
||||
| lwz CARG3, 8(BASE)
|
||||
| cmplw cr1, TMP1, TMP2
|
||||
|.if FPU
|
||||
| lfd FARG2, 8(BASE)
|
||||
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
||||
| lfd FARG1, 0(BASE)
|
||||
|.else
|
||||
| lwz CARG1, 0(BASE)
|
||||
| lwz CARG2, 4(BASE)
|
||||
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
||||
| lwz CARG4, 12(BASE)
|
||||
|.endif
|
||||
| blt ->fff_fallback
|
||||
|
@ -1463,6 +1463,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc_1 pcall
|
||||
| mov L:RB, SAVE_L
|
||||
| lea RA, [BASE+NARGS:RD*8]
|
||||
| cmp RA, L:RB->maxstack; ja ->fff_fallback
|
||||
| lea RA, [BASE+16]
|
||||
| sub NARGS:RDd, 1
|
||||
| mov PCd, 16+FRAME_PCALL
|
||||
@ -1481,6 +1484,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| jmp ->vm_call_dispatch
|
||||
|
|
||||
|.ffunc_2 xpcall
|
||||
| mov L:RB, SAVE_L
|
||||
| lea RA, [BASE+NARGS:RD*8]
|
||||
| cmp RA, L:RB->maxstack; ja ->fff_fallback
|
||||
| mov LFUNC:RA, [BASE+8]
|
||||
| checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
|
||||
| mov LFUNC:RB, [BASE] // Swap function and traceback.
|
||||
|
@ -1369,7 +1369,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| mov LFUNC:RB, [RA-8]
|
||||
| add NARGS:RD, 1
|
||||
| // This is fragile. L->base must not move, KBASE must always be defined.
|
||||
|.if x64
|
||||
|.if X64
|
||||
| cmp KBASEa, rdx // Continue with CALLT if flag set.
|
||||
|.else
|
||||
| cmp KBASE, BASE // Continue with CALLT if flag set.
|
||||
@ -1793,6 +1793,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Base library: catch errors ----------------------------------------
|
||||
|
|
||||
|.ffunc_1 pcall
|
||||
| mov L:RB, SAVE_L
|
||||
| lea RA, [BASE+NARGS:RD*8]
|
||||
| cmp RA, L:RB->maxstack; ja ->fff_fallback
|
||||
| lea RA, [BASE+8]
|
||||
| sub NARGS:RD, 1
|
||||
| mov PC, 8+FRAME_PCALL
|
||||
@ -1804,6 +1807,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| jmp ->vm_call_dispatch
|
||||
|
|
||||
|.ffunc_2 xpcall
|
||||
| mov L:RB, SAVE_L
|
||||
| lea RA, [BASE+NARGS:RD*8]
|
||||
| cmp RA, L:RB->maxstack; ja ->fff_fallback
|
||||
| cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
|
||||
| mov RB, [BASE+4] // Swap function and traceback.
|
||||
| mov [BASE+12], RB
|
||||
|
Loading…
Reference in New Issue
Block a user