Compare commits

..

No commits in common. "v2.1" and "v2.1.ROLLING" have entirely different histories.

217 changed files with 1501 additions and 2481 deletions

1
.gitattributes vendored
View File

@ -1 +0,0 @@
/.relver export-subst

View File

@ -1 +0,0 @@
$Format:%ct$

View File

@ -1,7 +1,7 @@
===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -10,7 +10,7 @@
# For MSVC, please follow the instructions given in src/msvcbuild.bat.
# For MinGW and Cygwin, cd to src and run make with the Makefile there.
#
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
@ -37,13 +37,12 @@ export MULTILIB= lib
DPREFIX= $(DESTDIR)$(PREFIX)
INSTALL_BIN= $(DPREFIX)/bin
INSTALL_LIB= $(DPREFIX)/$(MULTILIB)
INSTALL_SHARE_= $(PREFIX)/share
INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_)
INSTALL_SHARE= $(DPREFIX)/share
INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
INSTALL_INC= $(INSTALL_DEFINC)
export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit
INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
INSTALL_LMODD= $(INSTALL_SHARE)/lua
INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
INSTALL_CMODD= $(INSTALL_LIB)/lua
@ -72,7 +71,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
$(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \
$(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
RM= rm -f
@ -110,12 +109,11 @@ else
endif
TARGET_SYS?= $(HOST_SYS)
ifneq (,$(filter $(TARGET_SYS),Darwin iOS))
ifeq (Darwin,$(TARGET_SYS))
INSTALL_SONAME= $(INSTALL_DYLIBNAME)
INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
LDCONFIG= :
SED_PC+= -e "s| -Wl,-E||"
endif
##############################################################################
@ -144,12 +142,18 @@ install: $(INSTALL_DEP)
$(RM) $(FILE_PC).tmp
cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
@echo ""
@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
@echo "You can do this now by running this command (with sudo):"
@echo ""
@echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
@echo ""
uninstall:
@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
for file in $(FILES_JITLIB); do \
$(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
done

2
README
View File

@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
Project Homepage: https://luajit.org/
LuaJIT is Copyright (C) 2005-2025 Mike Pall.
LuaJIT is Copyright (C) 2005-2023 Mike Pall.
LuaJIT is free software, released under the MIT license.
See full Copyright Notice in the COPYRIGHT file or in luajit.h.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2025 Mike Pall.
/* Copyright (C) 2004-2023 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2025 Mike Pall.
/* Copyright (C) 2004-2023 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -3,7 +3,7 @@
<head>
<title>Contact</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -94,7 +94,7 @@ don't like that, please complain to Google or Microsoft, not me.
<h2>Copyright</h2>
<p>
All documentation is
Copyright &copy; 2005-2025 Mike Pall.
Copyright &copy; 2005-2023 Mike Pall.
</p>
@ -102,7 +102,7 @@ Copyright &copy; 2005-2025 Mike Pall.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>String Buffer Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -85,7 +85,7 @@ operations.
</p>
<p>
The string buffer library also includes a high-performance
<a href="#serialize">serializer</a> for Lua objects.
<a href="serialize">serializer</a> for Lua objects.
</p>
<h2 id="use">Using the String Buffer Library</h2>
@ -588,9 +588,9 @@ num → 0x07 double.L
tab → 0x08 // Empty table
| 0x09 h.U h*{object object} // Key/value hash
| 0x0a a.U a*object // 0-based array
| 0x0b a.U h.U a*object h*{object object} // Mixed
| 0x0b a.U a*object h.U h*{object object} // Mixed
| 0x0c a.U (a-1)*object // 1-based array
| 0x0d a.U h.U (a-1)*object h*{object object} // Mixed
| 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
tab_mt → 0x0e (index-1).U tab // Metatable dict entry
int64 → 0x10 int.L // FFI int64_t
@ -679,7 +679,7 @@ mappings of files are OK, but only if the file does not change.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Lua/C API Extensions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -173,7 +173,7 @@ Also note that this mechanism is not without overhead.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -316,7 +316,7 @@ without undue conversion penalties.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>ffi.* API Functions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -558,7 +558,7 @@ named <tt>i</tt>.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Semantics</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -440,19 +440,6 @@ If you don't do this, the default Lua number &rarr; <tt>double</tt>
conversion rule applies. A vararg C&nbsp;function expecting an integer
will see a garbled or uninitialized value.
</p>
<p>
Note: this is the only place where creating a boxed scalar number type is
actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt>
etc. anywhere else!</b>
</p>
<p style="font-size: 8pt;">
Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b>
improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It
just adds costly boxing, unboxing and conversions steps. And it may lead
to surprise results, because
<a href="#cdata_arith">cdata arithmetic on scalar numbers</a>
is always performed on 64 bit integers.
</p>
<h2 id="init">Initializers</h2>
<p>
@ -1259,7 +1246,7 @@ compiled.</li>
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Tutorial</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -587,7 +587,7 @@ it to a local variable in the function scope is unnecessary.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>jit.* Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -187,7 +187,7 @@ if you want to know more.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Profiler</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -349,7 +349,7 @@ use.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Extensions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -160,33 +160,13 @@ passes any arguments after the error function to the function
which is called in a protected context.
</p>
<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3>
<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3>
<p>
Non-ASCII characters are handled transparently by the Lua source code parser.
This allows the use of UTF-8 characters in identifiers and strings.
A UTF-8 BOM is skipped at the start of the source code.
</p>
<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
<p>
As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
<tt>mode</tt> parameter.
</p>
<p>
The default mode string is <tt>"bt"</tt>, which allows loading of both
source code and bytecode. Use <tt>"t"</tt> to allow only source code
or <tt>"b"</tt> to allow only bytecode to be loaded.
</p>
<p>
By default, the <tt>load*</tt> functions generate the native bytecode format.
For cross-compilation purposes, add <tt>W</tt> to the mode string to
force the 32 bit format and <tt>X</tt> to force the 64 bit format.
Add both to force the opposite format. Note that non-native bytecode
generated by <tt>load*</tt> cannot be run, but can still be passed
to <tt>string.dump</tt>.
</p>
<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
<p>
All number-to-string conversions consistently convert non-finite numbers
@ -206,33 +186,26 @@ works independently of the current locale and it supports hex floating-point
numbers (e.g. <tt>0x1.5p-3</tt>).
</p>
<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3>
<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3>
<p>
An extra argument has been added to <tt>string.dump()</tt>. If set to
<tt>true</tt> or to a string which contains the character <tt>s</tt>,
'stripped' bytecode without debug information is generated. This speeds
up later bytecode loading and reduces memory usage. See also the
<tt>true</tt>, 'stripped' bytecode without debug information is
generated. This speeds up later bytecode loading and reduces memory
usage. See also the
<a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
</p>
<p>
The generated bytecode is portable and can be loaded on any architecture
that LuaJIT supports. However, the bytecode compatibility versions must
match. Bytecode only stays compatible within a major+minor version
(x.y.aaa &rarr; x.y.bbb), except for development branches. Foreign bytecode
(e.g. from Lua 5.1) is incompatible and cannot be loaded.
that LuaJIT supports, independent of word size or endianess. However, the
bytecode compatibility versions must match. Bytecode stays compatible
for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
</p>
<p>
Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
a different, incompatible bytecode format between 32 bit and 64 bit ports.
This may be rectified in the future. In the meantime, use the <tt>W</tt>
and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
for cross-compilation purposes.
</p>
<p>
Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
mode string to dump it in a deterministic manner: identical source code
always gives a byte-for-byte identical bytecode dump. This feature is
mainly useful for reproducible builds.
a different, incompatible bytecode format for all 64 bit ports. This may be
rectified in the future.
</p>
<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
@ -265,7 +238,7 @@ and let the GC do its work.
LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
the PRNG results is much superior compared to the standard Lua
implementation, which uses the platform-specific ANSI <tt>rand()</tt>.
implementation, which uses the platform-specific ANSI rand().
</p>
<p>
The PRNG generates the same sequences from the same seeds on all
@ -276,10 +249,6 @@ It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
preserve uniformity.
</p>
<p>
Call <tt>math.randomseed()</tt> without any arguments to seed it from
system entropy.
</p>
<p>
Important: Neither this nor any other PRNG based on the simplistic
<tt>math.random()</tt> API is suitable for cryptographic use.
</p>
@ -317,7 +286,7 @@ enabled:
</p>
<ul>
<li><tt>goto</tt> and <tt>::labels::</tt>.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
@ -457,7 +426,9 @@ the toolchain used to compile LuaJIT:
on the C&nbsp;stack. The contents of the C++&nbsp;exception object
pass through unmodified.</li>
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
The corresponding Lua error message can be retrieved from the Lua stack.</li>
The corresponding Lua error message can be retrieved from the Lua stack.<br>
For MSVC for Windows 64 bit this requires compilation of your C++ code
with <tt>/EHa</tt>.</li>
<li>Throwing Lua errors across C++ frames is safe. C++ destructors
will be called.</li>
</ul>
@ -492,7 +463,7 @@ C++ destructors.</li>
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Installation</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -117,7 +117,7 @@ hold all user-configurable settings:
<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
only).</li>
<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
under POSIX or MinGW.</li>
under POSIX, MinGW or Cygwin.</li>
<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
MSVC (Visual Studio).</li>
</ul>
@ -195,13 +195,15 @@ Obviously the prefixes given during build and installation need to be the same.
<h2 id="windows">Windows Systems</h2>
<h3>Prerequisites</h3>
<p>
Either install the open source SDK <a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a>,
which comes with a modified GCC plus the required development headers.
Either install one of the open source SDKs
(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
<a href="https://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
GCC plus the required development headers.
Or install Microsoft's Visual Studio (MSVC).
</p>
<h3>Building with MSVC</h3>
<p>
Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
directory with the source code and run these commands:
</p>
<pre class="code">
@ -212,12 +214,9 @@ msvcbuild
Check the <tt>msvcbuild.bat</tt> file for more options.
Then follow the installation instructions below.
</p>
<h3>Building with MinGW or Cygwin</h3>
<p>
For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
</p>
<h3>Building with MinGW</h3>
<p>
Open a command prompt window and make sure the MinGW programs
Open a command prompt window and make sure the MinGW or Cygwin programs
are in your path. Then <tt>cd</tt> to the directory of the git repository.
Then run this command for MinGW:
</p>
@ -225,6 +224,12 @@ Then run this command for MinGW:
mingw32-make
</pre>
<p>
Or this command for Cygwin:
</p>
<pre class="code">
make
</pre>
<p>
Then follow the installation instructions below.
</p>
<h3>Installing LuaJIT</h3>
@ -241,19 +246,6 @@ absolute path names &mdash; all modules are loaded relative to the
directory where <tt>luajit.exe</tt> is installed
(see <tt>src/luaconf.h</tt>).
</p>
<p>
The final directory layout should look like this:
</p>
<pre class="code">
├── luajit.exe
├── lua51.dll
├── <- put your own classic Lua/C API modules (*.dll) here
└── lua
├── <- put your own Lua modules (*.lua) here
└── jit
├── bc.lua
└── (etc …)
</pre>
<h2 id="cross">Cross-compiling LuaJIT</h2>
<p>
@ -274,7 +266,6 @@ for any supported target:
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li>
</ul>
<p>
@ -577,7 +568,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>LuaJIT</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -122,7 +122,7 @@ Lua is a powerful, dynamic and light-weight programming language.
It may be embedded or used as a general-purpose, stand-alone language.
</p>
<p>
LuaJIT is Copyright &copy; 2005-2025 Mike Pall, released under the
LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the
<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
</p>
<p>
@ -193,7 +193,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Running LuaJIT</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -106,9 +106,6 @@ are accepted:
<li><tt>-l</tt> &mdash; Only list bytecode.</li>
<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
<li><tt>-g</tt> &mdash; Keep debug info.</li>
<li><tt>-W</tt> &mdash; Generate 32 bit (non-GC64) bytecode.</li>
<li><tt>-X</tt> &mdash; Generate 64 bit (GC64) bytecode.</li>
<li><tt>-d</tt> &mdash; Generate bytecode in deterministic manner.</li>
<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
@ -123,8 +120,7 @@ file name:
</p>
<ul>
<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
<li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
(OS- and architecture-specific).</li>
<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@ -307,7 +303,7 @@ Here are the parameters and their default settings:
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2025
Copyright &copy; 2005-2023
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -1,6 +1,6 @@
/*
** DynASM ARM encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM ARM module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/*
** DynASM ARM64 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM ARM64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
@ -549,7 +549,7 @@ end
local function parse_load_pair(params, nparams, n, op)
if params[n+2] then werror("too many operands") end
local pn, p2 = params[n], params[n+1]
local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
local scale = shr(op, 30) == 0 and 2 or 3
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
if not p1 then
if not p2 then
@ -806,8 +806,8 @@ map_op = {
["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- NOTE: ldur etc. are handled by ldr et al.
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
["ldpsw_*"] = "68400000DAxP",
-- Branches.
@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
werror("bad register type")
end
parse_reg_type = false
elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
elseif p == "x" or p == "w" or p == "d" or p == "s" then
if parse_reg_type ~= p then
werror("register size mismatch")
end

View File

@ -1,6 +1,6 @@
/*
** DynASM MIPS encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM MIPS32/MIPS64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM MIPS64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.

View File

@ -1,6 +1,6 @@
/*
** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM PPC/PPC64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
--
-- Support for various extensions contributed by Caio Souza Oliveira.

View File

@ -1,6 +1,6 @@
/*
** DynASM encoding engine prototypes.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module.

View File

@ -1,6 +1,6 @@
/*
** DynASM x86 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x86/x64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
@ -627,11 +627,7 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
werror("NYI: rip-relative displacement followed by immediate")
end
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
if disp[2] == "iPJ" then
waction("REL_A", disp[1])
else
wputlabel("REL_", disp[1], 2)
end
else
wputdarg(disp)
end
@ -748,9 +744,9 @@ local function dispexpr(expr)
return imm*map_opsizenum[ops]
end
local mode, iexpr = immexpr(dispt)
if mode == "iJ" or mode == "iPJ" then
if mode == "iJ" then
if c == "-" then werror("cannot invert label reference") end
return { iexpr, mode }
return { iexpr }
end
return expr -- Need to return original signed expression.
end
@ -1151,8 +1147,6 @@ local map_op = {
rep_0 = "F3",
repe_0 = "F3",
repz_0 = "F3",
endbr32_0 = "F30F1EFB",
endbr64_0 = "F30F1EFA",
-- F4: *hlt
cmc_0 = "F5",
-- F6: test... mb,i; div... mb

View File

@ -2,7 +2,7 @@
-- DynASM. A dynamic assembler for code generation engines.
-- Originally designed and implemented for LuaJIT.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See below for full copyright notice.
------------------------------------------------------------------------------
@ -17,7 +17,7 @@ local _info = {
url = "https://luajit.org/dynasm.html",
license = "MIT",
copyright = [[
Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -75,7 +75,7 @@ local function wline(line, needindent)
g_synclineno = g_synclineno + 1
end
-- Write assembler line as a comment, if requested.
-- Write assembler line as a comment, if requestd.
local function wcomment(aline)
if g_opt.comment then
wline(g_opt.comment..aline..g_opt.endcomment, true)

View File

@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
Runs some nested loops and shows the resulting traces.
.SH COPYRIGHT
.PP
\fBLuaJIT\fR is Copyright \(co 2005-2025 Mike Pall.
\fBLuaJIT\fR is Copyright \(co 2005-2023 Mike Pall.
.br
\fBLuaJIT\fR is open source software, released under the MIT license.
.SH SEE ALSO

View File

@ -7,7 +7,7 @@
# Also works with MinGW and Cygwin on Windows.
# Please check msvcbuild.bat for building with MSVC on Windows.
#
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x64
else
@ -299,12 +299,6 @@ endif
ifneq (,$(LMULTILIB))
TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
endif
##############################################################################
# Target system detection.
@ -326,13 +320,13 @@ ifeq (Darwin,$(TARGET_SYS))
endif
TARGET_STRIP+= -x
TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
else
ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
ifeq (arm64,$(TARGET_LJARCH))
@ -481,11 +475,7 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
GIT= git
ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
else
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
endif
GIT_RELVER= [ -d ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \

View File

@ -25,15 +25,14 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_strscan.h lj_libdef.h
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \
lj_libdef.h
lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
lj_libdef.h
@ -56,7 +55,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
lj_prng.h lj_emit_*.h lj_asm_*.h
lj_emit_*.h lj_asm_*.h
lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
lj_bcdef.h
@ -98,7 +97,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
lj_crecord.h lj_strfmt.h lj_strscan.h
lj_crecord.h lj_strfmt.h
lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
lj_ccallback.h lj_buf.h

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** This is a tool to build the hand-tuned assembler code required for
** LuaJIT's bytecode interpreter. It supports a variety of output formats

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _BUILDVM_H

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: Assembler source code emitter.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"
@ -339,10 +339,6 @@ void emit_asm(BuildCtx *ctx)
fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
break;
case BUILD_machasm:
#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300
/* Workaround for XCode 15.0 - 15.2. */
fprintf(ctx->fp, "\t.subsections_via_symbols\n");
#endif
fprintf(ctx->fp,
"\t.cstring\n"
"\t.ascii \"%s\\0\"\n", ctx->dasm_ident);

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: IR folding hash table generator.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: library definition compiler.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: PE object emitter.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Only used for building on Windows, since we cannot assume the presence
** of a suitable assembler. The host and target byte order must match.
@ -9,7 +9,7 @@
#include "buildvm.h"
#include "lj_bc.h"
#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
#if LJ_TARGET_X86ORX64
/* Context for PE object emitter. */
static char *strtab;
@ -93,17 +93,6 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 6
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
#elif LJ_TARGET_ARM64
#define PEOBJ_ARCH_TARGET 0xaa64
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
#define PEOBJ_RELOC_DIR32 0x01
#define PEOBJ_RELOC_ADDR32NB 0x02
#define PEOBJ_RELOC_OFS (-4)
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 4
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
#endif
/* Section numbers (0-based). */
@ -111,7 +100,7 @@ enum {
PEOBJ_SECT_ABS = -2,
PEOBJ_SECT_UNDEF = -1,
PEOBJ_SECT_TEXT,
#ifdef PEOBJ_PDATA_NRELOC
#if LJ_TARGET_X64
PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86
@ -186,9 +175,6 @@ void emit_peobj(BuildCtx *ctx)
uint32_t sofs;
int i, nrsym;
union { uint8_t b; uint32_t u; } host_endian;
#ifdef PEOBJ_PDATA_NRELOC
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
#endif
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
@ -202,18 +188,18 @@ void emit_peobj(BuildCtx *ctx)
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
#ifdef PEOBJ_PDATA_NRELOC
#if LJ_TARGET_X64
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
pesect[PEOBJ_SECT_PDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
pesect[PEOBJ_SECT_XDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */
pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@ -248,7 +234,7 @@ void emit_peobj(BuildCtx *ctx)
*/
nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#ifdef PEOBJ_PDATA_NRELOC
#if LJ_TARGET_X64
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif
@ -273,6 +259,7 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64
{ /* Write .pdata section. */
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
PEreloc reloc;
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@ -321,87 +308,6 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_ARM64
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
{ /* Write .pdata section. */
uint32_t pdata[4];
PEreloc reloc;
pdata[0] = 0;
pdata[1] = 0;
pdata[2] = fcofs;
pdata[3] = 4+24+4;
owrite(ctx, &pdata, sizeof(pdata));
/* Start of .text and start of .xdata. */
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
/* Start of vm_ffi_call and start of second part of .xdata. */
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
{ /* Write .xdata section. */
uint32_t u32;
uint8_t *p, uwc[24];
PEreloc reloc;
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
#define CSAVE_REGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
} while (0)
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
#define CSAVE_FREGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
} while (0)
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
#define CODE_NOP 0xe3
#define CODE_END 0xe4
#define CEND_ALIGN do { \
*p++ = CODE_END; \
while ((p - uwc) & 3) *p++ = CODE_NOP; \
} while (0)
/* Unwind codes for .text section with handler. */
p = uwc;
CADD_FP(192); /* +2 */
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
CEND_ALIGN; /* +1 +1 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 24);
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
owrite(ctx, &u32, 4);
/* Unwind codes for vm_ffi_call without handler. */
p = uwc;
CADD_FP(16); /* +2 */
CSAVE_FPLR(16); /* +1 */
CSAVE_REGPX(19, -32); /* +2 */
CEND_ALIGN; /* +1 +2 -> 8 */
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 8);
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86
/* Write .sxdata section. */
for (i = 0; i < nrsym; i++) {
@ -433,7 +339,7 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym(ctx, ctx->relocsym[i], 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#ifdef PEOBJ_PDATA_NRELOC
#if LJ_TARGET_X64
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,

View File

@ -2,7 +2,7 @@
-- Lua script to dump the bytecode of the library functions written in Lua.
-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
@ -138,23 +138,23 @@ local function fixup_dump(dump, fixup)
return { dump = ndump, startbc = startbc, sizebc = sizebc }
end
local function find_defs(src, mode)
local function find_defs(src)
local defs = {}
for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
local env = {}
local tcode, fixup = transform_lua(code)
local func = assert(load(tcode, "", mode))
defs[name] = fixup_dump(string.dump(func, mode), fixup)
local func = assert(load(tcode, "", nil, env))()
defs[name] = fixup_dump(string.dump(func, true), fixup)
defs[#defs+1] = name
end
return defs
end
local function gen_header(defs32, defs64)
local function gen_header(defs)
local t = {}
local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
for j,defs in ipairs{defs64, defs32} do
local s, sb = "", ""
for i,name in ipairs(defs) do
local d = defs[name]
@ -163,11 +163,7 @@ local function gen_header(defs32, defs64)
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
end
if j == 1 then
w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
else
w("\n#else\n")
end
w("static const uint8_t libbc_code[] = {\n")
local n = 0
for i=1,#s do
local x = string.byte(s, i)
@ -193,18 +189,14 @@ local function gen_header(defs32, defs64)
end
w(",")
end
end
w("\n#endif\n0\n};\n\n")
w("\n0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m32, m64 = 0, 0
for i,name in ipairs(defs32) do
assert(name == defs64[i])
w('{"'); w(name); w('",'); w(m32) w('},\n')
m32 = m32 + #defs32[name].dump
m64 = m64 + #defs64[name].dump
assert(m32 == m64)
local m = 0
for _,name in ipairs(defs) do
w('{"'); w(name); w('",'); w(m) w('},\n')
m = m + #defs[name].dump
end
w("{NULL,"); w(m32); w("}\n};\n\n")
w("{NULL,"); w(m); w("}\n};\n\n")
return table.concat(t)
end
@ -227,8 +219,7 @@ end
local outfile = parse_arg(arg)
local src = read_files(arg)
local defs32 = find_defs(src, "Wdts")
local defs64 = find_defs(src, "Xdts")
local hdr = gen_header(defs32, defs64)
local defs = find_defs(src)
local hdr = gen_header(defs)
write_file(outfile, hdr)

View File

@ -2,7 +2,7 @@
-- Lua script to generate a customized, minified version of Lua.
-- The resulting 'minilua' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------

View File

@ -1,14 +1,13 @@
----------------------------------------------------------------------------
-- Lua script to embed the rolling release version in luajit.h.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
local arg = {...}
local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
local FILE_LUAJIT_H = arg[3] or "luajit.h"
local FILE_INPUT_H = "luajit_rolling.h"
local FILE_INPUT_R = "luajit_relver.txt"
local FILE_OUTPUT_H = "luajit.h"
local function file_read(file)
local fp = assert(io.open(file, "rb"), "run from the wrong directory")
@ -29,8 +28,8 @@ local function file_write_mod(file, data)
assert(fp:close())
end
local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "")
local relver = file_read(FILE_RELVER_TXT):match("(%d+)")
local text = file_read(FILE_INPUT_H)
local relver = file_read(FILE_INPUT_R):match("(%d+)")
if relver then
text = text:gsub("ROLLING", relver)
@ -39,7 +38,6 @@ else
**** WARNING Cannot determine rolling release version from git log.
**** WARNING The 'git' command must be available during the build.
]])
file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target.
end
file_write_mod(FILE_LUAJIT_H, text)
file_write_mod(FILE_OUTPUT_H, text)

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT bytecode listing module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT module to save/list bytecode.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -29,9 +29,6 @@ Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode.
-s Strip debug info (default).
-g Keep debug info.
-W Generate 32 bit (non-GC64) bytecode.
-X Generate 64 bit (GC64) bytecode.
-d Generate bytecode in deterministic manner.
-n name Set module name (default: auto-detect from input name).
-t type Set output file type (default: auto-detect from output name).
-a arch Override architecture for object files (default: native).
@ -41,7 +38,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
-- Stop handling options.
- Use stdin as input and/or stdout as output.
File types: c cc h obj o raw (default)
File types: c h obj o raw (default)
]]
os.exit(1)
end
@ -54,9 +51,8 @@ local function check(ok, ...)
end
local function readfile(ctx, input)
if ctx.string then
return check(loadstring(input, nil, ctx.mode))
elseif ctx.filename then
if type(input) == "function" then return input end
if ctx.filename then
local data
if input == "-" then
data = io.stdin:read("*a")
@ -65,10 +61,10 @@ local function readfile(ctx, input)
data = assert(fp:read("*a"))
assert(fp:close())
end
return check(load(data, ctx.filename, ctx.mode))
return check(load(data, ctx.filename))
else
if input == "-" then input = nil end
return check(loadfile(input, ctx.mode))
return check(loadfile(input))
end
end
@ -85,7 +81,7 @@ end
------------------------------------------------------------------------------
local map_type = {
raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
}
local map_arch = {
@ -439,12 +435,24 @@ typedef struct
{
mach_header; uint32_t reserved;
} mach_header_64;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint32_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint64_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command_64;
typedef struct {
char sectname[16], segname[16];
uint32_t addr, size;
uint32_t offset, align, reloff, nreloc, flags;
uint32_t reserved1, reserved2;
} mach_section;
typedef struct {
char sectname[16], segname[16];
uint64_t addr, size;
@ -454,64 +462,139 @@ typedef struct {
typedef struct {
uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
} mach_symtab_command;
typedef struct {
int32_t strx;
uint8_t type, sect;
int16_t desc;
uint32_t value;
} mach_nlist;
typedef struct {
int32_t strx;
uint8_t type, sect;
uint16_t desc;
uint64_t value;
} mach_nlist_64;
typedef struct
{
int32_t magic, nfat_arch;
} mach_fat_header;
typedef struct
{
int32_t cputype, cpusubtype, offset, size, align;
} mach_fat_arch;
typedef struct {
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[1];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_obj;
typedef struct {
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[1];
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_obj_64;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[2];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_fat_obj;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[2];
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_fat_obj_64;
]]
local symname = '_'..LJBC_PREFIX..ctx.modname
local cputype, cpusubtype = 0x01000007, 3
if ctx.arch ~= "x64" then
check(ctx.arch == "arm64", "unsupported architecture for OSX")
cputype, cpusubtype = 0x0100000c, 0
local isfat, is64, align, mobj = false, false, 4, "mach_obj"
if ctx.arch == "x64" then
is64, align, mobj = true, 8, "mach_obj_64"
elseif ctx.arch == "arm" then
isfat, mobj = true, "mach_fat_obj"
elseif ctx.arch == "arm64" then
is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
else
check(ctx.arch == "x86", "unsupported architecture for OSX")
end
local function aligned(v, a) return bit.band(v+a-1, -a) end
local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
-- Create Mach-O object and fill in header.
local o = ffi.new("mach_obj_64")
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8)
local o = ffi.new(mobj)
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
if isfat then
o.fat.magic = be32(0xcafebabe)
o.fat.nfat_arch = be32(#cpusubtype)
end
-- Fill in sections and symbols.
o.hdr.magic = 0xfeedfacf
o.hdr.cputype = cputype
o.hdr.cpusubtype = cpusubtype
o.hdr.filetype = 1
o.hdr.ncmds = 2
o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym)
o.seg.cmd = 0x19
o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
o.seg.vmsize = #s
o.seg.fileoff = mach_size
o.seg.filesize = #s
o.seg.maxprot = 1
o.seg.initprot = 1
o.seg.nsects = 1
ffi.copy(o.sec.sectname, "__data")
ffi.copy(o.sec.segname, "__DATA")
o.sec.size = #s
o.sec.offset = mach_size
o.sym.cmd = 2
o.sym.cmdsize = ffi.sizeof(o.sym)
o.sym.symoff = ffi.offsetof(o, "sym_entry")
o.sym.nsyms = 1
o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)
o.sym.strsize = aligned(#symname+2, 8)
for i=0,#cpusubtype-1 do
local ofs = 0
if isfat then
local a = o.fat_arch[i]
a.cputype = be32(cputype[i+1])
a.cpusubtype = be32(cpusubtype[i+1])
-- Subsequent slices overlap each other to share data.
ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0])
a.offset = be32(ofs)
a.size = be32(mach_size-ofs+#s)
end
local a = o.arch[i]
a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface
a.hdr.cputype = cputype[i+1]
a.hdr.cpusubtype = cpusubtype[i+1]
a.hdr.filetype = 1
a.hdr.ncmds = 2
a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym)
a.seg.cmd = is64 and 0x19 or 0x1
a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)
a.seg.vmsize = #s
a.seg.fileoff = mach_size-ofs
a.seg.filesize = #s
a.seg.maxprot = 1
a.seg.initprot = 1
a.seg.nsects = 1
ffi.copy(a.sec.sectname, "__data")
ffi.copy(a.sec.segname, "__DATA")
a.sec.size = #s
a.sec.offset = mach_size-ofs
a.sym.cmd = 2
a.sym.cmdsize = ffi.sizeof(a.sym)
a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
a.sym.nsyms = 1
a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
a.sym.strsize = aligned(#symname+2, align)
end
o.sym_entry.type = 0xf
o.sym_entry.sect = 1
o.sym_entry.strx = 1
ffi.copy(o.space+1, symname)
-- Write Mach-O object file.
-- Write Macho-O object file.
local fp = savefile(output, "wb")
fp:write(ffi.string(o, mach_size))
bcsave_tail(fp, output, s)
@ -541,7 +624,7 @@ end
local function bcsave(ctx, input, output)
local f = readfile(ctx, input)
local s = string.dump(f, ctx.mode)
local s = string.dump(f, ctx.strip)
local t = ctx.type
if not t then
t = detecttype(output)
@ -564,11 +647,9 @@ local function docmd(...)
local n = 1
local list = false
local ctx = {
mode = "bt", arch = jit.arch, os = jit.os:lower(),
type = false, modname = false, string = false,
strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false,
}
local strip = "s"
local gc64 = ""
while n <= #arg do
local a = arg[n]
if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
@ -579,18 +660,14 @@ local function docmd(...)
if opt == "l" then
list = true
elseif opt == "s" then
strip = "s"
ctx.strip = true
elseif opt == "g" then
strip = ""
elseif opt == "W" or opt == "X" then
gc64 = opt
elseif opt == "d" then
ctx.mode = ctx.mode .. opt
ctx.strip = false
else
if arg[n] == nil or m ~= #a then usage() end
if opt == "e" then
if n ~= 1 then usage() end
ctx.string = true
arg[1] = check(loadstring(arg[1]))
elseif opt == "n" then
ctx.modname = checkmodname(tremove(arg, n))
elseif opt == "t" then
@ -610,7 +687,6 @@ local function docmd(...)
n = n + 1
end
end
ctx.mode = ctx.mode .. strip .. gc64
if list then
if #arg == 0 or #arg > 2 then usage() end
bclist(ctx, arg[1], arg[2] or "-")

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64 disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
--
-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
@ -107,20 +107,24 @@ local map_logsr = { -- Logical, shifted register.
[0] = {
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
},
{
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
shift = 21, mask = 7,
[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
},
{
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
},
{
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
}
},
false -- unallocated
@ -128,20 +132,24 @@ local map_logsr = { -- Logical, shifted register.
{
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
},
{
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
shift = 21, mask = 7,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
},
{
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
},
{
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
}
}
}
@ -658,10 +666,6 @@ local map_datafp = { -- Data processing, SIMD and FP.
}
}
}
},
{ -- 010
shift = 0, mask = 0x81f8fc00,
[0x100e400] = "moviDdG"
}
}
@ -731,7 +735,7 @@ local map_cond = {
"hi", "ls", "ge", "lt", "gt", "le", "al",
}
local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
local map_shift = { [0] = "lsl", "lsr", "asr", }
local map_extend = {
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
@ -836,20 +840,6 @@ local function parse_fpimm8(op)
return sign * frac * 2^exp
end
local function decode_fpmovi(op)
local lo = rshift(op, 5)
local hi = rshift(op, 9)
lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
band(lo, 8) * 0x1fe00000)
hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
if hi ~= 0 then
return fmt_hex32(hi)..tohex(lo)
else
return fmt_hex32(lo)
end
end
local function prefer_bfx(sf, uns, imms, immr)
if imms < immr or imms == 31 or imms == 63 then
return false
@ -966,7 +956,7 @@ local function disass_ins(ctx)
elseif p == "U" then
local rn = map_regs.x[band(rshift(op, 5), 31)]
local sz = band(rshift(op, 30), 3)
local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
if imm12 ~= 0 then
x = "["..rn..", #"..imm12.."]"
else
@ -1003,7 +993,8 @@ local function disass_ins(ctx)
x = x.."]"
end
elseif p == "P" then
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
local opcv, sh = rshift(op, 26), 2
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3)
@ -1149,8 +1140,6 @@ local function disass_ins(ctx)
x = 0
elseif p == "F" then
x = parse_fpimm8(op)
elseif p == "G" then
x = "#0x"..decode_fpmovi(op)
elseif p == "g" or p == "f" or p == "x" or p == "w" or
p == "d" or p == "s" then
-- These are handled in D/N/M/A.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64 disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the big-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64EL disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64R6 disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the r6 big-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64R6EL disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the r6 little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPSEL disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT PPC disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x64 disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the 64 bit functions from the combined

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x86/x64 disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT compiler dump module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -552,12 +552,7 @@ local recdepth = 0
local function fmterr(err, info)
if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end
local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
err = format(vmdef.traceerr[err], info)
end
return err
end

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -227,7 +227,9 @@ local function prof_finish()
local samples = prof_samples
if samples == 0 then
if prof_raw ~= true then out:write("[No samples collected]\n") end
elseif prof_ann then
return
end
if prof_ann then
prof_annotate(prof_count1, samples)
else
prof_top(prof_count1, prof_count2, samples, "")

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- Verbose mode of the LuaJIT compiler.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -62,7 +62,7 @@ local jit = require("jit")
local jutil = require("jit.util")
local vmdef = require("jit.vmdef")
local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
local type, sub, format = type, string.sub, string.format
local type, format = type, string.format
local stdout, stderr = io.stdout, io.stderr
-- Active flag and output file handle.
@ -89,12 +89,7 @@ end
local function fmterr(err, info)
if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end
local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
err = format(vmdef.traceerr[err], info)
end
return err
end

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler zones.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--

View File

@ -1,6 +1,6 @@
/*
** Auxiliary library for the Lua/C API.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** Base and coroutine library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -146,8 +146,6 @@ LJLIB_CF(getfenv) LJLIB_REC(.)
cTValue *o = L->base;
if (!(o < L->top && tvisfunc(o))) {
int level = lj_lib_optint(L, 1, 1);
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -170,8 +168,6 @@ LJLIB_CF(setfenv)
setgcref(L->env, obj2gco(t));
return 0;
}
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -364,11 +360,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
static int load_aux(lua_State *L, int status, int envarg)
{
if (status == LUA_OK) {
/*
** Set environment table for top-level function.
** Don't do this for non-native bytecode, which returns a prototype.
*/
if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
if (tvistab(L->base+envarg-1)) {
GCfunc *fn = funcV(L->top-1);
GCtab *t = tabV(L->base+envarg-1);
setgcref(fn->c.env, obj2gco(t));
@ -624,10 +616,7 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2);
}
if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
cTValue *msg = --co->top;
lj_err_callermsg(L, strVdata(msg));
}
lj_state_growstack(co, (MSize)(L->top - L->base));
return FFH_RETRY;
}

View File

@ -1,6 +1,6 @@
/*
** Bit manipulation library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_bit_c
@ -98,7 +98,7 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
return bit_result64(L, id, x);
}
setintV(L->base+1, sh);
if (id2) setintV(L->base+1, sh);
return FFH_RETRY;
#else
lj_lib_checknumber(L, 1);

View File

@ -1,6 +1,6 @@
/*
** Buffer library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_buffer_c

View File

@ -1,6 +1,6 @@
/*
** Debug library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** FFI library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_ffi_c
@ -305,7 +305,7 @@ LJLIB_CF(ffi_meta___tostring)
p = *(void **)p;
} else if (ctype_isenum(ct->info)) {
msg = "cdata<%s>: %d";
p = (void *)(uintptr_t)*(uint32_t *)p;
p = (void *)(uintptr_t)*(uint32_t **)p;
} else {
if (ctype_isptr(ct->info)) {
p = cdata_getptr(p, ct->size);
@ -513,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
/* Handle ctype __gc metamethod. Use the fast lookup here. */
cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]);
GCtab *t = cts->finalizer;
if (gcref(t->metatable)) {
/* Add to finalizer table, if still enabled. */
copyTV(L, lj_tab_set(L, t, o-1), tv);
@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
"\003win"
#endif
#if LJ_ABI_PAUTH
"\005pauth"
"\007pauth"
#endif
#if LJ_TARGET_UWP
"\003uwp"
@ -765,7 +765,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
return 1;
}
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_CF(ffi_metatype)
{
@ -791,6 +791,8 @@ LJLIB_CF(ffi_metatype)
return 1;
}
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */
LJLIB_CF(ffi_gc) LJLIB_REC(.)
{
GCcdata *cd = ffi_checkcdata(L, 1);
@ -823,6 +825,19 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch)
/* ------------------------------------------------------------------------ */
/* Create special weak-keyed finalizer table. */
static GCtab *ffi_finalizer(lua_State *L)
{
/* NOBARRIER: The table is new (marked white). */
GCtab *t = lj_tab_new(L, 0, 1);
settabV(L, L->top++, t);
setgcref(t->metatable, obj2gco(t));
setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
lj_str_newlit(L, "k"));
t->nomm = (uint8_t)(~(1u<<MM_mode));
return t;
}
/* Register FFI module as loaded. */
static void ffi_register_module(lua_State *L)
{
@ -838,6 +853,7 @@ LUALIB_API int luaopen_ffi(lua_State *L)
{
CTState *cts = lj_ctype_init(L);
settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
cts->finalizer = ffi_finalizer(L);
LJ_LIB_REG(L, NULL, ffi_meta);
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));

View File

@ -1,6 +1,6 @@
/*
** Library initialization.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** I/O library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -25,7 +25,6 @@
#include "lj_strfmt.h"
#include "lj_ff.h"
#include "lj_lib.h"
#include "lj_strscan.h"
/* Userdata payload for I/O file. */
typedef struct IOFileUD {
@ -324,14 +323,13 @@ LJLIB_CF(io_method_seek)
FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
int64_t ofs = 0;
TValue *o;
cTValue *o;
int res;
if (opt == 0) opt = SEEK_SET;
else if (opt == 1) opt = SEEK_CUR;
else if (opt == 2) opt = SEEK_END;
o = L->base+2;
if (o < L->top) {
if (tvisstr(o)) lj_strscan_num(strV(o), o);
if (tvisint(o))
ofs = (int64_t)intV(o);
else if (tvisnum(o))

View File

@ -1,6 +1,6 @@
/*
** JIT library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_jit_c
@ -161,6 +161,24 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
/* -- Reflection API for Lua functions ------------------------------------ */
/* Return prototype of first argument (Lua function or prototype object) */
static GCproto *check_Lproto(lua_State *L, int nolua)
{
TValue *o = L->base;
if (L->top > o) {
if (tvisproto(o)) {
return protoV(o);
} else if (tvisfunc(o)) {
if (isluafunc(funcV(o)))
return funcproto(funcV(o));
else if (nolua)
return NULL;
}
}
lj_err_argt(L, 1, LUA_TFUNCTION);
return NULL; /* unreachable */
}
static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
{
setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
@ -169,7 +187,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
/* local info = jit.util.funcinfo(func [,pc]) */
LJLIB_CF(jit_util_funcinfo)
{
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
GCproto *pt = check_Lproto(L, 1);
if (pt) {
BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
GCtab *t;
@ -211,7 +229,7 @@ LJLIB_CF(jit_util_funcinfo)
/* local ins, m = jit.util.funcbc(func, pc) */
LJLIB_CF(jit_util_funcbc)
{
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
GCproto *pt = check_Lproto(L, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2);
if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc];
@ -228,7 +246,7 @@ LJLIB_CF(jit_util_funcbc)
/* local k = jit.util.funck(func, idx) */
LJLIB_CF(jit_util_funck)
{
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
GCproto *pt = check_Lproto(L, 0);
ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
if (idx >= 0) {
if (idx < (ptrdiff_t)pt->sizekn) {
@ -248,7 +266,7 @@ LJLIB_CF(jit_util_funck)
/* local name = jit.util.funcuvname(func, idx) */
LJLIB_CF(jit_util_funcuvname)
{
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
GCproto *pt = check_Lproto(L, 0);
uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
if (idx < pt->sizeuv) {
setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));

View File

@ -1,6 +1,6 @@
/*
** Math library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include <math.h>
@ -13,7 +13,6 @@
#include "lualib.h"
#include "lj_obj.h"
#include "lj_err.h"
#include "lj_lib.h"
#include "lj_vm.h"
#include "lj_prng.h"
@ -184,10 +183,7 @@ LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_randomseed)
{
PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
if (L->base != L->top)
random_seed(rs, lj_lib_checknum(L, 1));
else if (!lj_prng_seed_secure(rs))
lj_err_caller(L, LJ_ERR_PRNGSD);
return 0;
}

View File

@ -1,6 +1,6 @@
/*
** OS library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** Package library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** String library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -122,25 +122,11 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
LJLIB_CF(string_dump)
{
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
uint32_t flags = 0;
SBuf *sb;
TValue *o = L->base+1;
if (o < L->top) {
if (tvisstr(o)) {
const char *mode = strVdata(o);
char c;
while ((c = *mode++)) {
if (c == 's') flags |= BCDUMP_F_STRIP;
if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
}
} else if (tvistruecond(o)) {
flags |= BCDUMP_F_STRIP;
}
}
sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
GCfunc *fn = lj_lib_checkfunc(L, 1);
int strip = L->base+1 < L->top && tvistruecond(L->base+1);
SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1;
if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags))
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
lj_err_caller(L, LJ_ERR_STRDUMP);
setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L);

View File

@ -1,6 +1,6 @@
/*
** Table library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1057,7 +1057,7 @@ static size_t release_unused_segments(mstate m)
mchunkptr p = align_as_chunk(base);
size_t psize = chunksize(p);
/* Can unmap if first chunk holds entire segment and not pinned */
if (!cinuse(p) && (char *)p + psize == (char *)mem2chunk(sp)) {
if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
tchunkptr tp = (tchunkptr)p;
if (p == m->dv) {
m->dv = 0;

View File

@ -1,6 +1,6 @@
/*
** Public Lua/C API.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -104,12 +104,7 @@ LUA_API int lua_checkstack(lua_State *L, int size)
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
return 0; /* Stack overflow. */
} else if (size > 0) {
int avail = (int)(mref(L->maxstack, TValue) - L->top);
if (size > avail &&
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
L->top--;
return 0; /* Out of memory. */
}
lj_state_checkstack(L, (MSize)size);
}
return 1;
}
@ -1052,7 +1047,6 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
/* Flush cache, since traces specialize to basemt. But not during __gc. */
if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM);
o = index2adr(L, idx); /* Stack may have been reallocated. */
if (tvisbool(o)) {
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));

View File

@ -1,6 +1,6 @@
/*
** Target architecture selection.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ARCH_H
@ -57,7 +57,7 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__) || defined(_M_ARM64)
#elif defined(__aarch64__)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
@ -66,7 +66,7 @@
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#error "No support for this architecture (yet)"
#endif
#endif
@ -124,7 +124,7 @@
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
#if TARGET_OS_IPHONE
#define LJ_TARGET_IOS 1
#else
#define LJ_TARGET_IOS 0
@ -237,7 +237,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70
@ -331,7 +331,6 @@
#define LJ_ARCH_NOFFI 1
#elif LJ_ARCH_BITS == 64
#error "No support for PPC64"
#undef LJ_TARGET_PPC
#endif
#if _ARCH_PWR7
@ -491,45 +490,36 @@
#elif LJ_TARGET_ARM
#if defined(__ARMEB__)
#error "No support for big-endian ARM"
#undef LJ_TARGET_ARM
#endif
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#error "No support for Cortex-M CPUs"
#undef LJ_TARGET_ARM
#endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#undef LJ_TARGET_ARM
#endif
#elif LJ_TARGET_ARM64
#if defined(_ILP32)
#error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif
#elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32"
#undef LJ_TARGET_PPC
#endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500, use LuaJIT 2.0"
#undef LJ_TARGET_PPC
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32"
#undef LJ_TARGET_MIPS
#endif
#if LJ_TARGET_MIPSR6
/* Not that useful, since most available r6 CPUs are 64 bit. */
#error "No support for MIPS32R6"
#undef LJ_TARGET_MIPS
#endif
#elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
#error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif
#endif
#endif

View File

@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_asm_c
@ -29,7 +29,6 @@
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_target.h"
#include "lj_prng.h"
#ifdef LUA_USE_ASSERT
#include <stdio.h>
@ -94,12 +93,6 @@ typedef struct ASMState {
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
uint64_t prngbits;
PRNGState prngstate;
#endif
#ifdef RID_NUM_KREF
intptr_t krefk[RID_NUM_KREF];
#endif
@ -180,41 +173,6 @@ IRFLDEF(FLOFS)
0
};
#ifdef LUAJIT_RANDOM_RA
/* Return a fixed number of random bits from the local PRNG state. */
static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
uint64_t b = as->prngbits;
uint32_t res = (1u << nbits) - 1u;
if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
res &= (uint32_t)b;
as->prngbits = b >> nbits;
return res;
}
/* Pick a random register from a register set. */
static Reg rset_pickrandom(ASMState *as, RegSet rs)
{
Reg r = rset_pickbot_(rs);
rs >>= r;
if (rs > 1) { /* More than one bit set? */
while (1) {
/* We need to sample max. the GPR or FPR half of the set. */
uint32_t d = ra_random_bits(as, RSET_BITS-1);
if ((rs >> d) & 1) {
r += d;
break;
}
}
}
return r;
}
#define rset_picktop(rs) rset_pickrandom(as, rs)
#define rset_pickbot(rs) rset_pickrandom(as, rs)
#else
#define rset_picktop(rs) rset_picktop_(rs)
#define rset_pickbot(rs) rset_pickbot_(rs)
#endif
/* -- Target-specific instruction emitter --------------------------------- */
#if LJ_TARGET_X86ORX64
@ -606,11 +564,7 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
IRIns *ir = IR(ref);
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
#if LJ_GC64
#if LJ_TARGET_ARM64
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
#else
(ir->o == IR_KINT && k == ir->i) ||
#endif
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
k == (intptr_t)ir_kptr(ir))
@ -949,11 +903,11 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
static void asm_snap_alloc1(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
if (!irref_isk(ref)) {
if (!irref_isk(ref) && ir->r != RID_SUNK) {
bloomset(as->snapfilt1, ref);
bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
if (ra_used(ir)) return;
if (ir->r == RID_SINK || ir->r == RID_SUNK) {
if (ir->r == RID_SINK) {
ir->r = RID_SUNK;
#if LJ_HASFFI
if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
@ -2488,9 +2442,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->realign = NULL;
as->loopinv = 0;
as->parent = J->parent ? traceref(J, J->parent) : NULL;
#ifdef LUAJIT_RANDOM_RA
(void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */
#endif
/* Reserve MCode memory. */
as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
@ -2532,10 +2483,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
#endif
as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins;
#ifdef LUAJIT_RANDOM_RA
as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */
as->prngbits = 0;
#endif
RA_DBG_START();
RA_DBGX((as, "===== STOP ====="));

View File

@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ASM_H

View File

@ -1,6 +1,6 @@
/*
** ARM IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -969,32 +969,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
}
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
emit_opk(as, ARMI_ADD, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, ARMI_LDR, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
@ -1927,7 +1919,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi)
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS);
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
return;
#elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) {
@ -1998,7 +1990,6 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
int savereg = 0;
Reg pbase;
uint32_t k;
if (irp) {
@ -2009,14 +2000,12 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
savereg = 1;
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
}
} else {
pbase = RID_BASE;
}
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
if (savereg)
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
k = emit_isk12(0, (int32_t)(8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP);
@ -2028,7 +2017,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
if (savereg)
if (ra_hasspill(irp->s) && !allow)
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095));
} else {
@ -2042,12 +2031,11 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
MSize n, nent = snap->nent;
int32_t bias = 0;
/* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
BCReg s = snap_slot(sn);
int32_t ofs = 8*((int32_t)s-1) - bias;
int32_t ofs = 8*((int32_t)s-1);
IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE))
@ -2066,12 +2054,6 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
#else
Reg src = ra_alloc1(as, ref, RSET_FPR);
if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) {
int32_t adj = ofs & 0xffffff00; /* K12-friendly. */
bias += adj;
ofs -= adj;
emit_addptr(as, RID_BASE, -adj);
}
emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
#endif
} else {
@ -2100,7 +2082,6 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
}
checkmclim(as);
}
emit_addptr(as, RID_BASE, bias);
lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
@ -2271,7 +2252,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
return REGSP_HINT(RID_RET);
}
static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/*
** ARM64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
**
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
** Sponsored by Cisco Systems, Inc.
@ -84,23 +84,18 @@ static void asm_guardcc(ASMState *as, A64CC cc)
emit_cond_branch(as, cc, target);
}
/* Emit test and branch instruction to exit for guard, if in range. */
static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
/* Emit test and branch instruction to exit for guard. */
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
{
MCode *target = asm_exitstub_addr(as, as->snapno);
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
if (LJ_UNLIKELY(p == as->invmcp)) {
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
as->loopinv = 1;
*p = A64I_B | A64F_S26(delta);
ai ^= 0x01000000u;
target = p-1;
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
return 0;
*p = A64I_B | A64F_S26(target-p);
emit_tnb(as, ai^0x01000000u, r, bit, p-1);
return;
}
emit_tnb(as, ai, r, bit, target);
return 1;
}
/* Emit compare and branch instruction to exit for guard. */
@ -216,14 +211,16 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{
IRIns *ir = IR(ref);
int logical = (ai & 0x1f000000) == 0x0a000000;
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
return A64F_M(ir->r);
} else if (irref_isk(ref)) {
uint32_t m;
int64_t k = get_k64val(as, ref);
uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
if ((ai & 0x1f000000) == 0x0a000000)
m = emit_isk13(k, irt_is64(ir->t));
else
m = emit_isk12(k);
if (m)
return m;
} else if (mayfuse(as, ref)) {
@ -235,7 +232,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
IRIns *irl = IR(ir->op1);
if (sh == A64SH_LSL &&
irl->o == IR_CONV && !logical &&
irl->o == IR_CONV &&
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
shift <= 4 &&
canfuse(as, irl)) {
@ -245,11 +242,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_SH(sh, shift);
}
} else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
Reg m = ra_alloc1(as, ir->op1, allow);
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
} else if (ir->o == IR_CONV && !logical &&
} else if (ir->o == IR_CONV &&
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_EX(A64EX_SXTW);
@ -426,18 +419,13 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir)
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
uint32_t n, nargs = CCI_XNARGS(ci);
int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7;
int32_t ofs = 0;
Reg gpr, fpr = REGARG_FIRSTFPR;
if (ci->func)
emit_call(as, ci->func);
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n];
IRIns *ir = IR(ref);
@ -448,21 +436,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref);
fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else {
Reg r = ra_alloc1(as, ref, RSET_FPR);
int32_t al = spalign;
#if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_isnum(ir->t) ? 7 : 3;
#endif
spofs = (spofs + al) & ~al;
if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
spofs += al + 1;
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
ofs += 8;
}
} else {
if (gpr <= REGARG_LASTGPR) {
@ -472,27 +449,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_GPR);
int32_t al = spalign;
#if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_size(ir->t) - 1;
#endif
spofs = (spofs + al) & ~al;
if (al >= 3) {
if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
} else {
lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
}
spofs += al + 1;
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
ofs += 8;
}
}
#if LJ_HASFFI && LJ_TARGET_OSX
} else { /* Marker for start of varargs. */
gpr = REGARG_LASTGPR+1;
fpr = REGARG_LASTFPR+1;
spalign = 7;
#endif
}
}
}
@ -558,6 +518,8 @@ static void asm_retf(ASMState *as, IRIns *ir)
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
/* Need to force a spill on REF_BASE now to update the stack slot. */
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
@ -681,22 +643,25 @@ static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
Reg tmp;
Reg dest = 0, tmp;
int destused = ra_used(ir);
int32_t ofs = 0;
ra_evictset(as, RSET_SCRATCH);
if (ra_used(ir)) {
if (destused) {
if (ra_hasspill(ir->s)) {
ofs = sps_scale(ir->s);
destused = 0;
if (ra_hasreg(ir->r)) {
ra_free(as, ir->r);
ra_modified(as, ir->r);
emit_spload(as, ir, ir->r, ofs);
}
} else {
Reg dest = ra_dest(as, ir, RSET_FPR);
dest = ra_dest(as, ir, RSET_FPR);
}
}
if (destused)
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
}
}
asm_guardcnb(as, A64I_CBZ, RID_RET);
args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */
@ -787,75 +752,113 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey;
Reg key = 0, tmp = RID_TMP;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey);
int isk = irref_isk(refkey);
int isk = irref_isk(ir->op2);
IRType1 kt = irkey->t;
uint32_t k = 0;
uint32_t khash;
MCLabel l_end, l_loop;
MCLabel l_end, l_loop, l_next;
rset_clear(allow, tab);
/* Allocate register for tkey outside of the loop. */
if (isk) {
int64_t kk;
if (irt_isaddr(kt)) {
kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
if (!isk) {
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
rset_clear(allow, key);
if (!irt_isstr(kt)) {
tmp = ra_scratch(as, allow);
rset_clear(allow, tmp);
}
} else if (irt_isnum(kt)) {
kk = (int64_t)ir_knum(irkey)->u64;
/* Assumes -0.0 is already canonicalized to +0.0. */
int64_t val = (int64_t)ir_knum(irkey)->u64;
if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
} else {
scr = ra_scratch(as, allow);
}
rset_clear(allow, scr);
} else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
kk = ~((int64_t)~irt_toitype(kt) << 47);
}
k = emit_isk12(kk);
tkey = k ? 0 : ra_allock(as, kk, allow);
} else {
tkey = ra_scratch(as, allow);
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
scr = ra_scratch(as, rset_clear(allow, type));
rset_clear(allow, scr);
}
/* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as);
as->invmcp = NULL;
if (merge == IR_NE) {
if (merge == IR_NE)
asm_guardcc(as, CC_AL);
} else if (destused) {
uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
}
else if (destused)
emit_loada(as, dest, niltvg(J2G(as->J)));
/* Follow hash chain until the end. */
l_loop = --as->mcp;
if (destused)
emit_n(as, A64I_CMPx^A64I_K12^0, dest);
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */
if (merge == IR_EQ)
asm_guardcc(as, CC_EQ);
else
emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
/* Construct tkey as canonicalized or tagged key. */
if (!isk) {
if (irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR);
emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
/* A64I_FMOV_R_D from key to tkey done below. */
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
if (k)
emit_n(as, A64I_CMPx^k, tmp);
else
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
lj_assertA(irt_isaddr(kt), "bad HREF key type");
key = ra_alloc1(as, refkey, allow);
type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
emit_nm(as, A64I_FCMPd, key, ftmp);
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
emit_cond_branch(as, CC_LO, l_next);
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
}
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
}
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
@ -869,6 +872,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -877,18 +881,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
emit_dnm(as, A64I_EORw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
emit_nm(as, A64I_FCMPZd, (key & 31), 0);
emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVw, tmp, dest);
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
} else {
emit_dnm(as, A64I_EORw, tmp, key, dest);
emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
checkmclim(as);
emit_dm(as, A64I_MOVw, tmp, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
}
}
}
@ -903,7 +912,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int bigofs = !emit_checkofs(A64I_LDRx, kofs);
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg idx = node;
Reg key, idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
uint64_t k;
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -922,8 +931,9 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
} else {
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
}
emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
key = ra_scratch(as, allow);
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
emit_lso(as, A64I_LDRx, key, idx, kofs);
if (bigofs)
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
}
@ -931,30 +941,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
if (guarded)
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
emit_opk(as, A64I_ADDx, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, A64I_LDRx, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}
@ -1059,7 +1063,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
Reg idx, tmp;
Reg idx, tmp, type;
int32_t ofs = 0;
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@ -1078,9 +1082,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} else {
tmp = ra_scratch(as, gpr);
}
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
rset_clear(gpr, idx);
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
type = ra_scratch(as, rset_clear(gpr, tmp));
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
/* Always do the type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
@ -1088,10 +1091,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
} else if (irt_isaddr(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
} else if (irt_isnil(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
} else {
@ -1214,8 +1217,9 @@ dotypecheck:
emit_nm(as, A64I_CMPx,
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
} else {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
Reg type = ra_scratch(as, allow);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
}
emit_lso(as, A64I_LDRx, tmp, base, ofs);
return;
@ -1285,9 +1289,8 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
Reg mark = RID_TMP;
MCLabel l_end = emit_label(as);
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
/* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
emit_setgl(as, tab, gc.grayagain);
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
emit_getgl(as, link, gc.grayagain);
@ -1301,6 +1304,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2];
MCLabel l_end;
RegSet allow = RSET_GPR;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@ -1311,8 +1315,9 @@ static void asm_obar(ASMState *as, IRIns *ir)
asm_gencall(as, ci, args);
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
tmp = ra_scratch(as, rset_exclude(allow, obj));
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
@ -1359,12 +1364,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
if (irref_isk(lref))
return 1; /* But swap constants to the right. */
ir = IR(rref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 0; /* Don't swap fusable operands to the left. */
ir = IR(lref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 1; /* But swap fusable operands to the right. */
@ -1410,12 +1415,13 @@ static void asm_intneg(ASMState *as, IRIns *ir)
static void asm_intmul(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
if (irt_isguard(ir->t)) { /* IR_MULOV */
asm_guardcc(as, CC_NE);
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
emit_dnm(as, A64I_SMULL, dest, right, left);
} else {
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@ -1675,15 +1681,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
if (asm_swapops(as, blref, brref)) {
Reg tmp = blref; blref = brref; brref = tmp;
}
bleft = ra_alloc1(as, blref, RSET_GPR);
if (irref_isk(brref)) {
uint64_t k = get_k64val(as, brref);
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
emit_ctz64(k)))
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
return;
}
m2 = emit_isk13(k, irt_is64(irl->t));
}
bleft = ra_alloc1(as, blref, RSET_GPR);
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
if (!m2)
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@ -1758,29 +1765,38 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
Reg pbase;
uint32_t k;
Reg pbase = RID_BASE;
if (irp) {
if (!ra_hasspill(irp->s)) {
pbase = irp->r;
if (!ra_hasreg(pbase))
pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
lj_assertA(ra_hasreg(pbase), "base reg lost");
} else if (allow) {
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
}
} else {
pbase = RID_BASE;
}
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
if (pbase & 0x80) /* Restore temp. register. */
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
k = emit_isk12((8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, A64I_CMPx^k, RID_TMP);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack));
if (pbase & 0x40) {
emit_getgl(as, (pbase & 31), jit_base);
if (pbase & 0x80) /* Save temp register. */
emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
}
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
if (ra_hasspill(irp->s))
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
if (ra_hasspill(irp->s) && !allow)
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
} else {
emit_getgl(as, RID_TMP, cur_L);
}
}
/* Restore Lua stack from on-trace state. */
static void asm_stack_restore(ASMState *as, SnapShot *snap)
@ -1821,7 +1837,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
/* Marker to prevent patching the GC check exit. */
#define ARM64_NOPATCH_GC_CHECK \
(A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
(A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as)
@ -1876,41 +1892,47 @@ static void asm_loop_tail_fixup(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */
/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
/* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
emit_movrr(as, ir, r, RID_BASE);
}
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
ra_destreg(as, ir, RID_BASE);
}
/* Coalesce BASE register for a side trace. */
static Reg asm_head_side_base(ASMState *as, IRIns *irp)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (irp->r == r) {
return r; /* Same BASE register already coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
/* Move from coalesced parent reg. */
emit_movrr(as, ir, r, irp->r);
return irp->r;
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
if (ra_hasspill(irp->s)) {
return ra_dest(as, ir, RSET_GPR);
} else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
Reg r = irp->r;
lj_assertA(ra_hasreg(r), "base reg lost");
if (r != ir->r && !rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
ra_destreg(as, ir, r);
return r;
}
}
return RID_NONE;
}
/* -- Tail of trace ------------------------------------------------------- */
@ -1953,47 +1975,20 @@ static void asm_tail_prep(ASMState *as)
/* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
#if LJ_HASFFI
uint32_t i, nargs = CCI_XNARGS(ci);
if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
(LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
IRRef args[CCI_NARGS_MAX*2];
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
asm_collectargs(as, ir, ci, args);
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) {
int al = spalign;
if (!args[i]) {
#if LJ_TARGET_OSX
/* Marker for start of varaargs. */
nfpr = 0;
ngpr = 0;
spalign = 7;
#endif
} else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; }
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif
if (args[i] && irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) nfpr--; else nslots += 2;
} else {
if (ngpr > 0) { ngpr--; continue; }
#if LJ_TARGET_OSX
al |= irt_size(IR(args[i])->t) - 1;
#endif
if (ngpr > 0) ngpr--; else nslots += 2;
}
spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */
}
nslots = (spofs + 3) >> 2;
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
}
#endif
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
return REGSP_HINT(RID_RET);
}
static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/*
** MIPS IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -456,7 +456,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_addptr(as, base, -8*delta);
asm_guard(as, MIPSI_BNE, RID_TMP,
ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4);
emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
}
/* -- Buffer operations --------------------------------------------------- */
@ -656,8 +656,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
#else
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
#endif
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)&as->J->k64[LJ_K64_2P63],
@ -673,8 +673,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
#else
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
#endif
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)&as->J->k32[LJ_K32_2P63],
@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
MIPSIns mi = irt_is64(ir->t) ?
(st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
(st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp);
emit_fg(as, mi, tmp, left);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
emit_fg(as, mi, left, left);
#endif
}
}
@ -1207,29 +1207,22 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
if (guarded)
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC)
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}

View File

@ -1,6 +1,6 @@
/*
** PPC IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -840,30 +840,23 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
}
if (ir->o == IR_UREFC)
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tai(as, PPCI_LWZ, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}

View File

@ -1,6 +1,6 @@
/*
** x86/x64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Guard handling ------------------------------------------------------ */
@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references.
*/
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
{
IRIns *ir = as->ir;
IRRef i = as->curins;
@ -118,9 +118,7 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
while (--i > ref) {
if (ir[i].o == conflict)
return 0; /* Conflict found. */
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
}
return 1; /* Ok, no conflict. */
@ -136,14 +134,13 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */
}
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
/* Fuse base offset (vararg load). */
IRIns *irk = IR(irb->op2);
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
as->mrm.ofs = IR(irb->op2)->i;
return irb->op1;
}
return ref; /* Otherwise use the given array base. */
@ -458,7 +455,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 2) &&
noconflict(as, ref, IR_RETF, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@ -469,12 +466,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 2)) {
noconflict(as, ref, IR_FSTORE, 0)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
@ -484,7 +481,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 2)) {
noconflict(as, ref, IR_XSTORE, 0)) {
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
@ -817,7 +814,6 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
@ -860,7 +856,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
@ -1177,7 +1172,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
@ -1237,6 +1231,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
@ -1263,7 +1258,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@ -1281,6 +1275,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else {
emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32);
@ -1377,33 +1372,26 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
if (ir->o == IR_UREFC)
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
else
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)
{
@ -1558,7 +1546,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@ -1606,7 +1593,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else

View File

@ -1,6 +1,6 @@
/*
** Internal assertions.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_assert_c

View File

@ -1,6 +1,6 @@
/*
** Bytecode instruction modes.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bc_c

View File

@ -1,6 +1,6 @@
/*
** Bytecode instruction format.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BC_H

View File

@ -1,6 +1,6 @@
/*
** Bytecode dump definitions.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BCDUMP_H
@ -46,8 +46,6 @@
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
#define BCDUMP_F_DETERMINISTIC 0x80000000
/* Type codes for the GC constants of a prototype. Plus length for strings. */
enum {
BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
@ -63,7 +61,7 @@ enum {
/* -- Bytecode reader/writer ---------------------------------------------- */
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, uint32_t flags);
void *data, int strip);
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls);

View File

@ -1,6 +1,6 @@
/*
** Bytecode reader.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcread_c
@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt)
}
/* Read a single constant key/value of a template table. */
static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
static void bcread_ktabk(LexState *ls, TValue *o)
{
MSize tp = bcread_uleb128(ls);
if (tp >= BCDUMP_KTAB_STR) {
@ -191,8 +191,6 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
} else if (tp == BCDUMP_KTAB_NUM) {
o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls);
} else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
settabV(ls->L, o, t);
} else {
lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
setpriV(o, ~tp);
@ -209,15 +207,15 @@ static GCtab *bcread_ktab(LexState *ls)
MSize i;
TValue *o = tvref(t->array);
for (i = 0; i < narray; i++, o++)
bcread_ktabk(ls, o, NULL);
bcread_ktabk(ls, o);
}
if (nhash) { /* Read hash entries. */
MSize i;
for (i = 0; i < nhash; i++) {
TValue key;
bcread_ktabk(ls, &key, NULL);
bcread_ktabk(ls, &key);
lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t);
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
}
}
return t;
@ -283,11 +281,8 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
{
BCIns *bc = proto_bc(pt);
BCIns op;
if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
else op = BC_FUNCF;
bc[0] = BCINS_AD(op, pt->framesize, 0);
bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
pt->framesize, 0);
bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
/* Swap bytecode instructions if the endianess differs. */
if (bcread_swap(ls)) {
@ -400,7 +395,7 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI
lua_State *L = ls->L;
@ -410,7 +405,7 @@ static int bcread_header(LexState *ls)
#endif
}
if ((flags & BCDUMP_F_STRIP)) {
ls->chunkname = lj_str_newz(ls->L, *ls->chunkarg == BCDUMP_HEAD1 ? "=?" : ls->chunkarg);
ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
} else {
MSize len = bcread_uleb128(ls);
bcread_need(ls, len);

View File

@ -1,6 +1,6 @@
/*
** Bytecode writer.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcwrite_c
@ -27,9 +27,7 @@ typedef struct BCWriteCtx {
GCproto *pt; /* Root prototype. */
lua_Writer wfunc; /* Writer callback. */
void *wdata; /* Writer callback data. */
TValue **heap; /* Heap used for deterministic sorting. */
uint32_t heapsz; /* Size of heap. */
uint32_t flags; /* BCDUMP_F_* flags. */
int strip; /* Strip debug info. */
int status; /* Status from writer callback. */
#ifdef LUA_USE_ASSERT
global_State *g;
@ -71,8 +69,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
*p++ = BCDUMP_KTAB_NUM;
p = lj_strfmt_wuleb128(p, o->u32.lo);
p = lj_strfmt_wuleb128(p, o->u32.hi);
} else if (tvistab(o)) { /* Write the nil value marker as a nil. */
*p++ = BCDUMP_KTAB_NIL;
} else {
lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
*p++ = BCDUMP_KTAB_NIL+~itype(o);
@ -80,75 +76,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
ctx->sb.w = p;
}
/* Compare two template table keys. */
static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
{
uint32_t at = itype(a), bt = itype(b);
if (at != bt) { /* This also handles false and true keys. */
return at < bt;
} else if (at == LJ_TSTR) {
return lj_str_cmp(strV(a), strV(b)) < 0;
} else {
return a->u64 < b->u64; /* This works for numbers and integers. */
}
}
/* Insert key into a sorted heap. */
static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
TValue *key)
{
MSize child;
while ((child = idx * 2 + 1) < end) {
/* Find lower of the two children. */
TValue *c0 = heap[child];
if (child + 1 < end) {
TValue *c1 = heap[child + 1];
if (bcwrite_ktabk_lt(c1, c0)) {
c0 = c1;
child++;
}
}
if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
heap[idx] = c0; /* Move lower child up. */
idx = child; /* Descend. */
}
heap[idx] = key; /* Insert key here. */
}
/* Resize heap, dropping content. */
static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
{
lua_State *L = sbufL(&ctx->sb);
if (ctx->heapsz) {
lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
ctx->heapsz = 0;
}
if (nsz) {
ctx->heap = lj_mem_newvec(L, nsz, TValue *);
ctx->heapsz = nsz;
}
}
/* Write hash part of template table in sorted order. */
static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
{
TValue **heap = ctx->heap;
MSize i = nhash;
for (;; node--) { /* Build heap. */
if (!tvisnil(&node->val)) {
bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
if (i == 0) break;
}
}
do { /* Drain heap. */
TValue *key = heap[0]; /* Output lowest key from top. */
bcwrite_ktabk(ctx, key, 0);
bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
key = heap[--nhash]; /* Remove last key. */
bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
} while (nhash);
}
/* Write a template table. */
static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
{
@ -178,13 +105,8 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
bcwrite_ktabk(ctx, o, 1);
}
if (nhash) { /* Write hash entries. */
Node *node = noderef(t->node) + t->hmask;
if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
if (ctx->heapsz < nhash)
bcwrite_heap_resize(ctx, t->hmask + 1);
bcwrite_ktab_sorted_hash(ctx, node, nhash);
} else {
MSize i = nhash;
Node *node = noderef(t->node) + t->hmask;
for (;; node--)
if (!tvisnil(&node->val)) {
bcwrite_ktabk(ctx, &node->key, 0);
@ -193,7 +115,6 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
}
}
}
}
/* Write GC constants of a prototype. */
static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
@ -348,7 +269,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
p = lj_strfmt_wuleb128(p, pt->sizekgc);
p = lj_strfmt_wuleb128(p, pt->sizekn);
p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!(ctx->flags & BCDUMP_F_STRIP)) {
if (!ctx->strip) {
if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
p = lj_strfmt_wuleb128(p, sizedbg);
@ -396,10 +317,11 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = BCDUMP_HEAD2;
*p++ = BCDUMP_HEAD3;
*p++ = BCDUMP_VERSION;
*p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) +
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
LJ_BE*BCDUMP_F_BE +
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
if (!(ctx->flags & BCDUMP_F_STRIP)) {
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
LJ_FR2*BCDUMP_F_FR2;
if (!ctx->strip) {
p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len);
}
@ -430,16 +352,14 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
/* Write bytecode for a prototype. */
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
uint32_t flags)
int strip)
{
BCWriteCtx ctx;
int status;
ctx.pt = pt;
ctx.wfunc = writer;
ctx.wdata = data;
ctx.heapsz = 0;
if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
ctx.flags = flags;
ctx.strip = strip;
ctx.status = 0;
#ifdef LUA_USE_ASSERT
ctx.g = G(L);
@ -448,7 +368,6 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status;
lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
bcwrite_heap_resize(&ctx, 0);
return status;
}

View File

@ -1,6 +1,6 @@
/*
** Buffer handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_buf_c
@ -92,8 +92,10 @@ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
char *b = sb->b;
MSize osz = (MSize)(sb->e - b);
if (osz > 2*LJ_MIN_SBUF) {
MSize n = (MSize)(sb->w - b);
b = lj_mem_realloc(L, b, osz, (osz >> 1));
sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */
sb->b = b;
sb->w = b + n;
sb->e = b + (osz >> 1);
}
lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");

View File

@ -1,6 +1,6 @@
/*
** Buffer handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BUF_H

View File

@ -1,6 +1,6 @@
/*
** C data arithmetic.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@ -44,13 +44,9 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
p = (uint8_t *)cdata_getptr(p, ct->size);
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
} else if (ctype_isfunc(ct->info)) {
CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
p = (uint8_t *)*(void **)p;
ct = ctype_get(cts,
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
if (i) { /* cts->tab may have been reallocated. */
ca->ct[0] = ctype_get(cts, id0);
}
}
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
ca->ct[i] = ct;
@ -349,7 +345,9 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
if (LJ_LIKELY(tvisint(o))) {
return (uint32_t)intV(o);
} else {
return (uint32_t)lj_num2bit(numV(o));
int32_t i = lj_num2bit(numV(o));
if (LJ_DUALNUM) setintV(o, i);
return (uint32_t)i;
}
}

View File

@ -1,6 +1,6 @@
/*
** C data arithmetic.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CARITH_H

View File

@ -1,6 +1,6 @@
/*
** FFI C call handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@ -20,15 +20,12 @@
#if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */
#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
#if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \
if (cc->retref) CCALL_PUSH(dp);
if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@ -43,7 +40,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
CCALL_PUSH(dp); \
cc->stack[nsp++] = (GPRArg)dp; \
} else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \
}
@ -59,7 +56,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
CCALL_PUSH(dp);
cc->stack[nsp++] = (GPRArg)dp;
#endif
@ -70,7 +67,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
CCALL_PUSH(dp); \
cc->stack[nsp++] = (GPRArg)dp; \
}
#endif
@ -281,8 +278,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -348,6 +345,7 @@
goto done; \
} else { \
nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
} else { /* Try to pass argument in GPRs. */ \
if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
@ -358,6 +356,7 @@
goto done; \
} else { \
ngpr = maxgpr; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
}
@ -472,8 +471,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -566,8 +565,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -699,11 +698,10 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
if (nsp + sz > CCALL_SIZE_STACK)
return 1; /* Too many arguments. */
cc->nsp = nsp + sz;
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
cc->nsp = nsp + n;
memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
}
return 0; /* Ok. */
}
@ -781,24 +779,17 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib && n <= 4) {
unsigned int m = 1;
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isarray(sct->info)) {
CType *cct = ctype_rawchild(cts, sct);
if (!cct->size) continue;
m = sct->size / cct->size;
sct = cct;
}
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n += m; else if (n < m) n = m;
if (!isu) n++; else if (n == 0) n = 1;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2*m; else if (n < 2*m) n = 2*m;
if (!isu) n += 2; else if (n < 2) n = 2;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
@ -810,11 +801,10 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct), sn;
unsigned int s = ccall_classify_struct(cts, sct);
if (s <= 1) goto noth;
r |= (s & 255);
sn = (s >> 8) * m;
if (!isu) n += sn; else if (n < sn) n = sn;
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
}
}
}
@ -993,14 +983,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib;
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did;
@ -1037,31 +1019,25 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_STRUCTARG
} else if (ctype_iscomplex(d->info)) {
CCALL_HANDLE_COMPLEXARG
} else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) {
} else {
sz = CTSIZE_PTR;
}
n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info)) - 1;
if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
}
dp = &cc->stack[nsp];
nsp += n;
isva = 0;
done:
@ -1072,8 +1048,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
}
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
/* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4 &&
(!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
if (d->info & CTF_UNSIGNED)
*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
(uint32_t)*(uint16_t *)dp;
@ -1120,17 +1095,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
cc->nsp = nsp;
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
if (nsp > CCALL_SPS_FREE)
cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
return gcsteps;
}

View File

@ -1,6 +1,6 @@
/*
** FFI C call handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CCALL_H
@ -75,9 +75,6 @@ typedef union FPRArg {
#define CCALL_NARG_FPR 8
#define CCALL_NRET_FPR 4
#define CCALL_SPS_FREE 0
#if LJ_TARGET_OSX
#define CCALL_PACK_STACKARG 1
#endif
typedef intptr_t GPRArg;
typedef union FPRArg {
@ -142,9 +139,6 @@ typedef union FPRArg {
#ifndef CCALL_ALIGN_STACKARG
#define CCALL_ALIGN_STACKARG 1
#endif
#ifndef CCALL_PACK_STACKARG
#define CCALL_PACK_STACKARG 0
#endif
#ifndef CCALL_ALIGN_CALLSTATE
#define CCALL_ALIGN_CALLSTATE 8
#endif
@ -158,15 +152,14 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
#define CCALL_NUM_STACK 31
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
#define CCALL_MAXSTACK 32
/* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of bytes on stack. */
uint8_t nsp; /* Number of stack slots. */
uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */
@ -185,7 +178,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
} CCallState;
/* -- C call handling ----------------------------------------------------- */

Some files were not shown because too many files have changed in this diff Show More