Compare commits

..

No commits in common. "v2.1" and "v2.1.ROLLING" have entirely different histories.

217 changed files with 1501 additions and 2481 deletions

1
.gitattributes vendored
View File

@ -1 +0,0 @@
/.relver export-subst

View File

@ -1 +0,0 @@
$Format:%ct$

View File

@ -1,7 +1,7 @@
=============================================================================== ===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
Copyright (C) 2005-2025 Mike Pall. All rights reserved. Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@ -10,7 +10,7 @@
# For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MSVC, please follow the instructions given in src/msvcbuild.bat.
# For MinGW and Cygwin, cd to src and run make with the Makefile there. # For MinGW and Cygwin, cd to src and run make with the Makefile there.
# #
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h # Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
############################################################################## ##############################################################################
MAJVER= 2 MAJVER= 2
@ -37,13 +37,12 @@ export MULTILIB= lib
DPREFIX= $(DESTDIR)$(PREFIX) DPREFIX= $(DESTDIR)$(PREFIX)
INSTALL_BIN= $(DPREFIX)/bin INSTALL_BIN= $(DPREFIX)/bin
INSTALL_LIB= $(DPREFIX)/$(MULTILIB) INSTALL_LIB= $(DPREFIX)/$(MULTILIB)
INSTALL_SHARE_= $(PREFIX)/share INSTALL_SHARE= $(DPREFIX)/share
INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_)
INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
INSTALL_INC= $(INSTALL_DEFINC) INSTALL_INC= $(INSTALL_DEFINC)
export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION) INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
INSTALL_LMODD= $(INSTALL_SHARE)/lua INSTALL_LMODD= $(INSTALL_SHARE)/lua
INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
INSTALL_CMODD= $(INSTALL_LIB)/lua INSTALL_CMODD= $(INSTALL_LIB)/lua
@ -72,7 +71,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
$(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \ UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \
$(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
RM= rm -f RM= rm -f
@ -110,12 +109,11 @@ else
endif endif
TARGET_SYS?= $(HOST_SYS) TARGET_SYS?= $(HOST_SYS)
ifneq (,$(filter $(TARGET_SYS),Darwin iOS)) ifeq (Darwin,$(TARGET_SYS))
INSTALL_SONAME= $(INSTALL_DYLIBNAME) INSTALL_SONAME= $(INSTALL_DYLIBNAME)
INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
LDCONFIG= : LDCONFIG= :
SED_PC+= -e "s| -Wl,-E||"
endif endif
############################################################################## ##############################################################################
@ -144,12 +142,18 @@ install: $(INSTALL_DEP)
$(RM) $(FILE_PC).tmp $(RM) $(FILE_PC).tmp
cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
@echo ""
@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
@echo "You can do this now by running this command (with sudo):"
@echo ""
@echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
@echo ""
uninstall: uninstall:
@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
for file in $(FILES_JITLIB); do \ for file in $(FILES_JITLIB); do \
$(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
done done

2
README
View File

@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
Project Homepage: https://luajit.org/ Project Homepage: https://luajit.org/
LuaJIT is Copyright (C) 2005-2025 Mike Pall. LuaJIT is Copyright (C) 2005-2023 Mike Pall.
LuaJIT is free software, released under the MIT license. LuaJIT is free software, released under the MIT license.
See full Copyright Notice in the COPYRIGHT file or in luajit.h. See full Copyright Notice in the COPYRIGHT file or in luajit.h.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2025 Mike Pall. /* Copyright (C) 2004-2023 Mike Pall.
* *
* You are welcome to use the general ideas of this design for your own sites. * You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme. * But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2025 Mike Pall. /* Copyright (C) 2004-2023 Mike Pall.
* *
* You are welcome to use the general ideas of this design for your own sites. * You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme. * But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Contact</title> <title>Contact</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -94,7 +94,7 @@ don't like that, please complain to Google or Microsoft, not me.
<h2>Copyright</h2> <h2>Copyright</h2>
<p> <p>
All documentation is All documentation is
Copyright &copy; 2005-2025 Mike Pall. Copyright &copy; 2005-2023 Mike Pall.
</p> </p>
@ -102,7 +102,7 @@ Copyright &copy; 2005-2025 Mike Pall.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>String Buffer Library</title> <title>String Buffer Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -85,7 +85,7 @@ operations.
</p> </p>
<p> <p>
The string buffer library also includes a high-performance The string buffer library also includes a high-performance
<a href="#serialize">serializer</a> for Lua objects. <a href="serialize">serializer</a> for Lua objects.
</p> </p>
<h2 id="use">Using the String Buffer Library</h2> <h2 id="use">Using the String Buffer Library</h2>
@ -588,9 +588,9 @@ num → 0x07 double.L
tab → 0x08 // Empty table tab → 0x08 // Empty table
| 0x09 h.U h*{object object} // Key/value hash | 0x09 h.U h*{object object} // Key/value hash
| 0x0a a.U a*object // 0-based array | 0x0a a.U a*object // 0-based array
| 0x0b a.U h.U a*object h*{object object} // Mixed | 0x0b a.U a*object h.U h*{object object} // Mixed
| 0x0c a.U (a-1)*object // 1-based array | 0x0c a.U (a-1)*object // 1-based array
| 0x0d a.U h.U (a-1)*object h*{object object} // Mixed | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
tab_mt → 0x0e (index-1).U tab // Metatable dict entry tab_mt → 0x0e (index-1).U tab // Metatable dict entry
int64 → 0x10 int.L // FFI int64_t int64 → 0x10 int.L // FFI int64_t
@ -679,7 +679,7 @@ mappings of files are OK, but only if the file does not change.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Lua/C API Extensions</title> <title>Lua/C API Extensions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -173,7 +173,7 @@ Also note that this mechanism is not without overhead.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Library</title> <title>FFI Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -316,7 +316,7 @@ without undue conversion penalties.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>ffi.* API Functions</title> <title>ffi.* API Functions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -558,7 +558,7 @@ named <tt>i</tt>.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Semantics</title> <title>FFI Semantics</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -440,19 +440,6 @@ If you don't do this, the default Lua number &rarr; <tt>double</tt>
conversion rule applies. A vararg C&nbsp;function expecting an integer conversion rule applies. A vararg C&nbsp;function expecting an integer
will see a garbled or uninitialized value. will see a garbled or uninitialized value.
</p> </p>
<p>
Note: this is the only place where creating a boxed scalar number type is
actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt>
etc. anywhere else!</b>
</p>
<p style="font-size: 8pt;">
Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b>
improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It
just adds costly boxing, unboxing and conversions steps. And it may lead
to surprise results, because
<a href="#cdata_arith">cdata arithmetic on scalar numbers</a>
is always performed on 64 bit integers.
</p>
<h2 id="init">Initializers</h2> <h2 id="init">Initializers</h2>
<p> <p>
@ -1259,7 +1246,7 @@ compiled.</li>
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Tutorial</title> <title>FFI Tutorial</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -587,7 +587,7 @@ it to a local variable in the function scope is unnecessary.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>jit.* Library</title> <title>jit.* Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -187,7 +187,7 @@ if you want to know more.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Profiler</title> <title>Profiler</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -349,7 +349,7 @@ use.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Extensions</title> <title>Extensions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -160,33 +160,13 @@ passes any arguments after the error function to the function
which is called in a protected context. which is called in a protected context.
</p> </p>
<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3> <h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3>
<p> <p>
Non-ASCII characters are handled transparently by the Lua source code parser. Non-ASCII characters are handled transparently by the Lua source code parser.
This allows the use of UTF-8 characters in identifiers and strings. This allows the use of UTF-8 characters in identifiers and strings.
A UTF-8 BOM is skipped at the start of the source code. A UTF-8 BOM is skipped at the start of the source code.
</p> </p>
<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
<p>
As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
<tt>mode</tt> parameter.
</p>
<p>
The default mode string is <tt>"bt"</tt>, which allows loading of both
source code and bytecode. Use <tt>"t"</tt> to allow only source code
or <tt>"b"</tt> to allow only bytecode to be loaded.
</p>
<p>
By default, the <tt>load*</tt> functions generate the native bytecode format.
For cross-compilation purposes, add <tt>W</tt> to the mode string to
force the 32 bit format and <tt>X</tt> to force the 64 bit format.
Add both to force the opposite format. Note that non-native bytecode
generated by <tt>load*</tt> cannot be run, but can still be passed
to <tt>string.dump</tt>.
</p>
<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3> <h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
<p> <p>
All number-to-string conversions consistently convert non-finite numbers All number-to-string conversions consistently convert non-finite numbers
@ -206,33 +186,26 @@ works independently of the current locale and it supports hex floating-point
numbers (e.g. <tt>0x1.5p-3</tt>). numbers (e.g. <tt>0x1.5p-3</tt>).
</p> </p>
<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3> <h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3>
<p> <p>
An extra argument has been added to <tt>string.dump()</tt>. If set to An extra argument has been added to <tt>string.dump()</tt>. If set to
<tt>true</tt> or to a string which contains the character <tt>s</tt>, <tt>true</tt>, 'stripped' bytecode without debug information is
'stripped' bytecode without debug information is generated. This speeds generated. This speeds up later bytecode loading and reduces memory
up later bytecode loading and reduces memory usage. See also the usage. See also the
<a href="running.html#opt_b"><tt>-b</tt> command line option</a>. <a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
</p> </p>
<p> <p>
The generated bytecode is portable and can be loaded on any architecture The generated bytecode is portable and can be loaded on any architecture
that LuaJIT supports. However, the bytecode compatibility versions must that LuaJIT supports, independent of word size or endianess. However, the
match. Bytecode only stays compatible within a major+minor version bytecode compatibility versions must match. Bytecode stays compatible
(x.y.aaa &rarr; x.y.bbb), except for development branches. Foreign bytecode for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
(e.g. from Lua 5.1) is incompatible and cannot be loaded. minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
</p> </p>
<p> <p>
Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
a different, incompatible bytecode format between 32 bit and 64 bit ports. a different, incompatible bytecode format for all 64 bit ports. This may be
This may be rectified in the future. In the meantime, use the <tt>W</tt> rectified in the future.
and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
for cross-compilation purposes.
</p>
<p>
Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
mode string to dump it in a deterministic manner: identical source code
always gives a byte-for-byte identical bytecode dump. This feature is
mainly useful for reproducible builds.
</p> </p>
<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
@ -265,7 +238,7 @@ and let the GC do its work.
LuaJIT uses a Tausworthe PRNG with period 2^223 to implement LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of <tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
the PRNG results is much superior compared to the standard Lua the PRNG results is much superior compared to the standard Lua
implementation, which uses the platform-specific ANSI <tt>rand()</tt>. implementation, which uses the platform-specific ANSI rand().
</p> </p>
<p> <p>
The PRNG generates the same sequences from the same seeds on all The PRNG generates the same sequences from the same seeds on all
@ -276,10 +249,6 @@ It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
preserve uniformity. preserve uniformity.
</p> </p>
<p> <p>
Call <tt>math.randomseed()</tt> without any arguments to seed it from
system entropy.
</p>
<p>
Important: Neither this nor any other PRNG based on the simplistic Important: Neither this nor any other PRNG based on the simplistic
<tt>math.random()</tt> API is suitable for cryptographic use. <tt>math.random()</tt> API is suitable for cryptographic use.
</p> </p>
@ -317,7 +286,7 @@ enabled:
</p> </p>
<ul> <ul>
<li><tt>goto</tt> and <tt>::labels::</tt>.</li> <li><tt>goto</tt> and <tt>::labels::</tt>.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li> <li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li> <li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li> <li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
<li><tt>loadfile(filename [,mode [,env]])</tt>.</li> <li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
@ -457,7 +426,9 @@ the toolchain used to compile LuaJIT:
on the C&nbsp;stack. The contents of the C++&nbsp;exception object on the C&nbsp;stack. The contents of the C++&nbsp;exception object
pass through unmodified.</li> pass through unmodified.</li>
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>. <li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
The corresponding Lua error message can be retrieved from the Lua stack.</li> The corresponding Lua error message can be retrieved from the Lua stack.<br>
For MSVC for Windows 64 bit this requires compilation of your C++ code
with <tt>/EHa</tt>.</li>
<li>Throwing Lua errors across C++ frames is safe. C++ destructors <li>Throwing Lua errors across C++ frames is safe. C++ destructors
will be called.</li> will be called.</li>
</ul> </ul>
@ -492,7 +463,7 @@ C++ destructors.</li>
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Installation</title> <title>Installation</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -117,7 +117,7 @@ hold all user-configurable settings:
<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX <li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
only).</li> only).</li>
<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
under POSIX or MinGW.</li> under POSIX, MinGW or Cygwin.</li>
<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
MSVC (Visual Studio).</li> MSVC (Visual Studio).</li>
</ul> </ul>
@ -195,13 +195,15 @@ Obviously the prefixes given during build and installation need to be the same.
<h2 id="windows">Windows Systems</h2> <h2 id="windows">Windows Systems</h2>
<h3>Prerequisites</h3> <h3>Prerequisites</h3>
<p> <p>
Either install the open source SDK <a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a>, Either install one of the open source SDKs
which comes with a modified GCC plus the required development headers. (<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
<a href="https://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
GCC plus the required development headers.
Or install Microsoft's Visual Studio (MSVC). Or install Microsoft's Visual Studio (MSVC).
</p> </p>
<h3>Building with MSVC</h3> <h3>Building with MSVC</h3>
<p> <p>
Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
directory with the source code and run these commands: directory with the source code and run these commands:
</p> </p>
<pre class="code"> <pre class="code">
@ -212,12 +214,9 @@ msvcbuild
Check the <tt>msvcbuild.bat</tt> file for more options. Check the <tt>msvcbuild.bat</tt> file for more options.
Then follow the installation instructions below. Then follow the installation instructions below.
</p> </p>
<h3>Building with MinGW or Cygwin</h3>
<p> <p>
For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt> Open a command prompt window and make sure the MinGW or Cygwin programs
</p>
<h3>Building with MinGW</h3>
<p>
Open a command prompt window and make sure the MinGW programs
are in your path. Then <tt>cd</tt> to the directory of the git repository. are in your path. Then <tt>cd</tt> to the directory of the git repository.
Then run this command for MinGW: Then run this command for MinGW:
</p> </p>
@ -225,6 +224,12 @@ Then run this command for MinGW:
mingw32-make mingw32-make
</pre> </pre>
<p> <p>
Or this command for Cygwin:
</p>
<pre class="code">
make
</pre>
<p>
Then follow the installation instructions below. Then follow the installation instructions below.
</p> </p>
<h3>Installing LuaJIT</h3> <h3>Installing LuaJIT</h3>
@ -241,19 +246,6 @@ absolute path names &mdash; all modules are loaded relative to the
directory where <tt>luajit.exe</tt> is installed directory where <tt>luajit.exe</tt> is installed
(see <tt>src/luaconf.h</tt>). (see <tt>src/luaconf.h</tt>).
</p> </p>
<p>
The final directory layout should look like this:
</p>
<pre class="code">
├── luajit.exe
├── lua51.dll
├── <- put your own classic Lua/C API modules (*.dll) here
└── lua
├── <- put your own Lua modules (*.lua) here
└── jit
├── bc.lua
└── (etc …)
</pre>
<h2 id="cross">Cross-compiling LuaJIT</h2> <h2 id="cross">Cross-compiling LuaJIT</h2>
<p> <p>
@ -274,7 +266,6 @@ for any supported target:
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> <li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li> <li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> <li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li> <li>64 bit targets always require compilation on a 64 bit host.</li>
</ul> </ul>
<p> <p>
@ -577,7 +568,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>LuaJIT</title> <title>LuaJIT</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -122,7 +122,7 @@ Lua is a powerful, dynamic and light-weight programming language.
It may be embedded or used as a general-purpose, stand-alone language. It may be embedded or used as a general-purpose, stand-alone language.
</p> </p>
<p> <p>
LuaJIT is Copyright &copy; 2005-2025 Mike Pall, released under the LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the
<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>. <a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
</p> </p>
<p> <p>
@ -193,7 +193,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Running LuaJIT</title> <title>Running LuaJIT</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -106,9 +106,6 @@ are accepted:
<li><tt>-l</tt> &mdash; Only list bytecode.</li> <li><tt>-l</tt> &mdash; Only list bytecode.</li>
<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li> <li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
<li><tt>-g</tt> &mdash; Keep debug info.</li> <li><tt>-g</tt> &mdash; Keep debug info.</li>
<li><tt>-W</tt> &mdash; Generate 32 bit (non-GC64) bytecode.</li>
<li><tt>-X</tt> &mdash; Generate 64 bit (GC64) bytecode.</li>
<li><tt>-d</tt> &mdash; Generate bytecode in deterministic manner.</li>
<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li> <li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li> <li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li> <li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
@ -123,8 +120,7 @@ file name:
</p> </p>
<ul> <ul>
<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li> <li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li> <li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data <li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
(OS- and architecture-specific).</li> (OS- and architecture-specific).</li>
<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable). <li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@ -307,7 +303,7 @@ Here are the parameters and their default settings:
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2025 Copyright &copy; 2005-2023
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -1,6 +1,6 @@
/* /*
** DynASM ARM encoding engine. ** DynASM ARM encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM ARM module. -- DynASM ARM module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/* /*
** DynASM ARM64 encoding engine. ** DynASM ARM64 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM ARM64 module. -- DynASM ARM64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -549,7 +549,7 @@ end
local function parse_load_pair(params, nparams, n, op) local function parse_load_pair(params, nparams, n, op)
if params[n+2] then werror("too many operands") end if params[n+2] then werror("too many operands") end
local pn, p2 = params[n], params[n+1] local pn, p2 = params[n], params[n+1]
local scale = 2 + shr(op, 31 - band(shr(op, 26), 1)) local scale = shr(op, 30) == 0 and 2 or 3
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
if not p1 then if not p1 then
if not p2 then if not p2 then
@ -806,8 +806,8 @@ map_op = {
["ldrsw_*"] = "98000000DxB|b8800000DxL", ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- NOTE: ldur etc. are handled by ldr et al. -- NOTE: ldur etc. are handled by ldr et al.
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP", ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP", ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
["ldpsw_*"] = "68400000DAxP", ["ldpsw_*"] = "68400000DAxP",
-- Branches. -- Branches.
@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
werror("bad register type") werror("bad register type")
end end
parse_reg_type = false parse_reg_type = false
elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then elseif p == "x" or p == "w" or p == "d" or p == "s" then
if parse_reg_type ~= p then if parse_reg_type ~= p then
werror("register size mismatch") werror("register size mismatch")
end end

View File

@ -1,6 +1,6 @@
/* /*
** DynASM MIPS encoding engine. ** DynASM MIPS encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM MIPS32/MIPS64 module. -- DynASM MIPS32/MIPS64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM MIPS64 module. -- DynASM MIPS64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM PPC/PPC64 encoding engine. ** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM PPC/PPC64 module. -- DynASM PPC/PPC64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
-- --
-- Support for various extensions contributed by Caio Souza Oliveira. -- Support for various extensions contributed by Caio Souza Oliveira.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM encoding engine prototypes. ** DynASM encoding engine prototypes.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM x64 module. -- DynASM x64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module. -- This module just sets 64 bit mode for the combined x86/x64 module.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM x86 encoding engine. ** DynASM x86 encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM x86/x64 module. -- DynASM x86/x64 module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -627,11 +627,7 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
werror("NYI: rip-relative displacement followed by immediate") werror("NYI: rip-relative displacement followed by immediate")
end end
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
if disp[2] == "iPJ" then
waction("REL_A", disp[1])
else
wputlabel("REL_", disp[1], 2) wputlabel("REL_", disp[1], 2)
end
else else
wputdarg(disp) wputdarg(disp)
end end
@ -748,9 +744,9 @@ local function dispexpr(expr)
return imm*map_opsizenum[ops] return imm*map_opsizenum[ops]
end end
local mode, iexpr = immexpr(dispt) local mode, iexpr = immexpr(dispt)
if mode == "iJ" or mode == "iPJ" then if mode == "iJ" then
if c == "-" then werror("cannot invert label reference") end if c == "-" then werror("cannot invert label reference") end
return { iexpr, mode } return { iexpr }
end end
return expr -- Need to return original signed expression. return expr -- Need to return original signed expression.
end end
@ -1151,8 +1147,6 @@ local map_op = {
rep_0 = "F3", rep_0 = "F3",
repe_0 = "F3", repe_0 = "F3",
repz_0 = "F3", repz_0 = "F3",
endbr32_0 = "F30F1EFB",
endbr64_0 = "F30F1EFA",
-- F4: *hlt -- F4: *hlt
cmc_0 = "F5", cmc_0 = "F5",
-- F6: test... mb,i; div... mb -- F6: test... mb,i; div... mb

View File

@ -2,7 +2,7 @@
-- DynASM. A dynamic assembler for code generation engines. -- DynASM. A dynamic assembler for code generation engines.
-- Originally designed and implemented for LuaJIT. -- Originally designed and implemented for LuaJIT.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- See below for full copyright notice. -- See below for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -17,7 +17,7 @@ local _info = {
url = "https://luajit.org/dynasm.html", url = "https://luajit.org/dynasm.html",
license = "MIT", license = "MIT",
copyright = [[ copyright = [[
Copyright (C) 2005-2025 Mike Pall. All rights reserved. Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -75,7 +75,7 @@ local function wline(line, needindent)
g_synclineno = g_synclineno + 1 g_synclineno = g_synclineno + 1
end end
-- Write assembler line as a comment, if requested. -- Write assembler line as a comment, if requestd.
local function wcomment(aline) local function wcomment(aline)
if g_opt.comment then if g_opt.comment then
wline(g_opt.comment..aline..g_opt.endcomment, true) wline(g_opt.comment..aline..g_opt.endcomment, true)

View File

@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
Runs some nested loops and shows the resulting traces. Runs some nested loops and shows the resulting traces.
.SH COPYRIGHT .SH COPYRIGHT
.PP .PP
\fBLuaJIT\fR is Copyright \(co 2005-2025 Mike Pall. \fBLuaJIT\fR is Copyright \(co 2005-2023 Mike Pall.
.br .br
\fBLuaJIT\fR is open source software, released under the MIT license. \fBLuaJIT\fR is open source software, released under the MIT license.
.SH SEE ALSO .SH SEE ALSO

View File

@ -7,7 +7,7 @@
# Also works with MinGW and Cygwin on Windows. # Also works with MinGW and Cygwin on Windows.
# Please check msvcbuild.bat for building with MSVC on Windows. # Please check msvcbuild.bat for building with MSVC on Windows.
# #
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h # Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
############################################################################## ##############################################################################
MAJVER= 2 MAJVER= 2
@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x64 TARGET_LJARCH= x64
else else
@ -299,12 +299,6 @@ endif
ifneq (,$(LMULTILIB)) ifneq (,$(LMULTILIB))
TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
endif endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
endif
############################################################################## ##############################################################################
# Target system detection. # Target system detection.
@ -326,13 +320,13 @@ ifeq (Darwin,$(TARGET_SYS))
endif endif
TARGET_STRIP+= -x TARGET_STRIP+= -x
TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS= TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
else else
ifeq (iOS,$(TARGET_SYS)) ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x TARGET_STRIP+= -x
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS= TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
ifeq (arm64,$(TARGET_LJARCH)) ifeq (arm64,$(TARGET_LJARCH))
@ -481,11 +475,7 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc DASM_DASC= vm_$(DASM_ARCH).dasc
GIT= git GIT= git
ifeq (Windows,$(HOST_SYS)$(HOST_MSYS)) GIT_RELVER= [ -d ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
else
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
endif
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \

View File

@ -25,15 +25,14 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_strscan.h lj_libdef.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
lj_libdef.h
lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
lj_libdef.h lj_libdef.h
@ -56,7 +55,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
lj_prng.h lj_emit_*.h lj_asm_*.h lj_emit_*.h lj_asm_*.h
lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
lj_bcdef.h lj_bcdef.h
@ -98,7 +97,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
lj_crecord.h lj_strfmt.h lj_strscan.h lj_crecord.h lj_strfmt.h
lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
lj_ccallback.h lj_buf.h lj_ccallback.h lj_buf.h

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder. ** LuaJIT VM builder.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** This is a tool to build the hand-tuned assembler code required for ** This is a tool to build the hand-tuned assembler code required for
** LuaJIT's bytecode interpreter. It supports a variety of output formats ** LuaJIT's bytecode interpreter. It supports a variety of output formats

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder. ** LuaJIT VM builder.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _BUILDVM_H #ifndef _BUILDVM_H

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: Assembler source code emitter. ** LuaJIT VM builder: Assembler source code emitter.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"
@ -339,10 +339,6 @@ void emit_asm(BuildCtx *ctx)
fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
break; break;
case BUILD_machasm: case BUILD_machasm:
#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300
/* Workaround for XCode 15.0 - 15.2. */
fprintf(ctx->fp, "\t.subsections_via_symbols\n");
#endif
fprintf(ctx->fp, fprintf(ctx->fp,
"\t.cstring\n" "\t.cstring\n"
"\t.ascii \"%s\\0\"\n", ctx->dasm_ident); "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: IR folding hash table generator. ** LuaJIT VM builder: IR folding hash table generator.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: library definition compiler. ** LuaJIT VM builder: library definition compiler.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: PE object emitter. ** LuaJIT VM builder: PE object emitter.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Only used for building on Windows, since we cannot assume the presence ** Only used for building on Windows, since we cannot assume the presence
** of a suitable assembler. The host and target byte order must match. ** of a suitable assembler. The host and target byte order must match.
@ -9,7 +9,7 @@
#include "buildvm.h" #include "buildvm.h"
#include "lj_bc.h" #include "lj_bc.h"
#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN #if LJ_TARGET_X86ORX64
/* Context for PE object emitter. */ /* Context for PE object emitter. */
static char *strtab; static char *strtab;
@ -93,17 +93,6 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0 #define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 6
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
#elif LJ_TARGET_ARM64
#define PEOBJ_ARCH_TARGET 0xaa64
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
#define PEOBJ_RELOC_DIR32 0x01
#define PEOBJ_RELOC_ADDR32NB 0x02
#define PEOBJ_RELOC_OFS (-4)
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 4
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
#endif #endif
/* Section numbers (0-based). */ /* Section numbers (0-based). */
@ -111,7 +100,7 @@ enum {
PEOBJ_SECT_ABS = -2, PEOBJ_SECT_ABS = -2,
PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_UNDEF = -1,
PEOBJ_SECT_TEXT, PEOBJ_SECT_TEXT,
#ifdef PEOBJ_PDATA_NRELOC #if LJ_TARGET_X64
PEOBJ_SECT_PDATA, PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA, PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86 #elif LJ_TARGET_X86
@ -186,9 +175,6 @@ void emit_peobj(BuildCtx *ctx)
uint32_t sofs; uint32_t sofs;
int i, nrsym; int i, nrsym;
union { uint8_t b; uint32_t u; } host_endian; union { uint8_t b; uint32_t u; } host_endian;
#ifdef PEOBJ_PDATA_NRELOC
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
#endif
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
@ -202,18 +188,18 @@ void emit_peobj(BuildCtx *ctx)
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */ /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
#ifdef PEOBJ_PDATA_NRELOC #if LJ_TARGET_X64
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
pesect[PEOBJ_SECT_PDATA].ofs = sofs; pesect[PEOBJ_SECT_PDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4); sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
pesect[PEOBJ_SECT_PDATA].relocofs = sofs; pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE; sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
pesect[PEOBJ_SECT_XDATA].ofs = sofs; pesect[PEOBJ_SECT_XDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */ sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */
pesect[PEOBJ_SECT_XDATA].relocofs = sofs; pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@ -248,7 +234,7 @@ void emit_peobj(BuildCtx *ctx)
*/ */
nrsym = ctx->nrelocsym; nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#ifdef PEOBJ_PDATA_NRELOC #if LJ_TARGET_X64
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif #endif
@ -273,6 +259,7 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64 #if LJ_TARGET_X64
{ /* Write .pdata section. */ { /* Write .pdata section. */
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
PEreloc reloc; PEreloc reloc;
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@ -321,87 +308,6 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB; reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
} }
#elif LJ_TARGET_ARM64
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
{ /* Write .pdata section. */
uint32_t pdata[4];
PEreloc reloc;
pdata[0] = 0;
pdata[1] = 0;
pdata[2] = fcofs;
pdata[3] = 4+24+4;
owrite(ctx, &pdata, sizeof(pdata));
/* Start of .text and start of .xdata. */
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
/* Start of vm_ffi_call and start of second part of .xdata. */
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
{ /* Write .xdata section. */
uint32_t u32;
uint8_t *p, uwc[24];
PEreloc reloc;
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
#define CSAVE_REGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
} while (0)
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
#define CSAVE_FREGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
} while (0)
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
#define CODE_NOP 0xe3
#define CODE_END 0xe4
#define CEND_ALIGN do { \
*p++ = CODE_END; \
while ((p - uwc) & 3) *p++ = CODE_NOP; \
} while (0)
/* Unwind codes for .text section with handler. */
p = uwc;
CADD_FP(192); /* +2 */
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
CEND_ALIGN; /* +1 +1 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 24);
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
owrite(ctx, &u32, 4);
/* Unwind codes for vm_ffi_call without handler. */
p = uwc;
CADD_FP(16); /* +2 */
CSAVE_FPLR(16); /* +1 */
CSAVE_REGPX(19, -32); /* +2 */
CEND_ALIGN; /* +1 +2 -> 8 */
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 8);
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86 #elif LJ_TARGET_X86
/* Write .sxdata section. */ /* Write .sxdata section. */
for (i = 0; i < nrsym; i++) { for (i = 0; i < nrsym; i++) {
@ -433,7 +339,7 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym(ctx, ctx->relocsym[i], 0, emit_peobj_sym(ctx, ctx->relocsym[i], 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#ifdef PEOBJ_PDATA_NRELOC #if LJ_TARGET_X64
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win", 0, emit_peobj_sym(ctx, "lj_err_unwind_win", 0,

View File

@ -2,7 +2,7 @@
-- Lua script to dump the bytecode of the library functions written in Lua. -- Lua script to dump the bytecode of the library functions written in Lua.
-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@ -138,23 +138,23 @@ local function fixup_dump(dump, fixup)
return { dump = ndump, startbc = startbc, sizebc = sizebc } return { dump = ndump, startbc = startbc, sizebc = sizebc }
end end
local function find_defs(src, mode) local function find_defs(src)
local defs = {} local defs = {}
for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
local env = {}
local tcode, fixup = transform_lua(code) local tcode, fixup = transform_lua(code)
local func = assert(load(tcode, "", mode)) local func = assert(load(tcode, "", nil, env))()
defs[name] = fixup_dump(string.dump(func, mode), fixup) defs[name] = fixup_dump(string.dump(func, true), fixup)
defs[#defs+1] = name defs[#defs+1] = name
end end
return defs return defs
end end
local function gen_header(defs32, defs64) local function gen_header(defs)
local t = {} local t = {}
local function w(x) t[#t+1] = x end local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n") w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
for j,defs in ipairs{defs64, defs32} do
local s, sb = "", "" local s, sb = "", ""
for i,name in ipairs(defs) do for i,name in ipairs(defs) do
local d = defs[name] local d = defs[name]
@ -163,11 +163,7 @@ local function gen_header(defs32, defs64)
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
end end
if j == 1 then w("static const uint8_t libbc_code[] = {\n")
w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
else
w("\n#else\n")
end
local n = 0 local n = 0
for i=1,#s do for i=1,#s do
local x = string.byte(s, i) local x = string.byte(s, i)
@ -193,18 +189,14 @@ local function gen_header(defs32, defs64)
end end
w(",") w(",")
end end
end w("\n0\n};\n\n")
w("\n#endif\n0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m32, m64 = 0, 0 local m = 0
for i,name in ipairs(defs32) do for _,name in ipairs(defs) do
assert(name == defs64[i]) w('{"'); w(name); w('",'); w(m) w('},\n')
w('{"'); w(name); w('",'); w(m32) w('},\n') m = m + #defs[name].dump
m32 = m32 + #defs32[name].dump
m64 = m64 + #defs64[name].dump
assert(m32 == m64)
end end
w("{NULL,"); w(m32); w("}\n};\n\n") w("{NULL,"); w(m); w("}\n};\n\n")
return table.concat(t) return table.concat(t)
end end
@ -227,8 +219,7 @@ end
local outfile = parse_arg(arg) local outfile = parse_arg(arg)
local src = read_files(arg) local src = read_files(arg)
local defs32 = find_defs(src, "Wdts") local defs = find_defs(src)
local defs64 = find_defs(src, "Xdts") local hdr = gen_header(defs)
local hdr = gen_header(defs32, defs64)
write_file(outfile, hdr) write_file(outfile, hdr)

View File

@ -2,7 +2,7 @@
-- Lua script to generate a customized, minified version of Lua. -- Lua script to generate a customized, minified version of Lua.
-- The resulting 'minilua' is used for the build process of LuaJIT. -- The resulting 'minilua' is used for the build process of LuaJIT.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------

View File

@ -1,14 +1,13 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Lua script to embed the rolling release version in luajit.h. -- Lua script to embed the rolling release version in luajit.h.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
local arg = {...} local FILE_INPUT_H = "luajit_rolling.h"
local FILE_ROLLING_H = arg[1] or "luajit_rolling.h" local FILE_INPUT_R = "luajit_relver.txt"
local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt" local FILE_OUTPUT_H = "luajit.h"
local FILE_LUAJIT_H = arg[3] or "luajit.h"
local function file_read(file) local function file_read(file)
local fp = assert(io.open(file, "rb"), "run from the wrong directory") local fp = assert(io.open(file, "rb"), "run from the wrong directory")
@ -29,8 +28,8 @@ local function file_write_mod(file, data)
assert(fp:close()) assert(fp:close())
end end
local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "") local text = file_read(FILE_INPUT_H)
local relver = file_read(FILE_RELVER_TXT):match("(%d+)") local relver = file_read(FILE_INPUT_R):match("(%d+)")
if relver then if relver then
text = text:gsub("ROLLING", relver) text = text:gsub("ROLLING", relver)
@ -39,7 +38,6 @@ else
**** WARNING Cannot determine rolling release version from git log. **** WARNING Cannot determine rolling release version from git log.
**** WARNING The 'git' command must be available during the build. **** WARNING The 'git' command must be available during the build.
]]) ]])
file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target.
end end
file_write_mod(FILE_LUAJIT_H, text) file_write_mod(FILE_OUTPUT_H, text)

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT bytecode listing module. -- LuaJIT bytecode listing module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT module to save/list bytecode. -- LuaJIT module to save/list bytecode.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -29,9 +29,6 @@ Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode. -l Only list bytecode.
-s Strip debug info (default). -s Strip debug info (default).
-g Keep debug info. -g Keep debug info.
-W Generate 32 bit (non-GC64) bytecode.
-X Generate 64 bit (GC64) bytecode.
-d Generate bytecode in deterministic manner.
-n name Set module name (default: auto-detect from input name). -n name Set module name (default: auto-detect from input name).
-t type Set output file type (default: auto-detect from output name). -t type Set output file type (default: auto-detect from output name).
-a arch Override architecture for object files (default: native). -a arch Override architecture for object files (default: native).
@ -41,7 +38,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
-- Stop handling options. -- Stop handling options.
- Use stdin as input and/or stdout as output. - Use stdin as input and/or stdout as output.
File types: c cc h obj o raw (default) File types: c h obj o raw (default)
]] ]]
os.exit(1) os.exit(1)
end end
@ -54,9 +51,8 @@ local function check(ok, ...)
end end
local function readfile(ctx, input) local function readfile(ctx, input)
if ctx.string then if type(input) == "function" then return input end
return check(loadstring(input, nil, ctx.mode)) if ctx.filename then
elseif ctx.filename then
local data local data
if input == "-" then if input == "-" then
data = io.stdin:read("*a") data = io.stdin:read("*a")
@ -65,10 +61,10 @@ local function readfile(ctx, input)
data = assert(fp:read("*a")) data = assert(fp:read("*a"))
assert(fp:close()) assert(fp:close())
end end
return check(load(data, ctx.filename, ctx.mode)) return check(load(data, ctx.filename))
else else
if input == "-" then input = nil end if input == "-" then input = nil end
return check(loadfile(input, ctx.mode)) return check(loadfile(input))
end end
end end
@ -85,7 +81,7 @@ end
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local map_type = { local map_type = {
raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj", raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
} }
local map_arch = { local map_arch = {
@ -439,12 +435,24 @@ typedef struct
{ {
mach_header; uint32_t reserved; mach_header; uint32_t reserved;
} mach_header_64; } mach_header_64;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint32_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command;
typedef struct { typedef struct {
uint32_t cmd, cmdsize; uint32_t cmd, cmdsize;
char segname[16]; char segname[16];
uint64_t vmaddr, vmsize, fileoff, filesize; uint64_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags; uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command_64; } mach_segment_command_64;
typedef struct {
char sectname[16], segname[16];
uint32_t addr, size;
uint32_t offset, align, reloff, nreloc, flags;
uint32_t reserved1, reserved2;
} mach_section;
typedef struct { typedef struct {
char sectname[16], segname[16]; char sectname[16], segname[16];
uint64_t addr, size; uint64_t addr, size;
@ -454,64 +462,139 @@ typedef struct {
typedef struct { typedef struct {
uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize; uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
} mach_symtab_command; } mach_symtab_command;
typedef struct {
int32_t strx;
uint8_t type, sect;
int16_t desc;
uint32_t value;
} mach_nlist;
typedef struct { typedef struct {
int32_t strx; int32_t strx;
uint8_t type, sect; uint8_t type, sect;
uint16_t desc; uint16_t desc;
uint64_t value; uint64_t value;
} mach_nlist_64; } mach_nlist_64;
typedef struct
{
int32_t magic, nfat_arch;
} mach_fat_header;
typedef struct
{
int32_t cputype, cpusubtype, offset, size, align;
} mach_fat_arch;
typedef struct { typedef struct {
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[1];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_obj;
typedef struct {
struct {
mach_header_64 hdr; mach_header_64 hdr;
mach_segment_command_64 seg; mach_segment_command_64 seg;
mach_section_64 sec; mach_section_64 sec;
mach_symtab_command sym; mach_symtab_command sym;
} arch[1];
mach_nlist_64 sym_entry; mach_nlist_64 sym_entry;
uint8_t space[4096]; uint8_t space[4096];
} mach_obj_64; } mach_obj_64;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[2];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_fat_obj;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[2];
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_fat_obj_64;
]] ]]
local symname = '_'..LJBC_PREFIX..ctx.modname local symname = '_'..LJBC_PREFIX..ctx.modname
local cputype, cpusubtype = 0x01000007, 3 local isfat, is64, align, mobj = false, false, 4, "mach_obj"
if ctx.arch ~= "x64" then if ctx.arch == "x64" then
check(ctx.arch == "arm64", "unsupported architecture for OSX") is64, align, mobj = true, 8, "mach_obj_64"
cputype, cpusubtype = 0x0100000c, 0 elseif ctx.arch == "arm" then
isfat, mobj = true, "mach_fat_obj"
elseif ctx.arch == "arm64" then
is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
else
check(ctx.arch == "x86", "unsupported architecture for OSX")
end end
local function aligned(v, a) return bit.band(v+a-1, -a) end local function aligned(v, a) return bit.band(v+a-1, -a) end
local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
-- Create Mach-O object and fill in header. -- Create Mach-O object and fill in header.
local o = ffi.new("mach_obj_64") local o = ffi.new(mobj)
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
if isfat then
o.fat.magic = be32(0xcafebabe)
o.fat.nfat_arch = be32(#cpusubtype)
end
-- Fill in sections and symbols. -- Fill in sections and symbols.
o.hdr.magic = 0xfeedfacf for i=0,#cpusubtype-1 do
o.hdr.cputype = cputype local ofs = 0
o.hdr.cpusubtype = cpusubtype if isfat then
o.hdr.filetype = 1 local a = o.fat_arch[i]
o.hdr.ncmds = 2 a.cputype = be32(cputype[i+1])
o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym) a.cpusubtype = be32(cpusubtype[i+1])
o.seg.cmd = 0x19 -- Subsequent slices overlap each other to share data.
o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0])
o.seg.vmsize = #s a.offset = be32(ofs)
o.seg.fileoff = mach_size a.size = be32(mach_size-ofs+#s)
o.seg.filesize = #s end
o.seg.maxprot = 1 local a = o.arch[i]
o.seg.initprot = 1 a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface
o.seg.nsects = 1 a.hdr.cputype = cputype[i+1]
ffi.copy(o.sec.sectname, "__data") a.hdr.cpusubtype = cpusubtype[i+1]
ffi.copy(o.sec.segname, "__DATA") a.hdr.filetype = 1
o.sec.size = #s a.hdr.ncmds = 2
o.sec.offset = mach_size a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym)
o.sym.cmd = 2 a.seg.cmd = is64 and 0x19 or 0x1
o.sym.cmdsize = ffi.sizeof(o.sym) a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)
o.sym.symoff = ffi.offsetof(o, "sym_entry") a.seg.vmsize = #s
o.sym.nsyms = 1 a.seg.fileoff = mach_size-ofs
o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) a.seg.filesize = #s
o.sym.strsize = aligned(#symname+2, 8) a.seg.maxprot = 1
a.seg.initprot = 1
a.seg.nsects = 1
ffi.copy(a.sec.sectname, "__data")
ffi.copy(a.sec.segname, "__DATA")
a.sec.size = #s
a.sec.offset = mach_size-ofs
a.sym.cmd = 2
a.sym.cmdsize = ffi.sizeof(a.sym)
a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
a.sym.nsyms = 1
a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
a.sym.strsize = aligned(#symname+2, align)
end
o.sym_entry.type = 0xf o.sym_entry.type = 0xf
o.sym_entry.sect = 1 o.sym_entry.sect = 1
o.sym_entry.strx = 1 o.sym_entry.strx = 1
ffi.copy(o.space+1, symname) ffi.copy(o.space+1, symname)
-- Write Mach-O object file. -- Write Macho-O object file.
local fp = savefile(output, "wb") local fp = savefile(output, "wb")
fp:write(ffi.string(o, mach_size)) fp:write(ffi.string(o, mach_size))
bcsave_tail(fp, output, s) bcsave_tail(fp, output, s)
@ -541,7 +624,7 @@ end
local function bcsave(ctx, input, output) local function bcsave(ctx, input, output)
local f = readfile(ctx, input) local f = readfile(ctx, input)
local s = string.dump(f, ctx.mode) local s = string.dump(f, ctx.strip)
local t = ctx.type local t = ctx.type
if not t then if not t then
t = detecttype(output) t = detecttype(output)
@ -564,11 +647,9 @@ local function docmd(...)
local n = 1 local n = 1
local list = false local list = false
local ctx = { local ctx = {
mode = "bt", arch = jit.arch, os = jit.os:lower(), strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false, string = false, type = false, modname = false,
} }
local strip = "s"
local gc64 = ""
while n <= #arg do while n <= #arg do
local a = arg[n] local a = arg[n]
if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
@ -579,18 +660,14 @@ local function docmd(...)
if opt == "l" then if opt == "l" then
list = true list = true
elseif opt == "s" then elseif opt == "s" then
strip = "s" ctx.strip = true
elseif opt == "g" then elseif opt == "g" then
strip = "" ctx.strip = false
elseif opt == "W" or opt == "X" then
gc64 = opt
elseif opt == "d" then
ctx.mode = ctx.mode .. opt
else else
if arg[n] == nil or m ~= #a then usage() end if arg[n] == nil or m ~= #a then usage() end
if opt == "e" then if opt == "e" then
if n ~= 1 then usage() end if n ~= 1 then usage() end
ctx.string = true arg[1] = check(loadstring(arg[1]))
elseif opt == "n" then elseif opt == "n" then
ctx.modname = checkmodname(tremove(arg, n)) ctx.modname = checkmodname(tremove(arg, n))
elseif opt == "t" then elseif opt == "t" then
@ -610,7 +687,6 @@ local function docmd(...)
n = n + 1 n = n + 1
end end
end end
ctx.mode = ctx.mode .. strip .. gc64
if list then if list then
if #arg == 0 or #arg > 2 then usage() end if #arg == 0 or #arg > 2 then usage() end
bclist(ctx, arg[1], arg[2] or "-") bclist(ctx, arg[1], arg[2] or "-")

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM disassembler module. -- LuaJIT ARM disassembler module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM64 disassembler module. -- LuaJIT ARM64 disassembler module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
-- --
-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
@ -107,20 +107,24 @@ local map_logsr = { -- Logical, shifted register.
[0] = { [0] = {
shift = 29, mask = 3, shift = 29, mask = 3,
[0] = { [0] = {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg" [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg" [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg" [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg" [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
} }
}, },
false -- unallocated false -- unallocated
@ -128,20 +132,24 @@ local map_logsr = { -- Logical, shifted register.
{ {
shift = 29, mask = 3, shift = 29, mask = 3,
[0] = { [0] = {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg" [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg" [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg" [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
}, },
{ {
shift = 21, mask = 1, shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg" [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
} }
} }
} }
@ -658,10 +666,6 @@ local map_datafp = { -- Data processing, SIMD and FP.
} }
} }
} }
},
{ -- 010
shift = 0, mask = 0x81f8fc00,
[0x100e400] = "moviDdG"
} }
} }
@ -731,7 +735,7 @@ local map_cond = {
"hi", "ls", "ge", "lt", "gt", "le", "al", "hi", "ls", "ge", "lt", "gt", "le", "al",
} }
local map_shift = { [0] = "lsl", "lsr", "asr", "ror"} local map_shift = { [0] = "lsl", "lsr", "asr", }
local map_extend = { local map_extend = {
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
@ -836,20 +840,6 @@ local function parse_fpimm8(op)
return sign * frac * 2^exp return sign * frac * 2^exp
end end
local function decode_fpmovi(op)
local lo = rshift(op, 5)
local hi = rshift(op, 9)
lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
band(lo, 8) * 0x1fe00000)
hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
if hi ~= 0 then
return fmt_hex32(hi)..tohex(lo)
else
return fmt_hex32(lo)
end
end
local function prefer_bfx(sf, uns, imms, immr) local function prefer_bfx(sf, uns, imms, immr)
if imms < immr or imms == 31 or imms == 63 then if imms < immr or imms == 31 or imms == 63 then
return false return false
@ -966,7 +956,7 @@ local function disass_ins(ctx)
elseif p == "U" then elseif p == "U" then
local rn = map_regs.x[band(rshift(op, 5), 31)] local rn = map_regs.x[band(rshift(op, 5), 31)]
local sz = band(rshift(op, 30), 3) local sz = band(rshift(op, 30), 3)
local imm12 = lshift(rshift(lshift(op, 10), 20), sz) local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
if imm12 ~= 0 then if imm12 ~= 0 then
x = "["..rn..", #"..imm12.."]" x = "["..rn..", #"..imm12.."]"
else else
@ -1003,7 +993,8 @@ local function disass_ins(ctx)
x = x.."]" x = x.."]"
end end
elseif p == "P" then elseif p == "P" then
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1)) local opcv, sh = rshift(op, 26), 2
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local imm7 = lshift(arshift(lshift(op, 10), 25), sh) local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)] local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3) local ind = band(rshift(op, 23), 3)
@ -1149,8 +1140,6 @@ local function disass_ins(ctx)
x = 0 x = 0
elseif p == "F" then elseif p == "F" then
x = parse_fpimm8(op) x = parse_fpimm8(op)
elseif p == "G" then
x = "#0x"..decode_fpmovi(op)
elseif p == "g" or p == "f" or p == "x" or p == "w" or elseif p == "g" or p == "f" or p == "x" or p == "w" or
p == "d" or p == "s" then p == "d" or p == "s" then
-- These are handled in D/N/M/A. -- These are handled in D/N/M/A.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module. -- LuaJIT ARM64BE disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the -- ARM64 instructions are always little-endian. So just forward to the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS disassembler module. -- LuaJIT MIPS disassembler module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h -- Released under the MIT/X license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64 disassembler wrapper module. -- LuaJIT MIPS64 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the big-endian functions from the -- This module just exports the big-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64EL disassembler wrapper module. -- LuaJIT MIPS64EL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the -- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64R6 disassembler wrapper module. -- LuaJIT MIPS64R6 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the r6 big-endian functions from the -- This module just exports the r6 big-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64R6EL disassembler wrapper module. -- LuaJIT MIPS64R6EL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the r6 little-endian functions from the -- This module just exports the r6 little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPSEL disassembler wrapper module. -- LuaJIT MIPSEL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the -- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT PPC disassembler module. -- LuaJIT PPC disassembler module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h -- Released under the MIT/X license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT x64 disassembler wrapper module. -- LuaJIT x64 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the 64 bit functions from the combined -- This module just exports the 64 bit functions from the combined

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT x86/x64 disassembler module. -- LuaJIT x86/x64 disassembler module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT compiler dump module. -- LuaJIT compiler dump module.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -552,12 +552,7 @@ local recdepth = 0
local function fmterr(err, info) local function fmterr(err, info)
if type(err) == "number" then if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end if type(info) == "function" then info = fmtfunc(info) end
local fmt = vmdef.traceerr[err] err = format(vmdef.traceerr[err], info)
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end end
return err return err
end end

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT profiler. -- LuaJIT profiler.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -227,7 +227,9 @@ local function prof_finish()
local samples = prof_samples local samples = prof_samples
if samples == 0 then if samples == 0 then
if prof_raw ~= true then out:write("[No samples collected]\n") end if prof_raw ~= true then out:write("[No samples collected]\n") end
elseif prof_ann then return
end
if prof_ann then
prof_annotate(prof_count1, samples) prof_annotate(prof_count1, samples)
else else
prof_top(prof_count1, prof_count2, samples, "") prof_top(prof_count1, prof_count2, samples, "")

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Verbose mode of the LuaJIT compiler. -- Verbose mode of the LuaJIT compiler.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -62,7 +62,7 @@ local jit = require("jit")
local jutil = require("jit.util") local jutil = require("jit.util")
local vmdef = require("jit.vmdef") local vmdef = require("jit.vmdef")
local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
local type, sub, format = type, string.sub, string.format local type, format = type, string.format
local stdout, stderr = io.stdout, io.stderr local stdout, stderr = io.stdout, io.stderr
-- Active flag and output file handle. -- Active flag and output file handle.
@ -89,12 +89,7 @@ end
local function fmterr(err, info) local function fmterr(err, info)
if type(err) == "number" then if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end if type(info) == "function" then info = fmtfunc(info) end
local fmt = vmdef.traceerr[err] err = format(vmdef.traceerr[err], info)
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end end
return err return err
end end

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT profiler zones. -- LuaJIT profiler zones.
-- --
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --

View File

@ -1,6 +1,6 @@
/* /*
** Auxiliary library for the Lua/C API. ** Auxiliary library for the Lua/C API.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major parts taken verbatim or adapted from the Lua interpreter. ** Major parts taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** Base and coroutine library. ** Base and coroutine library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -146,8 +146,6 @@ LJLIB_CF(getfenv) LJLIB_REC(.)
cTValue *o = L->base; cTValue *o = L->base;
if (!(o < L->top && tvisfunc(o))) { if (!(o < L->top && tvisfunc(o))) {
int level = lj_lib_optint(L, 1, 1); int level = lj_lib_optint(L, 1, 1);
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level); o = lj_debug_frame(L, level, &level);
if (o == NULL) if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL); lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -170,8 +168,6 @@ LJLIB_CF(setfenv)
setgcref(L->env, obj2gco(t)); setgcref(L->env, obj2gco(t));
return 0; return 0;
} }
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level); o = lj_debug_frame(L, level, &level);
if (o == NULL) if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL); lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -364,11 +360,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
static int load_aux(lua_State *L, int status, int envarg) static int load_aux(lua_State *L, int status, int envarg)
{ {
if (status == LUA_OK) { if (status == LUA_OK) {
/* if (tvistab(L->base+envarg-1)) {
** Set environment table for top-level function.
** Don't do this for non-native bytecode, which returns a prototype.
*/
if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
GCfunc *fn = funcV(L->top-1); GCfunc *fn = funcV(L->top-1);
GCtab *t = tabV(L->base+envarg-1); GCtab *t = tabV(L->base+envarg-1);
setgcref(fn->c.env, obj2gco(t)); setgcref(fn->c.env, obj2gco(t));
@ -624,10 +616,7 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2); return FFH_RES(2);
} }
if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) { lj_state_growstack(co, (MSize)(L->top - L->base));
cTValue *msg = --co->top;
lj_err_callermsg(L, strVdata(msg));
}
return FFH_RETRY; return FFH_RETRY;
} }

View File

@ -1,6 +1,6 @@
/* /*
** Bit manipulation library. ** Bit manipulation library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_bit_c #define lib_bit_c
@ -98,7 +98,7 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
return bit_result64(L, id, x); return bit_result64(L, id, x);
} }
setintV(L->base+1, sh); if (id2) setintV(L->base+1, sh);
return FFH_RETRY; return FFH_RETRY;
#else #else
lj_lib_checknumber(L, 1); lj_lib_checknumber(L, 1);

View File

@ -1,6 +1,6 @@
/* /*
** Buffer library. ** Buffer library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_buffer_c #define lib_buffer_c

View File

@ -1,6 +1,6 @@
/* /*
** Debug library. ** Debug library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** FFI library. ** FFI library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_ffi_c #define lib_ffi_c
@ -305,7 +305,7 @@ LJLIB_CF(ffi_meta___tostring)
p = *(void **)p; p = *(void **)p;
} else if (ctype_isenum(ct->info)) { } else if (ctype_isenum(ct->info)) {
msg = "cdata<%s>: %d"; msg = "cdata<%s>: %d";
p = (void *)(uintptr_t)*(uint32_t *)p; p = (void *)(uintptr_t)*(uint32_t **)p;
} else { } else {
if (ctype_isptr(ct->info)) { if (ctype_isptr(ct->info)) {
p = cdata_getptr(p, ct->size); p = cdata_getptr(p, ct->size);
@ -513,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
/* Handle ctype __gc metamethod. Use the fast lookup here. */ /* Handle ctype __gc metamethod. Use the fast lookup here. */
cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id); cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) { if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]); GCtab *t = cts->finalizer;
if (gcref(t->metatable)) { if (gcref(t->metatable)) {
/* Add to finalizer table, if still enabled. */ /* Add to finalizer table, if still enabled. */
copyTV(L, lj_tab_set(L, t, o-1), tv); copyTV(L, lj_tab_set(L, t, o-1), tv);
@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
"\003win" "\003win"
#endif #endif
#if LJ_ABI_PAUTH #if LJ_ABI_PAUTH
"\005pauth" "\007pauth"
#endif #endif
#if LJ_TARGET_UWP #if LJ_TARGET_UWP
"\003uwp" "\003uwp"
@ -765,7 +765,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
return 1; return 1;
} }
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */ LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_CF(ffi_metatype) LJLIB_CF(ffi_metatype)
{ {
@ -791,6 +791,8 @@ LJLIB_CF(ffi_metatype)
return 1; return 1;
} }
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */
LJLIB_CF(ffi_gc) LJLIB_REC(.) LJLIB_CF(ffi_gc) LJLIB_REC(.)
{ {
GCcdata *cd = ffi_checkcdata(L, 1); GCcdata *cd = ffi_checkcdata(L, 1);
@ -823,6 +825,19 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch)
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
/* Create special weak-keyed finalizer table. */
static GCtab *ffi_finalizer(lua_State *L)
{
/* NOBARRIER: The table is new (marked white). */
GCtab *t = lj_tab_new(L, 0, 1);
settabV(L, L->top++, t);
setgcref(t->metatable, obj2gco(t));
setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
lj_str_newlit(L, "k"));
t->nomm = (uint8_t)(~(1u<<MM_mode));
return t;
}
/* Register FFI module as loaded. */ /* Register FFI module as loaded. */
static void ffi_register_module(lua_State *L) static void ffi_register_module(lua_State *L)
{ {
@ -838,6 +853,7 @@ LUALIB_API int luaopen_ffi(lua_State *L)
{ {
CTState *cts = lj_ctype_init(L); CTState *cts = lj_ctype_init(L);
settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1))); settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
cts->finalizer = ffi_finalizer(L);
LJ_LIB_REG(L, NULL, ffi_meta); LJ_LIB_REG(L, NULL, ffi_meta);
/* NOBARRIER: basemt is a GC root. */ /* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1))); setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));

View File

@ -1,6 +1,6 @@
/* /*
** Library initialization. ** Library initialization.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major parts taken verbatim from the Lua interpreter. ** Major parts taken verbatim from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** I/O library. ** I/O library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -25,7 +25,6 @@
#include "lj_strfmt.h" #include "lj_strfmt.h"
#include "lj_ff.h" #include "lj_ff.h"
#include "lj_lib.h" #include "lj_lib.h"
#include "lj_strscan.h"
/* Userdata payload for I/O file. */ /* Userdata payload for I/O file. */
typedef struct IOFileUD { typedef struct IOFileUD {
@ -324,14 +323,13 @@ LJLIB_CF(io_method_seek)
FILE *fp = io_tofile(L)->fp; FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
int64_t ofs = 0; int64_t ofs = 0;
TValue *o; cTValue *o;
int res; int res;
if (opt == 0) opt = SEEK_SET; if (opt == 0) opt = SEEK_SET;
else if (opt == 1) opt = SEEK_CUR; else if (opt == 1) opt = SEEK_CUR;
else if (opt == 2) opt = SEEK_END; else if (opt == 2) opt = SEEK_END;
o = L->base+2; o = L->base+2;
if (o < L->top) { if (o < L->top) {
if (tvisstr(o)) lj_strscan_num(strV(o), o);
if (tvisint(o)) if (tvisint(o))
ofs = (int64_t)intV(o); ofs = (int64_t)intV(o);
else if (tvisnum(o)) else if (tvisnum(o))

View File

@ -1,6 +1,6 @@
/* /*
** JIT library. ** JIT library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_jit_c #define lib_jit_c
@ -161,6 +161,24 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
/* -- Reflection API for Lua functions ------------------------------------ */ /* -- Reflection API for Lua functions ------------------------------------ */
/* Return prototype of first argument (Lua function or prototype object) */
static GCproto *check_Lproto(lua_State *L, int nolua)
{
TValue *o = L->base;
if (L->top > o) {
if (tvisproto(o)) {
return protoV(o);
} else if (tvisfunc(o)) {
if (isluafunc(funcV(o)))
return funcproto(funcV(o));
else if (nolua)
return NULL;
}
}
lj_err_argt(L, 1, LUA_TFUNCTION);
return NULL; /* unreachable */
}
static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
{ {
setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
@ -169,7 +187,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
/* local info = jit.util.funcinfo(func [,pc]) */ /* local info = jit.util.funcinfo(func [,pc]) */
LJLIB_CF(jit_util_funcinfo) LJLIB_CF(jit_util_funcinfo)
{ {
GCproto *pt = lj_lib_checkLproto(L, 1, 1); GCproto *pt = check_Lproto(L, 1);
if (pt) { if (pt) {
BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
GCtab *t; GCtab *t;
@ -211,7 +229,7 @@ LJLIB_CF(jit_util_funcinfo)
/* local ins, m = jit.util.funcbc(func, pc) */ /* local ins, m = jit.util.funcbc(func, pc) */
LJLIB_CF(jit_util_funcbc) LJLIB_CF(jit_util_funcbc)
{ {
GCproto *pt = lj_lib_checkLproto(L, 1, 0); GCproto *pt = check_Lproto(L, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2); BCPos pc = (BCPos)lj_lib_checkint(L, 2);
if (pc < pt->sizebc) { if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc]; BCIns ins = proto_bc(pt)[pc];
@ -228,7 +246,7 @@ LJLIB_CF(jit_util_funcbc)
/* local k = jit.util.funck(func, idx) */ /* local k = jit.util.funck(func, idx) */
LJLIB_CF(jit_util_funck) LJLIB_CF(jit_util_funck)
{ {
GCproto *pt = lj_lib_checkLproto(L, 1, 0); GCproto *pt = check_Lproto(L, 0);
ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
if (idx >= 0) { if (idx >= 0) {
if (idx < (ptrdiff_t)pt->sizekn) { if (idx < (ptrdiff_t)pt->sizekn) {
@ -248,7 +266,7 @@ LJLIB_CF(jit_util_funck)
/* local name = jit.util.funcuvname(func, idx) */ /* local name = jit.util.funcuvname(func, idx) */
LJLIB_CF(jit_util_funcuvname) LJLIB_CF(jit_util_funcuvname)
{ {
GCproto *pt = lj_lib_checkLproto(L, 1, 0); GCproto *pt = check_Lproto(L, 0);
uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
if (idx < pt->sizeuv) { if (idx < pt->sizeuv) {
setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));

View File

@ -1,6 +1,6 @@
/* /*
** Math library. ** Math library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include <math.h> #include <math.h>
@ -13,7 +13,6 @@
#include "lualib.h" #include "lualib.h"
#include "lj_obj.h" #include "lj_obj.h"
#include "lj_err.h"
#include "lj_lib.h" #include "lj_lib.h"
#include "lj_vm.h" #include "lj_vm.h"
#include "lj_prng.h" #include "lj_prng.h"
@ -184,10 +183,7 @@ LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_randomseed) LJLIB_CF(math_randomseed)
{ {
PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
if (L->base != L->top)
random_seed(rs, lj_lib_checknum(L, 1)); random_seed(rs, lj_lib_checknum(L, 1));
else if (!lj_prng_seed_secure(rs))
lj_err_caller(L, LJ_ERR_PRNGSD);
return 0; return 0;
} }

View File

@ -1,6 +1,6 @@
/* /*
** OS library. ** OS library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** Package library. ** Package library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** String library. ** String library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -122,25 +122,11 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
LJLIB_CF(string_dump) LJLIB_CF(string_dump)
{ {
GCproto *pt = lj_lib_checkLproto(L, 1, 1); GCfunc *fn = lj_lib_checkfunc(L, 1);
uint32_t flags = 0; int strip = L->base+1 < L->top && tvistruecond(L->base+1);
SBuf *sb; SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
TValue *o = L->base+1;
if (o < L->top) {
if (tvisstr(o)) {
const char *mode = strVdata(o);
char c;
while ((c = *mode++)) {
if (c == 's') flags |= BCDUMP_F_STRIP;
if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
}
} else if (tvistruecond(o)) {
flags |= BCDUMP_F_STRIP;
}
}
sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1; L->top = L->base+1;
if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags)) if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
lj_err_caller(L, LJ_ERR_STRDUMP); lj_err_caller(L, LJ_ERR_STRDUMP);
setstrV(L, L->top-1, lj_buf_str(L, sb)); setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L); lj_gc_check(L);

View File

@ -1,6 +1,6 @@
/* /*
** Table library. ** Table library.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1057,7 +1057,7 @@ static size_t release_unused_segments(mstate m)
mchunkptr p = align_as_chunk(base); mchunkptr p = align_as_chunk(base);
size_t psize = chunksize(p); size_t psize = chunksize(p);
/* Can unmap if first chunk holds entire segment and not pinned */ /* Can unmap if first chunk holds entire segment and not pinned */
if (!cinuse(p) && (char *)p + psize == (char *)mem2chunk(sp)) { if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
tchunkptr tp = (tchunkptr)p; tchunkptr tp = (tchunkptr)p;
if (p == m->dv) { if (p == m->dv) {
m->dv = 0; m->dv = 0;

View File

@ -1,6 +1,6 @@
/* /*
** Public Lua/C API. ** Public Lua/C API.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -104,12 +104,7 @@ LUA_API int lua_checkstack(lua_State *L, int size)
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
return 0; /* Stack overflow. */ return 0; /* Stack overflow. */
} else if (size > 0) { } else if (size > 0) {
int avail = (int)(mref(L->maxstack, TValue) - L->top); lj_state_checkstack(L, (MSize)size);
if (size > avail &&
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
L->top--;
return 0; /* Out of memory. */
}
} }
return 1; return 1;
} }
@ -1052,7 +1047,6 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
/* Flush cache, since traces specialize to basemt. But not during __gc. */ /* Flush cache, since traces specialize to basemt. But not during __gc. */
if (lj_trace_flushall(L)) if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM); lj_err_caller(L, LJ_ERR_NOGCMM);
o = index2adr(L, idx); /* Stack may have been reallocated. */
if (tvisbool(o)) { if (tvisbool(o)) {
/* NOBARRIER: basemt is a GC root. */ /* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));

View File

@ -1,6 +1,6 @@
/* /*
** Target architecture selection. ** Target architecture selection.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_ARCH_H #ifndef _LJ_ARCH_H
@ -57,7 +57,7 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64 #define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM #define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__) || defined(_M_ARM64) #elif defined(__aarch64__)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC #define LUAJIT_TARGET LUAJIT_ARCH_PPC
@ -66,7 +66,7 @@
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else #else
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #error "No support for this architecture (yet)"
#endif #endif
#endif #endif
@ -124,7 +124,7 @@
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE #if TARGET_OS_IPHONE
#define LJ_TARGET_IOS 1 #define LJ_TARGET_IOS 1
#else #else
#define LJ_TARGET_IOS 0 #define LJ_TARGET_IOS 0
@ -237,7 +237,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80 #define LJ_ARCH_VERSION 80
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70 #define LJ_ARCH_VERSION 70
@ -331,7 +331,6 @@
#define LJ_ARCH_NOFFI 1 #define LJ_ARCH_NOFFI 1
#elif LJ_ARCH_BITS == 64 #elif LJ_ARCH_BITS == 64
#error "No support for PPC64" #error "No support for PPC64"
#undef LJ_TARGET_PPC
#endif #endif
#if _ARCH_PWR7 #if _ARCH_PWR7
@ -491,45 +490,36 @@
#elif LJ_TARGET_ARM #elif LJ_TARGET_ARM
#if defined(__ARMEB__) #if defined(__ARMEB__)
#error "No support for big-endian ARM" #error "No support for big-endian ARM"
#undef LJ_TARGET_ARM
#endif #endif
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#error "No support for Cortex-M CPUs" #error "No support for Cortex-M CPUs"
#undef LJ_TARGET_ARM
#endif #endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS) #if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported" #error "Only ARM EABI or iOS 3.0+ ABI is supported"
#undef LJ_TARGET_ARM
#endif #endif
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64
#if defined(_ILP32) #if defined(_ILP32)
#error "No support for ILP32 model on ARM64" #error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif #endif
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32" #error "No support for little-endian PPC32"
#undef LJ_TARGET_PPC
#endif #endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500, use LuaJIT 2.0" #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#undef LJ_TARGET_PPC
#endif #endif
#elif LJ_TARGET_MIPS32 #elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32" #error "Only o32 ABI supported for MIPS32"
#undef LJ_TARGET_MIPS
#endif #endif
#if LJ_TARGET_MIPSR6 #if LJ_TARGET_MIPSR6
/* Not that useful, since most available r6 CPUs are 64 bit. */ /* Not that useful, since most available r6 CPUs are 64 bit. */
#error "No support for MIPS32R6" #error "No support for MIPS32R6"
#undef LJ_TARGET_MIPS
#endif #endif
#elif LJ_TARGET_MIPS64 #elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
#error "Only n64 ABI supported for MIPS64" #error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif #endif
#endif #endif
#endif #endif

View File

@ -1,6 +1,6 @@
/* /*
** IR assembler (SSA IR -> machine code). ** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_asm_c #define lj_asm_c
@ -29,7 +29,6 @@
#include "lj_dispatch.h" #include "lj_dispatch.h"
#include "lj_vm.h" #include "lj_vm.h"
#include "lj_target.h" #include "lj_target.h"
#include "lj_prng.h"
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
#include <stdio.h> #include <stdio.h>
@ -94,12 +93,6 @@ typedef struct ASMState {
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */ MCode *realign; /* Realign loop if not NULL. */
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
uint64_t prngbits;
PRNGState prngstate;
#endif
#ifdef RID_NUM_KREF #ifdef RID_NUM_KREF
intptr_t krefk[RID_NUM_KREF]; intptr_t krefk[RID_NUM_KREF];
#endif #endif
@ -180,41 +173,6 @@ IRFLDEF(FLOFS)
0 0
}; };
#ifdef LUAJIT_RANDOM_RA
/* Return a fixed number of random bits from the local PRNG state. */
static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
uint64_t b = as->prngbits;
uint32_t res = (1u << nbits) - 1u;
if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
res &= (uint32_t)b;
as->prngbits = b >> nbits;
return res;
}
/* Pick a random register from a register set. */
static Reg rset_pickrandom(ASMState *as, RegSet rs)
{
Reg r = rset_pickbot_(rs);
rs >>= r;
if (rs > 1) { /* More than one bit set? */
while (1) {
/* We need to sample max. the GPR or FPR half of the set. */
uint32_t d = ra_random_bits(as, RSET_BITS-1);
if ((rs >> d) & 1) {
r += d;
break;
}
}
}
return r;
}
#define rset_picktop(rs) rset_pickrandom(as, rs)
#define rset_pickbot(rs) rset_pickrandom(as, rs)
#else
#define rset_picktop(rs) rset_picktop_(rs)
#define rset_pickbot(rs) rset_pickbot_(rs)
#endif
/* -- Target-specific instruction emitter --------------------------------- */ /* -- Target-specific instruction emitter --------------------------------- */
#if LJ_TARGET_X86ORX64 #if LJ_TARGET_X86ORX64
@ -606,11 +564,7 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
#if LJ_GC64 #if LJ_GC64
#if LJ_TARGET_ARM64
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
#else
(ir->o == IR_KINT && k == ir->i) || (ir->o == IR_KINT && k == ir->i) ||
#endif
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) && ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
k == (intptr_t)ir_kptr(ir)) k == (intptr_t)ir_kptr(ir))
@ -949,11 +903,11 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
static void asm_snap_alloc1(ASMState *as, IRRef ref) static void asm_snap_alloc1(ASMState *as, IRRef ref)
{ {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if (!irref_isk(ref)) { if (!irref_isk(ref) && ir->r != RID_SUNK) {
bloomset(as->snapfilt1, ref); bloomset(as->snapfilt1, ref);
bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS)); bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
if (ra_used(ir)) return; if (ra_used(ir)) return;
if (ir->r == RID_SINK || ir->r == RID_SUNK) { if (ir->r == RID_SINK) {
ir->r = RID_SUNK; ir->r = RID_SUNK;
#if LJ_HASFFI #if LJ_HASFFI
if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
@ -2488,9 +2442,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->realign = NULL; as->realign = NULL;
as->loopinv = 0; as->loopinv = 0;
as->parent = J->parent ? traceref(J, J->parent) : NULL; as->parent = J->parent ? traceref(J, J->parent) : NULL;
#ifdef LUAJIT_RANDOM_RA
(void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */
#endif
/* Reserve MCode memory. */ /* Reserve MCode memory. */
as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
@ -2532,10 +2483,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
#endif #endif
as->ir = J->curfinal->ir; /* Use the copied IR. */ as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins; as->curins = J->cur.nins = as->orignins;
#ifdef LUAJIT_RANDOM_RA
as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */
as->prngbits = 0;
#endif
RA_DBG_START(); RA_DBG_START();
RA_DBGX((as, "===== STOP =====")); RA_DBGX((as, "===== STOP ====="));

View File

@ -1,6 +1,6 @@
/* /*
** IR assembler (SSA IR -> machine code). ** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_ASM_H #ifndef _LJ_ASM_H

View File

@ -1,6 +1,6 @@
/* /*
** ARM IR assembler (SSA IR -> machine code). ** ARM IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -969,32 +969,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); if (irref_isk(ir->op1)) {
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v); emit_lsptr(as, ARMI_LDR, dest, v);
} else { } else {
if (guarded) { Reg uv = ra_scratch(as, RSET_GPR);
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
} emit_opk(as, ARMI_ADD, dest, uv,
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR); (int32_t)offsetof(GCupval, tv), RSET_GPR);
else emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else { } else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR), emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
} }
emit_lso(as, ARMI_LDR, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
} }
} }
@ -1927,7 +1919,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */ as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi) if (uselo || usehi)
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS); asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
return; return;
#elif LJ_HASFFI #elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) { } else if ((ir-1)->o == IR_CONV) {
@ -1998,7 +1990,6 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno) IRIns *irp, RegSet allow, ExitNo exitno)
{ {
int savereg = 0;
Reg pbase; Reg pbase;
uint32_t k; uint32_t k;
if (irp) { if (irp) {
@ -2009,14 +2000,12 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
pbase = rset_pickbot(allow); pbase = rset_pickbot(allow);
} else { } else {
pbase = RID_RET; pbase = RID_RET;
savereg = 1; emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
} }
} else { } else {
pbase = RID_BASE; pbase = RID_BASE;
} }
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
if (savereg)
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
k = emit_isk12(0, (int32_t)(8*topslot)); k = emit_isk12(0, (int32_t)(8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP); emit_n(as, ARMI_CMP^k, RID_TMP);
@ -2028,7 +2017,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (ra_hasspill(irp->s)) if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
if (savereg) if (ra_hasspill(irp->s) && !allow)
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095)); emit_loadi(as, RID_TMP, (i & ~4095));
} else { } else {
@ -2042,12 +2031,11 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
SnapEntry *map = &as->T->snapmap[snap->mapofs]; SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
MSize n, nent = snap->nent; MSize n, nent = snap->nent;
int32_t bias = 0;
/* Store the value of all modified slots to the Lua stack. */ /* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) { for (n = 0; n < nent; n++) {
SnapEntry sn = map[n]; SnapEntry sn = map[n];
BCReg s = snap_slot(sn); BCReg s = snap_slot(sn);
int32_t ofs = 8*((int32_t)s-1) - bias; int32_t ofs = 8*((int32_t)s-1);
IRRef ref = snap_ref(sn); IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE)) if ((sn & SNAP_NORESTORE))
@ -2066,12 +2054,6 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
#else #else
Reg src = ra_alloc1(as, ref, RSET_FPR); Reg src = ra_alloc1(as, ref, RSET_FPR);
if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) {
int32_t adj = ofs & 0xffffff00; /* K12-friendly. */
bias += adj;
ofs -= adj;
emit_addptr(as, RID_BASE, -adj);
}
emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
#endif #endif
} else { } else {
@ -2100,7 +2082,6 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
} }
checkmclim(as); checkmclim(as);
} }
emit_addptr(as, RID_BASE, bias);
lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
} }
@ -2271,7 +2252,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
} }
if (nslots > as->evenspill) /* Leave room for args in stack slots. */ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots; as->evenspill = nslots;
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); return REGSP_HINT(RID_RET);
} }
static void asm_setup_target(ASMState *as) static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/* /*
** ARM64 IR assembler (SSA IR -> machine code). ** ARM64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** **
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
** Sponsored by Cisco Systems, Inc. ** Sponsored by Cisco Systems, Inc.
@ -84,23 +84,18 @@ static void asm_guardcc(ASMState *as, A64CC cc)
emit_cond_branch(as, cc, target); emit_cond_branch(as, cc, target);
} }
/* Emit test and branch instruction to exit for guard, if in range. */ /* Emit test and branch instruction to exit for guard. */
static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
{ {
MCode *target = asm_exitstub_addr(as, as->snapno); MCode *target = asm_exitstub_addr(as, as->snapno);
MCode *p = as->mcp; MCode *p = as->mcp;
ptrdiff_t delta = target - p;
if (LJ_UNLIKELY(p == as->invmcp)) { if (LJ_UNLIKELY(p == as->invmcp)) {
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
as->loopinv = 1; as->loopinv = 1;
*p = A64I_B | A64F_S26(delta); *p = A64I_B | A64F_S26(target-p);
ai ^= 0x01000000u; emit_tnb(as, ai^0x01000000u, r, bit, p-1);
target = p-1; return;
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
return 0;
} }
emit_tnb(as, ai, r, bit, target); emit_tnb(as, ai, r, bit, target);
return 1;
} }
/* Emit compare and branch instruction to exit for guard. */ /* Emit compare and branch instruction to exit for guard. */
@ -216,14 +211,16 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{ {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
int logical = (ai & 0x1f000000) == 0x0a000000;
if (ra_hasreg(ir->r)) { if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r); ra_noweak(as, ir->r);
return A64F_M(ir->r); return A64F_M(ir->r);
} else if (irref_isk(ref)) { } else if (irref_isk(ref)) {
uint32_t m;
int64_t k = get_k64val(as, ref); int64_t k = get_k64val(as, ref);
uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : if ((ai & 0x1f000000) == 0x0a000000)
emit_isk12(irt_is64(ir->t) ? k : (int32_t)k); m = emit_isk13(k, irt_is64(ir->t));
else
m = emit_isk12(k);
if (m) if (m)
return m; return m;
} else if (mayfuse(as, ref)) { } else if (mayfuse(as, ref)) {
@ -235,7 +232,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
IRIns *irl = IR(ir->op1); IRIns *irl = IR(ir->op1);
if (sh == A64SH_LSL && if (sh == A64SH_LSL &&
irl->o == IR_CONV && !logical && irl->o == IR_CONV &&
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
shift <= 4 && shift <= 4 &&
canfuse(as, irl)) { canfuse(as, irl)) {
@ -245,11 +242,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
Reg m = ra_alloc1(as, ir->op1, allow); Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_SH(sh, shift); return A64F_M(m) | A64F_SH(sh, shift);
} }
} else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) { } else if (ir->o == IR_CONV &&
Reg m = ra_alloc1(as, ir->op1, allow);
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
} else if (ir->o == IR_CONV && !logical &&
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
Reg m = ra_alloc1(as, ir->op1, allow); Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_EX(A64EX_SXTW); return A64F_M(m) | A64F_EX(A64EX_SXTW);
@ -426,18 +419,13 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir)
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{ {
uint32_t n, nargs = CCI_XNARGS(ci); uint32_t n, nargs = CCI_XNARGS(ci);
int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7; int32_t ofs = 0;
Reg gpr, fpr = REGARG_FIRSTFPR; Reg gpr, fpr = REGARG_FIRSTFPR;
if (ci->func) if (ci->func)
emit_call(as, ci->func); emit_call(as, ci->func);
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L); as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR; gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */ for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n]; IRRef ref = args[n];
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
@ -448,21 +436,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */ "reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref); ra_leftov(as, fpr, ref);
fpr++; fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_FPR); Reg r = ra_alloc1(as, ref, RSET_FPR);
int32_t al = spalign; emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
#if LJ_HASFFI && LJ_TARGET_OSX ofs += 8;
al |= irt_isnum(ir->t) ? 7 : 3;
#endif
spofs = (spofs + al) & ~al;
if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
spofs += al + 1;
} }
} else { } else {
if (gpr <= REGARG_LASTGPR) { if (gpr <= REGARG_LASTGPR) {
@ -472,27 +449,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++; gpr++;
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_GPR); Reg r = ra_alloc1(as, ref, RSET_GPR);
int32_t al = spalign; emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
#if LJ_HASFFI && LJ_TARGET_OSX ofs += 8;
al |= irt_size(ir->t) - 1;
#endif
spofs = (spofs + al) & ~al;
if (al >= 3) {
if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
} else {
lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
}
spofs += al + 1;
} }
} }
#if LJ_HASFFI && LJ_TARGET_OSX
} else { /* Marker for start of varargs. */
gpr = REGARG_LASTGPR+1;
fpr = REGARG_LASTFPR+1;
spalign = 7;
#endif
} }
} }
} }
@ -558,6 +518,8 @@ static void asm_retf(ASMState *as, IRIns *ir)
as->topslot -= (BCReg)delta; as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0; if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
/* Need to force a spill on REF_BASE now to update the stack slot. */
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
emit_setgl(as, base, jit_base); emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta); emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE); asm_guardcc(as, CC_NE);
@ -681,22 +643,25 @@ static void asm_strto(ASMState *as, IRIns *ir)
{ {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2]; IRRef args[2];
Reg tmp; Reg dest = 0, tmp;
int destused = ra_used(ir);
int32_t ofs = 0; int32_t ofs = 0;
ra_evictset(as, RSET_SCRATCH); ra_evictset(as, RSET_SCRATCH);
if (ra_used(ir)) { if (destused) {
if (ra_hasspill(ir->s)) { if (ra_hasspill(ir->s)) {
ofs = sps_scale(ir->s); ofs = sps_scale(ir->s);
destused = 0;
if (ra_hasreg(ir->r)) { if (ra_hasreg(ir->r)) {
ra_free(as, ir->r); ra_free(as, ir->r);
ra_modified(as, ir->r); ra_modified(as, ir->r);
emit_spload(as, ir, ir->r, ofs); emit_spload(as, ir, ir->r, ofs);
} }
} else { } else {
Reg dest = ra_dest(as, ir, RSET_FPR); dest = ra_dest(as, ir, RSET_FPR);
}
}
if (destused)
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
}
}
asm_guardcnb(as, A64I_CBZ, RID_RET); asm_guardcnb(as, A64I_CBZ, RID_RET);
args[0] = ir->op1; /* GCstr *str */ args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */ args[1] = ASMREF_TMP1; /* TValue *n */
@ -787,75 +752,113 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir); int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow); Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey; Reg key = 0, tmp = RID_TMP;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
IRRef refkey = ir->op2; IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey); IRIns *irkey = IR(refkey);
int isk = irref_isk(refkey); int isk = irref_isk(ir->op2);
IRType1 kt = irkey->t; IRType1 kt = irkey->t;
uint32_t k = 0; uint32_t k = 0;
uint32_t khash; uint32_t khash;
MCLabel l_end, l_loop; MCLabel l_end, l_loop, l_next;
rset_clear(allow, tab); rset_clear(allow, tab);
/* Allocate register for tkey outside of the loop. */ if (!isk) {
if (isk) { key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
int64_t kk; rset_clear(allow, key);
if (irt_isaddr(kt)) { if (!irt_isstr(kt)) {
kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; tmp = ra_scratch(as, allow);
rset_clear(allow, tmp);
}
} else if (irt_isnum(kt)) { } else if (irt_isnum(kt)) {
kk = (int64_t)ir_knum(irkey)->u64; int64_t val = (int64_t)ir_knum(irkey)->u64;
/* Assumes -0.0 is already canonicalized to +0.0. */ if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
} else {
scr = ra_scratch(as, allow);
}
rset_clear(allow, scr);
} else { } else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
kk = ~((int64_t)~irt_toitype(kt) << 47); type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
} scr = ra_scratch(as, rset_clear(allow, type));
k = emit_isk12(kk); rset_clear(allow, scr);
tkey = k ? 0 : ra_allock(as, kk, allow);
} else {
tkey = ra_scratch(as, allow);
} }
/* Key not found in chain: jump to exit (if merged) or load niltv. */ /* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as); l_end = emit_label(as);
as->invmcp = NULL; as->invmcp = NULL;
if (merge == IR_NE) { if (merge == IR_NE)
asm_guardcc(as, CC_AL); asm_guardcc(as, CC_AL);
} else if (destused) { else if (destused)
uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val)); emit_loada(as, dest, niltvg(J2G(as->J)));
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
}
/* Follow hash chain until the end. */ /* Follow hash chain until the end. */
l_loop = --as->mcp; l_loop = --as->mcp;
if (destused) emit_n(as, A64I_CMPx^A64I_K12^0, dest);
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */ /* Type and value comparison. */
if (merge == IR_EQ) if (merge == IR_EQ)
asm_guardcc(as, CC_EQ); asm_guardcc(as, CC_EQ);
else else
emit_cond_branch(as, CC_EQ, l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
/* Construct tkey as canonicalized or tagged key. */
if (!isk) {
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR); if (isk) {
emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); /* Assumes -0.0 is already canonicalized to +0.0. */
/* A64I_FMOV_R_D from key to tkey done below. */ if (k)
emit_n(as, A64I_CMPx^k, tmp);
else
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else { } else {
lj_assertA(irt_isaddr(kt), "bad HREF key type"); emit_nm(as, A64I_FCMPd, key, ftmp);
key = ra_alloc1(as, refkey, allow); emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key)); emit_cond_branch(as, CC_LO, l_next);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
} }
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
} }
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */ /* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1; khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) { if (khash == 0) {
@ -869,6 +872,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) { } else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -877,18 +881,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest); emit_dnm(as, A64I_EORw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey); emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_nm(as, A64I_FCMPZd, (key & 31), 0); emit_dm(as, A64I_MOVw, tmp, dest);
emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31)); emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
} else { } else {
emit_dnm(as, A64I_EORw, tmp, key, dest); checkmclim(as);
emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key); emit_dm(as, A64I_MOVw, tmp, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
} }
} }
} }
@ -903,7 +912,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int bigofs = !emit_checkofs(A64I_LDRx, kofs); int bigofs = !emit_checkofs(A64I_LDRx, kofs);
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg idx = node; Reg key, idx = node;
RegSet allow = rset_exclude(RSET_GPR, node); RegSet allow = rset_exclude(RSET_GPR, node);
uint64_t k; uint64_t k;
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -922,8 +931,9 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
} else { } else {
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
} }
emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow)); key = ra_scratch(as, allow);
emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs); emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
emit_lso(as, A64I_LDRx, key, idx, kofs);
if (bigofs) if (bigofs)
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
} }
@ -931,30 +941,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); if (irref_isk(ir->op1)) {
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v); emit_lsptr(as, A64I_LDRx, dest, v);
} else { } else {
if (guarded) Reg uv = ra_scratch(as, RSET_GPR);
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP); Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) if (ir->o == IR_UREFC) {
emit_opk(as, A64I_ADDx, dest, dest, asm_guardcc(as, CC_NE);
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
emit_opk(as, A64I_ADDx, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR); (int32_t)offsetof(GCupval, tv), RSET_GPR);
else emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else { } else {
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
} }
emit_lso(as, A64I_LDRx, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
} }
} }
@ -1059,7 +1063,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir)
{ {
Reg idx, tmp; Reg idx, tmp, type;
int32_t ofs = 0; int32_t ofs = 0;
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@ -1078,9 +1082,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} else { } else {
tmp = ra_scratch(as, gpr); tmp = ra_scratch(as, gpr);
} }
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx); type = ra_scratch(as, rset_clear(gpr, tmp));
rset_clear(gpr, idx); idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
/* Always do the type check, even if the load result is unused. */ /* Always do the type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
@ -1088,10 +1091,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t)); "bad load type %d", irt_type(ir->t));
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
ra_allock(as, LJ_TISNUM << 15, gpr), tmp); ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
} else if (irt_isaddr(ir->t)) { } else if (irt_isaddr(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
} else if (irt_isnil(ir->t)) { } else if (irt_isnil(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
} else { } else {
@ -1214,8 +1217,9 @@ dotypecheck:
emit_nm(as, A64I_CMPx, emit_nm(as, A64I_CMPx,
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
} else { } else {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP); Reg type = ra_scratch(as, allow);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
} }
emit_lso(as, A64I_LDRx, tmp, base, ofs); emit_lso(as, A64I_LDRx, tmp, base, ofs);
return; return;
@ -1285,9 +1289,8 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
Reg mark = RID_TMP; Reg mark = RID_TMP;
MCLabel l_end = emit_label(as); MCLabel l_end = emit_label(as);
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
/* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
emit_setgl(as, tab, gc.grayagain); emit_setgl(as, tab, gc.grayagain);
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
emit_getgl(as, link, gc.grayagain); emit_getgl(as, link, gc.grayagain);
@ -1301,6 +1304,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2]; IRRef args[2];
MCLabel l_end; MCLabel l_end;
RegSet allow = RSET_GPR;
Reg obj, val, tmp; Reg obj, val, tmp;
/* No need for other object barriers (yet). */ /* No need for other object barriers (yet). */
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@ -1311,8 +1315,9 @@ static void asm_obar(ASMState *as, IRIns *ir)
asm_gencall(as, ci, args); asm_gencall(as, ci, args);
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r; obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); tmp = ra_scratch(as, rset_exclude(allow, obj));
emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
emit_cond_branch(as, CC_EQ, l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
@ -1359,12 +1364,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
if (irref_isk(lref)) if (irref_isk(lref))
return 1; /* But swap constants to the right. */ return 1; /* But swap constants to the right. */
ir = IR(rref); ir = IR(rref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 0; /* Don't swap fusable operands to the left. */ return 0; /* Don't swap fusable operands to the left. */
ir = IR(lref); ir = IR(lref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 1; /* But swap fusable operands to the right. */ return 1; /* But swap fusable operands to the right. */
@ -1410,12 +1415,13 @@ static void asm_intneg(ASMState *as, IRIns *ir)
static void asm_intmul(ASMState *as, IRIns *ir) static void asm_intmul(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
if (irt_isguard(ir->t)) { /* IR_MULOV */ if (irt_isguard(ir->t)) { /* IR_MULOV */
asm_guardcc(as, CC_NE); asm_guardcc(as, CC_NE);
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest); emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
emit_dnm(as, A64I_SMULL, dest, right, left); emit_dnm(as, A64I_SMULL, dest, right, left);
} else { } else {
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@ -1675,15 +1681,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
if (asm_swapops(as, blref, brref)) { if (asm_swapops(as, blref, brref)) {
Reg tmp = blref; blref = brref; brref = tmp; Reg tmp = blref; blref = brref; brref = tmp;
} }
bleft = ra_alloc1(as, blref, RSET_GPR);
if (irref_isk(brref)) { if (irref_isk(brref)) {
uint64_t k = get_k64val(as, brref); uint64_t k = get_k64val(as, brref);
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) && if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft, asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
emit_ctz64(k))) ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
return; return;
}
m2 = emit_isk13(k, irt_is64(irl->t)); m2 = emit_isk13(k, irt_is64(irl->t));
} }
bleft = ra_alloc1(as, blref, RSET_GPR);
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
if (!m2) if (!m2)
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@ -1758,28 +1765,37 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno) IRIns *irp, RegSet allow, ExitNo exitno)
{ {
Reg pbase;
uint32_t k; uint32_t k;
Reg pbase = RID_BASE;
if (irp) { if (irp) {
if (!ra_hasspill(irp->s)) {
pbase = irp->r; pbase = irp->r;
if (!ra_hasreg(pbase)) lj_assertA(ra_hasreg(pbase), "base reg lost");
pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET); } else if (allow) {
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
}
} else {
pbase = RID_BASE;
} }
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
if (pbase & 0x80) /* Restore temp. register. */
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
k = emit_isk12((8*topslot)); k = emit_isk12((8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, A64I_CMPx^k, RID_TMP); emit_n(as, A64I_CMPx^k, RID_TMP);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31)); emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack)); (int32_t)offsetof(lua_State, maxstack));
if (pbase & 0x40) { if (irp) { /* Must not spill arbitrary registers in head of side trace. */
emit_getgl(as, (pbase & 31), jit_base); if (ra_hasspill(irp->s))
if (pbase & 0x80) /* Save temp register. */ emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0); emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
} if (ra_hasspill(irp->s) && !allow)
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
} else {
emit_getgl(as, RID_TMP, cur_L); emit_getgl(as, RID_TMP, cur_L);
}
} }
/* Restore Lua stack from on-trace state. */ /* Restore Lua stack from on-trace state. */
@ -1821,7 +1837,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
/* Marker to prevent patching the GC check exit. */ /* Marker to prevent patching the GC check exit. */
#define ARM64_NOPATCH_GC_CHECK \ #define ARM64_NOPATCH_GC_CHECK \
(A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO)) (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
/* Check GC threshold and do one or more GC steps. */ /* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as) static void asm_gc_check(ASMState *as)
@ -1876,40 +1892,46 @@ static void asm_loop_tail_fixup(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */ /* -- Head of trace ------------------------------------------------------- */
/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
/* Coalesce BASE register for a root trace. */ /* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as) static void asm_head_root_base(ASMState *as)
{ {
IRIns *ir = IR(REF_BASE); IRIns *ir;
Reg r = ir->r; asm_head_lreg(as);
if (ra_hasreg(r)) { ir = IR(REF_BASE);
ra_free(as, r); if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ra_spill(as, ir);
ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ra_destreg(as, ir, RID_BASE);
if (r != RID_BASE)
emit_movrr(as, ir, r, RID_BASE);
}
} }
/* Coalesce BASE register for a side trace. */ /* Coalesce BASE register for a side trace. */
static Reg asm_head_side_base(ASMState *as, IRIns *irp) static Reg asm_head_side_base(ASMState *as, IRIns *irp)
{ {
IRIns *ir = IR(REF_BASE); IRIns *ir;
Reg r = ir->r; asm_head_lreg(as);
if (ra_hasreg(r)) { ir = IR(REF_BASE);
ra_free(as, r); if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ra_spill(as, ir);
ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (ra_hasspill(irp->s)) {
if (irp->r == r) { return ra_dest(as, ir, RSET_GPR);
return r; /* Same BASE register already coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
/* Move from coalesced parent reg. */
emit_movrr(as, ir, r, irp->r);
return irp->r;
} else { } else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ Reg r = irp->r;
lj_assertA(ra_hasreg(r), "base reg lost");
if (r != ir->r && !rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
ra_destreg(as, ir, r);
return r;
} }
}
return RID_NONE;
} }
/* -- Tail of trace ------------------------------------------------------- */ /* -- Tail of trace ------------------------------------------------------- */
@ -1953,47 +1975,20 @@ static void asm_tail_prep(ASMState *as)
/* Ensure there are enough stack slots for call arguments. */ /* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{ {
#if LJ_HASFFI
uint32_t i, nargs = CCI_XNARGS(ci);
if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
(LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
IRRef args[CCI_NARGS_MAX*2]; IRRef args[CCI_NARGS_MAX*2];
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; uint32_t i, nargs = CCI_XNARGS(ci);
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
asm_collectargs(as, ir, ci, args); asm_collectargs(as, ir, ci, args);
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) { for (i = 0; i < nargs; i++) {
int al = spalign; if (args[i] && irt_isfp(IR(args[i])->t)) {
if (!args[i]) { if (nfpr > 0) nfpr--; else nslots += 2;
#if LJ_TARGET_OSX
/* Marker for start of varaargs. */
nfpr = 0;
ngpr = 0;
spalign = 7;
#endif
} else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; }
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif
} else { } else {
if (ngpr > 0) { ngpr--; continue; } if (ngpr > 0) ngpr--; else nslots += 2;
#if LJ_TARGET_OSX
al |= irt_size(IR(args[i])->t) - 1;
#endif
} }
spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */
} }
nslots = (spofs + 3) >> 2;
if (nslots > as->evenspill) /* Leave room for args in stack slots. */ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots; as->evenspill = nslots;
} return REGSP_HINT(RID_RET);
#endif
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
} }
static void asm_setup_target(ASMState *as) static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/* /*
** MIPS IR assembler (SSA IR -> machine code). ** MIPS IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -456,7 +456,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_addptr(as, base, -8*delta); emit_addptr(as, base, -8*delta);
asm_guard(as, MIPSI_BNE, RID_TMP, asm_guard(as, MIPSI_BNE, RID_TMP,
ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4); emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
} }
/* -- Buffer operations --------------------------------------------------- */ /* -- Buffer operations --------------------------------------------------- */
@ -656,8 +656,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
#else #else
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
#endif #endif
emit_lsptr(as, MIPSI_LDC1, (tmp & 31), emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)&as->J->k64[LJ_K64_2P63], (void *)&as->J->k64[LJ_K64_2P63],
@ -673,8 +673,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
#else #else
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
#endif #endif
emit_lsptr(as, MIPSI_LWC1, (tmp & 31), emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)&as->J->k32[LJ_K32_2P63], (void *)&as->J->k32[LJ_K32_2P63],
@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
MIPSIns mi = irt_is64(ir->t) ? MIPSIns mi = irt_is64(ir->t) ?
(st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
(st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp); emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
emit_fg(as, mi, tmp, left); emit_fg(as, mi, left, left);
#endif #endif
} }
} }
@ -1207,29 +1207,22 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); if (irref_isk(ir->op1)) {
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else { } else {
if (guarded) Reg uv = ra_scratch(as, RSET_GPR);
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) if (ir->o == IR_UREFC) {
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv)); asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
else emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v)); emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else { } else {
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR), emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** PPC IR assembler (SSA IR -> machine code). ** PPC IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -840,30 +840,23 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); if (irref_isk(ir->op1)) {
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else { } else {
if (guarded) { Reg uv = ra_scratch(as, RSET_GPR);
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1); emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
} emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
if (ir->o == IR_UREFC) emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else { } else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR), emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
} }
emit_tai(as, PPCI_LWZ, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** x86/x64 IR assembler (SSA IR -> machine code). ** x86/x64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Guard handling ------------------------------------------------------ */ /* -- Guard handling ------------------------------------------------------ */
@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref. /* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references. ** Also avoid fusing loads if there are multiple references.
*/ */
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
{ {
IRIns *ir = as->ir; IRIns *ir = as->ir;
IRRef i = as->curins; IRRef i = as->curins;
@ -118,9 +118,7 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
while (--i > ref) { while (--i > ref) {
if (ir[i].o == conflict) if (ir[i].o == conflict)
return 0; /* Conflict found. */ return 0; /* Conflict found. */
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS)) else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0; return 0;
} }
return 1; /* Ok, no conflict. */ return 1; /* Ok, no conflict. */
@ -136,14 +134,13 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */ /* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */ return irb->op1; /* Table obj. */
} }
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) { } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
/* Fuse base offset (vararg load). */ /* Fuse base offset (vararg load). */
IRIns *irk = IR(irb->op2); as->mrm.ofs = IR(irb->op2)->i;
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
return irb->op1; return irb->op1;
} }
return ref; /* Otherwise use the given array base. */ return ref; /* Otherwise use the given array base. */
@ -458,7 +455,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) { if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 2) && noconflict(as, ref, IR_RETF, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) { !(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@ -469,12 +466,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) { } else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 2)) { noconflict(as, ref, IR_FSTORE, 0)) {
asm_fusefref(as, ir, xallow); asm_fusefref(as, ir, xallow);
return RID_MRM; return RID_MRM;
} }
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) && if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) { !(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow); asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM; return RID_MRM;
@ -484,7 +481,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types). ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/ */
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 2)) { noconflict(as, ref, IR_XSTORE, 0)) {
asm_fusexref(as, ir->op1, xallow); asm_fusexref(as, ir->op1, xallow);
return RID_MRM; return RID_MRM;
} }
@ -817,7 +814,6 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp); emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest); emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left); emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */ /* Can't fuse since left is needed twice. */
} }
@ -860,7 +856,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k); emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return; return;
} else { /* Integer to FP conversion. */ } else { /* Integer to FP conversion. */
@ -1177,7 +1172,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E); asm_guardcc(as, CC_E);
else else
emit_sjcc(as, CC_E, l_end); emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
if (isk) { if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */ /* Assumes -0.0 is already canonicalized to +0.0. */
@ -1237,6 +1231,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif #endif
} }
emit_sfixup(as, l_loop); emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64 #if LJ_GC64
if (!isk && irt_isaddr(kt)) { if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key); emit_rr(as, XO_OR, tmp|REX_64, key);
@ -1263,7 +1258,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3); emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2); emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1); emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@ -1281,6 +1275,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else { } else {
emit_rr(as, XO_MOV, tmp, key); emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64 #if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) { if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32); emit_i8(as, 32);
@ -1377,32 +1372,25 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); if (irref_isk(ir->op1)) {
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v); emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); Reg uv = ra_scratch(as, RSET_GPR);
if (ir->o == IR_UREFC) Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
else asm_guardcc(as, CC_NE);
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); emit_i8(as, 1);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else { } else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR), emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) + (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
}
} }
static void asm_fref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir)
@ -1558,7 +1546,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) { if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1); Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) { if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR); asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@ -1606,7 +1593,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t)); "bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64 #if LJ_GC64
emit_u32(as, LJ_TISNUM << 15); emit_u32(as, LJ_TISNUM << 15);
#else #else

View File

@ -1,6 +1,6 @@
/* /*
** Internal assertions. ** Internal assertions.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_assert_c #define lj_assert_c

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode instruction modes. ** Bytecode instruction modes.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bc_c #define lj_bc_c

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode instruction format. ** Bytecode instruction format.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BC_H #ifndef _LJ_BC_H

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode dump definitions. ** Bytecode dump definitions.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BCDUMP_H #ifndef _LJ_BCDUMP_H
@ -46,8 +46,6 @@
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
#define BCDUMP_F_DETERMINISTIC 0x80000000
/* Type codes for the GC constants of a prototype. Plus length for strings. */ /* Type codes for the GC constants of a prototype. Plus length for strings. */
enum { enum {
BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
@ -63,7 +61,7 @@ enum {
/* -- Bytecode reader/writer ---------------------------------------------- */ /* -- Bytecode reader/writer ---------------------------------------------- */
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, uint32_t flags); void *data, int strip);
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls); LJ_FUNC GCproto *lj_bcread(LexState *ls);

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode reader. ** Bytecode reader.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bcread_c #define lj_bcread_c
@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt)
} }
/* Read a single constant key/value of a template table. */ /* Read a single constant key/value of a template table. */
static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) static void bcread_ktabk(LexState *ls, TValue *o)
{ {
MSize tp = bcread_uleb128(ls); MSize tp = bcread_uleb128(ls);
if (tp >= BCDUMP_KTAB_STR) { if (tp >= BCDUMP_KTAB_STR) {
@ -191,8 +191,6 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
} else if (tp == BCDUMP_KTAB_NUM) { } else if (tp == BCDUMP_KTAB_NUM) {
o->u32.lo = bcread_uleb128(ls); o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls);
} else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
settabV(ls->L, o, t);
} else { } else {
lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
setpriV(o, ~tp); setpriV(o, ~tp);
@ -209,15 +207,15 @@ static GCtab *bcread_ktab(LexState *ls)
MSize i; MSize i;
TValue *o = tvref(t->array); TValue *o = tvref(t->array);
for (i = 0; i < narray; i++, o++) for (i = 0; i < narray; i++, o++)
bcread_ktabk(ls, o, NULL); bcread_ktabk(ls, o);
} }
if (nhash) { /* Read hash entries. */ if (nhash) { /* Read hash entries. */
MSize i; MSize i;
for (i = 0; i < nhash; i++) { for (i = 0; i < nhash; i++) {
TValue key; TValue key;
bcread_ktabk(ls, &key, NULL); bcread_ktabk(ls, &key);
lj_assertLS(!tvisnil(&key), "nil key"); lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
} }
} }
return t; return t;
@ -283,11 +281,8 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
{ {
BCIns *bc = proto_bc(pt); BCIns *bc = proto_bc(pt);
BCIns op; bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */ pt->framesize, 0);
else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
else op = BC_FUNCF;
bc[0] = BCINS_AD(op, pt->framesize, 0);
bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
/* Swap bytecode instructions if the endianess differs. */ /* Swap bytecode instructions if the endianess differs. */
if (bcread_swap(ls)) { if (bcread_swap(ls)) {
@ -400,7 +395,7 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0; bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls); bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) { if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI #if LJ_HASFFI
lua_State *L = ls->L; lua_State *L = ls->L;
@ -410,7 +405,7 @@ static int bcread_header(LexState *ls)
#endif #endif
} }
if ((flags & BCDUMP_F_STRIP)) { if ((flags & BCDUMP_F_STRIP)) {
ls->chunkname = lj_str_newz(ls->L, *ls->chunkarg == BCDUMP_HEAD1 ? "=?" : ls->chunkarg); ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
} else { } else {
MSize len = bcread_uleb128(ls); MSize len = bcread_uleb128(ls);
bcread_need(ls, len); bcread_need(ls, len);

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode writer. ** Bytecode writer.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bcwrite_c #define lj_bcwrite_c
@ -27,9 +27,7 @@ typedef struct BCWriteCtx {
GCproto *pt; /* Root prototype. */ GCproto *pt; /* Root prototype. */
lua_Writer wfunc; /* Writer callback. */ lua_Writer wfunc; /* Writer callback. */
void *wdata; /* Writer callback data. */ void *wdata; /* Writer callback data. */
TValue **heap; /* Heap used for deterministic sorting. */ int strip; /* Strip debug info. */
uint32_t heapsz; /* Size of heap. */
uint32_t flags; /* BCDUMP_F_* flags. */
int status; /* Status from writer callback. */ int status; /* Status from writer callback. */
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
global_State *g; global_State *g;
@ -71,8 +69,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
*p++ = BCDUMP_KTAB_NUM; *p++ = BCDUMP_KTAB_NUM;
p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.lo);
p = lj_strfmt_wuleb128(p, o->u32.hi); p = lj_strfmt_wuleb128(p, o->u32.hi);
} else if (tvistab(o)) { /* Write the nil value marker as a nil. */
*p++ = BCDUMP_KTAB_NIL;
} else { } else {
lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
*p++ = BCDUMP_KTAB_NIL+~itype(o); *p++ = BCDUMP_KTAB_NIL+~itype(o);
@ -80,75 +76,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
ctx->sb.w = p; ctx->sb.w = p;
} }
/* Compare two template table keys. */
static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
{
uint32_t at = itype(a), bt = itype(b);
if (at != bt) { /* This also handles false and true keys. */
return at < bt;
} else if (at == LJ_TSTR) {
return lj_str_cmp(strV(a), strV(b)) < 0;
} else {
return a->u64 < b->u64; /* This works for numbers and integers. */
}
}
/* Insert key into a sorted heap. */
static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
TValue *key)
{
MSize child;
while ((child = idx * 2 + 1) < end) {
/* Find lower of the two children. */
TValue *c0 = heap[child];
if (child + 1 < end) {
TValue *c1 = heap[child + 1];
if (bcwrite_ktabk_lt(c1, c0)) {
c0 = c1;
child++;
}
}
if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
heap[idx] = c0; /* Move lower child up. */
idx = child; /* Descend. */
}
heap[idx] = key; /* Insert key here. */
}
/* Resize heap, dropping content. */
static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
{
lua_State *L = sbufL(&ctx->sb);
if (ctx->heapsz) {
lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
ctx->heapsz = 0;
}
if (nsz) {
ctx->heap = lj_mem_newvec(L, nsz, TValue *);
ctx->heapsz = nsz;
}
}
/* Write hash part of template table in sorted order. */
static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
{
TValue **heap = ctx->heap;
MSize i = nhash;
for (;; node--) { /* Build heap. */
if (!tvisnil(&node->val)) {
bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
if (i == 0) break;
}
}
do { /* Drain heap. */
TValue *key = heap[0]; /* Output lowest key from top. */
bcwrite_ktabk(ctx, key, 0);
bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
key = heap[--nhash]; /* Remove last key. */
bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
} while (nhash);
}
/* Write a template table. */ /* Write a template table. */
static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
{ {
@ -178,13 +105,8 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
bcwrite_ktabk(ctx, o, 1); bcwrite_ktabk(ctx, o, 1);
} }
if (nhash) { /* Write hash entries. */ if (nhash) { /* Write hash entries. */
Node *node = noderef(t->node) + t->hmask;
if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
if (ctx->heapsz < nhash)
bcwrite_heap_resize(ctx, t->hmask + 1);
bcwrite_ktab_sorted_hash(ctx, node, nhash);
} else {
MSize i = nhash; MSize i = nhash;
Node *node = noderef(t->node) + t->hmask;
for (;; node--) for (;; node--)
if (!tvisnil(&node->val)) { if (!tvisnil(&node->val)) {
bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->key, 0);
@ -192,7 +114,6 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
if (--i == 0) break; if (--i == 0) break;
} }
} }
}
} }
/* Write GC constants of a prototype. */ /* Write GC constants of a prototype. */
@ -348,7 +269,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
p = lj_strfmt_wuleb128(p, pt->sizekgc); p = lj_strfmt_wuleb128(p, pt->sizekgc);
p = lj_strfmt_wuleb128(p, pt->sizekn); p = lj_strfmt_wuleb128(p, pt->sizekn);
p = lj_strfmt_wuleb128(p, pt->sizebc-1); p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!(ctx->flags & BCDUMP_F_STRIP)) { if (!ctx->strip) {
if (proto_lineinfo(pt)) if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
p = lj_strfmt_wuleb128(p, sizedbg); p = lj_strfmt_wuleb128(p, sizedbg);
@ -396,10 +317,11 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = BCDUMP_HEAD2; *p++ = BCDUMP_HEAD2;
*p++ = BCDUMP_HEAD3; *p++ = BCDUMP_HEAD3;
*p++ = BCDUMP_VERSION; *p++ = BCDUMP_VERSION;
*p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) + *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
LJ_BE*BCDUMP_F_BE + LJ_BE*BCDUMP_F_BE +
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0); ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
if (!(ctx->flags & BCDUMP_F_STRIP)) { LJ_FR2*BCDUMP_F_FR2;
if (!ctx->strip) {
p = lj_strfmt_wuleb128(p, len); p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len); p = lj_buf_wmem(p, name, len);
} }
@ -430,16 +352,14 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
/* Write bytecode for a prototype. */ /* Write bytecode for a prototype. */
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
uint32_t flags) int strip)
{ {
BCWriteCtx ctx; BCWriteCtx ctx;
int status; int status;
ctx.pt = pt; ctx.pt = pt;
ctx.wfunc = writer; ctx.wfunc = writer;
ctx.wdata = data; ctx.wdata = data;
ctx.heapsz = 0; ctx.strip = strip;
if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
ctx.flags = flags;
ctx.status = 0; ctx.status = 0;
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
ctx.g = G(L); ctx.g = G(L);
@ -448,7 +368,6 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status; if (status == 0) status = ctx.status;
lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
bcwrite_heap_resize(&ctx, 0);
return status; return status;
} }

View File

@ -1,6 +1,6 @@
/* /*
** Buffer handling. ** Buffer handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_buf_c #define lj_buf_c
@ -92,8 +92,10 @@ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
char *b = sb->b; char *b = sb->b;
MSize osz = (MSize)(sb->e - b); MSize osz = (MSize)(sb->e - b);
if (osz > 2*LJ_MIN_SBUF) { if (osz > 2*LJ_MIN_SBUF) {
MSize n = (MSize)(sb->w - b);
b = lj_mem_realloc(L, b, osz, (osz >> 1)); b = lj_mem_realloc(L, b, osz, (osz >> 1));
sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */ sb->b = b;
sb->w = b + n;
sb->e = b + (osz >> 1); sb->e = b + (osz >> 1);
} }
lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");

View File

@ -1,6 +1,6 @@
/* /*
** Buffer handling. ** Buffer handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BUF_H #ifndef _LJ_BUF_H

View File

@ -1,6 +1,6 @@
/* /*
** C data arithmetic. ** C data arithmetic.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "lj_obj.h" #include "lj_obj.h"
@ -44,13 +44,9 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
p = (uint8_t *)cdata_getptr(p, ct->size); p = (uint8_t *)cdata_getptr(p, ct->size);
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
} else if (ctype_isfunc(ct->info)) { } else if (ctype_isfunc(ct->info)) {
CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
p = (uint8_t *)*(void **)p; p = (uint8_t *)*(void **)p;
ct = ctype_get(cts, ct = ctype_get(cts,
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR)); lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
if (i) { /* cts->tab may have been reallocated. */
ca->ct[0] = ctype_get(cts, id0);
}
} }
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
ca->ct[i] = ct; ca->ct[i] = ct;
@ -349,7 +345,9 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
if (LJ_LIKELY(tvisint(o))) { if (LJ_LIKELY(tvisint(o))) {
return (uint32_t)intV(o); return (uint32_t)intV(o);
} else { } else {
return (uint32_t)lj_num2bit(numV(o)); int32_t i = lj_num2bit(numV(o));
if (LJ_DUALNUM) setintV(o, i);
return (uint32_t)i;
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** C data arithmetic. ** C data arithmetic.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_CARITH_H #ifndef _LJ_CARITH_H

View File

@ -1,6 +1,6 @@
/* /*
** FFI C call handling. ** FFI C call handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "lj_obj.h" #include "lj_obj.h"
@ -20,15 +20,12 @@
#if LJ_TARGET_X86 #if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */ /* -- x86 calling conventions --------------------------------------------- */
#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
#if LJ_ABI_WIN #if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \ #define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \ /* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \ cc->retref = (sz > 8); \
if (cc->retref) CCALL_PUSH(dp); if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@ -43,7 +40,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
CCALL_PUSH(dp); \ cc->stack[nsp++] = (GPRArg)dp; \
} else { /* Struct with single FP field ends up in FPR. */ \ } else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \ cc->resx87 = ccall_classify_struct(cts, ctr); \
} }
@ -59,7 +56,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
CCALL_PUSH(dp); cc->stack[nsp++] = (GPRArg)dp;
#endif #endif
@ -70,7 +67,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
CCALL_PUSH(dp); \ cc->stack[nsp++] = (GPRArg)dp; \
} }
#endif #endif
@ -281,8 +278,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -348,6 +345,7 @@
goto done; \ goto done; \
} else { \ } else { \
nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \ } \
} else { /* Try to pass argument in GPRs. */ \ } else { /* Try to pass argument in GPRs. */ \
if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
@ -358,6 +356,7 @@
goto done; \ goto done; \
} else { \ } else { \
ngpr = maxgpr; /* Prevent reordering. */ \ ngpr = maxgpr; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \ } \
} }
@ -472,8 +471,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -566,8 +565,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -699,11 +698,10 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) { if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */ /* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR; MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
if (nsp + sz > CCALL_SIZE_STACK) if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
return 1; /* Too many arguments. */ cc->nsp = nsp + n;
cc->nsp = nsp + sz; memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
} }
return 0; /* Ok. */ return 0; /* Ok. */
} }
@ -781,24 +779,17 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{ {
CTSize sz = ct->size; CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib && n <= 4) { while (ct->sib) {
unsigned int m = 1;
CType *sct; CType *sct;
ct = ctype_get(cts, ct->sib); ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) { if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct); sct = ctype_rawchild(cts, ct);
if (ctype_isarray(sct->info)) {
CType *cct = ctype_rawchild(cts, sct);
if (!cct->size) continue;
m = sct->size / cct->size;
sct = cct;
}
if (ctype_isfp(sct->info)) { if (ctype_isfp(sct->info)) {
r |= sct->size; r |= sct->size;
if (!isu) n += m; else if (n < m) n = m; if (!isu) n++; else if (n == 0) n = 1;
} else if (ctype_iscomplex(sct->info)) { } else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1); r |= (sct->size >> 1);
if (!isu) n += 2*m; else if (n < 2*m) n = 2*m; if (!isu) n += 2; else if (n < 2) n = 2;
} else if (ctype_isstruct(sct->info)) { } else if (ctype_isstruct(sct->info)) {
goto substruct; goto substruct;
} else { } else {
@ -810,11 +801,10 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct); sct = ctype_rawchild(cts, ct);
substruct: substruct:
if (sct->size > 0) { if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct), sn; unsigned int s = ccall_classify_struct(cts, sct);
if (s <= 1) goto noth; if (s <= 1) goto noth;
r |= (s & 255); r |= (s & 255);
sn = (s >> 8) * m; if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
if (!isu) n += sn; else if (n < sn) n = sn;
} }
} }
} }
@ -993,14 +983,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib; fid = ctf->sib;
} }
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */ /* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) { for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did; CTypeID did;
@ -1037,31 +1019,25 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_STRUCTARG CCALL_HANDLE_STRUCTARG
} else if (ctype_iscomplex(d->info)) { } else if (ctype_iscomplex(d->info)) {
CCALL_HANDLE_COMPLEXARG CCALL_HANDLE_COMPLEXARG
} else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) { } else {
sz = CTSIZE_PTR; sz = CTSIZE_PTR;
} }
n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */ CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */ /* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info)) - 1; MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) nsp = (nsp + align) & ~align; /* Align argument on stack. */
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
} }
#if LJ_TARGET_ARM64 && LJ_ABI_WIN if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi: err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL); lj_err_caller(L, LJ_ERR_FFI_NYICALL);
} }
dp = &cc->stack[nsp];
nsp += n;
isva = 0; isva = 0;
done: done:
@ -1072,8 +1048,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} }
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
/* Extend passed integers to 32 bits at least. */ /* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4 && if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
(!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */
if (d->info & CTF_UNSIGNED) if (d->info & CTF_UNSIGNED)
*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
(uint32_t)*(uint16_t *)dp; (uint32_t)*(uint16_t *)dp;
@ -1120,17 +1095,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif #endif
} }
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */ cc->nfpr = nfpr; /* Required for vararg functions. */
#endif #endif
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); cc->nsp = nsp;
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR; cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR) if (nsp > CCALL_SPS_FREE)
cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u); cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
return gcsteps; return gcsteps;
} }

View File

@ -1,6 +1,6 @@
/* /*
** FFI C call handling. ** FFI C call handling.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_CCALL_H #ifndef _LJ_CCALL_H
@ -75,9 +75,6 @@ typedef union FPRArg {
#define CCALL_NARG_FPR 8 #define CCALL_NARG_FPR 8
#define CCALL_NRET_FPR 4 #define CCALL_NRET_FPR 4
#define CCALL_SPS_FREE 0 #define CCALL_SPS_FREE 0
#if LJ_TARGET_OSX
#define CCALL_PACK_STACKARG 1
#endif
typedef intptr_t GPRArg; typedef intptr_t GPRArg;
typedef union FPRArg { typedef union FPRArg {
@ -142,9 +139,6 @@ typedef union FPRArg {
#ifndef CCALL_ALIGN_STACKARG #ifndef CCALL_ALIGN_STACKARG
#define CCALL_ALIGN_STACKARG 1 #define CCALL_ALIGN_STACKARG 1
#endif #endif
#ifndef CCALL_PACK_STACKARG
#define CCALL_PACK_STACKARG 0
#endif
#ifndef CCALL_ALIGN_CALLSTATE #ifndef CCALL_ALIGN_CALLSTATE
#define CCALL_ALIGN_CALLSTATE 8 #define CCALL_ALIGN_CALLSTATE 8
#endif #endif
@ -158,15 +152,14 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
#define CCALL_NUM_STACK 31 #define CCALL_MAXSTACK 32
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
/* -- C call state -------------------------------------------------------- */ /* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */ void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */ uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of bytes on stack. */ uint8_t nsp; /* Number of stack slots. */
uint8_t retref; /* Return value by reference. */ uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64 #if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */ uint8_t ngpr; /* Number of arguments in GPRs. */
@ -185,7 +178,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif #endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */ GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
} CCallState; } CCallState;
/* -- C call handling ----------------------------------------------------- */ /* -- C call handling ----------------------------------------------------- */

Some files were not shown because too many files have changed in this diff Show More