Compare commits

...

260 Commits

Author SHA1 Message Date
Mike Pall
eec7a8016c Prevent Clang UB 'optimization' which breaks integerness checks.
Thanks to Kacper Michajłow. #1351 #1355
2025-04-10 22:53:50 +02:00
Mike Pall
51d4c26ec7 ARM: Fix soft-float math.min()/math.max().
Reported by Dong Jianqiang. #1356
2025-04-10 22:45:38 +02:00
Mike Pall
c262976486 ARM64: Fix pass-by-value struct calling conventions.
Reported by AnthonyK213. #1357
2025-04-10 22:06:47 +02:00
Mike Pall
e0a7ea8a92 Merge branch 'master' into v2.1 2025-04-07 10:33:15 +02:00
Mike Pall
e76bb50d44 Fix error generation in load*.
Reported by Sergey Kaplun. #1353
2025-04-07 10:27:40 +02:00
Mike Pall
e9e4b6d302 Initialize unused value when specializing to cdata metatable.
Reported by jakitliang. #1354
2025-04-07 09:22:07 +02:00
Mike Pall
538a82133a Change handling of nil value markers in template tables.
Reported by Bernhard M. Wiedemann. #1348 #1155
Fixes from Peter Cawley, Christian Clason, Lewis Russell.
2025-03-11 23:04:30 +01:00
Mike Pall
84cb21ffaf REVERT: Change handling of nil value markers in template tables. 2025-03-10 02:56:07 +01:00
Mike Pall
4f2bb199fe macOS: Fix Apple hardened runtime support and put behind build option.
Reported by vanc. #1334
2025-03-10 02:53:20 +01:00
Mike Pall
e3c70a7d81 macOS: Fix support for Apple hardened runtime.
Reported by Christian Clason. #1334
2025-03-10 00:05:08 +01:00
Mike Pall
7db2d1b12a Fix handling of nil value markers in template tables.
Thanks to Peter Cawley. #1348 #1155
2025-03-09 23:11:05 +01:00
Mike Pall
e0551670c9 Merge branch 'master' into v2.1 2025-03-09 23:09:02 +01:00
Mike Pall
85c3f2fb6f Avoid unpatching bytecode twice after a trace flush.
Reported by Sergey Kaplun. #1345
2025-03-09 23:04:23 +01:00
Mike Pall
eee16efa77 Fix state restore when recording __concat metamethod.
Reported by Sergey Kaplun. #1338 #1298
2025-03-09 21:28:17 +01:00
Mike Pall
4219efae43 Windows: Allow mixed builds with msvcbuild.bat.
Suggested by alex4814. #1341
2025-03-09 21:05:06 +01:00
Mike Pall
0254770582 macOS: Add suport for Apple hardened runtime.
Thanks to Peter Cawley. #1334
2025-03-09 20:45:22 +01:00
Mike Pall
f14556234c Merge branch 'master' into v2.1 2025-03-09 16:25:34 +01:00
Mike Pall
d508715ab6 Add compatibility string coercion for fp:seek() argument.
Reported by Magnus Wibeck. #1343
2025-03-09 16:21:29 +01:00
Mike Pall
e27ee68817 Windows: Clarify installation directory layout.
Suggested by eabase. #1346
2025-03-09 16:10:22 +01:00
Mike Pall
55a42da36e Remove Cygwin from docs, since it's not a supported target. 2025-03-09 16:09:36 +01:00
Mike Pall
423ac2144b Improve CLI signal handling on POSIX. 2025-03-09 15:50:01 +01:00
Mike Pall
54dc2fa5d7 FFI: Add pre-declared int128_t, uint128_t, __int128 types.
Note: Only declaration and copy (interpreted only) are implemented.
2025-03-09 15:37:35 +01:00
Mike Pall
b1179ea5f7 Use dylib extension for iOS installs, too.
Reported by Andrey Filipenkov. #1336
2025-03-09 15:00:15 +01:00
Mike Pall
5eb9509468 Change handling of nil value markers in template tables.
Reported by Bernhard M. Wiedemann. #1348 #1155
2025-03-09 14:44:57 +01:00
Mike Pall
a4f56a459a Merge branch 'master' into v2.1 2025-01-13 16:22:22 +01:00
Mike Pall
62e362afbb Fix recording of BC_VARG.
Reported by Bachir Bendrissou.
2025-01-13 16:19:57 +01:00
Mike Pall
9d777346bc Reject negative getfenv()/setfenv() levels to prevent compiler warning.
Thanks to Sergey Kaplun. #1329
2025-01-13 16:16:27 +01:00
Mike Pall
8358eb0cce Merge branch 'master' into v2.1 2025-01-13 16:15:19 +01:00
Mike Pall
e8236561d4 Bump copyright date. 2025-01-13 15:59:10 +01:00
Mike Pall
f73e649a95 Merge branch 'master' into v2.1 2024-12-16 14:32:07 +01:00
Mike Pall
e2e0b1dd2d Force fallback source name for stripped bytecode.
Reported by Lyrth. #1319
2024-12-16 14:30:10 +01:00
Mike Pall
cd8d0a437d Remove dependency on <limits.h>.
Reported by yupengda002. #1318
2024-12-16 14:27:58 +01:00
Mike Pall
19878ec05c Restore state when recording __concat metamethod throws OOM.
Reported by Sergey Kaplun. #1298 #1234
2024-11-28 18:07:58 +01:00
Mike Pall
35a4dd6f79 MIPS64: Fix pcall() error case.
Thanks to Sergey Kaplun. #1308
2024-11-28 16:33:18 +01:00
Mike Pall
4788e6f92a Merge branch 'master' into v2.1 2024-11-28 16:28:51 +01:00
Mike Pall
811e448daa Fix detection of inconsistent renames due to sunk values.
Thanks to Sergey Kaplun. #1295 #584
2024-11-28 16:26:10 +01:00
Mike Pall
fe71d0fb54 Windows: Allow amalgamated static builds with msvcbuild.bat.
Reported by Naman Dixit. #1289
2024-11-14 17:21:00 +01:00
Mike Pall
fca66335d1 Always close profiler output file.
Reported by Guilherme Batalheiro. #1304
2024-11-14 17:13:58 +01:00
Mike Pall
9ce8f1ff8e Fix override of INSTALL_LJLIBD in the presence of DESTDIR.
Reported by faithanalog. #1239 #1303
2024-11-14 17:09:07 +01:00
Mike Pall
69bbf3c1b0 Fix bit op coercion for shifts in DUALNUM builds.
Reported by Junlong Li. Followup to #1273
2024-11-13 09:18:32 +01:00
Mike Pall
97813fb924 macOS: Remove obsolete -single_module flag.
Thanks to dundargoc. #1284
2024-10-02 13:59:42 +02:00
Mike Pall
b2915e9ab5 macOS: Workaround for buggy XCode 15.0 - 15.2 linker.
Thanks to Carlo Cabrera. #1283
2024-10-02 12:12:56 +02:00
Mike Pall
2240d84464 macOS: Fix macOS 15 / Clang 16 build.
Note: The -Wl,-no_deduplicate workaround is NOT needed anymore.
Thanks to fxcoudert, corsix, clason, baconpaul, mvf. #1275 #1266
2024-10-02 02:06:25 +02:00
Mike Pall
f5fd22203e Fix bit op coercion in DUALNUM builds.
Thanks to Sergey Kaplun. #1273
2024-09-29 16:46:29 +02:00
Mike Pall
0ae532c9aa Merge branch 'master' into v2.1 2024-09-29 16:11:15 +02:00
Mike Pall
5141cbc20c Fix compiliation of getmetatable() for UDTYPE_IO_FILE.
Reported by Sergey Bronnikov. #1279
2024-09-29 16:03:37 +02:00
Mike Pall
c63a160706 Remove ancient RtlUnwindEx workaround for MinGW64.
Thanks to Kacper Michajłow. #1272
2024-09-29 15:33:32 +02:00
Mike Pall
87ae18af97 Drop unused function wrapper.
Follow-up to #1247.
2024-09-04 14:32:08 +02:00
Mike Pall
f725e44cda Merge branch 'master' into v2.1 2024-08-24 17:14:51 +02:00
Mike Pall
e45fd4cb71 Fix limit check in narrow_conv_backprop().
Thanks to Sergey Kaplun. #1262
2024-08-24 17:11:45 +02:00
Mike Pall
9bb6b35f7f Always use IRT_NIL for IR_TBAR.
Thanks to Peter Cawley. #1258
2024-08-24 17:03:17 +02:00
Mike Pall
c68711cc87 ARM64: Use ldr literal to load FP constants.
Thanks to Peter Cawley. #1255
2024-08-21 11:31:29 +02:00
Mike Pall
304da39cc5 FFI: Add missing coercion when recording 64-bit bit.*().
Thanks to Peter Cawley. #1252
2024-08-20 19:13:59 +02:00
Mike Pall
cdc2db3aea ARM64: Make tobit conversions match JIT backend behavior.
Thanks to Peter Cawley. #1253
2024-08-20 19:01:51 +02:00
Mike Pall
f4fa5646a8 Merge branch 'master' into v2.1 2024-08-20 19:01:38 +02:00
Mike Pall
32a683d226 ARM: Make hard-float tobit conversions match JIT backend behavior.
Reported by Peter Cawley. #1253
2024-08-20 19:00:47 +02:00
Mike Pall
fb22d0f80f FFI: Drop finalizer table rehash after GC cycle.
Reported by Sergey Kaplun. #1247
2024-08-19 20:00:21 +02:00
Mike Pall
fb5e1c9f0d Merge branch 'master' into v2.1 2024-08-19 17:33:23 +02:00
Mike Pall
ab39082fdd Fix another potential file descriptor leak in luaL_loadfile*().
Reported by Peter Cawley. #1249
2024-08-19 17:31:15 +02:00
Mike Pall
fddc9650d8 Merge branch 'master' into v2.1 2024-08-19 16:22:55 +02:00
Mike Pall
bcc6cbb188 MIPS32: Fix little-endian IR_RETF.
Thanks to Peter Cawley. #1250
2024-08-19 16:17:44 +02:00
Mike Pall
5ca25ee83e Correctly close VM state after early OOM during open.
Reported by Assumeru. #1248
2024-08-19 16:14:55 +02:00
Mike Pall
19db4e9b7c Fix potential file descriptor leak in luaL_loadfile*().
Reported by Assumeru. #1249
2024-08-19 16:11:36 +02:00
Mike Pall
ae4735f621 Reflect override of INSTALL_LJLIBD in package.path.
Suggested by GitSparTV. #1239
2024-08-15 00:38:43 +02:00
Mike Pall
6f834087d0 ARM64: Use movi to materialize FP constants.
Thanks to Peter Cawley. #1245
2024-08-15 00:22:47 +02:00
Mike Pall
2d54213e7c Add more FOLD rules for integer conversions.
Thanks to Peter Cawley. #1246
2024-08-15 00:20:54 +02:00
Mike Pall
833600390c Merge branch 'master' into v2.1 2024-08-15 00:19:35 +02:00
Mike Pall
86e7123bb1 Different fix for partial snapshot restore due to stack overflow.
Reported by Junlong Li. Fixed by Peter Cawley. #1196
2024-08-15 00:17:19 +02:00
Mike Pall
7369eff67d Fix IR_ABC hoisting.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1194
2024-08-15 00:10:01 +02:00
Mike Pall
3bdc6498c4 Limit CSE for IR_CARG to fix loop optimizations.
Thanks to Peter Cawley. #1244
2024-08-15 00:07:34 +02:00
Mike Pall
04dca7911e Call math.randomseed() without arguments to seed from system entropy.
Reminder: the math.random() PRNG is NOT SUITABLE FOR CRYPTOGRAPHIC USE.
2024-07-04 01:26:29 +02:00
Mike Pall
7421a1b33c Restore state when recording __concat metamethod throws an error.
Thanks to Sergey Kaplun. #1234
2024-07-04 00:48:49 +02:00
Mike Pall
510f88d468 Add build flag LUAJIT_DISABLE_TAILCALL to disable tailcall generation.
Only use this for debugging purposes. NEVER set it for regular builds
or distro builds! In Lua, tailcalls are a language guarantee.
Suggested by Steve Vermeulen. #1220
2024-07-04 00:13:58 +02:00
Mike Pall
444c8ff19a Clarify that lj_buf_shrink() does not keep any buffer data.
https://www.freelists.org/post/luajit/lj-buf-shrink-may-truncate-the-data-and-sbw-point-over-the-end-of-the-buffer,1
Thanks to Junlong li.
2024-07-04 00:03:40 +02:00
Mike Pall
747fc02eb9 OSX: Fix installed luajit.pc.
Reported by leleliu008. #1221
2024-07-03 23:59:59 +02:00
Mike Pall
8038430110 Merge branch 'master' into v2.1 2024-07-03 23:59:03 +02:00
Mike Pall
7a608e4425 FFI: Fix various issues in recff_cdata_arith.
Thanks to Sergey Kaplun. #1224
2024-07-03 23:46:47 +02:00
Mike Pall
f602f0154b Fix predict_next() in parser (for real now).
Reported by Sergey Kaplun. #1226 #1054
2024-07-03 23:45:16 +02:00
Mike Pall
f2a1cd4328 FFI: Fix __tostring metamethod access to enum cdata value.
Thanks to Sergey Kaplun. #1232
2024-07-03 23:43:57 +02:00
Mike Pall
0f8b878e2f Fix typo.
Reported by Sergey Bronnikov. #1223
2024-07-03 23:43:29 +02:00
Mike Pall
6885efb73e Merge branch 'master' into v2.1 2024-07-03 23:42:38 +02:00
Mike Pall
811c5322c8 Handle partial snapshot restore due to stack overflow.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1196
2024-07-03 21:42:21 +02:00
Mike Pall
93e87998b2 Update Nintendo Switch build script.
Thanks to IoriBranford. #1214
2024-05-25 19:01:18 +02:00
Mike Pall
f5587f5eb3 Merge branch 'master' into v2.1 2024-05-25 16:41:53 +02:00
Mike Pall
4a22050df9 Prevent sanitizer warning in snap_restoredata().
Thanks to Sergey Kaplun. #1193
2024-05-25 16:38:05 +02:00
Mike Pall
80c1c65bce Typo. 2024-05-25 16:25:35 +02:00
Mike Pall
4fc48c50fe Limit number of string format elements to compile.
Reported by pwnhacker0x18. #1203
2024-05-25 16:22:39 +02:00
Mike Pall
a6386bdabe FFI: Clarify scalar boxing behavior.
Prevent misunderstandings like in #1216
2024-05-25 15:48:07 +02:00
Mike Pall
9398123383 Fix internal link in docs.
Thanks to GitSparTV. #1219
2024-05-25 14:56:15 +02:00
Mike Pall
5790d25397 OSX/iOS: Fix SDK incompatibility.
Thanks to Ryan Carsten Schmidt. #1189
2024-04-22 10:06:42 +02:00
Mike Pall
b3e4987389 Windows/MSVC: Cleanup msvcbuild.bat and always generate PDB.
Thanks to Miku AuahDark. #1127
2024-04-19 11:01:13 +02:00
Mike Pall
75e9277798 Merge branch 'master' into v2.1 2024-04-19 01:47:48 +02:00
Mike Pall
9b5e837ac2 Fix segment release check in internal memory allocator.
Thanks to Jinji Zeng. #1179 #1157
2024-04-19 01:44:19 +02:00
Mike Pall
d032c637b1 Fix compiler warning. 2024-04-19 01:41:12 +02:00
Mike Pall
f5affaa6c4 FFI: Turn FFI finalizer table into a proper GC root.
Reported by Sergey Bronnikov. #1168
2024-04-19 01:33:19 +02:00
Mike Pall
7110b93567 OSX/iOS: Always generate 64 bit non-FAT Mach-O object files.
Reported by Sergey Bronnikov. #1181
2024-04-19 00:31:06 +02:00
Mike Pall
d2fe2a6d46 Show name of NYI bytecode in -jv and -jdump.
Suggested by Sergey Kaplun. #1176 #567
2024-04-19 00:12:22 +02:00
Mike Pall
b8b49bf395 Use generic trace error for OOM during trace stitching.
Thanks to Sergey Kaplun. #1166
2024-04-18 23:57:53 +02:00
Mike Pall
243b7682a5 Fix serialization format docs.
Reported by nounwind.
2024-04-18 23:49:43 +02:00
Mike Pall
d06beb0480 Handle all types of errors during trace stitching.
Thanks to Sergey Kaplun and Peter Cawley. #1166 #720
2024-03-10 17:29:48 +01:00
Mike Pall
bcc5125a91 Fix recording of __concat metamethod.
Thanks to Sergey Kaplun. #1164
2024-03-10 17:26:36 +01:00
Mike Pall
913df6a945 Merge branch 'master' into v2.1 2024-03-10 17:26:03 +01:00
Mike Pall
cae361187e Prevent down-recursion for side traces.
Thanks to Sergey Kaplun. #1169
2024-03-10 17:23:21 +01:00
Mike Pall
302366a338 Check frame size limit before returning to a lower frame.
Thanks to Sergey Kaplun. #1173
2024-03-10 17:19:29 +01:00
Mike Pall
dda1ac273a FFI: Treat cdata finalizer table as a GC root.
Thanks to Sergey Bronnikov. #1168
2024-03-10 17:16:41 +01:00
Mike Pall
88ed9fdbbb Handle stack reallocation in debug.setmetatable() and lua_setmetatable().
Thanks to Sergey Kaplun. #1172
2024-03-10 17:13:28 +01:00
Mike Pall
0d313b2431 Merge branch 'master' into v2.1 2024-02-04 16:47:14 +01:00
Mike Pall
defe61a567 Rework stack overflow handling.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1152
2024-02-04 16:34:30 +01:00
Mike Pall
9cc2e42b17 Merge branch 'master' into v2.1 2024-01-31 14:39:50 +01:00
Mike Pall
9cdd5a9479 Preserve keys with dynamic values in template tables when saving bytecode.
Reported by Lyrthras. Fixed by Peter Cawley. #1155
2024-01-31 14:32:04 +01:00
Mike Pall
5e5d542c99 Merge branch 'master' into v2.1 2024-01-31 14:31:40 +01:00
Mike Pall
14987af80a Prevent include of luajit_rolling.h.
Thanks to Peter Cawley. #1145
2024-01-31 14:29:23 +01:00
Mike Pall
21a46723d8 Merge branch 'master' into v2.1 2024-01-26 23:18:02 +01:00
Mike Pall
e6c0ade97c Fix documentation bug about '\z' string escape. 2024-01-26 23:17:33 +01:00
Mike Pall
343ce0edaf Fix zero stripping in %g number formatting.
Reported by pwnhacker0x18. #1149
2024-01-25 13:23:48 +01:00
Mike Pall
f2336c48fa Merge branch 'master' into v2.1 2024-01-23 19:01:46 +01:00
Mike Pall
85b4fed0b0 Fix unsinking of IR_FSTORE for NULL metatable.
Reported by pwnhacker0x18. #1147
2024-01-23 18:58:52 +01:00
Mike Pall
3ca0a80711 DynASM/x86: Add endbr instruction.
Thanks to Dmitry Stogov. #1143 #1142
2024-01-22 19:17:45 +01:00
Mike Pall
2f35cb45fd MIPS64 R2/R6: Fix FP to integer conversions.
Thanks to Peter Cawley. #1146
2024-01-22 19:12:13 +01:00
Mike Pall
4b90f6c4d7 Add cross-32/64 bit and deterministic bytecode generation.
Contributed by Peter Cawley. #993 #1008
2024-01-22 19:06:36 +01:00
Mike Pall
c525bcb902 DynASM/x86: Allow [&expr] operand.
Thanks to Dmitry Stogov. #1138
2023-12-23 20:06:17 +01:00
Mike Pall
dbd363ca25 Merge branch 'master' into v2.1 2023-12-23 19:49:43 +01:00
Mike Pall
658530562c Check for IR_HREF vs. IR_HREFK aliasing in non-nil store check.
Thanks to Peter Cawley. #1133
2023-12-23 19:43:03 +01:00
Mike Pall
293199c5eb Merge branch 'master' into v2.1 2023-12-23 19:23:12 +01:00
Mike Pall
7dbe545933 Respect jit.off() on pending trace exit.
Thanks to Sergey Kaplun. #1134
2023-12-23 19:22:34 +01:00
Mike Pall
e02a207909 Merge branch 'master' into v2.1 2023-12-23 19:15:57 +01:00
Mike Pall
c42c62e71a Simplify handling of instable types in TNEW/TDUP load forwarding.
Thanks to Peter Cawley. #994
2023-12-23 19:14:32 +01:00
Mike Pall
29b0b282f5 Merge branch 'master' into v2.1 2023-12-11 13:04:43 +01:00
Mike Pall
9bdfd34dcc Only emit proper parent references in snapshot replay.
Thanks to Peter Cawley. #1132
2023-12-11 13:01:36 +01:00
Mike Pall
ff204d0350 Fix anchoring for string buffer set() method (again).
Thanks to Peter Cawley. #1125
2023-12-10 19:42:22 +01:00
Mike Pall
8d5ea4ceb9 Merge branch 'master' into v2.1 2023-12-10 16:13:34 +01:00
Mike Pall
10cc759f25 ARM: Fix stack restore for FP slots.
Thanks to Peter Cawley. #1131
2023-12-10 16:10:48 +01:00
Mike Pall
420a9afa93 Merge branch 'master' into v2.1 2023-12-10 15:50:14 +01:00
Mike Pall
1b38c73655 Document workaround for multilib vs. cross-compiler conflict.
Reported by igorpupkinable. #1126
2023-12-10 15:45:10 +01:00
Mike Pall
e02cb19b57 Fix anchoring for string buffer set() method.
Thanks to Peter Cawley. #1125
2023-12-10 15:33:47 +01:00
Mike Pall
e4168fae5b Merge branch 'master' into v2.1 2023-12-10 15:02:26 +01:00
Mike Pall
856423f5da Fix runtime library flags for MSVC debug builds.
Reported by igor725. #1127
2023-12-10 15:00:52 +01:00
Mike Pall
487eaaf040 Merge branch 'master' into v2.1 2023-12-10 14:50:29 +01:00
Mike Pall
dcf3627d79 Fix .debug_abbrev section in GDB JIT API.
Thanks to Dmitry Stogov. #1129
2023-12-10 14:48:34 +01:00
Mike Pall
d1236a4caa Optimize table.new() with constant args to (sinkable) IR_TNEW.
Thanks to Peter Cawley. #1128
2023-12-10 14:41:56 +01:00
Mike Pall
7ad68a1fd3 Merge branch 'master' into v2.1 2023-12-10 14:33:48 +01:00
Mike Pall
1761fd2ef7 Emit sunk IR_NEWREF only once per key on snapshot replay.
Thanks to Sergey Kaplun and Peter Cawley. #1128
2023-12-10 14:29:45 +01:00
Mike Pall
43d0a19158 Fix last commit. 2023-11-15 01:41:31 +01:00
Mike Pall
536cf8a271 Merge branch 'master' into v2.1 2023-11-14 22:56:09 +01:00
Mike Pall
644723649e x86/x64: Don't fuse loads across IR_NEWREF.
Reported by Peter Cawley. #1117
2023-11-14 22:50:21 +01:00
Mike Pall
113a168b79 Improve last commit. 2023-11-12 16:11:11 +01:00
Mike Pall
45c88b7963 x86/x64: Don't fuse loads across table.clear.
Reported by Peter Cawley. #1117
2023-11-12 15:41:52 +01:00
Mike Pall
6807e60af1 Merge branch 'master' into v2.1 2023-11-12 15:25:14 +01:00
Mike Pall
d854d00ce9 x86/x64: Add more red zone checks to assembler backend.
Thanks to Peter Cawley. #1116
2023-11-12 15:18:44 +01:00
Mike Pall
7c9671a043 Merge branch 'master' into v2.1 2023-11-12 15:11:29 +01:00
Mike Pall
a4c1640432 Add stack check to pcall/xpcall.
Analyzed by Peter Cawley. #1048
2023-11-12 14:42:24 +01:00
Mike Pall
69bbbf7736 Merge branch 'master' into v2.1 2023-11-09 11:05:27 +01:00
Mike Pall
65c8493907 Invalidate SCEV entry when returning to lower frame.
Thanks to Zhongwei Yao. #1115
2023-11-09 11:02:36 +01:00
Mike Pall
b94fbfbee9 Merge branch 'master' into v2.1 2023-11-07 22:28:53 +01:00
Mike Pall
433d7e8d8d FFI: Fix pragma push stack limit check and throw on overflow.
Reported by Sergey Kaplun. #1114
2023-11-07 22:25:42 +01:00
Mike Pall
ce2cd61739 ARM64: Fix disassembly of ldp/stp offsets.
Thanks to Peter Cawley. #1113
2023-11-06 23:14:22 +01:00
Mike Pall
07b3cd3cf9 Check for upvalue state transition in IR_UREFO.
Thanks to Peter Cawley. #1085
2023-11-05 16:34:46 +01:00
Mike Pall
0afa1676b2 Merge branch 'master' into v2.1 2023-11-05 11:37:57 +01:00
Mike Pall
d133d67c88 x64: Properly fix __call metamethod return dispatch.
Reported by Sergey Kaplun. #1110
2023-11-05 11:31:08 +01:00
Mike Pall
f2e955dae8 Windows/x86: _BitScan*64 are only available on 64 bit archs.
Reported by memcorrupt. #1109
2023-11-05 11:27:35 +01:00
Mike Pall
e826d0c101 Add 'cc' file type for saving bytecode.
Contributed by Sergey Bronnikov. #1105
2023-10-21 13:31:45 +02:00
Mike Pall
4eb47df605 FFI/Windows: Fix type declaration for int64_t and uint64_t.
Thanks to Peter Cawley. #1106
2023-10-21 13:18:51 +02:00
Mike Pall
7269b02130 Merge branch 'master' into v2.1 2023-10-21 13:13:34 +02:00
Mike Pall
db944b2b56 FFI: Fix dangling reference to CType in carith_checkarg().
Reported by Sergey Kaplun. #1108
2023-10-21 13:11:50 +02:00
Mike Pall
656ecbcf8f DynASM/ARM64: Support ldp/stp of q registers.
Thanks to Peter Cawley. #1096
2023-10-08 22:12:01 +02:00
Mike Pall
d2a5487fd7 ARM64: Use ADR and ADRP to form constants.
Thanks to Peter Cawley. #1100
2023-10-08 22:10:02 +02:00
Mike Pall
14866a6828 ARM64: Fix disassembly of U12 loads.
Thanks to Peter Cawley. #1100
2023-10-08 21:57:04 +02:00
Mike Pall
c5b075eb31 ARM64: Unify constant register handling in interpreter.
Plus minor optimizations. Simplifications for out-of-tree ARM64EC.
Thanks to Peter Cawley. #1096
2023-10-08 21:39:40 +02:00
Mike Pall
9cc8bbb7ae ARM: Fix register hint for FFI calls with FP results. 2023-10-08 21:22:50 +02:00
Mike Pall
1e93951b25 ARM64: Fix register hint for FFI calls with FP results.
Thanks to Peter Cawley. #1096
2023-10-08 21:20:10 +02:00
Mike Pall
007e4dce13 ARM64: Restore fp before sp in C stack unwinders.
Thanks to Peter Cawley. #1096
2023-10-08 21:17:43 +02:00
Mike Pall
becf5cc65d FFI: Fix ffi.abi("pauth").
Thanks to Peter Cawley. #1098
2023-09-25 16:56:17 +02:00
Mike Pall
97c75843c6 Merge branch 'master' into v2.1 2023-09-22 21:07:20 +02:00
Mike Pall
f72c19e482 Maintain chain invariant in DCE.
Thanks to Peter Cawley. #1094
2023-09-22 21:04:22 +02:00
Mike Pall
d1a2fef8a8 LJ_FR2: Fix stack checks in vararg calls.
Thanks to Peter Cawley. #1048
2023-09-21 05:19:55 +02:00
Mike Pall
234dbc481e Merge branch 'master' into v2.1 2023-09-21 04:44:37 +02:00
Mike Pall
aa6b15c1a8 Follow-up fix for stack overflow handling cleanup. 2023-09-21 04:43:40 +02:00
Mike Pall
a5d2f70c73 Handle OOM error on stack resize in coroutine.resume and lua_checkstack.
Thanks to Peter Cawley. #1066
2023-09-21 04:40:48 +02:00
Mike Pall
e86990f7f2 Restore cur_L for specific Lua/C API use case.
Thanks to Peter Cawley. #1066
2023-09-21 03:54:08 +02:00
Mike Pall
b8919781d4 Consistently use 64 bit constants for 64 bit IR instructions.
Thanks to Peter Cawley. #1084
2023-09-21 03:46:33 +02:00
Mike Pall
9159289927 ARM64: Fix IR_HREF code generation for constant FP keys.
Reported by swarn. Fix for 435d8c63 by Peter Cawley. #1090
2023-09-21 02:48:12 +02:00
Mike Pall
fca1f51bf8 ARM64: Fuse negative 32 bit constants into arithmetic ops again.
Thanks to Peter Cawley. #1065
2023-09-21 02:38:29 +02:00
Mike Pall
4b605a7da8 Merge branch 'master' into v2.1 2023-09-21 02:23:25 +02:00
Mike Pall
b138ccfa91 Handle all stack layouts in (delayed) TRACE vmevent.
Thanks to Sergey Bronnikov and Peter Cawley. #1087
2023-09-21 02:15:16 +02:00
Mike Pall
92b89d005a Add missing coercion when recording select(string, ...)
Thanks to Peter Cawley. #1083
2023-09-21 02:10:18 +02:00
Mike Pall
d2f6c55b05 Cleanup stack overflow handling.
Reported by Peter Cawley. #962
2023-09-21 01:58:43 +02:00
Mike Pall
e897c5743f Windows/ARM64: Add MSVC cross-build support for x64 to ARM64.
Thanks to invertego. #1081
2023-09-17 10:44:04 +02:00
Mike Pall
7a2b83a0c5 IR_MIN/IR_MAX is non-commutative due to underlying FPU ops.
Thanks to Peter Cawley. #1082
2023-09-17 10:31:00 +02:00
Mike Pall
42ca6e120f ARM64: Set fixed interpreter registers before rethrow.
Thanks to Peter Cawley. #593
2023-09-17 10:09:58 +02:00
Mike Pall
7a77a3cd85 Windows/ARM64: Update install docs. 2023-09-15 06:10:58 +02:00
Mike Pall
bd2d107151 Windows: Call C++ destructors without compiling with /EHa.
Thanks to Peter Cawley. #593
2023-09-15 05:47:29 +02:00
Mike Pall
7a1c139569 Windows: Pass scratch CONTEXT record to RtlUnwindEx.
Thanks to Peter Cawley. #593
2023-09-15 05:31:26 +02:00
Mike Pall
18b8fd8de7 ARM64: External unwinder already restores non-volatile registers.
Thanks to Peter Cawley. #593
2023-09-15 05:27:29 +02:00
Mike Pall
b36f9fad63 Windows/ARM64: Fix exception unwinding (again).
Thanks to Peter Cawley. #593
2023-09-15 05:23:29 +02:00
Mike Pall
8af63f9920 Windows/ARM64: Fix typo in exception unwinding.
Thanks to Peter Cawley. #593
2023-09-11 23:00:36 +02:00
Mike Pall
9e0437240f FFI: Fix 64 bit shift fold rules.
Thanks to Peter Cawley. #1079
2023-09-11 21:06:25 +02:00
Mike Pall
1c33f46314 Windows/ARM64: Support Windows calling conventions.
Dear Microsoft: your butchering of the (perfectly fine) ARM64 ABI is a disgrace.
Thanks to Peter Cawley. #593
2023-09-11 16:35:28 +02:00
Mike Pall
f63bc569fa Windows/ARM64: Fix exception unwinding.
Thanks to Peter Cawley. #593
2023-09-11 13:33:27 +02:00
Mike Pall
836ab4227a ARM64: Remove unneeded IRCALL_* defs for math intrinsics.
Workaround for MSVC issue.
Thanks to Peter Cawley. #593
2023-09-11 13:14:09 +02:00
Mike Pall
b174d5e66d Fix Cygwin build.
Thanks to Christopher Ng. #1077 #1078
2023-09-11 13:10:17 +02:00
Mike Pall
5a18d4582f Merge branch 'master' into v2.1 2023-09-10 05:26:27 +02:00
Mike Pall
9760984638 Allow path overrides in genversion.lua with minilua, too.
Thanks to arch1t3cht. #1067
2023-09-10 05:23:10 +02:00
Mike Pall
cb413bf8f4 Windows/ARM64: Add initial support.
Only builds with native ARM64 Visual Studio for now.
Thanks to vanc and Stephen Just. #593 #964
2023-09-10 05:20:22 +02:00
Mike Pall
566532b807 Merge branch 'master' into v2.1 2023-09-09 23:20:57 +02:00
Mike Pall
4fe2002292 Improve architecture detection error messages. 2023-09-09 23:01:26 +02:00
Mike Pall
4611e25c0f ARM64: Fuse rotates into logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:59:18 +02:00
Mike Pall
90742d91c2 ARM64: Don't fuse sign extensions into logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:57:46 +02:00
Mike Pall
ba2b34f5e8 ARM64: Disassemble rotates on logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:52:02 +02:00
Mike Pall
f442432ecb Merge branch 'master' into v2.1 2023-09-09 18:18:48 +02:00
Mike Pall
44da356e97 ARM: Fix stack check code generation.
Thanks to Peter Cawley. #1068
2023-09-09 18:16:31 +02:00
Mike Pall
b8c6ccd50c ARM64: Fix LDP/STP fusion (again).
Reported and analyzed by Zhongwei Yao. Fix by Peter Cawley. #1075
2023-09-09 18:01:37 +02:00
Mike Pall
0705ef6ce4 ARM64: Ensure branch is in range before emitting TBZ/TBNZ.
Thanks to Peter Cawley. #1074
2023-09-09 17:52:43 +02:00
Mike Pall
59be97edb6 Merge branch 'master' into v2.1 2023-09-09 17:46:10 +02:00
Mike Pall
43eff4aad4 Fix mcode limit check for non-x86 archs.
Thanks to Peter Cawley.
2023-09-09 17:44:54 +02:00
Mike Pall
de2e09f54c ARM64: Improve BC_JLOOP.
Thanks to Peter Cawley. #1070
2023-09-09 17:38:44 +02:00
Mike Pall
6c599960d1 ARM64: Improve integer IR_MUL code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:36:40 +02:00
Mike Pall
4ed83bd990 ARM64: Simplify code generation for IR_STRTO.
Thanks to Peter Cawley. #1070
2023-09-09 17:34:28 +02:00
Mike Pall
a5ee35867c ARM64: Use RID_TMP instead of scratch register in more places.
Thanks to Peter Cawley. #1070
2023-09-09 17:31:06 +02:00
Mike Pall
c1877e648a ARM64: Improve IR_OBAR code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:21:32 +02:00
Mike Pall
c2bdce399e ARM64: Improve IR_UREF code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:19:02 +02:00
Mike Pall
435d8c6301 ARM64: Improve IR_HREF code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:15:26 +02:00
Mike Pall
315dc3e776 ARM64: Reload BASE via GL instead of spilling it.
Thanks to Peter Cawley. #1068.
2023-09-09 16:56:16 +02:00
Mike Pall
5149b0a3a2 ARM64: Consolidate 32/64-bit constant handling in assembler.
Thanks to Peter Cawley. #1065
2023-09-09 16:30:14 +02:00
Mike Pall
dfc122e45c ARM64: Tune emit_lsptr. Avoid wrong load for asm_prof.
Thanks to Peter Cawley. #1065
2023-09-09 14:20:39 +02:00
Mike Pall
4651ff2fbc ARM64: Inline only use of emit_loada.
Thanks to Peter Cawley. #1065
2023-09-09 14:15:18 +02:00
Mike Pall
9daf9f9003 ARM64: Improve K13 constant rematerialization.
Algorithm by Dougall Johnson: https://dougallj.wordpress.com/2021/10/30/
Thanks to Peter Cawley. #1065
2023-09-09 14:11:25 +02:00
Mike Pall
9bd2404137 Merge branch 'master' into v2.1 2023-09-09 13:42:12 +02:00
Mike Pall
7f9907b4ed Add NaN check to IR_NEWREF.
Thanks to Peter Cawley. #1069
2023-09-09 13:37:31 +02:00
Mike Pall
cc8d88aafc Merge branch 'master' into v2.1 2023-09-09 12:50:13 +02:00
Mike Pall
4d05806ae0 Allow override of paths for genversion.lua.
Thanks to arch1t3cht. #1067
2023-09-09 12:47:27 +02:00
Mike Pall
19707009bf Fix native MinGW build.
Thanks to Victor Bombi. #1071
2023-09-09 12:41:47 +02:00
Mike Pall
41fb94defa Add randomized register allocation for fuzz testing.
This must be explicitly enabled with: -DLUAJIT_RANDOM_RA
Thanks to Peter Cawley. #1062
2023-08-30 01:10:52 +02:00
Mike Pall
2f6c451ce8 ARM64: Improve register allocation for integer IR_MUL/IR_MULOV.
Thanks to Peter Cawley. #1062
2023-08-29 22:38:20 +02:00
Mike Pall
7ff8f26eb8 ARM64: Fix register allocation for IR_*LOAD.
Thanks to Peter Cawley. #1062
2023-08-29 22:35:10 +02:00
Mike Pall
356231edaf Merge branch 'master' into v2.1 2023-08-29 22:30:57 +02:00
Mike Pall
c6ee7e19d1 Update external MSDN URL in code.
Thanks to Kyle Marshall. #1060
2023-08-29 22:27:38 +02:00
Mike Pall
83954100db FFI/ARM64/OSX: Handle non-standard OSX C calling conventions.
Contributed by Peter Cawley. #205
2023-08-29 02:21:51 +02:00
Mike Pall
cf903edb30 FFI: Unify stack setup for C calls in interpreter. 2023-08-29 02:12:13 +02:00
Mike Pall
7cc53f0b85 ARM64: Prevent STP fusion for conditional code emitted by TBAR.
Thanks to Peter Cawley. #1057
2023-08-28 22:39:35 +02:00
Mike Pall
0fa2f1cbcf ARM64: Fix LDP/STP fusing for unaligned accesses.
Thanks to Peter Cawley. #1056
2023-08-28 22:33:54 +02:00
Mike Pall
c0d5240a25 Merge branch 'master' into v2.1 2023-08-28 22:24:36 +02:00
Mike Pall
0ef51b495f Handle table unsinking in the presence of IRFL_TAB_NOMM.
Reported by Sergey Kaplun. #1052
2023-08-28 22:15:42 +02:00
Mike Pall
238a2a80bb Merge branch 'master' into v2.1 2023-08-28 22:02:06 +02:00
Mike Pall
6a3111a57f Use fallback name for install files without valid .git or .relver. 2023-08-28 21:25:51 +02:00
Mike Pall
a0b52aae33 Handle non-.git checkout with .relver in .bat-file builds.
Thanks to Simon Cooke.
2023-08-28 21:59:01 +02:00
Mike Pall
631a45f73b Merge branch 'master' into v2.1 2023-08-28 21:08:00 +02:00
Mike Pall
14e2917e7a Fix external C call stack check when using LUAJIT_MODE_WRAPCFUNC.
Thanks to Peter Cawley. #1047
2023-08-28 21:04:01 +02:00
Mike Pall
309fb42b87 Fix predict_next() in parser (again).
Reported by Sergey Bronnikov. #1054
2023-08-28 21:00:37 +02:00
Mike Pall
03c31124cc Fix typo.
Thanks to Simon Cooke.
2023-08-22 17:06:34 +02:00
Mike Pall
ff192d134d Merge branch 'master' into v2.1 2023-08-22 17:06:14 +02:00
Mike Pall
d0ce82ecdc Handle the case when .git is not a directory.
Thanks to Alexander Shpilkin.
2023-08-22 17:04:22 +02:00
Mike Pall
0b5bf71e37 Merge branch 'master' into v2.1 2023-08-22 15:37:21 +02:00
Mike Pall
6a2163a6b4 Add .gitattributes to dynamically resolve .relver.
Thanks to Alexander Shpilkin.
2023-08-22 15:36:55 +02:00
Mike Pall
33e2a49dbf Add .gitattributes to dynamically resolve .relver.
Thanks to Alexander Shpilkin.
2023-08-22 15:30:27 +02:00
Mike Pall
093759d528 Fix for last commit: also remove symlink on uninstall. 2023-08-22 11:46:12 +02:00
Mike Pall
748ab9d90a Switch to rolling releases: mark v2.1 as production. 2023-08-22 11:13:45 +02:00
Mike Pall
54ef81f864 Merge branch 'master' into v2.1 2023-08-21 13:09:52 +02:00
Mike Pall
ed21acd863 Fix Windows build scripts for rolling releases.
Reported by Miku AuahDark.
2023-08-21 13:08:00 +02:00
Mike Pall
3c290f817f Merge branch 'master' into v2.1 2023-08-21 04:03:25 +02:00
Mike Pall
6351abc78f Switch MSVC and console build scripts to rolling releases. 2023-08-21 03:59:03 +02:00
217 changed files with 2489 additions and 1509 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
/.relver export-subst

1
.relver Normal file
View File

@ -0,0 +1 @@
$Format:%ct$

View File

@ -1,7 +1,7 @@
=============================================================================== ===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
Copyright (C) 2005-2023 Mike Pall. All rights reserved. Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@ -10,7 +10,7 @@
# For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MSVC, please follow the instructions given in src/msvcbuild.bat.
# For MinGW and Cygwin, cd to src and run make with the Makefile there. # For MinGW and Cygwin, cd to src and run make with the Makefile there.
# #
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h # Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
############################################################################## ##############################################################################
MAJVER= 2 MAJVER= 2
@ -37,12 +37,13 @@ export MULTILIB= lib
DPREFIX= $(DESTDIR)$(PREFIX) DPREFIX= $(DESTDIR)$(PREFIX)
INSTALL_BIN= $(DPREFIX)/bin INSTALL_BIN= $(DPREFIX)/bin
INSTALL_LIB= $(DPREFIX)/$(MULTILIB) INSTALL_LIB= $(DPREFIX)/$(MULTILIB)
INSTALL_SHARE= $(DPREFIX)/share INSTALL_SHARE_= $(PREFIX)/share
INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_)
INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
INSTALL_INC= $(INSTALL_DEFINC) INSTALL_INC= $(INSTALL_DEFINC)
INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit
INSTALL_LMODD= $(INSTALL_SHARE)/lua INSTALL_LMODD= $(INSTALL_SHARE)/lua
INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
INSTALL_CMODD= $(INSTALL_LIB)/lua INSTALL_CMODD= $(INSTALL_LIB)/lua
@ -71,7 +72,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
$(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \
$(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
RM= rm -f RM= rm -f
@ -109,11 +110,12 @@ else
endif endif
TARGET_SYS?= $(HOST_SYS) TARGET_SYS?= $(HOST_SYS)
ifeq (Darwin,$(TARGET_SYS)) ifneq (,$(filter $(TARGET_SYS),Darwin iOS))
INSTALL_SONAME= $(INSTALL_DYLIBNAME) INSTALL_SONAME= $(INSTALL_DYLIBNAME)
INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
LDCONFIG= : LDCONFIG= :
SED_PC+= -e "s| -Wl,-E||"
endif endif
############################################################################## ##############################################################################
@ -142,18 +144,12 @@ install: $(INSTALL_DEP)
$(RM) $(FILE_PC).tmp $(RM) $(FILE_PC).tmp
cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
@echo ""
@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
@echo "You can do this now by running this command (with sudo):"
@echo ""
@echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
@echo ""
uninstall: uninstall:
@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
for file in $(FILES_JITLIB); do \ for file in $(FILES_JITLIB); do \
$(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
done done

2
README
View File

@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
Project Homepage: https://luajit.org/ Project Homepage: https://luajit.org/
LuaJIT is Copyright (C) 2005-2023 Mike Pall. LuaJIT is Copyright (C) 2005-2025 Mike Pall.
LuaJIT is free software, released under the MIT license. LuaJIT is free software, released under the MIT license.
See full Copyright Notice in the COPYRIGHT file or in luajit.h. See full Copyright Notice in the COPYRIGHT file or in luajit.h.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2023 Mike Pall. /* Copyright (C) 2004-2025 Mike Pall.
* *
* You are welcome to use the general ideas of this design for your own sites. * You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme. * But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2023 Mike Pall. /* Copyright (C) 2004-2025 Mike Pall.
* *
* You are welcome to use the general ideas of this design for your own sites. * You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme. * But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Contact</title> <title>Contact</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -94,7 +94,7 @@ don't like that, please complain to Google or Microsoft, not me.
<h2>Copyright</h2> <h2>Copyright</h2>
<p> <p>
All documentation is All documentation is
Copyright &copy; 2005-2023 Mike Pall. Copyright &copy; 2005-2025 Mike Pall.
</p> </p>
@ -102,7 +102,7 @@ Copyright &copy; 2005-2023 Mike Pall.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>String Buffer Library</title> <title>String Buffer Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -85,7 +85,7 @@ operations.
</p> </p>
<p> <p>
The string buffer library also includes a high-performance The string buffer library also includes a high-performance
<a href="serialize">serializer</a> for Lua objects. <a href="#serialize">serializer</a> for Lua objects.
</p> </p>
<h2 id="use">Using the String Buffer Library</h2> <h2 id="use">Using the String Buffer Library</h2>
@ -588,9 +588,9 @@ num → 0x07 double.L
tab → 0x08 // Empty table tab → 0x08 // Empty table
| 0x09 h.U h*{object object} // Key/value hash | 0x09 h.U h*{object object} // Key/value hash
| 0x0a a.U a*object // 0-based array | 0x0a a.U a*object // 0-based array
| 0x0b a.U a*object h.U h*{object object} // Mixed | 0x0b a.U h.U a*object h*{object object} // Mixed
| 0x0c a.U (a-1)*object // 1-based array | 0x0c a.U (a-1)*object // 1-based array
| 0x0d a.U (a-1)*object h.U h*{object object} // Mixed | 0x0d a.U h.U (a-1)*object h*{object object} // Mixed
tab_mt → 0x0e (index-1).U tab // Metatable dict entry tab_mt → 0x0e (index-1).U tab // Metatable dict entry
int64 → 0x10 int.L // FFI int64_t int64 → 0x10 int.L // FFI int64_t
@ -679,7 +679,7 @@ mappings of files are OK, but only if the file does not change.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Lua/C API Extensions</title> <title>Lua/C API Extensions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -173,7 +173,7 @@ Also note that this mechanism is not without overhead.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Library</title> <title>FFI Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -316,7 +316,7 @@ without undue conversion penalties.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>ffi.* API Functions</title> <title>ffi.* API Functions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -558,7 +558,7 @@ named <tt>i</tt>.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Semantics</title> <title>FFI Semantics</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -440,6 +440,19 @@ If you don't do this, the default Lua number &rarr; <tt>double</tt>
conversion rule applies. A vararg C&nbsp;function expecting an integer conversion rule applies. A vararg C&nbsp;function expecting an integer
will see a garbled or uninitialized value. will see a garbled or uninitialized value.
</p> </p>
<p>
Note: this is the only place where creating a boxed scalar number type is
actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt>
etc. anywhere else!</b>
</p>
<p style="font-size: 8pt;">
Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b>
improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It
just adds costly boxing, unboxing and conversions steps. And it may lead
to surprise results, because
<a href="#cdata_arith">cdata arithmetic on scalar numbers</a>
is always performed on 64 bit integers.
</p>
<h2 id="init">Initializers</h2> <h2 id="init">Initializers</h2>
<p> <p>
@ -1246,7 +1259,7 @@ compiled.</li>
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>FFI Tutorial</title> <title>FFI Tutorial</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -587,7 +587,7 @@ it to a local variable in the function scope is unnecessary.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>jit.* Library</title> <title>jit.* Library</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -187,7 +187,7 @@ if you want to know more.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Profiler</title> <title>Profiler</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -349,7 +349,7 @@ use.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Extensions</title> <title>Extensions</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -160,13 +160,33 @@ passes any arguments after the error function to the function
which is called in a protected context. which is called in a protected context.
</p> </p>
<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3> <h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3>
<p> <p>
Non-ASCII characters are handled transparently by the Lua source code parser. Non-ASCII characters are handled transparently by the Lua source code parser.
This allows the use of UTF-8 characters in identifiers and strings. This allows the use of UTF-8 characters in identifiers and strings.
A UTF-8 BOM is skipped at the start of the source code. A UTF-8 BOM is skipped at the start of the source code.
</p> </p>
<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
<p>
As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
<tt>mode</tt> parameter.
</p>
<p>
The default mode string is <tt>"bt"</tt>, which allows loading of both
source code and bytecode. Use <tt>"t"</tt> to allow only source code
or <tt>"b"</tt> to allow only bytecode to be loaded.
</p>
<p>
By default, the <tt>load*</tt> functions generate the native bytecode format.
For cross-compilation purposes, add <tt>W</tt> to the mode string to
force the 32 bit format and <tt>X</tt> to force the 64 bit format.
Add both to force the opposite format. Note that non-native bytecode
generated by <tt>load*</tt> cannot be run, but can still be passed
to <tt>string.dump</tt>.
</p>
<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3> <h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
<p> <p>
All number-to-string conversions consistently convert non-finite numbers All number-to-string conversions consistently convert non-finite numbers
@ -186,26 +206,33 @@ works independently of the current locale and it supports hex floating-point
numbers (e.g. <tt>0x1.5p-3</tt>). numbers (e.g. <tt>0x1.5p-3</tt>).
</p> </p>
<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3> <h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3>
<p> <p>
An extra argument has been added to <tt>string.dump()</tt>. If set to An extra argument has been added to <tt>string.dump()</tt>. If set to
<tt>true</tt>, 'stripped' bytecode without debug information is <tt>true</tt> or to a string which contains the character <tt>s</tt>,
generated. This speeds up later bytecode loading and reduces memory 'stripped' bytecode without debug information is generated. This speeds
usage. See also the up later bytecode loading and reduces memory usage. See also the
<a href="running.html#opt_b"><tt>-b</tt> command line option</a>. <a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
</p> </p>
<p> <p>
The generated bytecode is portable and can be loaded on any architecture The generated bytecode is portable and can be loaded on any architecture
that LuaJIT supports, independent of word size or endianess. However, the that LuaJIT supports. However, the bytecode compatibility versions must
bytecode compatibility versions must match. Bytecode stays compatible match. Bytecode only stays compatible within a major+minor version
for dot releases (x.y.0 &rarr; x.y.1), but may change with major or (x.y.aaa &rarr; x.y.bbb), except for development branches. Foreign bytecode
minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign (e.g. from Lua 5.1) is incompatible and cannot be loaded.
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
</p> </p>
<p> <p>
Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
a different, incompatible bytecode format for all 64 bit ports. This may be a different, incompatible bytecode format between 32 bit and 64 bit ports.
rectified in the future. This may be rectified in the future. In the meantime, use the <tt>W</tt>
and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
for cross-compilation purposes.
</p>
<p>
Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
mode string to dump it in a deterministic manner: identical source code
always gives a byte-for-byte identical bytecode dump. This feature is
mainly useful for reproducible builds.
</p> </p>
<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
@ -238,7 +265,7 @@ and let the GC do its work.
LuaJIT uses a Tausworthe PRNG with period 2^223 to implement LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of <tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
the PRNG results is much superior compared to the standard Lua the PRNG results is much superior compared to the standard Lua
implementation, which uses the platform-specific ANSI rand(). implementation, which uses the platform-specific ANSI <tt>rand()</tt>.
</p> </p>
<p> <p>
The PRNG generates the same sequences from the same seeds on all The PRNG generates the same sequences from the same seeds on all
@ -249,6 +276,10 @@ It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
preserve uniformity. preserve uniformity.
</p> </p>
<p> <p>
Call <tt>math.randomseed()</tt> without any arguments to seed it from
system entropy.
</p>
<p>
Important: Neither this nor any other PRNG based on the simplistic Important: Neither this nor any other PRNG based on the simplistic
<tt>math.random()</tt> API is suitable for cryptographic use. <tt>math.random()</tt> API is suitable for cryptographic use.
</p> </p>
@ -286,7 +317,7 @@ enabled:
</p> </p>
<ul> <ul>
<li><tt>goto</tt> and <tt>::labels::</tt>.</li> <li><tt>goto</tt> and <tt>::labels::</tt>.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li> <li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li>
<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li> <li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li> <li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
<li><tt>loadfile(filename [,mode [,env]])</tt>.</li> <li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
@ -426,9 +457,7 @@ the toolchain used to compile LuaJIT:
on the C&nbsp;stack. The contents of the C++&nbsp;exception object on the C&nbsp;stack. The contents of the C++&nbsp;exception object
pass through unmodified.</li> pass through unmodified.</li>
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>. <li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
The corresponding Lua error message can be retrieved from the Lua stack.<br> The corresponding Lua error message can be retrieved from the Lua stack.</li>
For MSVC for Windows 64 bit this requires compilation of your C++ code
with <tt>/EHa</tt>.</li>
<li>Throwing Lua errors across C++ frames is safe. C++ destructors <li>Throwing Lua errors across C++ frames is safe. C++ destructors
will be called.</li> will be called.</li>
</ul> </ul>
@ -463,7 +492,7 @@ C++ destructors.</li>
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Installation</title> <title>Installation</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -117,7 +117,7 @@ hold all user-configurable settings:
<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX <li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
only).</li> only).</li>
<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
under POSIX, MinGW or Cygwin.</li> under POSIX or MinGW.</li>
<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
MSVC (Visual Studio).</li> MSVC (Visual Studio).</li>
</ul> </ul>
@ -195,15 +195,13 @@ Obviously the prefixes given during build and installation need to be the same.
<h2 id="windows">Windows Systems</h2> <h2 id="windows">Windows Systems</h2>
<h3>Prerequisites</h3> <h3>Prerequisites</h3>
<p> <p>
Either install one of the open source SDKs Either install the open source SDK <a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a>,
(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or which comes with a modified GCC plus the required development headers.
<a href="https://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
GCC plus the required development headers.
Or install Microsoft's Visual Studio (MSVC). Or install Microsoft's Visual Studio (MSVC).
</p> </p>
<h3>Building with MSVC</h3> <h3>Building with MSVC</h3>
<p> <p>
Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
directory with the source code and run these commands: directory with the source code and run these commands:
</p> </p>
<pre class="code"> <pre class="code">
@ -214,9 +212,12 @@ msvcbuild
Check the <tt>msvcbuild.bat</tt> file for more options. Check the <tt>msvcbuild.bat</tt> file for more options.
Then follow the installation instructions below. Then follow the installation instructions below.
</p> </p>
<h3>Building with MinGW or Cygwin</h3>
<p> <p>
Open a command prompt window and make sure the MinGW or Cygwin programs For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
</p>
<h3>Building with MinGW</h3>
<p>
Open a command prompt window and make sure the MinGW programs
are in your path. Then <tt>cd</tt> to the directory of the git repository. are in your path. Then <tt>cd</tt> to the directory of the git repository.
Then run this command for MinGW: Then run this command for MinGW:
</p> </p>
@ -224,12 +225,6 @@ Then run this command for MinGW:
mingw32-make mingw32-make
</pre> </pre>
<p> <p>
Or this command for Cygwin:
</p>
<pre class="code">
make
</pre>
<p>
Then follow the installation instructions below. Then follow the installation instructions below.
</p> </p>
<h3>Installing LuaJIT</h3> <h3>Installing LuaJIT</h3>
@ -246,6 +241,19 @@ absolute path names &mdash; all modules are loaded relative to the
directory where <tt>luajit.exe</tt> is installed directory where <tt>luajit.exe</tt> is installed
(see <tt>src/luaconf.h</tt>). (see <tt>src/luaconf.h</tt>).
</p> </p>
<p>
The final directory layout should look like this:
</p>
<pre class="code">
├── luajit.exe
├── lua51.dll
├── <- put your own classic Lua/C API modules (*.dll) here
└── lua
├── <- put your own Lua modules (*.lua) here
└── jit
├── bc.lua
└── (etc …)
</pre>
<h2 id="cross">Cross-compiling LuaJIT</h2> <h2 id="cross">Cross-compiling LuaJIT</h2>
<p> <p>
@ -266,6 +274,7 @@ for any supported target:
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> <li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li> <li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> <li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li> <li>64 bit targets always require compilation on a 64 bit host.</li>
</ul> </ul>
<p> <p>
@ -568,7 +577,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>LuaJIT</title> <title>LuaJIT</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -122,7 +122,7 @@ Lua is a powerful, dynamic and light-weight programming language.
It may be embedded or used as a general-purpose, stand-alone language. It may be embedded or used as a general-purpose, stand-alone language.
</p> </p>
<p> <p>
LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the LuaJIT is Copyright &copy; 2005-2025 Mike Pall, released under the
<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>. <a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
</p> </p>
<p> <p>
@ -193,7 +193,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head> <head>
<title>Running LuaJIT</title> <title>Running LuaJIT</title>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023"> <meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en"> <meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -106,6 +106,9 @@ are accepted:
<li><tt>-l</tt> &mdash; Only list bytecode.</li> <li><tt>-l</tt> &mdash; Only list bytecode.</li>
<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li> <li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
<li><tt>-g</tt> &mdash; Keep debug info.</li> <li><tt>-g</tt> &mdash; Keep debug info.</li>
<li><tt>-W</tt> &mdash; Generate 32 bit (non-GC64) bytecode.</li>
<li><tt>-X</tt> &mdash; Generate 64 bit (GC64) bytecode.</li>
<li><tt>-d</tt> &mdash; Generate bytecode in deterministic manner.</li>
<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li> <li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li> <li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li> <li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
@ -120,7 +123,8 @@ file name:
</p> </p>
<ul> <ul>
<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li> <li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
<li><tt>h</tt> &mdash; C header file, static bytecode data.</li> <li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data <li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
(OS- and architecture-specific).</li> (OS- and architecture-specific).</li>
<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable). <li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@ -303,7 +307,7 @@ Here are the parameters and their default settings:
</div> </div>
<div id="foot"> <div id="foot">
<hr class="hide"> <hr class="hide">
Copyright &copy; 2005-2023 Copyright &copy; 2005-2025
<span class="noprint"> <span class="noprint">
&middot; &middot;
<a href="contact.html">Contact</a> <a href="contact.html">Contact</a>

View File

@ -1,6 +1,6 @@
/* /*
** DynASM ARM encoding engine. ** DynASM ARM encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM ARM module. -- DynASM ARM module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/* /*
** DynASM ARM64 encoding engine. ** DynASM ARM64 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM ARM64 module. -- DynASM ARM64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -549,7 +549,7 @@ end
local function parse_load_pair(params, nparams, n, op) local function parse_load_pair(params, nparams, n, op)
if params[n+2] then werror("too many operands") end if params[n+2] then werror("too many operands") end
local pn, p2 = params[n], params[n+1] local pn, p2 = params[n], params[n+1]
local scale = shr(op, 30) == 0 and 2 or 3 local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
if not p1 then if not p1 then
if not p2 then if not p2 then
@ -806,8 +806,8 @@ map_op = {
["ldrsw_*"] = "98000000DxB|b8800000DxL", ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- NOTE: ldur etc. are handled by ldr et al. -- NOTE: ldur etc. are handled by ldr et al.
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
["ldpsw_*"] = "68400000DAxP", ["ldpsw_*"] = "68400000DAxP",
-- Branches. -- Branches.
@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
werror("bad register type") werror("bad register type")
end end
parse_reg_type = false parse_reg_type = false
elseif p == "x" or p == "w" or p == "d" or p == "s" then elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
if parse_reg_type ~= p then if parse_reg_type ~= p then
werror("register size mismatch") werror("register size mismatch")
end end

View File

@ -1,6 +1,6 @@
/* /*
** DynASM MIPS encoding engine. ** DynASM MIPS encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM MIPS32/MIPS64 module. -- DynASM MIPS32/MIPS64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM MIPS64 module. -- DynASM MIPS64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM PPC/PPC64 encoding engine. ** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM PPC/PPC64 module. -- DynASM PPC/PPC64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
-- --
-- Support for various extensions contributed by Caio Souza Oliveira. -- Support for various extensions contributed by Caio Souza Oliveira.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM encoding engine prototypes. ** DynASM encoding engine prototypes.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM x64 module. -- DynASM x64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module. -- This module just sets 64 bit mode for the combined x86/x64 module.

View File

@ -1,6 +1,6 @@
/* /*
** DynASM x86 encoding engine. ** DynASM x86 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved. ** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice. ** Released under the MIT license. See dynasm.lua for full copyright notice.
*/ */

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
-- DynASM x86/x64 module. -- DynASM x86/x64 module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice. -- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -627,7 +627,11 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
werror("NYI: rip-relative displacement followed by immediate") werror("NYI: rip-relative displacement followed by immediate")
end end
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
wputlabel("REL_", disp[1], 2) if disp[2] == "iPJ" then
waction("REL_A", disp[1])
else
wputlabel("REL_", disp[1], 2)
end
else else
wputdarg(disp) wputdarg(disp)
end end
@ -744,9 +748,9 @@ local function dispexpr(expr)
return imm*map_opsizenum[ops] return imm*map_opsizenum[ops]
end end
local mode, iexpr = immexpr(dispt) local mode, iexpr = immexpr(dispt)
if mode == "iJ" then if mode == "iJ" or mode == "iPJ" then
if c == "-" then werror("cannot invert label reference") end if c == "-" then werror("cannot invert label reference") end
return { iexpr } return { iexpr, mode }
end end
return expr -- Need to return original signed expression. return expr -- Need to return original signed expression.
end end
@ -1147,6 +1151,8 @@ local map_op = {
rep_0 = "F3", rep_0 = "F3",
repe_0 = "F3", repe_0 = "F3",
repz_0 = "F3", repz_0 = "F3",
endbr32_0 = "F30F1EFB",
endbr64_0 = "F30F1EFA",
-- F4: *hlt -- F4: *hlt
cmc_0 = "F5", cmc_0 = "F5",
-- F6: test... mb,i; div... mb -- F6: test... mb,i; div... mb

View File

@ -2,7 +2,7 @@
-- DynASM. A dynamic assembler for code generation engines. -- DynASM. A dynamic assembler for code generation engines.
-- Originally designed and implemented for LuaJIT. -- Originally designed and implemented for LuaJIT.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See below for full copyright notice. -- See below for full copyright notice.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -17,7 +17,7 @@ local _info = {
url = "https://luajit.org/dynasm.html", url = "https://luajit.org/dynasm.html",
license = "MIT", license = "MIT",
copyright = [[ copyright = [[
Copyright (C) 2005-2023 Mike Pall. All rights reserved. Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -75,7 +75,7 @@ local function wline(line, needindent)
g_synclineno = g_synclineno + 1 g_synclineno = g_synclineno + 1
end end
-- Write assembler line as a comment, if requestd. -- Write assembler line as a comment, if requested.
local function wcomment(aline) local function wcomment(aline)
if g_opt.comment then if g_opt.comment then
wline(g_opt.comment..aline..g_opt.endcomment, true) wline(g_opt.comment..aline..g_opt.endcomment, true)

View File

@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
Runs some nested loops and shows the resulting traces. Runs some nested loops and shows the resulting traces.
.SH COPYRIGHT .SH COPYRIGHT
.PP .PP
\fBLuaJIT\fR is Copyright \(co 2005-2023 Mike Pall. \fBLuaJIT\fR is Copyright \(co 2005-2025 Mike Pall.
.br .br
\fBLuaJIT\fR is open source software, released under the MIT license. \fBLuaJIT\fR is open source software, released under the MIT license.
.SH SEE ALSO .SH SEE ALSO

View File

@ -7,7 +7,7 @@
# Also works with MinGW and Cygwin on Windows. # Also works with MinGW and Cygwin on Windows.
# Please check msvcbuild.bat for building with MSVC on Windows. # Please check msvcbuild.bat for building with MSVC on Windows.
# #
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h # Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
############################################################################## ##############################################################################
MAJVER= 2 MAJVER= 2
@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x64 TARGET_LJARCH= x64
else else
@ -299,6 +299,12 @@ endif
ifneq (,$(LMULTILIB)) ifneq (,$(LMULTILIB))
TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
endif endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
endif
############################################################################## ##############################################################################
# Target system detection. # Target system detection.
@ -320,13 +326,13 @@ ifeq (Darwin,$(TARGET_SYS))
endif endif
TARGET_STRIP+= -x TARGET_STRIP+= -x
TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS= TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
else else
ifeq (iOS,$(TARGET_SYS)) ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x TARGET_STRIP+= -x
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS= TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
ifeq (arm64,$(TARGET_LJARCH)) ifeq (arm64,$(TARGET_LJARCH))
@ -475,7 +481,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc DASM_DASC= vm_$(DASM_ARCH).dasc
GIT= git GIT= git
GIT_RELVER= [ -d ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
else
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
endif
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \

View File

@ -25,14 +25,15 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_strscan.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \
lj_libdef.h
lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
lj_libdef.h lj_libdef.h
@ -55,7 +56,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
lj_emit_*.h lj_asm_*.h lj_prng.h lj_emit_*.h lj_asm_*.h
lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
lj_bcdef.h lj_bcdef.h
@ -97,7 +98,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
lj_crecord.h lj_strfmt.h lj_crecord.h lj_strfmt.h lj_strscan.h
lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
lj_ccallback.h lj_buf.h lj_ccallback.h lj_buf.h

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder. ** LuaJIT VM builder.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** This is a tool to build the hand-tuned assembler code required for ** This is a tool to build the hand-tuned assembler code required for
** LuaJIT's bytecode interpreter. It supports a variety of output formats ** LuaJIT's bytecode interpreter. It supports a variety of output formats

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder. ** LuaJIT VM builder.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _BUILDVM_H #ifndef _BUILDVM_H

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: Assembler source code emitter. ** LuaJIT VM builder: Assembler source code emitter.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"
@ -339,6 +339,10 @@ void emit_asm(BuildCtx *ctx)
fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
break; break;
case BUILD_machasm: case BUILD_machasm:
#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300
/* Workaround for XCode 15.0 - 15.2. */
fprintf(ctx->fp, "\t.subsections_via_symbols\n");
#endif
fprintf(ctx->fp, fprintf(ctx->fp,
"\t.cstring\n" "\t.cstring\n"
"\t.ascii \"%s\\0\"\n", ctx->dasm_ident); "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: IR folding hash table generator. ** LuaJIT VM builder: IR folding hash table generator.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: library definition compiler. ** LuaJIT VM builder: library definition compiler.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "buildvm.h" #include "buildvm.h"

View File

@ -1,6 +1,6 @@
/* /*
** LuaJIT VM builder: PE object emitter. ** LuaJIT VM builder: PE object emitter.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Only used for building on Windows, since we cannot assume the presence ** Only used for building on Windows, since we cannot assume the presence
** of a suitable assembler. The host and target byte order must match. ** of a suitable assembler. The host and target byte order must match.
@ -9,7 +9,7 @@
#include "buildvm.h" #include "buildvm.h"
#include "lj_bc.h" #include "lj_bc.h"
#if LJ_TARGET_X86ORX64 #if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
/* Context for PE object emitter. */ /* Context for PE object emitter. */
static char *strtab; static char *strtab;
@ -93,6 +93,17 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0 #define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 6
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
#elif LJ_TARGET_ARM64
#define PEOBJ_ARCH_TARGET 0xaa64
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
#define PEOBJ_RELOC_DIR32 0x01
#define PEOBJ_RELOC_ADDR32NB 0x02
#define PEOBJ_RELOC_OFS (-4)
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 4
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
#endif #endif
/* Section numbers (0-based). */ /* Section numbers (0-based). */
@ -100,7 +111,7 @@ enum {
PEOBJ_SECT_ABS = -2, PEOBJ_SECT_ABS = -2,
PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_UNDEF = -1,
PEOBJ_SECT_TEXT, PEOBJ_SECT_TEXT,
#if LJ_TARGET_X64 #ifdef PEOBJ_PDATA_NRELOC
PEOBJ_SECT_PDATA, PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA, PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86 #elif LJ_TARGET_X86
@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
uint32_t sofs; uint32_t sofs;
int i, nrsym; int i, nrsym;
union { uint8_t b; uint32_t u; } host_endian; union { uint8_t b; uint32_t u; } host_endian;
#ifdef PEOBJ_PDATA_NRELOC
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
#endif
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */ /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
#if LJ_TARGET_X64 #ifdef PEOBJ_PDATA_NRELOC
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
pesect[PEOBJ_SECT_PDATA].ofs = sofs; pesect[PEOBJ_SECT_PDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
pesect[PEOBJ_SECT_PDATA].relocofs = sofs; pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
pesect[PEOBJ_SECT_XDATA].ofs = sofs; pesect[PEOBJ_SECT_XDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
pesect[PEOBJ_SECT_XDATA].relocofs = sofs; pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
*/ */
nrsym = ctx->nrelocsym; nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64 #ifdef PEOBJ_PDATA_NRELOC
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif #endif
@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64 #if LJ_TARGET_X64
{ /* Write .pdata section. */ { /* Write .pdata section. */
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
PEreloc reloc; PEreloc reloc;
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@ -308,6 +321,87 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB; reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
} }
#elif LJ_TARGET_ARM64
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
{ /* Write .pdata section. */
uint32_t pdata[4];
PEreloc reloc;
pdata[0] = 0;
pdata[1] = 0;
pdata[2] = fcofs;
pdata[3] = 4+24+4;
owrite(ctx, &pdata, sizeof(pdata));
/* Start of .text and start of .xdata. */
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
/* Start of vm_ffi_call and start of second part of .xdata. */
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
{ /* Write .xdata section. */
uint32_t u32;
uint8_t *p, uwc[24];
PEreloc reloc;
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
#define CSAVE_REGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
} while (0)
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
#define CSAVE_FREGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
} while (0)
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
#define CODE_NOP 0xe3
#define CODE_END 0xe4
#define CEND_ALIGN do { \
*p++ = CODE_END; \
while ((p - uwc) & 3) *p++ = CODE_NOP; \
} while (0)
/* Unwind codes for .text section with handler. */
p = uwc;
CADD_FP(192); /* +2 */
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
CEND_ALIGN; /* +1 +1 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 24);
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
owrite(ctx, &u32, 4);
/* Unwind codes for vm_ffi_call without handler. */
p = uwc;
CADD_FP(16); /* +2 */
CSAVE_FPLR(16); /* +1 */
CSAVE_REGPX(19, -32); /* +2 */
CEND_ALIGN; /* +1 +2 -> 8 */
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 8);
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86 #elif LJ_TARGET_X86
/* Write .sxdata section. */ /* Write .sxdata section. */
for (i = 0; i < nrsym; i++) { for (i = 0; i < nrsym; i++) {
@ -339,7 +433,7 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym(ctx, ctx->relocsym[i], 0, emit_peobj_sym(ctx, ctx->relocsym[i], 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#if LJ_TARGET_X64 #ifdef PEOBJ_PDATA_NRELOC
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win", 0, emit_peobj_sym(ctx, "lj_err_unwind_win", 0,

View File

@ -2,7 +2,7 @@
-- Lua script to dump the bytecode of the library functions written in Lua. -- Lua script to dump the bytecode of the library functions written in Lua.
-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@ -138,65 +138,73 @@ local function fixup_dump(dump, fixup)
return { dump = ndump, startbc = startbc, sizebc = sizebc } return { dump = ndump, startbc = startbc, sizebc = sizebc }
end end
local function find_defs(src) local function find_defs(src, mode)
local defs = {} local defs = {}
for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
local env = {}
local tcode, fixup = transform_lua(code) local tcode, fixup = transform_lua(code)
local func = assert(load(tcode, "", nil, env))() local func = assert(load(tcode, "", mode))
defs[name] = fixup_dump(string.dump(func, true), fixup) defs[name] = fixup_dump(string.dump(func, mode), fixup)
defs[#defs+1] = name defs[#defs+1] = name
end end
return defs return defs
end end
local function gen_header(defs) local function gen_header(defs32, defs64)
local t = {} local t = {}
local function w(x) t[#t+1] = x end local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n") w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
local s, sb = "", "" for j,defs in ipairs{defs64, defs32} do
for i,name in ipairs(defs) do local s, sb = "", ""
local d = defs[name] for i,name in ipairs(defs) do
s = s .. d.dump local d = defs[name]
sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) s = s .. d.dump
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
end .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
w("static const uint8_t libbc_code[] = {\n") end
local n = 0 if j == 1 then
for i=1,#s do w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
local x = string.byte(s, i) else
local xb = string.byte(sb, i) w("\n#else\n")
if xb == 255 then end
local name = BCN[x] local n = 0
local m = #name + 4 for i=1,#s do
if n + m > 78 then n = 0; w("\n") end local x = string.byte(s, i)
n = n + m local xb = string.byte(sb, i)
w("BC_"); w(name) if xb == 255 then
else local name = BCN[x]
local m = x < 10 and 2 or (x < 100 and 3 or 4) local m = #name + 4
if xb == 0 then if n + m > 78 then n = 0; w("\n") end
if n + m > 78 then n = 0; w("\n") end n = n + m
else w("BC_"); w(name)
local name = defs[xb]:gsub("_", ".") else
if n ~= 0 then w("\n") end local m = x < 10 and 2 or (x < 100 and 3 or 4)
w("/* "); w(name); w(" */ ") if xb == 0 then
n = #name + 7 if n + m > 78 then n = 0; w("\n") end
end else
n = n + m local name = defs[xb]:gsub("_", ".")
w(x) if n ~= 0 then w("\n") end
w("/* "); w(name); w(" */ ")
n = #name + 7
end
n = n + m
w(x)
end
w(",")
end end
w(",")
end end
w("\n0\n};\n\n") w("\n#endif\n0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m = 0 local m32, m64 = 0, 0
for _,name in ipairs(defs) do for i,name in ipairs(defs32) do
w('{"'); w(name); w('",'); w(m) w('},\n') assert(name == defs64[i])
m = m + #defs[name].dump w('{"'); w(name); w('",'); w(m32) w('},\n')
m32 = m32 + #defs32[name].dump
m64 = m64 + #defs64[name].dump
assert(m32 == m64)
end end
w("{NULL,"); w(m); w("}\n};\n\n") w("{NULL,"); w(m32); w("}\n};\n\n")
return table.concat(t) return table.concat(t)
end end
@ -219,7 +227,8 @@ end
local outfile = parse_arg(arg) local outfile = parse_arg(arg)
local src = read_files(arg) local src = read_files(arg)
local defs = find_defs(src) local defs32 = find_defs(src, "Wdts")
local hdr = gen_header(defs) local defs64 = find_defs(src, "Xdts")
local hdr = gen_header(defs32, defs64)
write_file(outfile, hdr) write_file(outfile, hdr)

View File

@ -2,7 +2,7 @@
-- Lua script to generate a customized, minified version of Lua. -- Lua script to generate a customized, minified version of Lua.
-- The resulting 'minilua' is used for the build process of LuaJIT. -- The resulting 'minilua' is used for the build process of LuaJIT.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------

View File

@ -1,13 +1,14 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Lua script to embed the rolling release version in luajit.h. -- Lua script to embed the rolling release version in luajit.h.
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
local FILE_INPUT_H = "luajit_rolling.h" local arg = {...}
local FILE_INPUT_R = "luajit_relver.txt" local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
local FILE_OUTPUT_H = "luajit.h" local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
local FILE_LUAJIT_H = arg[3] or "luajit.h"
local function file_read(file) local function file_read(file)
local fp = assert(io.open(file, "rb"), "run from the wrong directory") local fp = assert(io.open(file, "rb"), "run from the wrong directory")
@ -28,8 +29,8 @@ local function file_write_mod(file, data)
assert(fp:close()) assert(fp:close())
end end
local text = file_read(FILE_INPUT_H) local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "")
local relver = file_read(FILE_INPUT_R):match("(%d+)") local relver = file_read(FILE_RELVER_TXT):match("(%d+)")
if relver then if relver then
text = text:gsub("ROLLING", relver) text = text:gsub("ROLLING", relver)
@ -38,6 +39,7 @@ else
**** WARNING Cannot determine rolling release version from git log. **** WARNING Cannot determine rolling release version from git log.
**** WARNING The 'git' command must be available during the build. **** WARNING The 'git' command must be available during the build.
]]) ]])
file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target.
end end
file_write_mod(FILE_OUTPUT_H, text) file_write_mod(FILE_LUAJIT_H, text)

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT bytecode listing module. -- LuaJIT bytecode listing module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT module to save/list bytecode. -- LuaJIT module to save/list bytecode.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -29,6 +29,9 @@ Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode. -l Only list bytecode.
-s Strip debug info (default). -s Strip debug info (default).
-g Keep debug info. -g Keep debug info.
-W Generate 32 bit (non-GC64) bytecode.
-X Generate 64 bit (GC64) bytecode.
-d Generate bytecode in deterministic manner.
-n name Set module name (default: auto-detect from input name). -n name Set module name (default: auto-detect from input name).
-t type Set output file type (default: auto-detect from output name). -t type Set output file type (default: auto-detect from output name).
-a arch Override architecture for object files (default: native). -a arch Override architecture for object files (default: native).
@ -38,7 +41,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
-- Stop handling options. -- Stop handling options.
- Use stdin as input and/or stdout as output. - Use stdin as input and/or stdout as output.
File types: c h obj o raw (default) File types: c cc h obj o raw (default)
]] ]]
os.exit(1) os.exit(1)
end end
@ -51,8 +54,9 @@ local function check(ok, ...)
end end
local function readfile(ctx, input) local function readfile(ctx, input)
if type(input) == "function" then return input end if ctx.string then
if ctx.filename then return check(loadstring(input, nil, ctx.mode))
elseif ctx.filename then
local data local data
if input == "-" then if input == "-" then
data = io.stdin:read("*a") data = io.stdin:read("*a")
@ -61,10 +65,10 @@ local function readfile(ctx, input)
data = assert(fp:read("*a")) data = assert(fp:read("*a"))
assert(fp:close()) assert(fp:close())
end end
return check(load(data, ctx.filename)) return check(load(data, ctx.filename, ctx.mode))
else else
if input == "-" then input = nil end if input == "-" then input = nil end
return check(loadfile(input)) return check(loadfile(input, ctx.mode))
end end
end end
@ -81,7 +85,7 @@ end
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local map_type = { local map_type = {
raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
} }
local map_arch = { local map_arch = {
@ -435,24 +439,12 @@ typedef struct
{ {
mach_header; uint32_t reserved; mach_header; uint32_t reserved;
} mach_header_64; } mach_header_64;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint32_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command;
typedef struct { typedef struct {
uint32_t cmd, cmdsize; uint32_t cmd, cmdsize;
char segname[16]; char segname[16];
uint64_t vmaddr, vmsize, fileoff, filesize; uint64_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags; uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command_64; } mach_segment_command_64;
typedef struct {
char sectname[16], segname[16];
uint32_t addr, size;
uint32_t offset, align, reloff, nreloc, flags;
uint32_t reserved1, reserved2;
} mach_section;
typedef struct { typedef struct {
char sectname[16], segname[16]; char sectname[16], segname[16];
uint64_t addr, size; uint64_t addr, size;
@ -462,139 +454,64 @@ typedef struct {
typedef struct { typedef struct {
uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize; uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
} mach_symtab_command; } mach_symtab_command;
typedef struct {
int32_t strx;
uint8_t type, sect;
int16_t desc;
uint32_t value;
} mach_nlist;
typedef struct { typedef struct {
int32_t strx; int32_t strx;
uint8_t type, sect; uint8_t type, sect;
uint16_t desc; uint16_t desc;
uint64_t value; uint64_t value;
} mach_nlist_64; } mach_nlist_64;
typedef struct
{
int32_t magic, nfat_arch;
} mach_fat_header;
typedef struct
{
int32_t cputype, cpusubtype, offset, size, align;
} mach_fat_arch;
typedef struct { typedef struct {
struct { mach_header_64 hdr;
mach_header hdr; mach_segment_command_64 seg;
mach_segment_command seg; mach_section_64 sec;
mach_section sec; mach_symtab_command sym;
mach_symtab_command sym;
} arch[1];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_obj;
typedef struct {
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[1];
mach_nlist_64 sym_entry; mach_nlist_64 sym_entry;
uint8_t space[4096]; uint8_t space[4096];
} mach_obj_64; } mach_obj_64;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[2];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_fat_obj;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[2];
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_fat_obj_64;
]] ]]
local symname = '_'..LJBC_PREFIX..ctx.modname local symname = '_'..LJBC_PREFIX..ctx.modname
local isfat, is64, align, mobj = false, false, 4, "mach_obj" local cputype, cpusubtype = 0x01000007, 3
if ctx.arch == "x64" then if ctx.arch ~= "x64" then
is64, align, mobj = true, 8, "mach_obj_64" check(ctx.arch == "arm64", "unsupported architecture for OSX")
elseif ctx.arch == "arm" then cputype, cpusubtype = 0x0100000c, 0
isfat, mobj = true, "mach_fat_obj"
elseif ctx.arch == "arm64" then
is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
else
check(ctx.arch == "x86", "unsupported architecture for OSX")
end end
local function aligned(v, a) return bit.band(v+a-1, -a) end local function aligned(v, a) return bit.band(v+a-1, -a) end
local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
-- Create Mach-O object and fill in header. -- Create Mach-O object and fill in header.
local o = ffi.new(mobj) local o = ffi.new("mach_obj_64")
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8)
local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
if isfat then
o.fat.magic = be32(0xcafebabe)
o.fat.nfat_arch = be32(#cpusubtype)
end
-- Fill in sections and symbols. -- Fill in sections and symbols.
for i=0,#cpusubtype-1 do o.hdr.magic = 0xfeedfacf
local ofs = 0 o.hdr.cputype = cputype
if isfat then o.hdr.cpusubtype = cpusubtype
local a = o.fat_arch[i] o.hdr.filetype = 1
a.cputype = be32(cputype[i+1]) o.hdr.ncmds = 2
a.cpusubtype = be32(cpusubtype[i+1]) o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym)
-- Subsequent slices overlap each other to share data. o.seg.cmd = 0x19
ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
a.offset = be32(ofs) o.seg.vmsize = #s
a.size = be32(mach_size-ofs+#s) o.seg.fileoff = mach_size
end o.seg.filesize = #s
local a = o.arch[i] o.seg.maxprot = 1
a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface o.seg.initprot = 1
a.hdr.cputype = cputype[i+1] o.seg.nsects = 1
a.hdr.cpusubtype = cpusubtype[i+1] ffi.copy(o.sec.sectname, "__data")
a.hdr.filetype = 1 ffi.copy(o.sec.segname, "__DATA")
a.hdr.ncmds = 2 o.sec.size = #s
a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) o.sec.offset = mach_size
a.seg.cmd = is64 and 0x19 or 0x1 o.sym.cmd = 2
a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) o.sym.cmdsize = ffi.sizeof(o.sym)
a.seg.vmsize = #s o.sym.symoff = ffi.offsetof(o, "sym_entry")
a.seg.fileoff = mach_size-ofs o.sym.nsyms = 1
a.seg.filesize = #s o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)
a.seg.maxprot = 1 o.sym.strsize = aligned(#symname+2, 8)
a.seg.initprot = 1
a.seg.nsects = 1
ffi.copy(a.sec.sectname, "__data")
ffi.copy(a.sec.segname, "__DATA")
a.sec.size = #s
a.sec.offset = mach_size-ofs
a.sym.cmd = 2
a.sym.cmdsize = ffi.sizeof(a.sym)
a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
a.sym.nsyms = 1
a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
a.sym.strsize = aligned(#symname+2, align)
end
o.sym_entry.type = 0xf o.sym_entry.type = 0xf
o.sym_entry.sect = 1 o.sym_entry.sect = 1
o.sym_entry.strx = 1 o.sym_entry.strx = 1
ffi.copy(o.space+1, symname) ffi.copy(o.space+1, symname)
-- Write Macho-O object file. -- Write Mach-O object file.
local fp = savefile(output, "wb") local fp = savefile(output, "wb")
fp:write(ffi.string(o, mach_size)) fp:write(ffi.string(o, mach_size))
bcsave_tail(fp, output, s) bcsave_tail(fp, output, s)
@ -624,7 +541,7 @@ end
local function bcsave(ctx, input, output) local function bcsave(ctx, input, output)
local f = readfile(ctx, input) local f = readfile(ctx, input)
local s = string.dump(f, ctx.strip) local s = string.dump(f, ctx.mode)
local t = ctx.type local t = ctx.type
if not t then if not t then
t = detecttype(output) t = detecttype(output)
@ -647,9 +564,11 @@ local function docmd(...)
local n = 1 local n = 1
local list = false local list = false
local ctx = { local ctx = {
strip = true, arch = jit.arch, os = jit.os:lower(), mode = "bt", arch = jit.arch, os = jit.os:lower(),
type = false, modname = false, type = false, modname = false, string = false,
} }
local strip = "s"
local gc64 = ""
while n <= #arg do while n <= #arg do
local a = arg[n] local a = arg[n]
if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
@ -660,14 +579,18 @@ local function docmd(...)
if opt == "l" then if opt == "l" then
list = true list = true
elseif opt == "s" then elseif opt == "s" then
ctx.strip = true strip = "s"
elseif opt == "g" then elseif opt == "g" then
ctx.strip = false strip = ""
elseif opt == "W" or opt == "X" then
gc64 = opt
elseif opt == "d" then
ctx.mode = ctx.mode .. opt
else else
if arg[n] == nil or m ~= #a then usage() end if arg[n] == nil or m ~= #a then usage() end
if opt == "e" then if opt == "e" then
if n ~= 1 then usage() end if n ~= 1 then usage() end
arg[1] = check(loadstring(arg[1])) ctx.string = true
elseif opt == "n" then elseif opt == "n" then
ctx.modname = checkmodname(tremove(arg, n)) ctx.modname = checkmodname(tremove(arg, n))
elseif opt == "t" then elseif opt == "t" then
@ -687,6 +610,7 @@ local function docmd(...)
n = n + 1 n = n + 1
end end
end end
ctx.mode = ctx.mode .. strip .. gc64
if list then if list then
if #arg == 0 or #arg > 2 then usage() end if #arg == 0 or #arg > 2 then usage() end
bclist(ctx, arg[1], arg[2] or "-") bclist(ctx, arg[1], arg[2] or "-")

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM disassembler module. -- LuaJIT ARM disassembler module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM64 disassembler module. -- LuaJIT ARM64 disassembler module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
-- --
-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register.
[0] = { [0] = {
shift = 29, mask = 3, shift = 29, mask = 3,
[0] = { [0] = {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", [0] = "andDNMSg", "bicDNMSg"
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", [0] = "eorDNMSg", "eonDNMSg"
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", [0] = "ands|tstD0NMSg", "bicsDNMSg"
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
} }
}, },
false -- unallocated false -- unallocated
@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register.
{ {
shift = 29, mask = 3, shift = 29, mask = 3,
[0] = { [0] = {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", [0] = "andDNMSg", "bicDNMSg"
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", [0] = "eorDNMSg", "eonDNMSg"
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
}, },
{ {
shift = 21, mask = 7, shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", [0] = "ands|tstD0NMSg", "bicsDNMSg"
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
} }
} }
} }
@ -666,6 +658,10 @@ local map_datafp = { -- Data processing, SIMD and FP.
} }
} }
} }
},
{ -- 010
shift = 0, mask = 0x81f8fc00,
[0x100e400] = "moviDdG"
} }
} }
@ -735,7 +731,7 @@ local map_cond = {
"hi", "ls", "ge", "lt", "gt", "le", "al", "hi", "ls", "ge", "lt", "gt", "le", "al",
} }
local map_shift = { [0] = "lsl", "lsr", "asr", } local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
local map_extend = { local map_extend = {
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
@ -840,6 +836,20 @@ local function parse_fpimm8(op)
return sign * frac * 2^exp return sign * frac * 2^exp
end end
local function decode_fpmovi(op)
local lo = rshift(op, 5)
local hi = rshift(op, 9)
lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
band(lo, 8) * 0x1fe00000)
hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
if hi ~= 0 then
return fmt_hex32(hi)..tohex(lo)
else
return fmt_hex32(lo)
end
end
local function prefer_bfx(sf, uns, imms, immr) local function prefer_bfx(sf, uns, imms, immr)
if imms < immr or imms == 31 or imms == 63 then if imms < immr or imms == 31 or imms == 63 then
return false return false
@ -956,7 +966,7 @@ local function disass_ins(ctx)
elseif p == "U" then elseif p == "U" then
local rn = map_regs.x[band(rshift(op, 5), 31)] local rn = map_regs.x[band(rshift(op, 5), 31)]
local sz = band(rshift(op, 30), 3) local sz = band(rshift(op, 30), 3)
local imm12 = lshift(arshift(lshift(op, 10), 20), sz) local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
if imm12 ~= 0 then if imm12 ~= 0 then
x = "["..rn..", #"..imm12.."]" x = "["..rn..", #"..imm12.."]"
else else
@ -993,8 +1003,7 @@ local function disass_ins(ctx)
x = x.."]" x = x.."]"
end end
elseif p == "P" then elseif p == "P" then
local opcv, sh = rshift(op, 26), 2 local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local imm7 = lshift(arshift(lshift(op, 10), 25), sh) local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)] local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3) local ind = band(rshift(op, 23), 3)
@ -1140,6 +1149,8 @@ local function disass_ins(ctx)
x = 0 x = 0
elseif p == "F" then elseif p == "F" then
x = parse_fpimm8(op) x = parse_fpimm8(op)
elseif p == "G" then
x = "#0x"..decode_fpmovi(op)
elseif p == "g" or p == "f" or p == "x" or p == "w" or elseif p == "g" or p == "f" or p == "x" or p == "w" or
p == "d" or p == "s" then p == "d" or p == "s" then
-- These are handled in D/N/M/A. -- These are handled in D/N/M/A.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module. -- LuaJIT ARM64BE disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the -- ARM64 instructions are always little-endian. So just forward to the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS disassembler module. -- LuaJIT MIPS disassembler module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h -- Released under the MIT/X license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64 disassembler wrapper module. -- LuaJIT MIPS64 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the big-endian functions from the -- This module just exports the big-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64EL disassembler wrapper module. -- LuaJIT MIPS64EL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the -- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64R6 disassembler wrapper module. -- LuaJIT MIPS64R6 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the r6 big-endian functions from the -- This module just exports the r6 big-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPS64R6EL disassembler wrapper module. -- LuaJIT MIPS64R6EL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the r6 little-endian functions from the -- This module just exports the r6 little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT MIPSEL disassembler wrapper module. -- LuaJIT MIPSEL disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the -- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT PPC disassembler module. -- LuaJIT PPC disassembler module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h -- Released under the MIT/X license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT x64 disassembler wrapper module. -- LuaJIT x64 disassembler wrapper module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This module just exports the 64 bit functions from the combined -- This module just exports the 64 bit functions from the combined

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT x86/x64 disassembler module. -- LuaJIT x86/x64 disassembler module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module. -- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT compiler dump module. -- LuaJIT compiler dump module.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -552,7 +552,12 @@ local recdepth = 0
local function fmterr(err, info) local function fmterr(err, info)
if type(err) == "number" then if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end if type(info) == "function" then info = fmtfunc(info) end
err = format(vmdef.traceerr[err], info) local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end end
return err return err
end end

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT profiler. -- LuaJIT profiler.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -227,9 +227,7 @@ local function prof_finish()
local samples = prof_samples local samples = prof_samples
if samples == 0 then if samples == 0 then
if prof_raw ~= true then out:write("[No samples collected]\n") end if prof_raw ~= true then out:write("[No samples collected]\n") end
return elseif prof_ann then
end
if prof_ann then
prof_annotate(prof_count1, samples) prof_annotate(prof_count1, samples)
else else
prof_top(prof_count1, prof_count2, samples, "") prof_top(prof_count1, prof_count2, samples, "")

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- Verbose mode of the LuaJIT compiler. -- Verbose mode of the LuaJIT compiler.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --
@ -62,7 +62,7 @@ local jit = require("jit")
local jutil = require("jit.util") local jutil = require("jit.util")
local vmdef = require("jit.vmdef") local vmdef = require("jit.vmdef")
local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
local type, format = type, string.format local type, sub, format = type, string.sub, string.format
local stdout, stderr = io.stdout, io.stderr local stdout, stderr = io.stdout, io.stderr
-- Active flag and output file handle. -- Active flag and output file handle.
@ -89,7 +89,12 @@ end
local function fmterr(err, info) local function fmterr(err, info)
if type(err) == "number" then if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end if type(info) == "function" then info = fmtfunc(info) end
err = format(vmdef.traceerr[err], info) local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end end
return err return err
end end

View File

@ -1,7 +1,7 @@
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- LuaJIT profiler zones. -- LuaJIT profiler zones.
-- --
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. -- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h -- Released under the MIT license. See Copyright Notice in luajit.h
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
-- --

View File

@ -1,6 +1,6 @@
/* /*
** Auxiliary library for the Lua/C API. ** Auxiliary library for the Lua/C API.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major parts taken verbatim or adapted from the Lua interpreter. ** Major parts taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** Base and coroutine library. ** Base and coroutine library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -146,6 +146,8 @@ LJLIB_CF(getfenv) LJLIB_REC(.)
cTValue *o = L->base; cTValue *o = L->base;
if (!(o < L->top && tvisfunc(o))) { if (!(o < L->top && tvisfunc(o))) {
int level = lj_lib_optint(L, 1, 1); int level = lj_lib_optint(L, 1, 1);
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level); o = lj_debug_frame(L, level, &level);
if (o == NULL) if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL); lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -168,6 +170,8 @@ LJLIB_CF(setfenv)
setgcref(L->env, obj2gco(t)); setgcref(L->env, obj2gco(t));
return 0; return 0;
} }
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level); o = lj_debug_frame(L, level, &level);
if (o == NULL) if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL); lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -360,7 +364,11 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
static int load_aux(lua_State *L, int status, int envarg) static int load_aux(lua_State *L, int status, int envarg)
{ {
if (status == LUA_OK) { if (status == LUA_OK) {
if (tvistab(L->base+envarg-1)) { /*
** Set environment table for top-level function.
** Don't do this for non-native bytecode, which returns a prototype.
*/
if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
GCfunc *fn = funcV(L->top-1); GCfunc *fn = funcV(L->top-1);
GCtab *t = tabV(L->base+envarg-1); GCtab *t = tabV(L->base+envarg-1);
setgcref(fn->c.env, obj2gco(t)); setgcref(fn->c.env, obj2gco(t));
@ -616,7 +624,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2); return FFH_RES(2);
} }
lj_state_growstack(co, (MSize)(L->top - L->base)); if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
cTValue *msg = --co->top;
lj_err_callermsg(L, strVdata(msg));
}
return FFH_RETRY; return FFH_RETRY;
} }

View File

@ -1,6 +1,6 @@
/* /*
** Bit manipulation library. ** Bit manipulation library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_bit_c #define lib_bit_c
@ -98,7 +98,7 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
return bit_result64(L, id, x); return bit_result64(L, id, x);
} }
if (id2) setintV(L->base+1, sh); setintV(L->base+1, sh);
return FFH_RETRY; return FFH_RETRY;
#else #else
lj_lib_checknumber(L, 1); lj_lib_checknumber(L, 1);

View File

@ -1,6 +1,6 @@
/* /*
** Buffer library. ** Buffer library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_buffer_c #define lib_buffer_c

View File

@ -1,6 +1,6 @@
/* /*
** Debug library. ** Debug library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** FFI library. ** FFI library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_ffi_c #define lib_ffi_c
@ -305,7 +305,7 @@ LJLIB_CF(ffi_meta___tostring)
p = *(void **)p; p = *(void **)p;
} else if (ctype_isenum(ct->info)) { } else if (ctype_isenum(ct->info)) {
msg = "cdata<%s>: %d"; msg = "cdata<%s>: %d";
p = (void *)(uintptr_t)*(uint32_t **)p; p = (void *)(uintptr_t)*(uint32_t *)p;
} else { } else {
if (ctype_isptr(ct->info)) { if (ctype_isptr(ct->info)) {
p = cdata_getptr(p, ct->size); p = cdata_getptr(p, ct->size);
@ -513,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
/* Handle ctype __gc metamethod. Use the fast lookup here. */ /* Handle ctype __gc metamethod. Use the fast lookup here. */
cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id); cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) { if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
GCtab *t = cts->finalizer; GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]);
if (gcref(t->metatable)) { if (gcref(t->metatable)) {
/* Add to finalizer table, if still enabled. */ /* Add to finalizer table, if still enabled. */
copyTV(L, lj_tab_set(L, t, o-1), tv); copyTV(L, lj_tab_set(L, t, o-1), tv);
@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
"\003win" "\003win"
#endif #endif
#if LJ_ABI_PAUTH #if LJ_ABI_PAUTH
"\007pauth" "\005pauth"
#endif #endif
#if LJ_TARGET_UWP #if LJ_TARGET_UWP
"\003uwp" "\003uwp"
@ -765,7 +765,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
return 1; return 1;
} }
LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_CF(ffi_metatype) LJLIB_CF(ffi_metatype)
{ {
@ -791,8 +791,6 @@ LJLIB_CF(ffi_metatype)
return 1; return 1;
} }
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */
LJLIB_CF(ffi_gc) LJLIB_REC(.) LJLIB_CF(ffi_gc) LJLIB_REC(.)
{ {
GCcdata *cd = ffi_checkcdata(L, 1); GCcdata *cd = ffi_checkcdata(L, 1);
@ -825,19 +823,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch)
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
/* Create special weak-keyed finalizer table. */
static GCtab *ffi_finalizer(lua_State *L)
{
/* NOBARRIER: The table is new (marked white). */
GCtab *t = lj_tab_new(L, 0, 1);
settabV(L, L->top++, t);
setgcref(t->metatable, obj2gco(t));
setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
lj_str_newlit(L, "k"));
t->nomm = (uint8_t)(~(1u<<MM_mode));
return t;
}
/* Register FFI module as loaded. */ /* Register FFI module as loaded. */
static void ffi_register_module(lua_State *L) static void ffi_register_module(lua_State *L)
{ {
@ -853,7 +838,6 @@ LUALIB_API int luaopen_ffi(lua_State *L)
{ {
CTState *cts = lj_ctype_init(L); CTState *cts = lj_ctype_init(L);
settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1))); settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
cts->finalizer = ffi_finalizer(L);
LJ_LIB_REG(L, NULL, ffi_meta); LJ_LIB_REG(L, NULL, ffi_meta);
/* NOBARRIER: basemt is a GC root. */ /* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1))); setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));

View File

@ -1,6 +1,6 @@
/* /*
** Library initialization. ** Library initialization.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major parts taken verbatim from the Lua interpreter. ** Major parts taken verbatim from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** I/O library. ** I/O library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -25,6 +25,7 @@
#include "lj_strfmt.h" #include "lj_strfmt.h"
#include "lj_ff.h" #include "lj_ff.h"
#include "lj_lib.h" #include "lj_lib.h"
#include "lj_strscan.h"
/* Userdata payload for I/O file. */ /* Userdata payload for I/O file. */
typedef struct IOFileUD { typedef struct IOFileUD {
@ -323,13 +324,14 @@ LJLIB_CF(io_method_seek)
FILE *fp = io_tofile(L)->fp; FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
int64_t ofs = 0; int64_t ofs = 0;
cTValue *o; TValue *o;
int res; int res;
if (opt == 0) opt = SEEK_SET; if (opt == 0) opt = SEEK_SET;
else if (opt == 1) opt = SEEK_CUR; else if (opt == 1) opt = SEEK_CUR;
else if (opt == 2) opt = SEEK_END; else if (opt == 2) opt = SEEK_END;
o = L->base+2; o = L->base+2;
if (o < L->top) { if (o < L->top) {
if (tvisstr(o)) lj_strscan_num(strV(o), o);
if (tvisint(o)) if (tvisint(o))
ofs = (int64_t)intV(o); ofs = (int64_t)intV(o);
else if (tvisnum(o)) else if (tvisnum(o))

View File

@ -1,6 +1,6 @@
/* /*
** JIT library. ** JIT library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lib_jit_c #define lib_jit_c
@ -161,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
/* -- Reflection API for Lua functions ------------------------------------ */ /* -- Reflection API for Lua functions ------------------------------------ */
/* Return prototype of first argument (Lua function or prototype object) */
static GCproto *check_Lproto(lua_State *L, int nolua)
{
TValue *o = L->base;
if (L->top > o) {
if (tvisproto(o)) {
return protoV(o);
} else if (tvisfunc(o)) {
if (isluafunc(funcV(o)))
return funcproto(funcV(o));
else if (nolua)
return NULL;
}
}
lj_err_argt(L, 1, LUA_TFUNCTION);
return NULL; /* unreachable */
}
static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
{ {
setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
@ -187,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
/* local info = jit.util.funcinfo(func [,pc]) */ /* local info = jit.util.funcinfo(func [,pc]) */
LJLIB_CF(jit_util_funcinfo) LJLIB_CF(jit_util_funcinfo)
{ {
GCproto *pt = check_Lproto(L, 1); GCproto *pt = lj_lib_checkLproto(L, 1, 1);
if (pt) { if (pt) {
BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
GCtab *t; GCtab *t;
@ -229,7 +211,7 @@ LJLIB_CF(jit_util_funcinfo)
/* local ins, m = jit.util.funcbc(func, pc) */ /* local ins, m = jit.util.funcbc(func, pc) */
LJLIB_CF(jit_util_funcbc) LJLIB_CF(jit_util_funcbc)
{ {
GCproto *pt = check_Lproto(L, 0); GCproto *pt = lj_lib_checkLproto(L, 1, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2); BCPos pc = (BCPos)lj_lib_checkint(L, 2);
if (pc < pt->sizebc) { if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc]; BCIns ins = proto_bc(pt)[pc];
@ -246,7 +228,7 @@ LJLIB_CF(jit_util_funcbc)
/* local k = jit.util.funck(func, idx) */ /* local k = jit.util.funck(func, idx) */
LJLIB_CF(jit_util_funck) LJLIB_CF(jit_util_funck)
{ {
GCproto *pt = check_Lproto(L, 0); GCproto *pt = lj_lib_checkLproto(L, 1, 0);
ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
if (idx >= 0) { if (idx >= 0) {
if (idx < (ptrdiff_t)pt->sizekn) { if (idx < (ptrdiff_t)pt->sizekn) {
@ -266,7 +248,7 @@ LJLIB_CF(jit_util_funck)
/* local name = jit.util.funcuvname(func, idx) */ /* local name = jit.util.funcuvname(func, idx) */
LJLIB_CF(jit_util_funcuvname) LJLIB_CF(jit_util_funcuvname)
{ {
GCproto *pt = check_Lproto(L, 0); GCproto *pt = lj_lib_checkLproto(L, 1, 0);
uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
if (idx < pt->sizeuv) { if (idx < pt->sizeuv) {
setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));

View File

@ -1,6 +1,6 @@
/* /*
** Math library. ** Math library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include <math.h> #include <math.h>
@ -13,6 +13,7 @@
#include "lualib.h" #include "lualib.h"
#include "lj_obj.h" #include "lj_obj.h"
#include "lj_err.h"
#include "lj_lib.h" #include "lj_lib.h"
#include "lj_vm.h" #include "lj_vm.h"
#include "lj_prng.h" #include "lj_prng.h"
@ -183,7 +184,10 @@ LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_randomseed) LJLIB_CF(math_randomseed)
{ {
PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
random_seed(rs, lj_lib_checknum(L, 1)); if (L->base != L->top)
random_seed(rs, lj_lib_checknum(L, 1));
else if (!lj_prng_seed_secure(rs))
lj_err_caller(L, LJ_ERR_PRNGSD);
return 0; return 0;
} }

View File

@ -1,6 +1,6 @@
/* /*
** OS library. ** OS library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** Package library. ** Package library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/* /*
** String library. ** String library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -122,11 +122,25 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
LJLIB_CF(string_dump) LJLIB_CF(string_dump)
{ {
GCfunc *fn = lj_lib_checkfunc(L, 1); GCproto *pt = lj_lib_checkLproto(L, 1, 1);
int strip = L->base+1 < L->top && tvistruecond(L->base+1); uint32_t flags = 0;
SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ SBuf *sb;
TValue *o = L->base+1;
if (o < L->top) {
if (tvisstr(o)) {
const char *mode = strVdata(o);
char c;
while ((c = *mode++)) {
if (c == 's') flags |= BCDUMP_F_STRIP;
if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
}
} else if (tvistruecond(o)) {
flags |= BCDUMP_F_STRIP;
}
}
sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1; L->top = L->base+1;
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags))
lj_err_caller(L, LJ_ERR_STRDUMP); lj_err_caller(L, LJ_ERR_STRDUMP);
setstrV(L, L->top-1, lj_buf_str(L, sb)); setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L); lj_gc_check(L);

View File

@ -1,6 +1,6 @@
/* /*
** Table library. ** Table library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1057,7 +1057,7 @@ static size_t release_unused_segments(mstate m)
mchunkptr p = align_as_chunk(base); mchunkptr p = align_as_chunk(base);
size_t psize = chunksize(p); size_t psize = chunksize(p);
/* Can unmap if first chunk holds entire segment and not pinned */ /* Can unmap if first chunk holds entire segment and not pinned */
if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { if (!cinuse(p) && (char *)p + psize == (char *)mem2chunk(sp)) {
tchunkptr tp = (tchunkptr)p; tchunkptr tp = (tchunkptr)p;
if (p == m->dv) { if (p == m->dv) {
m->dv = 0; m->dv = 0;

View File

@ -1,6 +1,6 @@
/* /*
** Public Lua/C API. ** Public Lua/C API.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Major portions taken verbatim or adapted from the Lua interpreter. ** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
return 0; /* Stack overflow. */ return 0; /* Stack overflow. */
} else if (size > 0) { } else if (size > 0) {
lj_state_checkstack(L, (MSize)size); int avail = (int)(mref(L->maxstack, TValue) - L->top);
if (size > avail &&
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
L->top--;
return 0; /* Out of memory. */
}
} }
return 1; return 1;
} }
@ -1047,6 +1052,7 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
/* Flush cache, since traces specialize to basemt. But not during __gc. */ /* Flush cache, since traces specialize to basemt. But not during __gc. */
if (lj_trace_flushall(L)) if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM); lj_err_caller(L, LJ_ERR_NOGCMM);
o = index2adr(L, idx); /* Stack may have been reallocated. */
if (tvisbool(o)) { if (tvisbool(o)) {
/* NOBARRIER: basemt is a GC root. */ /* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));

View File

@ -1,6 +1,6 @@
/* /*
** Target architecture selection. ** Target architecture selection.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_ARCH_H #ifndef _LJ_ARCH_H
@ -57,7 +57,7 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64 #define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM #define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__) #elif defined(__aarch64__) || defined(_M_ARM64)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC #define LUAJIT_TARGET LUAJIT_ARCH_PPC
@ -66,7 +66,7 @@
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else #else
#error "No support for this architecture (yet)" #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#endif #endif
#endif #endif
@ -124,7 +124,7 @@
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
#if TARGET_OS_IPHONE #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
#define LJ_TARGET_IOS 1 #define LJ_TARGET_IOS 1
#else #else
#define LJ_TARGET_IOS 0 #define LJ_TARGET_IOS 0
@ -237,7 +237,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80 #define LJ_ARCH_VERSION 80
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70 #define LJ_ARCH_VERSION 70
@ -331,6 +331,7 @@
#define LJ_ARCH_NOFFI 1 #define LJ_ARCH_NOFFI 1
#elif LJ_ARCH_BITS == 64 #elif LJ_ARCH_BITS == 64
#error "No support for PPC64" #error "No support for PPC64"
#undef LJ_TARGET_PPC
#endif #endif
#if _ARCH_PWR7 #if _ARCH_PWR7
@ -490,36 +491,45 @@
#elif LJ_TARGET_ARM #elif LJ_TARGET_ARM
#if defined(__ARMEB__) #if defined(__ARMEB__)
#error "No support for big-endian ARM" #error "No support for big-endian ARM"
#undef LJ_TARGET_ARM
#endif #endif
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#error "No support for Cortex-M CPUs" #error "No support for Cortex-M CPUs"
#undef LJ_TARGET_ARM
#endif #endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS) #if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported" #error "Only ARM EABI or iOS 3.0+ ABI is supported"
#undef LJ_TARGET_ARM
#endif #endif
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64
#if defined(_ILP32) #if defined(_ILP32)
#error "No support for ILP32 model on ARM64" #error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif #endif
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32" #error "No support for little-endian PPC32"
#undef LJ_TARGET_PPC
#endif #endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #error "No support for PPC/e500, use LuaJIT 2.0"
#undef LJ_TARGET_PPC
#endif #endif
#elif LJ_TARGET_MIPS32 #elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32" #error "Only o32 ABI supported for MIPS32"
#undef LJ_TARGET_MIPS
#endif #endif
#if LJ_TARGET_MIPSR6 #if LJ_TARGET_MIPSR6
/* Not that useful, since most available r6 CPUs are 64 bit. */ /* Not that useful, since most available r6 CPUs are 64 bit. */
#error "No support for MIPS32R6" #error "No support for MIPS32R6"
#undef LJ_TARGET_MIPS
#endif #endif
#elif LJ_TARGET_MIPS64 #elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
#error "Only n64 ABI supported for MIPS64" #error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif #endif
#endif #endif
#endif #endif

View File

@ -1,6 +1,6 @@
/* /*
** IR assembler (SSA IR -> machine code). ** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_asm_c #define lj_asm_c
@ -29,6 +29,7 @@
#include "lj_dispatch.h" #include "lj_dispatch.h"
#include "lj_vm.h" #include "lj_vm.h"
#include "lj_target.h" #include "lj_target.h"
#include "lj_prng.h"
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
#include <stdio.h> #include <stdio.h>
@ -93,6 +94,12 @@ typedef struct ASMState {
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */ MCode *realign; /* Realign loop if not NULL. */
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
uint64_t prngbits;
PRNGState prngstate;
#endif
#ifdef RID_NUM_KREF #ifdef RID_NUM_KREF
intptr_t krefk[RID_NUM_KREF]; intptr_t krefk[RID_NUM_KREF];
#endif #endif
@ -173,6 +180,41 @@ IRFLDEF(FLOFS)
0 0
}; };
#ifdef LUAJIT_RANDOM_RA
/* Return a fixed number of random bits from the local PRNG state. */
static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
uint64_t b = as->prngbits;
uint32_t res = (1u << nbits) - 1u;
if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
res &= (uint32_t)b;
as->prngbits = b >> nbits;
return res;
}
/* Pick a random register from a register set. */
static Reg rset_pickrandom(ASMState *as, RegSet rs)
{
Reg r = rset_pickbot_(rs);
rs >>= r;
if (rs > 1) { /* More than one bit set? */
while (1) {
/* We need to sample max. the GPR or FPR half of the set. */
uint32_t d = ra_random_bits(as, RSET_BITS-1);
if ((rs >> d) & 1) {
r += d;
break;
}
}
}
return r;
}
#define rset_picktop(rs) rset_pickrandom(as, rs)
#define rset_pickbot(rs) rset_pickrandom(as, rs)
#else
#define rset_picktop(rs) rset_picktop_(rs)
#define rset_pickbot(rs) rset_pickbot_(rs)
#endif
/* -- Target-specific instruction emitter --------------------------------- */ /* -- Target-specific instruction emitter --------------------------------- */
#if LJ_TARGET_X86ORX64 #if LJ_TARGET_X86ORX64
@ -564,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
#if LJ_GC64 #if LJ_GC64
#if LJ_TARGET_ARM64
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
#else
(ir->o == IR_KINT && k == ir->i) || (ir->o == IR_KINT && k == ir->i) ||
#endif
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) && ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
k == (intptr_t)ir_kptr(ir)) k == (intptr_t)ir_kptr(ir))
@ -903,11 +949,11 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
static void asm_snap_alloc1(ASMState *as, IRRef ref) static void asm_snap_alloc1(ASMState *as, IRRef ref)
{ {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if (!irref_isk(ref) && ir->r != RID_SUNK) { if (!irref_isk(ref)) {
bloomset(as->snapfilt1, ref); bloomset(as->snapfilt1, ref);
bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS)); bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
if (ra_used(ir)) return; if (ra_used(ir)) return;
if (ir->r == RID_SINK) { if (ir->r == RID_SINK || ir->r == RID_SUNK) {
ir->r = RID_SUNK; ir->r = RID_SUNK;
#if LJ_HASFFI #if LJ_HASFFI
if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
@ -2442,6 +2488,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->realign = NULL; as->realign = NULL;
as->loopinv = 0; as->loopinv = 0;
as->parent = J->parent ? traceref(J, J->parent) : NULL; as->parent = J->parent ? traceref(J, J->parent) : NULL;
#ifdef LUAJIT_RANDOM_RA
(void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */
#endif
/* Reserve MCode memory. */ /* Reserve MCode memory. */
as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
@ -2483,6 +2532,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
#endif #endif
as->ir = J->curfinal->ir; /* Use the copied IR. */ as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins; as->curins = J->cur.nins = as->orignins;
#ifdef LUAJIT_RANDOM_RA
as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */
as->prngbits = 0;
#endif
RA_DBG_START(); RA_DBG_START();
RA_DBGX((as, "===== STOP =====")); RA_DBGX((as, "===== STOP ====="));

View File

@ -1,6 +1,6 @@
/* /*
** IR assembler (SSA IR -> machine code). ** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_ASM_H #ifndef _LJ_ASM_H

View File

@ -1,6 +1,6 @@
/* /*
** ARM IR assembler (SSA IR -> machine code). ** ARM IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v); emit_lsptr(as, ARMI_LDR, dest, v);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); if (guarded) {
Reg func = ra_alloc1(as, ir->op1, RSET_GPR); asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
emit_opk(as, ARMI_ADD, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
} }
emit_lso(as, ARMI_LDR, uv, func, if (ir->o == IR_UREFC)
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
} }
} }
@ -1919,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */ as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi) if (uselo || usehi)
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS);
return; return;
#elif LJ_HASFFI #elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) { } else if ((ir-1)->o == IR_CONV) {
@ -1990,6 +1998,7 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno) IRIns *irp, RegSet allow, ExitNo exitno)
{ {
int savereg = 0;
Reg pbase; Reg pbase;
uint32_t k; uint32_t k;
if (irp) { if (irp) {
@ -2000,12 +2009,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
pbase = rset_pickbot(allow); pbase = rset_pickbot(allow);
} else { } else {
pbase = RID_RET; pbase = RID_RET;
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ savereg = 1;
} }
} else { } else {
pbase = RID_BASE; pbase = RID_BASE;
} }
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
if (savereg)
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
k = emit_isk12(0, (int32_t)(8*topslot)); k = emit_isk12(0, (int32_t)(8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP); emit_n(as, ARMI_CMP^k, RID_TMP);
@ -2017,7 +2028,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (ra_hasspill(irp->s)) if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
if (ra_hasspill(irp->s) && !allow) if (savereg)
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095)); emit_loadi(as, RID_TMP, (i & ~4095));
} else { } else {
@ -2031,11 +2042,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
SnapEntry *map = &as->T->snapmap[snap->mapofs]; SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
MSize n, nent = snap->nent; MSize n, nent = snap->nent;
int32_t bias = 0;
/* Store the value of all modified slots to the Lua stack. */ /* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) { for (n = 0; n < nent; n++) {
SnapEntry sn = map[n]; SnapEntry sn = map[n];
BCReg s = snap_slot(sn); BCReg s = snap_slot(sn);
int32_t ofs = 8*((int32_t)s-1); int32_t ofs = 8*((int32_t)s-1) - bias;
IRRef ref = snap_ref(sn); IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE)) if ((sn & SNAP_NORESTORE))
@ -2054,6 +2066,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
#else #else
Reg src = ra_alloc1(as, ref, RSET_FPR); Reg src = ra_alloc1(as, ref, RSET_FPR);
if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) {
int32_t adj = ofs & 0xffffff00; /* K12-friendly. */
bias += adj;
ofs -= adj;
emit_addptr(as, RID_BASE, -adj);
}
emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
#endif #endif
} else { } else {
@ -2082,6 +2100,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
} }
checkmclim(as); checkmclim(as);
} }
emit_addptr(as, RID_BASE, bias);
lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
} }
@ -2252,7 +2271,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
} }
if (nslots > as->evenspill) /* Leave room for args in stack slots. */ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots; as->evenspill = nslots;
return REGSP_HINT(RID_RET); return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
} }
static void asm_setup_target(ASMState *as) static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/* /*
** ARM64 IR assembler (SSA IR -> machine code). ** ARM64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
** **
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
** Sponsored by Cisco Systems, Inc. ** Sponsored by Cisco Systems, Inc.
@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc)
emit_cond_branch(as, cc, target); emit_cond_branch(as, cc, target);
} }
/* Emit test and branch instruction to exit for guard. */ /* Emit test and branch instruction to exit for guard, if in range. */
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
{ {
MCode *target = asm_exitstub_addr(as, as->snapno); MCode *target = asm_exitstub_addr(as, as->snapno);
MCode *p = as->mcp; MCode *p = as->mcp;
ptrdiff_t delta = target - p;
if (LJ_UNLIKELY(p == as->invmcp)) { if (LJ_UNLIKELY(p == as->invmcp)) {
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
as->loopinv = 1; as->loopinv = 1;
*p = A64I_B | A64F_S26(target-p); *p = A64I_B | A64F_S26(delta);
emit_tnb(as, ai^0x01000000u, r, bit, p-1); ai ^= 0x01000000u;
return; target = p-1;
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
return 0;
} }
emit_tnb(as, ai, r, bit, target); emit_tnb(as, ai, r, bit, target);
return 1;
} }
/* Emit compare and branch instruction to exit for guard. */ /* Emit compare and branch instruction to exit for guard. */
@ -211,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{ {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
int logical = (ai & 0x1f000000) == 0x0a000000;
if (ra_hasreg(ir->r)) { if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r); ra_noweak(as, ir->r);
return A64F_M(ir->r); return A64F_M(ir->r);
} else if (irref_isk(ref)) { } else if (irref_isk(ref)) {
uint32_t m;
int64_t k = get_k64val(as, ref); int64_t k = get_k64val(as, ref);
if ((ai & 0x1f000000) == 0x0a000000) uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
m = emit_isk13(k, irt_is64(ir->t)); emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
else
m = emit_isk12(k);
if (m) if (m)
return m; return m;
} else if (mayfuse(as, ref)) { } else if (mayfuse(as, ref)) {
@ -232,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
IRIns *irl = IR(ir->op1); IRIns *irl = IR(ir->op1);
if (sh == A64SH_LSL && if (sh == A64SH_LSL &&
irl->o == IR_CONV && irl->o == IR_CONV && !logical &&
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
shift <= 4 && shift <= 4 &&
canfuse(as, irl)) { canfuse(as, irl)) {
@ -242,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
Reg m = ra_alloc1(as, ir->op1, allow); Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_SH(sh, shift); return A64F_M(m) | A64F_SH(sh, shift);
} }
} else if (ir->o == IR_CONV && } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
Reg m = ra_alloc1(as, ir->op1, allow);
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
} else if (ir->o == IR_CONV && !logical &&
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
Reg m = ra_alloc1(as, ir->op1, allow); Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_EX(A64EX_SXTW); return A64F_M(m) | A64F_EX(A64EX_SXTW);
@ -419,13 +426,18 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir)
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{ {
uint32_t n, nargs = CCI_XNARGS(ci); uint32_t n, nargs = CCI_XNARGS(ci);
int32_t ofs = 0; int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7;
Reg gpr, fpr = REGARG_FIRSTFPR; Reg gpr, fpr = REGARG_FIRSTFPR;
if (ci->func) if (ci->func)
emit_call(as, ci->func); emit_call(as, ci->func);
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L); as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR; gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */ for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n]; IRRef ref = args[n];
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
@ -436,10 +448,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */ "reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref); ra_leftov(as, fpr, ref);
fpr++; fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_FPR); Reg r = ra_alloc1(as, ref, RSET_FPR);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); int32_t al = spalign;
ofs += 8; #if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_isnum(ir->t) ? 7 : 3;
#endif
spofs = (spofs + al) & ~al;
if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
spofs += al + 1;
} }
} else { } else {
if (gpr <= REGARG_LASTGPR) { if (gpr <= REGARG_LASTGPR) {
@ -449,10 +472,27 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++; gpr++;
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_GPR); Reg r = ra_alloc1(as, ref, RSET_GPR);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); int32_t al = spalign;
ofs += 8; #if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_size(ir->t) - 1;
#endif
spofs = (spofs + al) & ~al;
if (al >= 3) {
if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
} else {
lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
}
spofs += al + 1;
} }
} }
#if LJ_HASFFI && LJ_TARGET_OSX
} else { /* Marker for start of varargs. */
gpr = REGARG_LASTGPR+1;
fpr = REGARG_LASTFPR+1;
spalign = 7;
#endif
} }
} }
} }
@ -518,8 +558,6 @@ static void asm_retf(ASMState *as, IRIns *ir)
as->topslot -= (BCReg)delta; as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0; if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
/* Need to force a spill on REF_BASE now to update the stack slot. */
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
emit_setgl(as, base, jit_base); emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta); emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE); asm_guardcc(as, CC_NE);
@ -643,25 +681,22 @@ static void asm_strto(ASMState *as, IRIns *ir)
{ {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2]; IRRef args[2];
Reg dest = 0, tmp; Reg tmp;
int destused = ra_used(ir);
int32_t ofs = 0; int32_t ofs = 0;
ra_evictset(as, RSET_SCRATCH); ra_evictset(as, RSET_SCRATCH);
if (destused) { if (ra_used(ir)) {
if (ra_hasspill(ir->s)) { if (ra_hasspill(ir->s)) {
ofs = sps_scale(ir->s); ofs = sps_scale(ir->s);
destused = 0;
if (ra_hasreg(ir->r)) { if (ra_hasreg(ir->r)) {
ra_free(as, ir->r); ra_free(as, ir->r);
ra_modified(as, ir->r); ra_modified(as, ir->r);
emit_spload(as, ir, ir->r, ofs); emit_spload(as, ir, ir->r, ofs);
} }
} else { } else {
dest = ra_dest(as, ir, RSET_FPR); Reg dest = ra_dest(as, ir, RSET_FPR);
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
} }
} }
if (destused)
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
asm_guardcnb(as, A64I_CBZ, RID_RET); asm_guardcnb(as, A64I_CBZ, RID_RET);
args[0] = ir->op1; /* GCstr *str */ args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */ args[1] = ASMREF_TMP1; /* TValue *n */
@ -752,113 +787,75 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir); int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow); Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = 0, tmp = RID_TMP; Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
IRRef refkey = ir->op2; IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey); IRIns *irkey = IR(refkey);
int isk = irref_isk(ir->op2); int isk = irref_isk(refkey);
IRType1 kt = irkey->t; IRType1 kt = irkey->t;
uint32_t k = 0; uint32_t k = 0;
uint32_t khash; uint32_t khash;
MCLabel l_end, l_loop, l_next; MCLabel l_end, l_loop;
rset_clear(allow, tab); rset_clear(allow, tab);
if (!isk) { /* Allocate register for tkey outside of the loop. */
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); if (isk) {
rset_clear(allow, key); int64_t kk;
if (!irt_isstr(kt)) { if (irt_isaddr(kt)) {
tmp = ra_scratch(as, allow); kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
rset_clear(allow, tmp); } else if (irt_isnum(kt)) {
} kk = (int64_t)ir_knum(irkey)->u64;
} else if (irt_isnum(kt)) { /* Assumes -0.0 is already canonicalized to +0.0. */
int64_t val = (int64_t)ir_knum(irkey)->u64;
if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
} else { } else {
scr = ra_scratch(as, allow); lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
kk = ~((int64_t)~irt_toitype(kt) << 47);
} }
rset_clear(allow, scr); k = emit_isk12(kk);
tkey = k ? 0 : ra_allock(as, kk, allow);
} else { } else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); tkey = ra_scratch(as, allow);
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
scr = ra_scratch(as, rset_clear(allow, type));
rset_clear(allow, scr);
} }
/* Key not found in chain: jump to exit (if merged) or load niltv. */ /* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as); l_end = emit_label(as);
as->invmcp = NULL; as->invmcp = NULL;
if (merge == IR_NE) if (merge == IR_NE) {
asm_guardcc(as, CC_AL); asm_guardcc(as, CC_AL);
else if (destused) } else if (destused) {
emit_loada(as, dest, niltvg(J2G(as->J))); uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
}
/* Follow hash chain until the end. */ /* Follow hash chain until the end. */
l_loop = --as->mcp; l_loop = --as->mcp;
emit_n(as, A64I_CMPx^A64I_K12^0, dest); if (destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */ /* Type and value comparison. */
if (merge == IR_EQ) if (merge == IR_EQ)
asm_guardcc(as, CC_EQ); asm_guardcc(as, CC_EQ);
else else
emit_cond_branch(as, CC_EQ, l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
if (irt_isnum(kt)) { /* Construct tkey as canonicalized or tagged key. */
if (isk) { if (!isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */ if (irt_isnum(kt)) {
if (k) key = ra_alloc1(as, refkey, RSET_FPR);
emit_n(as, A64I_CMPx^k, tmp); emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
else /* A64I_FMOV_R_D from key to tkey done below. */
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else { } else {
emit_nm(as, A64I_FCMPd, key, ftmp); lj_assertA(irt_isaddr(kt), "bad HREF key type");
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); key = ra_alloc1(as, refkey, allow);
emit_cond_branch(as, CC_LO, l_next); type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
} }
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
} }
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */ /* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1; khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) { if (khash == 0) {
@ -872,7 +869,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) { } else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -881,23 +877,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw, dest, dest, tmp); emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
emit_dm(as, A64I_MOVw, tmp, dest); emit_nm(as, A64I_FCMPZd, (key & 31), 0);
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
} else { } else {
checkmclim(as); emit_dnm(as, A64I_EORw, tmp, key, dest);
emit_dm(as, A64I_MOVw, tmp, key); emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
} }
} }
} }
@ -912,7 +903,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int bigofs = !emit_checkofs(A64I_LDRx, kofs); int bigofs = !emit_checkofs(A64I_LDRx, kofs);
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg key, idx = node; Reg idx = node;
RegSet allow = rset_exclude(RSET_GPR, node); RegSet allow = rset_exclude(RSET_GPR, node);
uint64_t k; uint64_t k;
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -931,9 +922,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
} else { } else {
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
} }
key = ra_scratch(as, allow); emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
emit_lso(as, A64I_LDRx, key, idx, kofs);
if (bigofs) if (bigofs)
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
} }
@ -941,24 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v); emit_lsptr(as, A64I_LDRx, dest, v);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); if (guarded)
Reg func = ra_alloc1(as, ir->op1, RSET_GPR); asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC) { if (ir->o == IR_UREFC)
asm_guardcc(as, CC_NE); emit_opk(as, A64I_ADDx, dest, dest,
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
emit_opk(as, A64I_ADDx, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR); (int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else { } else {
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
} }
emit_lso(as, A64I_LDRx, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
} }
} }
@ -1063,7 +1059,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir)
{ {
Reg idx, tmp, type; Reg idx, tmp;
int32_t ofs = 0; int32_t ofs = 0;
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@ -1082,8 +1078,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} else { } else {
tmp = ra_scratch(as, gpr); tmp = ra_scratch(as, gpr);
} }
type = ra_scratch(as, rset_clear(gpr, tmp)); idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); rset_clear(gpr, idx);
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
/* Always do the type check, even if the load result is unused. */ /* Always do the type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
@ -1091,10 +1088,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t)); "bad load type %d", irt_type(ir->t));
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
} else if (irt_isaddr(ir->t)) { } else if (irt_isaddr(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
} else if (irt_isnil(ir->t)) { } else if (irt_isnil(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
} else { } else {
@ -1217,9 +1214,8 @@ dotypecheck:
emit_nm(as, A64I_CMPx, emit_nm(as, A64I_CMPx,
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
} else { } else {
Reg type = ra_scratch(as, allow); emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
} }
emit_lso(as, A64I_LDRx, tmp, base, ofs); emit_lso(as, A64I_LDRx, tmp, base, ofs);
return; return;
@ -1289,8 +1285,9 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
Reg mark = RID_TMP; Reg mark = RID_TMP;
MCLabel l_end = emit_label(as); MCLabel l_end = emit_label(as);
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
/* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_setgl(as, tab, gc.grayagain); emit_setgl(as, tab, gc.grayagain);
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
emit_getgl(as, link, gc.grayagain); emit_getgl(as, link, gc.grayagain);
@ -1304,7 +1301,6 @@ static void asm_obar(ASMState *as, IRIns *ir)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2]; IRRef args[2];
MCLabel l_end; MCLabel l_end;
RegSet allow = RSET_GPR;
Reg obj, val, tmp; Reg obj, val, tmp;
/* No need for other object barriers (yet). */ /* No need for other object barriers (yet). */
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@ -1315,14 +1311,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
asm_gencall(as, ci, args); asm_gencall(as, ci, args);
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r; obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(allow, obj)); tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
emit_cond_branch(as, CC_EQ, l_end); emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
emit_cond_branch(as, CC_EQ, l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
emit_lso(as, A64I_LDRB, tmp, obj, emit_lso(as, A64I_LDRB, tmp, obj,
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
} }
@ -1364,12 +1359,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
if (irref_isk(lref)) if (irref_isk(lref))
return 1; /* But swap constants to the right. */ return 1; /* But swap constants to the right. */
ir = IR(rref); ir = IR(rref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 0; /* Don't swap fusable operands to the left. */ return 0; /* Don't swap fusable operands to the left. */
ir = IR(lref); ir = IR(lref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 1; /* But swap fusable operands to the right. */ return 1; /* But swap fusable operands to the right. */
@ -1415,13 +1410,12 @@ static void asm_intneg(ASMState *as, IRIns *ir)
static void asm_intmul(ASMState *as, IRIns *ir) static void asm_intmul(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
if (irt_isguard(ir->t)) { /* IR_MULOV */ if (irt_isguard(ir->t)) { /* IR_MULOV */
asm_guardcc(as, CC_NE); asm_guardcc(as, CC_NE);
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
emit_dnm(as, A64I_SMULL, dest, right, left); emit_dnm(as, A64I_SMULL, dest, right, left);
} else { } else {
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@ -1681,16 +1675,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
if (asm_swapops(as, blref, brref)) { if (asm_swapops(as, blref, brref)) {
Reg tmp = blref; blref = brref; brref = tmp; Reg tmp = blref; blref = brref; brref = tmp;
} }
bleft = ra_alloc1(as, blref, RSET_GPR);
if (irref_isk(brref)) { if (irref_isk(brref)) {
uint64_t k = get_k64val(as, brref); uint64_t k = get_k64val(as, brref);
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); emit_ctz64(k)))
return; return;
}
m2 = emit_isk13(k, irt_is64(irl->t)); m2 = emit_isk13(k, irt_is64(irl->t));
} }
bleft = ra_alloc1(as, blref, RSET_GPR);
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
if (!m2) if (!m2)
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@ -1765,37 +1758,28 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno) IRIns *irp, RegSet allow, ExitNo exitno)
{ {
Reg pbase;
uint32_t k; uint32_t k;
Reg pbase = RID_BASE;
if (irp) { if (irp) {
if (!ra_hasspill(irp->s)) { pbase = irp->r;
pbase = irp->r; if (!ra_hasreg(pbase))
lj_assertA(ra_hasreg(pbase), "base reg lost"); pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
} else if (allow) {
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
}
} else {
pbase = RID_BASE;
} }
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
if (pbase & 0x80) /* Restore temp. register. */
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
k = emit_isk12((8*topslot)); k = emit_isk12((8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, A64I_CMPx^k, RID_TMP); emit_n(as, A64I_CMPx^k, RID_TMP);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack)); (int32_t)offsetof(lua_State, maxstack));
if (irp) { /* Must not spill arbitrary registers in head of side trace. */ if (pbase & 0x40) {
if (ra_hasspill(irp->s)) emit_getgl(as, (pbase & 31), jit_base);
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); if (pbase & 0x80) /* Save temp register. */
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
if (ra_hasspill(irp->s) && !allow)
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
} else {
emit_getgl(as, RID_TMP, cur_L);
} }
emit_getgl(as, RID_TMP, cur_L);
} }
/* Restore Lua stack from on-trace state. */ /* Restore Lua stack from on-trace state. */
@ -1837,7 +1821,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
/* Marker to prevent patching the GC check exit. */ /* Marker to prevent patching the GC check exit. */
#define ARM64_NOPATCH_GC_CHECK \ #define ARM64_NOPATCH_GC_CHECK \
(A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
/* Check GC threshold and do one or more GC steps. */ /* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as) static void asm_gc_check(ASMState *as)
@ -1892,46 +1876,40 @@ static void asm_loop_tail_fixup(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */ /* -- Head of trace ------------------------------------------------------- */
/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
/* Coalesce BASE register for a root trace. */ /* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as) static void asm_head_root_base(ASMState *as)
{ {
IRIns *ir; IRIns *ir = IR(REF_BASE);
asm_head_lreg(as); Reg r = ir->r;
ir = IR(REF_BASE); if (ra_hasreg(r)) {
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ra_free(as, r);
ra_spill(as, ir); if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ra_destreg(as, ir, RID_BASE); ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
emit_movrr(as, ir, r, RID_BASE);
}
} }
/* Coalesce BASE register for a side trace. */ /* Coalesce BASE register for a side trace. */
static Reg asm_head_side_base(ASMState *as, IRIns *irp) static Reg asm_head_side_base(ASMState *as, IRIns *irp)
{ {
IRIns *ir; IRIns *ir = IR(REF_BASE);
asm_head_lreg(as); Reg r = ir->r;
ir = IR(REF_BASE); if (ra_hasreg(r)) {
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ra_free(as, r);
ra_spill(as, ir); if (rset_test(as->modset, r) || irt_ismarked(ir->t))
if (ra_hasspill(irp->s)) { ir->r = RID_INIT; /* No inheritance for modified BASE register. */
return ra_dest(as, ir, RSET_GPR); if (irp->r == r) {
} else { return r; /* Same BASE register already coalesced. */
Reg r = irp->r; } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
lj_assertA(ra_hasreg(r), "base reg lost"); /* Move from coalesced parent reg. */
if (r != ir->r && !rset_test(as->freeset, r)) emit_movrr(as, ir, r, irp->r);
ra_restore(as, regcost_ref(as->cost[r])); return irp->r;
ra_destreg(as, ir, r); } else {
return r; emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
}
} }
return RID_NONE;
} }
/* -- Tail of trace ------------------------------------------------------- */ /* -- Tail of trace ------------------------------------------------------- */
@ -1975,20 +1953,47 @@ static void asm_tail_prep(ASMState *as)
/* Ensure there are enough stack slots for call arguments. */ /* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{ {
IRRef args[CCI_NARGS_MAX*2]; #if LJ_HASFFI
uint32_t i, nargs = CCI_XNARGS(ci); uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
asm_collectargs(as, ir, ci, args); (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
for (i = 0; i < nargs; i++) { IRRef args[CCI_NARGS_MAX*2];
if (args[i] && irt_isfp(IR(args[i])->t)) { int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
if (nfpr > 0) nfpr--; else nslots += 2; int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
} else { asm_collectargs(as, ir, ci, args);
if (ngpr > 0) ngpr--; else nslots += 2; #if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) {
int al = spalign;
if (!args[i]) {
#if LJ_TARGET_OSX
/* Marker for start of varaargs. */
nfpr = 0;
ngpr = 0;
spalign = 7;
#endif
} else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; }
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif
} else {
if (ngpr > 0) { ngpr--; continue; }
#if LJ_TARGET_OSX
al |= irt_size(IR(args[i])->t) - 1;
#endif
}
spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */
} }
nslots = (spofs + 3) >> 2;
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
} }
if (nslots > as->evenspill) /* Leave room for args in stack slots. */ #endif
as->evenspill = nslots; return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
return REGSP_HINT(RID_RET);
} }
static void asm_setup_target(ASMState *as) static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/* /*
** MIPS IR assembler (SSA IR -> machine code). ** MIPS IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -456,7 +456,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_addptr(as, base, -8*delta); emit_addptr(as, base, -8*delta);
asm_guard(as, MIPSI_BNE, RID_TMP, asm_guard(as, MIPSI_BNE, RID_TMP,
ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4);
} }
/* -- Buffer operations --------------------------------------------------- */ /* -- Buffer operations --------------------------------------------------- */
@ -653,11 +653,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
rset_exclude(RSET_GPR, dest)); rset_exclude(RSET_GPR, dest));
emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
#if !LJ_TARGET_MIPSR6 #if !LJ_TARGET_MIPSR6
emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
#else #else
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
#endif #endif
emit_lsptr(as, MIPSI_LDC1, (tmp & 31), emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)&as->J->k64[LJ_K64_2P63], (void *)&as->J->k64[LJ_K64_2P63],
@ -670,11 +670,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
rset_exclude(RSET_GPR, dest)); rset_exclude(RSET_GPR, dest));
emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
#if !LJ_TARGET_MIPSR6 #if !LJ_TARGET_MIPSR6
emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
#else #else
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
#endif #endif
emit_lsptr(as, MIPSI_LWC1, (tmp & 31), emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)&as->J->k32[LJ_K32_2P63], (void *)&as->J->k32[LJ_K32_2P63],
@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
MIPSIns mi = irt_is64(ir->t) ? MIPSIns mi = irt_is64(ir->t) ?
(st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
(st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp);
emit_fg(as, mi, left, left); emit_fg(as, mi, tmp, left);
#endif #endif
} }
} }
@ -1207,22 +1207,29 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); if (guarded)
Reg func = ra_alloc1(as, ir->op1, RSET_GPR); asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC) { if (ir->o == IR_UREFC)
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); else
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else { } else {
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** PPC IR assembler (SSA IR -> machine code). ** PPC IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Register allocator extensions --------------------------------------- */ /* -- Register allocator extensions --------------------------------------- */
@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); if (guarded) {
Reg func = ra_alloc1(as, ir->op1, RSET_GPR); asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1); emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
} }
emit_tai(as, PPCI_LWZ, uv, func, if (ir->o == IR_UREFC)
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** x86/x64 IR assembler (SSA IR -> machine code). ** x86/x64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
/* -- Guard handling ------------------------------------------------------ */ /* -- Guard handling ------------------------------------------------------ */
@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref. /* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references. ** Also avoid fusing loads if there are multiple references.
*/ */
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
{ {
IRIns *ir = as->ir; IRIns *ir = as->ir;
IRRef i = as->curins; IRRef i = as->curins;
@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
while (--i > ref) { while (--i > ref) {
if (ir[i].o == conflict) if (ir[i].o == conflict)
return 0; /* Conflict found. */ return 0; /* Conflict found. */
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0; return 0;
} }
return 1; /* Ok, no conflict. */ return 1; /* Ok, no conflict. */
@ -134,13 +136,14 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */ /* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */ return irb->op1; /* Table obj. */
} }
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) { } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
/* Fuse base offset (vararg load). */ /* Fuse base offset (vararg load). */
as->mrm.ofs = IR(irb->op2)->i; IRIns *irk = IR(irb->op2);
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
return irb->op1; return irb->op1;
} }
return ref; /* Otherwise use the given array base. */ return ref; /* Otherwise use the given array base. */
@ -455,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) { if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0) && noconflict(as, ref, IR_RETF, 2) &&
!(LJ_GC64 && irt_isaddr(ir->t))) { !(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@ -466,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) { } else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 0)) { noconflict(as, ref, IR_FSTORE, 2)) {
asm_fusefref(as, ir, xallow); asm_fusefref(as, ir, xallow);
return RID_MRM; return RID_MRM;
} }
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
!(LJ_GC64 && irt_isaddr(ir->t))) { !(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow); asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM; return RID_MRM;
@ -481,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types). ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/ */
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 0)) { noconflict(as, ref, IR_XSTORE, 2)) {
asm_fusexref(as, ir->op1, xallow); asm_fusexref(as, ir->op1, xallow);
return RID_MRM; return RID_MRM;
} }
@ -814,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp); emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest); emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left); emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */ /* Can't fuse since left is needed twice. */
} }
@ -856,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k); emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return; return;
} else { /* Integer to FP conversion. */ } else { /* Integer to FP conversion. */
@ -1172,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E); asm_guardcc(as, CC_E);
else else
emit_sjcc(as, CC_E, l_end); emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
if (isk) { if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */ /* Assumes -0.0 is already canonicalized to +0.0. */
@ -1231,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif #endif
} }
emit_sfixup(as, l_loop); emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64 #if LJ_GC64
if (!isk && irt_isaddr(kt)) { if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key); emit_rr(as, XO_OR, tmp|REX_64, key);
@ -1258,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3); emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2); emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1); emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@ -1275,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else { } else {
emit_rr(as, XO_MOV, tmp, key); emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64 #if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) { if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32); emit_i8(as, 32);
@ -1372,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
{ {
Reg dest = ra_dest(as, ir, RSET_GPR); Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) { int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1)); GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v); emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else { } else {
Reg uv = ra_scratch(as, RSET_GPR); Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC)
if (ir->o == IR_UREFC) {
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE); else
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
} }
} }
@ -1546,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) { if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1); Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) { if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR); asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@ -1593,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t)); "bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64 #if LJ_GC64
emit_u32(as, LJ_TISNUM << 15); emit_u32(as, LJ_TISNUM << 15);
#else #else

View File

@ -1,6 +1,6 @@
/* /*
** Internal assertions. ** Internal assertions.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_assert_c #define lj_assert_c

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode instruction modes. ** Bytecode instruction modes.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bc_c #define lj_bc_c

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode instruction format. ** Bytecode instruction format.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BC_H #ifndef _LJ_BC_H

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode dump definitions. ** Bytecode dump definitions.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BCDUMP_H #ifndef _LJ_BCDUMP_H
@ -46,6 +46,8 @@
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
#define BCDUMP_F_DETERMINISTIC 0x80000000
/* Type codes for the GC constants of a prototype. Plus length for strings. */ /* Type codes for the GC constants of a prototype. Plus length for strings. */
enum { enum {
BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
@ -61,7 +63,7 @@ enum {
/* -- Bytecode reader/writer ---------------------------------------------- */ /* -- Bytecode reader/writer ---------------------------------------------- */
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, int strip); void *data, uint32_t flags);
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls); LJ_FUNC GCproto *lj_bcread(LexState *ls);

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode reader. ** Bytecode reader.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bcread_c #define lj_bcread_c
@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt)
} }
/* Read a single constant key/value of a template table. */ /* Read a single constant key/value of a template table. */
static void bcread_ktabk(LexState *ls, TValue *o) static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
{ {
MSize tp = bcread_uleb128(ls); MSize tp = bcread_uleb128(ls);
if (tp >= BCDUMP_KTAB_STR) { if (tp >= BCDUMP_KTAB_STR) {
@ -191,6 +191,8 @@ static void bcread_ktabk(LexState *ls, TValue *o)
} else if (tp == BCDUMP_KTAB_NUM) { } else if (tp == BCDUMP_KTAB_NUM) {
o->u32.lo = bcread_uleb128(ls); o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls);
} else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
settabV(ls->L, o, t);
} else { } else {
lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
setpriV(o, ~tp); setpriV(o, ~tp);
@ -207,15 +209,15 @@ static GCtab *bcread_ktab(LexState *ls)
MSize i; MSize i;
TValue *o = tvref(t->array); TValue *o = tvref(t->array);
for (i = 0; i < narray; i++, o++) for (i = 0; i < narray; i++, o++)
bcread_ktabk(ls, o); bcread_ktabk(ls, o, NULL);
} }
if (nhash) { /* Read hash entries. */ if (nhash) { /* Read hash entries. */
MSize i; MSize i;
for (i = 0; i < nhash; i++) { for (i = 0; i < nhash; i++) {
TValue key; TValue key;
bcread_ktabk(ls, &key); bcread_ktabk(ls, &key, NULL);
lj_assertLS(!tvisnil(&key), "nil key"); lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t);
} }
} }
return t; return t;
@ -281,8 +283,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
{ {
BCIns *bc = proto_bc(pt); BCIns *bc = proto_bc(pt);
bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, BCIns op;
pt->framesize, 0); if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
else op = BC_FUNCF;
bc[0] = BCINS_AD(op, pt->framesize, 0);
bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
/* Swap bytecode instructions if the endianess differs. */ /* Swap bytecode instructions if the endianess differs. */
if (bcread_swap(ls)) { if (bcread_swap(ls)) {
@ -395,7 +400,7 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0; bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls); bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) { if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI #if LJ_HASFFI
lua_State *L = ls->L; lua_State *L = ls->L;
@ -405,7 +410,7 @@ static int bcread_header(LexState *ls)
#endif #endif
} }
if ((flags & BCDUMP_F_STRIP)) { if ((flags & BCDUMP_F_STRIP)) {
ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); ls->chunkname = lj_str_newz(ls->L, *ls->chunkarg == BCDUMP_HEAD1 ? "=?" : ls->chunkarg);
} else { } else {
MSize len = bcread_uleb128(ls); MSize len = bcread_uleb128(ls);
bcread_need(ls, len); bcread_need(ls, len);

View File

@ -1,6 +1,6 @@
/* /*
** Bytecode writer. ** Bytecode writer.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_bcwrite_c #define lj_bcwrite_c
@ -27,7 +27,9 @@ typedef struct BCWriteCtx {
GCproto *pt; /* Root prototype. */ GCproto *pt; /* Root prototype. */
lua_Writer wfunc; /* Writer callback. */ lua_Writer wfunc; /* Writer callback. */
void *wdata; /* Writer callback data. */ void *wdata; /* Writer callback data. */
int strip; /* Strip debug info. */ TValue **heap; /* Heap used for deterministic sorting. */
uint32_t heapsz; /* Size of heap. */
uint32_t flags; /* BCDUMP_F_* flags. */
int status; /* Status from writer callback. */ int status; /* Status from writer callback. */
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
global_State *g; global_State *g;
@ -69,6 +71,8 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
*p++ = BCDUMP_KTAB_NUM; *p++ = BCDUMP_KTAB_NUM;
p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.lo);
p = lj_strfmt_wuleb128(p, o->u32.hi); p = lj_strfmt_wuleb128(p, o->u32.hi);
} else if (tvistab(o)) { /* Write the nil value marker as a nil. */
*p++ = BCDUMP_KTAB_NIL;
} else { } else {
lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
*p++ = BCDUMP_KTAB_NIL+~itype(o); *p++ = BCDUMP_KTAB_NIL+~itype(o);
@ -76,6 +80,75 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
ctx->sb.w = p; ctx->sb.w = p;
} }
/* Compare two template table keys. */
static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
{
uint32_t at = itype(a), bt = itype(b);
if (at != bt) { /* This also handles false and true keys. */
return at < bt;
} else if (at == LJ_TSTR) {
return lj_str_cmp(strV(a), strV(b)) < 0;
} else {
return a->u64 < b->u64; /* This works for numbers and integers. */
}
}
/* Insert key into a sorted heap. */
static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
TValue *key)
{
MSize child;
while ((child = idx * 2 + 1) < end) {
/* Find lower of the two children. */
TValue *c0 = heap[child];
if (child + 1 < end) {
TValue *c1 = heap[child + 1];
if (bcwrite_ktabk_lt(c1, c0)) {
c0 = c1;
child++;
}
}
if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
heap[idx] = c0; /* Move lower child up. */
idx = child; /* Descend. */
}
heap[idx] = key; /* Insert key here. */
}
/* Resize heap, dropping content. */
static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
{
lua_State *L = sbufL(&ctx->sb);
if (ctx->heapsz) {
lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
ctx->heapsz = 0;
}
if (nsz) {
ctx->heap = lj_mem_newvec(L, nsz, TValue *);
ctx->heapsz = nsz;
}
}
/* Write hash part of template table in sorted order. */
static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
{
TValue **heap = ctx->heap;
MSize i = nhash;
for (;; node--) { /* Build heap. */
if (!tvisnil(&node->val)) {
bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
if (i == 0) break;
}
}
do { /* Drain heap. */
TValue *key = heap[0]; /* Output lowest key from top. */
bcwrite_ktabk(ctx, key, 0);
bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
key = heap[--nhash]; /* Remove last key. */
bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
} while (nhash);
}
/* Write a template table. */ /* Write a template table. */
static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
{ {
@ -105,14 +178,20 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
bcwrite_ktabk(ctx, o, 1); bcwrite_ktabk(ctx, o, 1);
} }
if (nhash) { /* Write hash entries. */ if (nhash) { /* Write hash entries. */
MSize i = nhash;
Node *node = noderef(t->node) + t->hmask; Node *node = noderef(t->node) + t->hmask;
for (;; node--) if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
if (!tvisnil(&node->val)) { if (ctx->heapsz < nhash)
bcwrite_ktabk(ctx, &node->key, 0); bcwrite_heap_resize(ctx, t->hmask + 1);
bcwrite_ktabk(ctx, &node->val, 1); bcwrite_ktab_sorted_hash(ctx, node, nhash);
if (--i == 0) break; } else {
} MSize i = nhash;
for (;; node--)
if (!tvisnil(&node->val)) {
bcwrite_ktabk(ctx, &node->key, 0);
bcwrite_ktabk(ctx, &node->val, 1);
if (--i == 0) break;
}
}
} }
} }
@ -269,7 +348,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
p = lj_strfmt_wuleb128(p, pt->sizekgc); p = lj_strfmt_wuleb128(p, pt->sizekgc);
p = lj_strfmt_wuleb128(p, pt->sizekn); p = lj_strfmt_wuleb128(p, pt->sizekn);
p = lj_strfmt_wuleb128(p, pt->sizebc-1); p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!ctx->strip) { if (!(ctx->flags & BCDUMP_F_STRIP)) {
if (proto_lineinfo(pt)) if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
p = lj_strfmt_wuleb128(p, sizedbg); p = lj_strfmt_wuleb128(p, sizedbg);
@ -317,11 +396,10 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = BCDUMP_HEAD2; *p++ = BCDUMP_HEAD2;
*p++ = BCDUMP_HEAD3; *p++ = BCDUMP_HEAD3;
*p++ = BCDUMP_VERSION; *p++ = BCDUMP_VERSION;
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + *p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) +
LJ_BE*BCDUMP_F_BE + LJ_BE*BCDUMP_F_BE +
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
LJ_FR2*BCDUMP_F_FR2; if (!(ctx->flags & BCDUMP_F_STRIP)) {
if (!ctx->strip) {
p = lj_strfmt_wuleb128(p, len); p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len); p = lj_buf_wmem(p, name, len);
} }
@ -352,14 +430,16 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
/* Write bytecode for a prototype. */ /* Write bytecode for a prototype. */
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
int strip) uint32_t flags)
{ {
BCWriteCtx ctx; BCWriteCtx ctx;
int status; int status;
ctx.pt = pt; ctx.pt = pt;
ctx.wfunc = writer; ctx.wfunc = writer;
ctx.wdata = data; ctx.wdata = data;
ctx.strip = strip; ctx.heapsz = 0;
if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
ctx.flags = flags;
ctx.status = 0; ctx.status = 0;
#ifdef LUA_USE_ASSERT #ifdef LUA_USE_ASSERT
ctx.g = G(L); ctx.g = G(L);
@ -368,6 +448,7 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status; if (status == 0) status = ctx.status;
lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
bcwrite_heap_resize(&ctx, 0);
return status; return status;
} }

View File

@ -1,6 +1,6 @@
/* /*
** Buffer handling. ** Buffer handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#define lj_buf_c #define lj_buf_c
@ -92,10 +92,8 @@ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
char *b = sb->b; char *b = sb->b;
MSize osz = (MSize)(sb->e - b); MSize osz = (MSize)(sb->e - b);
if (osz > 2*LJ_MIN_SBUF) { if (osz > 2*LJ_MIN_SBUF) {
MSize n = (MSize)(sb->w - b);
b = lj_mem_realloc(L, b, osz, (osz >> 1)); b = lj_mem_realloc(L, b, osz, (osz >> 1));
sb->b = b; sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */
sb->w = b + n;
sb->e = b + (osz >> 1); sb->e = b + (osz >> 1);
} }
lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");

View File

@ -1,6 +1,6 @@
/* /*
** Buffer handling. ** Buffer handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_BUF_H #ifndef _LJ_BUF_H

View File

@ -1,6 +1,6 @@
/* /*
** C data arithmetic. ** C data arithmetic.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "lj_obj.h" #include "lj_obj.h"
@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
p = (uint8_t *)cdata_getptr(p, ct->size); p = (uint8_t *)cdata_getptr(p, ct->size);
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
} else if (ctype_isfunc(ct->info)) { } else if (ctype_isfunc(ct->info)) {
CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
p = (uint8_t *)*(void **)p; p = (uint8_t *)*(void **)p;
ct = ctype_get(cts, ct = ctype_get(cts,
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR)); lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
if (i) { /* cts->tab may have been reallocated. */
ca->ct[0] = ctype_get(cts, id0);
}
} }
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
ca->ct[i] = ct; ca->ct[i] = ct;
@ -345,9 +349,7 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
if (LJ_LIKELY(tvisint(o))) { if (LJ_LIKELY(tvisint(o))) {
return (uint32_t)intV(o); return (uint32_t)intV(o);
} else { } else {
int32_t i = lj_num2bit(numV(o)); return (uint32_t)lj_num2bit(numV(o));
if (LJ_DUALNUM) setintV(o, i);
return (uint32_t)i;
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
** C data arithmetic. ** C data arithmetic.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_CARITH_H #ifndef _LJ_CARITH_H

View File

@ -1,6 +1,6 @@
/* /*
** FFI C call handling. ** FFI C call handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#include "lj_obj.h" #include "lj_obj.h"
@ -20,12 +20,15 @@
#if LJ_TARGET_X86 #if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */ /* -- x86 calling conventions --------------------------------------------- */
#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
#if LJ_ABI_WIN #if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \ #define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \ /* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \ cc->retref = (sz > 8); \
if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; if (cc->retref) CCALL_PUSH(dp);
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@ -40,7 +43,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; \ CCALL_PUSH(dp); \
} else { /* Struct with single FP field ends up in FPR. */ \ } else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \ cc->resx87 = ccall_classify_struct(cts, ctr); \
} }
@ -56,7 +59,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; CCALL_PUSH(dp);
#endif #endif
@ -67,7 +70,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; \ CCALL_PUSH(dp); \
} }
#endif #endif
@ -278,8 +281,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -345,7 +348,6 @@
goto done; \ goto done; \
} else { \ } else { \
nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \ } \
} else { /* Try to pass argument in GPRs. */ \ } else { /* Try to pass argument in GPRs. */ \
if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
@ -356,7 +358,6 @@
goto done; \ goto done; \
} else { \ } else { \
ngpr = maxgpr; /* Prevent reordering. */ \ ngpr = maxgpr; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \ } \
} }
@ -471,8 +472,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -565,8 +566,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -698,10 +699,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) { if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */ /* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ if (nsp + sz > CCALL_SIZE_STACK)
cc->nsp = nsp + n; return 1; /* Too many arguments. */
memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); cc->nsp = nsp + sz;
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
} }
return 0; /* Ok. */ return 0; /* Ok. */
} }
@ -779,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{ {
CTSize sz = ct->size; CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) { while (ct->sib && n <= 4) {
unsigned int m = 1;
CType *sct; CType *sct;
ct = ctype_get(cts, ct->sib); ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) { if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct); sct = ctype_rawchild(cts, ct);
if (ctype_isarray(sct->info)) {
CType *cct = ctype_rawchild(cts, sct);
if (!cct->size) continue;
m = sct->size / cct->size;
sct = cct;
}
if (ctype_isfp(sct->info)) { if (ctype_isfp(sct->info)) {
r |= sct->size; r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1; if (!isu) n += m; else if (n < m) n = m;
} else if (ctype_iscomplex(sct->info)) { } else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1); r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2; if (!isu) n += 2*m; else if (n < 2*m) n = 2*m;
} else if (ctype_isstruct(sct->info)) { } else if (ctype_isstruct(sct->info)) {
goto substruct; goto substruct;
} else { } else {
@ -801,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct); sct = ctype_rawchild(cts, ct);
substruct: substruct:
if (sct->size > 0) { if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct); unsigned int s = ccall_classify_struct(cts, sct), sn;
if (s <= 1) goto noth; if (s <= 1) goto noth;
r |= (s & 255); r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); sn = (s >> 8) * m;
if (!isu) n += sn; else if (n < sn) n = sn;
} }
} }
} }
@ -983,6 +993,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib; fid = ctf->sib;
} }
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */ /* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) { for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did; CTypeID did;
@ -1019,25 +1037,31 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_STRUCTARG CCALL_HANDLE_STRUCTARG
} else if (ctype_iscomplex(d->info)) { } else if (ctype_iscomplex(d->info)) {
CCALL_HANDLE_COMPLEXARG CCALL_HANDLE_COMPLEXARG
} else { } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) {
sz = CTSIZE_PTR; sz = CTSIZE_PTR;
} }
sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */ CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */ /* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; MSize align = (1u << ctype_align(d->info)) - 1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */ if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
} }
if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ #if LJ_TARGET_ARM64 && LJ_ABI_WIN
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi: err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL); lj_err_caller(L, LJ_ERR_FFI_NYICALL);
} }
dp = &cc->stack[nsp];
nsp += n;
isva = 0; isva = 0;
done: done:
@ -1048,7 +1072,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} }
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
/* Extend passed integers to 32 bits at least. */ /* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) { if (ctype_isinteger_or_bool(d->info) && d->size < 4 &&
(!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */
if (d->info & CTF_UNSIGNED) if (d->info & CTF_UNSIGNED)
*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
(uint32_t)*(uint16_t *)dp; (uint32_t)*(uint16_t *)dp;
@ -1095,14 +1120,17 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif #endif
} }
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */ cc->nfpr = nfpr; /* Required for vararg functions. */
#endif #endif
cc->nsp = nsp; cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
if (nsp > CCALL_SPS_FREE) if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
return gcsteps; return gcsteps;
} }

View File

@ -1,6 +1,6 @@
/* /*
** FFI C call handling. ** FFI C call handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_CCALL_H #ifndef _LJ_CCALL_H
@ -75,6 +75,9 @@ typedef union FPRArg {
#define CCALL_NARG_FPR 8 #define CCALL_NARG_FPR 8
#define CCALL_NRET_FPR 4 #define CCALL_NRET_FPR 4
#define CCALL_SPS_FREE 0 #define CCALL_SPS_FREE 0
#if LJ_TARGET_OSX
#define CCALL_PACK_STACKARG 1
#endif
typedef intptr_t GPRArg; typedef intptr_t GPRArg;
typedef union FPRArg { typedef union FPRArg {
@ -139,6 +142,9 @@ typedef union FPRArg {
#ifndef CCALL_ALIGN_STACKARG #ifndef CCALL_ALIGN_STACKARG
#define CCALL_ALIGN_STACKARG 1 #define CCALL_ALIGN_STACKARG 1
#endif #endif
#ifndef CCALL_PACK_STACKARG
#define CCALL_PACK_STACKARG 0
#endif
#ifndef CCALL_ALIGN_CALLSTATE #ifndef CCALL_ALIGN_CALLSTATE
#define CCALL_ALIGN_CALLSTATE 8 #define CCALL_ALIGN_CALLSTATE 8
#endif #endif
@ -152,14 +158,15 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
#define CCALL_MAXSTACK 32 #define CCALL_NUM_STACK 31
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
/* -- C call state -------------------------------------------------------- */ /* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */ void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */ uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of stack slots. */ uint8_t nsp; /* Number of bytes on stack. */
uint8_t retref; /* Return value by reference. */ uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64 #if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */ uint8_t ngpr; /* Number of arguments in GPRs. */
@ -178,7 +185,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif #endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
} CCallState; } CCallState;
/* -- C call handling ----------------------------------------------------- */ /* -- C call handling ----------------------------------------------------- */

Some files were not shown because too many files have changed in this diff Show More