Compare commits

...

260 Commits

Author SHA1 Message Date
Mike Pall
eec7a8016c Prevent Clang UB 'optimization' which breaks integerness checks.
Thanks to Kacper Michajłow. #1351 #1355
2025-04-10 22:53:50 +02:00
Mike Pall
51d4c26ec7 ARM: Fix soft-float math.min()/math.max().
Reported by Dong Jianqiang. #1356
2025-04-10 22:45:38 +02:00
Mike Pall
c262976486 ARM64: Fix pass-by-value struct calling conventions.
Reported by AnthonyK213. #1357
2025-04-10 22:06:47 +02:00
Mike Pall
e0a7ea8a92 Merge branch 'master' into v2.1 2025-04-07 10:33:15 +02:00
Mike Pall
e76bb50d44 Fix error generation in load*.
Reported by Sergey Kaplun. #1353
2025-04-07 10:27:40 +02:00
Mike Pall
e9e4b6d302 Initialize unused value when specializing to cdata metatable.
Reported by jakitliang. #1354
2025-04-07 09:22:07 +02:00
Mike Pall
538a82133a Change handling of nil value markers in template tables.
Reported by Bernhard M. Wiedemann. #1348 #1155
Fixes from Peter Cawley, Christian Clason, Lewis Russell.
2025-03-11 23:04:30 +01:00
Mike Pall
84cb21ffaf REVERT: Change handling of nil value markers in template tables. 2025-03-10 02:56:07 +01:00
Mike Pall
4f2bb199fe macOS: Fix Apple hardened runtime support and put behind build option.
Reported by vanc. #1334
2025-03-10 02:53:20 +01:00
Mike Pall
e3c70a7d81 macOS: Fix support for Apple hardened runtime.
Reported by Christian Clason. #1334
2025-03-10 00:05:08 +01:00
Mike Pall
7db2d1b12a Fix handling of nil value markers in template tables.
Thanks to Peter Cawley. #1348 #1155
2025-03-09 23:11:05 +01:00
Mike Pall
e0551670c9 Merge branch 'master' into v2.1 2025-03-09 23:09:02 +01:00
Mike Pall
85c3f2fb6f Avoid unpatching bytecode twice after a trace flush.
Reported by Sergey Kaplun. #1345
2025-03-09 23:04:23 +01:00
Mike Pall
eee16efa77 Fix state restore when recording __concat metamethod.
Reported by Sergey Kaplun. #1338 #1298
2025-03-09 21:28:17 +01:00
Mike Pall
4219efae43 Windows: Allow mixed builds with msvcbuild.bat.
Suggested by alex4814. #1341
2025-03-09 21:05:06 +01:00
Mike Pall
0254770582 macOS: Add suport for Apple hardened runtime.
Thanks to Peter Cawley. #1334
2025-03-09 20:45:22 +01:00
Mike Pall
f14556234c Merge branch 'master' into v2.1 2025-03-09 16:25:34 +01:00
Mike Pall
d508715ab6 Add compatibility string coercion for fp:seek() argument.
Reported by Magnus Wibeck. #1343
2025-03-09 16:21:29 +01:00
Mike Pall
e27ee68817 Windows: Clarify installation directory layout.
Suggested by eabase. #1346
2025-03-09 16:10:22 +01:00
Mike Pall
55a42da36e Remove Cygwin from docs, since it's not a supported target. 2025-03-09 16:09:36 +01:00
Mike Pall
423ac2144b Improve CLI signal handling on POSIX. 2025-03-09 15:50:01 +01:00
Mike Pall
54dc2fa5d7 FFI: Add pre-declared int128_t, uint128_t, __int128 types.
Note: Only declaration and copy (interpreted only) are implemented.
2025-03-09 15:37:35 +01:00
Mike Pall
b1179ea5f7 Use dylib extension for iOS installs, too.
Reported by Andrey Filipenkov. #1336
2025-03-09 15:00:15 +01:00
Mike Pall
5eb9509468 Change handling of nil value markers in template tables.
Reported by Bernhard M. Wiedemann. #1348 #1155
2025-03-09 14:44:57 +01:00
Mike Pall
a4f56a459a Merge branch 'master' into v2.1 2025-01-13 16:22:22 +01:00
Mike Pall
62e362afbb Fix recording of BC_VARG.
Reported by Bachir Bendrissou.
2025-01-13 16:19:57 +01:00
Mike Pall
9d777346bc Reject negative getfenv()/setfenv() levels to prevent compiler warning.
Thanks to Sergey Kaplun. #1329
2025-01-13 16:16:27 +01:00
Mike Pall
8358eb0cce Merge branch 'master' into v2.1 2025-01-13 16:15:19 +01:00
Mike Pall
e8236561d4 Bump copyright date. 2025-01-13 15:59:10 +01:00
Mike Pall
f73e649a95 Merge branch 'master' into v2.1 2024-12-16 14:32:07 +01:00
Mike Pall
e2e0b1dd2d Force fallback source name for stripped bytecode.
Reported by Lyrth. #1319
2024-12-16 14:30:10 +01:00
Mike Pall
cd8d0a437d Remove dependency on <limits.h>.
Reported by yupengda002. #1318
2024-12-16 14:27:58 +01:00
Mike Pall
19878ec05c Restore state when recording __concat metamethod throws OOM.
Reported by Sergey Kaplun. #1298 #1234
2024-11-28 18:07:58 +01:00
Mike Pall
35a4dd6f79 MIPS64: Fix pcall() error case.
Thanks to Sergey Kaplun. #1308
2024-11-28 16:33:18 +01:00
Mike Pall
4788e6f92a Merge branch 'master' into v2.1 2024-11-28 16:28:51 +01:00
Mike Pall
811e448daa Fix detection of inconsistent renames due to sunk values.
Thanks to Sergey Kaplun. #1295 #584
2024-11-28 16:26:10 +01:00
Mike Pall
fe71d0fb54 Windows: Allow amalgamated static builds with msvcbuild.bat.
Reported by Naman Dixit. #1289
2024-11-14 17:21:00 +01:00
Mike Pall
fca66335d1 Always close profiler output file.
Reported by Guilherme Batalheiro. #1304
2024-11-14 17:13:58 +01:00
Mike Pall
9ce8f1ff8e Fix override of INSTALL_LJLIBD in the presence of DESTDIR.
Reported by faithanalog. #1239 #1303
2024-11-14 17:09:07 +01:00
Mike Pall
69bbf3c1b0 Fix bit op coercion for shifts in DUALNUM builds.
Reported by Junlong Li. Followup to #1273
2024-11-13 09:18:32 +01:00
Mike Pall
97813fb924 macOS: Remove obsolete -single_module flag.
Thanks to dundargoc. #1284
2024-10-02 13:59:42 +02:00
Mike Pall
b2915e9ab5 macOS: Workaround for buggy XCode 15.0 - 15.2 linker.
Thanks to Carlo Cabrera. #1283
2024-10-02 12:12:56 +02:00
Mike Pall
2240d84464 macOS: Fix macOS 15 / Clang 16 build.
Note: The -Wl,-no_deduplicate workaround is NOT needed anymore.
Thanks to fxcoudert, corsix, clason, baconpaul, mvf. #1275 #1266
2024-10-02 02:06:25 +02:00
Mike Pall
f5fd22203e Fix bit op coercion in DUALNUM builds.
Thanks to Sergey Kaplun. #1273
2024-09-29 16:46:29 +02:00
Mike Pall
0ae532c9aa Merge branch 'master' into v2.1 2024-09-29 16:11:15 +02:00
Mike Pall
5141cbc20c Fix compiliation of getmetatable() for UDTYPE_IO_FILE.
Reported by Sergey Bronnikov. #1279
2024-09-29 16:03:37 +02:00
Mike Pall
c63a160706 Remove ancient RtlUnwindEx workaround for MinGW64.
Thanks to Kacper Michajłow. #1272
2024-09-29 15:33:32 +02:00
Mike Pall
87ae18af97 Drop unused function wrapper.
Follow-up to #1247.
2024-09-04 14:32:08 +02:00
Mike Pall
f725e44cda Merge branch 'master' into v2.1 2024-08-24 17:14:51 +02:00
Mike Pall
e45fd4cb71 Fix limit check in narrow_conv_backprop().
Thanks to Sergey Kaplun. #1262
2024-08-24 17:11:45 +02:00
Mike Pall
9bb6b35f7f Always use IRT_NIL for IR_TBAR.
Thanks to Peter Cawley. #1258
2024-08-24 17:03:17 +02:00
Mike Pall
c68711cc87 ARM64: Use ldr literal to load FP constants.
Thanks to Peter Cawley. #1255
2024-08-21 11:31:29 +02:00
Mike Pall
304da39cc5 FFI: Add missing coercion when recording 64-bit bit.*().
Thanks to Peter Cawley. #1252
2024-08-20 19:13:59 +02:00
Mike Pall
cdc2db3aea ARM64: Make tobit conversions match JIT backend behavior.
Thanks to Peter Cawley. #1253
2024-08-20 19:01:51 +02:00
Mike Pall
f4fa5646a8 Merge branch 'master' into v2.1 2024-08-20 19:01:38 +02:00
Mike Pall
32a683d226 ARM: Make hard-float tobit conversions match JIT backend behavior.
Reported by Peter Cawley. #1253
2024-08-20 19:00:47 +02:00
Mike Pall
fb22d0f80f FFI: Drop finalizer table rehash after GC cycle.
Reported by Sergey Kaplun. #1247
2024-08-19 20:00:21 +02:00
Mike Pall
fb5e1c9f0d Merge branch 'master' into v2.1 2024-08-19 17:33:23 +02:00
Mike Pall
ab39082fdd Fix another potential file descriptor leak in luaL_loadfile*().
Reported by Peter Cawley. #1249
2024-08-19 17:31:15 +02:00
Mike Pall
fddc9650d8 Merge branch 'master' into v2.1 2024-08-19 16:22:55 +02:00
Mike Pall
bcc6cbb188 MIPS32: Fix little-endian IR_RETF.
Thanks to Peter Cawley. #1250
2024-08-19 16:17:44 +02:00
Mike Pall
5ca25ee83e Correctly close VM state after early OOM during open.
Reported by Assumeru. #1248
2024-08-19 16:14:55 +02:00
Mike Pall
19db4e9b7c Fix potential file descriptor leak in luaL_loadfile*().
Reported by Assumeru. #1249
2024-08-19 16:11:36 +02:00
Mike Pall
ae4735f621 Reflect override of INSTALL_LJLIBD in package.path.
Suggested by GitSparTV. #1239
2024-08-15 00:38:43 +02:00
Mike Pall
6f834087d0 ARM64: Use movi to materialize FP constants.
Thanks to Peter Cawley. #1245
2024-08-15 00:22:47 +02:00
Mike Pall
2d54213e7c Add more FOLD rules for integer conversions.
Thanks to Peter Cawley. #1246
2024-08-15 00:20:54 +02:00
Mike Pall
833600390c Merge branch 'master' into v2.1 2024-08-15 00:19:35 +02:00
Mike Pall
86e7123bb1 Different fix for partial snapshot restore due to stack overflow.
Reported by Junlong Li. Fixed by Peter Cawley. #1196
2024-08-15 00:17:19 +02:00
Mike Pall
7369eff67d Fix IR_ABC hoisting.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1194
2024-08-15 00:10:01 +02:00
Mike Pall
3bdc6498c4 Limit CSE for IR_CARG to fix loop optimizations.
Thanks to Peter Cawley. #1244
2024-08-15 00:07:34 +02:00
Mike Pall
04dca7911e Call math.randomseed() without arguments to seed from system entropy.
Reminder: the math.random() PRNG is NOT SUITABLE FOR CRYPTOGRAPHIC USE.
2024-07-04 01:26:29 +02:00
Mike Pall
7421a1b33c Restore state when recording __concat metamethod throws an error.
Thanks to Sergey Kaplun. #1234
2024-07-04 00:48:49 +02:00
Mike Pall
510f88d468 Add build flag LUAJIT_DISABLE_TAILCALL to disable tailcall generation.
Only use this for debugging purposes. NEVER set it for regular builds
or distro builds! In Lua, tailcalls are a language guarantee.
Suggested by Steve Vermeulen. #1220
2024-07-04 00:13:58 +02:00
Mike Pall
444c8ff19a Clarify that lj_buf_shrink() does not keep any buffer data.
https://www.freelists.org/post/luajit/lj-buf-shrink-may-truncate-the-data-and-sbw-point-over-the-end-of-the-buffer,1
Thanks to Junlong li.
2024-07-04 00:03:40 +02:00
Mike Pall
747fc02eb9 OSX: Fix installed luajit.pc.
Reported by leleliu008. #1221
2024-07-03 23:59:59 +02:00
Mike Pall
8038430110 Merge branch 'master' into v2.1 2024-07-03 23:59:03 +02:00
Mike Pall
7a608e4425 FFI: Fix various issues in recff_cdata_arith.
Thanks to Sergey Kaplun. #1224
2024-07-03 23:46:47 +02:00
Mike Pall
f602f0154b Fix predict_next() in parser (for real now).
Reported by Sergey Kaplun. #1226 #1054
2024-07-03 23:45:16 +02:00
Mike Pall
f2a1cd4328 FFI: Fix __tostring metamethod access to enum cdata value.
Thanks to Sergey Kaplun. #1232
2024-07-03 23:43:57 +02:00
Mike Pall
0f8b878e2f Fix typo.
Reported by Sergey Bronnikov. #1223
2024-07-03 23:43:29 +02:00
Mike Pall
6885efb73e Merge branch 'master' into v2.1 2024-07-03 23:42:38 +02:00
Mike Pall
811c5322c8 Handle partial snapshot restore due to stack overflow.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1196
2024-07-03 21:42:21 +02:00
Mike Pall
93e87998b2 Update Nintendo Switch build script.
Thanks to IoriBranford. #1214
2024-05-25 19:01:18 +02:00
Mike Pall
f5587f5eb3 Merge branch 'master' into v2.1 2024-05-25 16:41:53 +02:00
Mike Pall
4a22050df9 Prevent sanitizer warning in snap_restoredata().
Thanks to Sergey Kaplun. #1193
2024-05-25 16:38:05 +02:00
Mike Pall
80c1c65bce Typo. 2024-05-25 16:25:35 +02:00
Mike Pall
4fc48c50fe Limit number of string format elements to compile.
Reported by pwnhacker0x18. #1203
2024-05-25 16:22:39 +02:00
Mike Pall
a6386bdabe FFI: Clarify scalar boxing behavior.
Prevent misunderstandings like in #1216
2024-05-25 15:48:07 +02:00
Mike Pall
9398123383 Fix internal link in docs.
Thanks to GitSparTV. #1219
2024-05-25 14:56:15 +02:00
Mike Pall
5790d25397 OSX/iOS: Fix SDK incompatibility.
Thanks to Ryan Carsten Schmidt. #1189
2024-04-22 10:06:42 +02:00
Mike Pall
b3e4987389 Windows/MSVC: Cleanup msvcbuild.bat and always generate PDB.
Thanks to Miku AuahDark. #1127
2024-04-19 11:01:13 +02:00
Mike Pall
75e9277798 Merge branch 'master' into v2.1 2024-04-19 01:47:48 +02:00
Mike Pall
9b5e837ac2 Fix segment release check in internal memory allocator.
Thanks to Jinji Zeng. #1179 #1157
2024-04-19 01:44:19 +02:00
Mike Pall
d032c637b1 Fix compiler warning. 2024-04-19 01:41:12 +02:00
Mike Pall
f5affaa6c4 FFI: Turn FFI finalizer table into a proper GC root.
Reported by Sergey Bronnikov. #1168
2024-04-19 01:33:19 +02:00
Mike Pall
7110b93567 OSX/iOS: Always generate 64 bit non-FAT Mach-O object files.
Reported by Sergey Bronnikov. #1181
2024-04-19 00:31:06 +02:00
Mike Pall
d2fe2a6d46 Show name of NYI bytecode in -jv and -jdump.
Suggested by Sergey Kaplun. #1176 #567
2024-04-19 00:12:22 +02:00
Mike Pall
b8b49bf395 Use generic trace error for OOM during trace stitching.
Thanks to Sergey Kaplun. #1166
2024-04-18 23:57:53 +02:00
Mike Pall
243b7682a5 Fix serialization format docs.
Reported by nounwind.
2024-04-18 23:49:43 +02:00
Mike Pall
d06beb0480 Handle all types of errors during trace stitching.
Thanks to Sergey Kaplun and Peter Cawley. #1166 #720
2024-03-10 17:29:48 +01:00
Mike Pall
bcc5125a91 Fix recording of __concat metamethod.
Thanks to Sergey Kaplun. #1164
2024-03-10 17:26:36 +01:00
Mike Pall
913df6a945 Merge branch 'master' into v2.1 2024-03-10 17:26:03 +01:00
Mike Pall
cae361187e Prevent down-recursion for side traces.
Thanks to Sergey Kaplun. #1169
2024-03-10 17:23:21 +01:00
Mike Pall
302366a338 Check frame size limit before returning to a lower frame.
Thanks to Sergey Kaplun. #1173
2024-03-10 17:19:29 +01:00
Mike Pall
dda1ac273a FFI: Treat cdata finalizer table as a GC root.
Thanks to Sergey Bronnikov. #1168
2024-03-10 17:16:41 +01:00
Mike Pall
88ed9fdbbb Handle stack reallocation in debug.setmetatable() and lua_setmetatable().
Thanks to Sergey Kaplun. #1172
2024-03-10 17:13:28 +01:00
Mike Pall
0d313b2431 Merge branch 'master' into v2.1 2024-02-04 16:47:14 +01:00
Mike Pall
defe61a567 Rework stack overflow handling.
Reported by pwnhacker0x18. Fixed by Peter Cawley. #1152
2024-02-04 16:34:30 +01:00
Mike Pall
9cc2e42b17 Merge branch 'master' into v2.1 2024-01-31 14:39:50 +01:00
Mike Pall
9cdd5a9479 Preserve keys with dynamic values in template tables when saving bytecode.
Reported by Lyrthras. Fixed by Peter Cawley. #1155
2024-01-31 14:32:04 +01:00
Mike Pall
5e5d542c99 Merge branch 'master' into v2.1 2024-01-31 14:31:40 +01:00
Mike Pall
14987af80a Prevent include of luajit_rolling.h.
Thanks to Peter Cawley. #1145
2024-01-31 14:29:23 +01:00
Mike Pall
21a46723d8 Merge branch 'master' into v2.1 2024-01-26 23:18:02 +01:00
Mike Pall
e6c0ade97c Fix documentation bug about '\z' string escape. 2024-01-26 23:17:33 +01:00
Mike Pall
343ce0edaf Fix zero stripping in %g number formatting.
Reported by pwnhacker0x18. #1149
2024-01-25 13:23:48 +01:00
Mike Pall
f2336c48fa Merge branch 'master' into v2.1 2024-01-23 19:01:46 +01:00
Mike Pall
85b4fed0b0 Fix unsinking of IR_FSTORE for NULL metatable.
Reported by pwnhacker0x18. #1147
2024-01-23 18:58:52 +01:00
Mike Pall
3ca0a80711 DynASM/x86: Add endbr instruction.
Thanks to Dmitry Stogov. #1143 #1142
2024-01-22 19:17:45 +01:00
Mike Pall
2f35cb45fd MIPS64 R2/R6: Fix FP to integer conversions.
Thanks to Peter Cawley. #1146
2024-01-22 19:12:13 +01:00
Mike Pall
4b90f6c4d7 Add cross-32/64 bit and deterministic bytecode generation.
Contributed by Peter Cawley. #993 #1008
2024-01-22 19:06:36 +01:00
Mike Pall
c525bcb902 DynASM/x86: Allow [&expr] operand.
Thanks to Dmitry Stogov. #1138
2023-12-23 20:06:17 +01:00
Mike Pall
dbd363ca25 Merge branch 'master' into v2.1 2023-12-23 19:49:43 +01:00
Mike Pall
658530562c Check for IR_HREF vs. IR_HREFK aliasing in non-nil store check.
Thanks to Peter Cawley. #1133
2023-12-23 19:43:03 +01:00
Mike Pall
293199c5eb Merge branch 'master' into v2.1 2023-12-23 19:23:12 +01:00
Mike Pall
7dbe545933 Respect jit.off() on pending trace exit.
Thanks to Sergey Kaplun. #1134
2023-12-23 19:22:34 +01:00
Mike Pall
e02a207909 Merge branch 'master' into v2.1 2023-12-23 19:15:57 +01:00
Mike Pall
c42c62e71a Simplify handling of instable types in TNEW/TDUP load forwarding.
Thanks to Peter Cawley. #994
2023-12-23 19:14:32 +01:00
Mike Pall
29b0b282f5 Merge branch 'master' into v2.1 2023-12-11 13:04:43 +01:00
Mike Pall
9bdfd34dcc Only emit proper parent references in snapshot replay.
Thanks to Peter Cawley. #1132
2023-12-11 13:01:36 +01:00
Mike Pall
ff204d0350 Fix anchoring for string buffer set() method (again).
Thanks to Peter Cawley. #1125
2023-12-10 19:42:22 +01:00
Mike Pall
8d5ea4ceb9 Merge branch 'master' into v2.1 2023-12-10 16:13:34 +01:00
Mike Pall
10cc759f25 ARM: Fix stack restore for FP slots.
Thanks to Peter Cawley. #1131
2023-12-10 16:10:48 +01:00
Mike Pall
420a9afa93 Merge branch 'master' into v2.1 2023-12-10 15:50:14 +01:00
Mike Pall
1b38c73655 Document workaround for multilib vs. cross-compiler conflict.
Reported by igorpupkinable. #1126
2023-12-10 15:45:10 +01:00
Mike Pall
e02cb19b57 Fix anchoring for string buffer set() method.
Thanks to Peter Cawley. #1125
2023-12-10 15:33:47 +01:00
Mike Pall
e4168fae5b Merge branch 'master' into v2.1 2023-12-10 15:02:26 +01:00
Mike Pall
856423f5da Fix runtime library flags for MSVC debug builds.
Reported by igor725. #1127
2023-12-10 15:00:52 +01:00
Mike Pall
487eaaf040 Merge branch 'master' into v2.1 2023-12-10 14:50:29 +01:00
Mike Pall
dcf3627d79 Fix .debug_abbrev section in GDB JIT API.
Thanks to Dmitry Stogov. #1129
2023-12-10 14:48:34 +01:00
Mike Pall
d1236a4caa Optimize table.new() with constant args to (sinkable) IR_TNEW.
Thanks to Peter Cawley. #1128
2023-12-10 14:41:56 +01:00
Mike Pall
7ad68a1fd3 Merge branch 'master' into v2.1 2023-12-10 14:33:48 +01:00
Mike Pall
1761fd2ef7 Emit sunk IR_NEWREF only once per key on snapshot replay.
Thanks to Sergey Kaplun and Peter Cawley. #1128
2023-12-10 14:29:45 +01:00
Mike Pall
43d0a19158 Fix last commit. 2023-11-15 01:41:31 +01:00
Mike Pall
536cf8a271 Merge branch 'master' into v2.1 2023-11-14 22:56:09 +01:00
Mike Pall
644723649e x86/x64: Don't fuse loads across IR_NEWREF.
Reported by Peter Cawley. #1117
2023-11-14 22:50:21 +01:00
Mike Pall
113a168b79 Improve last commit. 2023-11-12 16:11:11 +01:00
Mike Pall
45c88b7963 x86/x64: Don't fuse loads across table.clear.
Reported by Peter Cawley. #1117
2023-11-12 15:41:52 +01:00
Mike Pall
6807e60af1 Merge branch 'master' into v2.1 2023-11-12 15:25:14 +01:00
Mike Pall
d854d00ce9 x86/x64: Add more red zone checks to assembler backend.
Thanks to Peter Cawley. #1116
2023-11-12 15:18:44 +01:00
Mike Pall
7c9671a043 Merge branch 'master' into v2.1 2023-11-12 15:11:29 +01:00
Mike Pall
a4c1640432 Add stack check to pcall/xpcall.
Analyzed by Peter Cawley. #1048
2023-11-12 14:42:24 +01:00
Mike Pall
69bbbf7736 Merge branch 'master' into v2.1 2023-11-09 11:05:27 +01:00
Mike Pall
65c8493907 Invalidate SCEV entry when returning to lower frame.
Thanks to Zhongwei Yao. #1115
2023-11-09 11:02:36 +01:00
Mike Pall
b94fbfbee9 Merge branch 'master' into v2.1 2023-11-07 22:28:53 +01:00
Mike Pall
433d7e8d8d FFI: Fix pragma push stack limit check and throw on overflow.
Reported by Sergey Kaplun. #1114
2023-11-07 22:25:42 +01:00
Mike Pall
ce2cd61739 ARM64: Fix disassembly of ldp/stp offsets.
Thanks to Peter Cawley. #1113
2023-11-06 23:14:22 +01:00
Mike Pall
07b3cd3cf9 Check for upvalue state transition in IR_UREFO.
Thanks to Peter Cawley. #1085
2023-11-05 16:34:46 +01:00
Mike Pall
0afa1676b2 Merge branch 'master' into v2.1 2023-11-05 11:37:57 +01:00
Mike Pall
d133d67c88 x64: Properly fix __call metamethod return dispatch.
Reported by Sergey Kaplun. #1110
2023-11-05 11:31:08 +01:00
Mike Pall
f2e955dae8 Windows/x86: _BitScan*64 are only available on 64 bit archs.
Reported by memcorrupt. #1109
2023-11-05 11:27:35 +01:00
Mike Pall
e826d0c101 Add 'cc' file type for saving bytecode.
Contributed by Sergey Bronnikov. #1105
2023-10-21 13:31:45 +02:00
Mike Pall
4eb47df605 FFI/Windows: Fix type declaration for int64_t and uint64_t.
Thanks to Peter Cawley. #1106
2023-10-21 13:18:51 +02:00
Mike Pall
7269b02130 Merge branch 'master' into v2.1 2023-10-21 13:13:34 +02:00
Mike Pall
db944b2b56 FFI: Fix dangling reference to CType in carith_checkarg().
Reported by Sergey Kaplun. #1108
2023-10-21 13:11:50 +02:00
Mike Pall
656ecbcf8f DynASM/ARM64: Support ldp/stp of q registers.
Thanks to Peter Cawley. #1096
2023-10-08 22:12:01 +02:00
Mike Pall
d2a5487fd7 ARM64: Use ADR and ADRP to form constants.
Thanks to Peter Cawley. #1100
2023-10-08 22:10:02 +02:00
Mike Pall
14866a6828 ARM64: Fix disassembly of U12 loads.
Thanks to Peter Cawley. #1100
2023-10-08 21:57:04 +02:00
Mike Pall
c5b075eb31 ARM64: Unify constant register handling in interpreter.
Plus minor optimizations. Simplifications for out-of-tree ARM64EC.
Thanks to Peter Cawley. #1096
2023-10-08 21:39:40 +02:00
Mike Pall
9cc8bbb7ae ARM: Fix register hint for FFI calls with FP results. 2023-10-08 21:22:50 +02:00
Mike Pall
1e93951b25 ARM64: Fix register hint for FFI calls with FP results.
Thanks to Peter Cawley. #1096
2023-10-08 21:20:10 +02:00
Mike Pall
007e4dce13 ARM64: Restore fp before sp in C stack unwinders.
Thanks to Peter Cawley. #1096
2023-10-08 21:17:43 +02:00
Mike Pall
becf5cc65d FFI: Fix ffi.abi("pauth").
Thanks to Peter Cawley. #1098
2023-09-25 16:56:17 +02:00
Mike Pall
97c75843c6 Merge branch 'master' into v2.1 2023-09-22 21:07:20 +02:00
Mike Pall
f72c19e482 Maintain chain invariant in DCE.
Thanks to Peter Cawley. #1094
2023-09-22 21:04:22 +02:00
Mike Pall
d1a2fef8a8 LJ_FR2: Fix stack checks in vararg calls.
Thanks to Peter Cawley. #1048
2023-09-21 05:19:55 +02:00
Mike Pall
234dbc481e Merge branch 'master' into v2.1 2023-09-21 04:44:37 +02:00
Mike Pall
aa6b15c1a8 Follow-up fix for stack overflow handling cleanup. 2023-09-21 04:43:40 +02:00
Mike Pall
a5d2f70c73 Handle OOM error on stack resize in coroutine.resume and lua_checkstack.
Thanks to Peter Cawley. #1066
2023-09-21 04:40:48 +02:00
Mike Pall
e86990f7f2 Restore cur_L for specific Lua/C API use case.
Thanks to Peter Cawley. #1066
2023-09-21 03:54:08 +02:00
Mike Pall
b8919781d4 Consistently use 64 bit constants for 64 bit IR instructions.
Thanks to Peter Cawley. #1084
2023-09-21 03:46:33 +02:00
Mike Pall
9159289927 ARM64: Fix IR_HREF code generation for constant FP keys.
Reported by swarn. Fix for 435d8c63 by Peter Cawley. #1090
2023-09-21 02:48:12 +02:00
Mike Pall
fca1f51bf8 ARM64: Fuse negative 32 bit constants into arithmetic ops again.
Thanks to Peter Cawley. #1065
2023-09-21 02:38:29 +02:00
Mike Pall
4b605a7da8 Merge branch 'master' into v2.1 2023-09-21 02:23:25 +02:00
Mike Pall
b138ccfa91 Handle all stack layouts in (delayed) TRACE vmevent.
Thanks to Sergey Bronnikov and Peter Cawley. #1087
2023-09-21 02:15:16 +02:00
Mike Pall
92b89d005a Add missing coercion when recording select(string, ...)
Thanks to Peter Cawley. #1083
2023-09-21 02:10:18 +02:00
Mike Pall
d2f6c55b05 Cleanup stack overflow handling.
Reported by Peter Cawley. #962
2023-09-21 01:58:43 +02:00
Mike Pall
e897c5743f Windows/ARM64: Add MSVC cross-build support for x64 to ARM64.
Thanks to invertego. #1081
2023-09-17 10:44:04 +02:00
Mike Pall
7a2b83a0c5 IR_MIN/IR_MAX is non-commutative due to underlying FPU ops.
Thanks to Peter Cawley. #1082
2023-09-17 10:31:00 +02:00
Mike Pall
42ca6e120f ARM64: Set fixed interpreter registers before rethrow.
Thanks to Peter Cawley. #593
2023-09-17 10:09:58 +02:00
Mike Pall
7a77a3cd85 Windows/ARM64: Update install docs. 2023-09-15 06:10:58 +02:00
Mike Pall
bd2d107151 Windows: Call C++ destructors without compiling with /EHa.
Thanks to Peter Cawley. #593
2023-09-15 05:47:29 +02:00
Mike Pall
7a1c139569 Windows: Pass scratch CONTEXT record to RtlUnwindEx.
Thanks to Peter Cawley. #593
2023-09-15 05:31:26 +02:00
Mike Pall
18b8fd8de7 ARM64: External unwinder already restores non-volatile registers.
Thanks to Peter Cawley. #593
2023-09-15 05:27:29 +02:00
Mike Pall
b36f9fad63 Windows/ARM64: Fix exception unwinding (again).
Thanks to Peter Cawley. #593
2023-09-15 05:23:29 +02:00
Mike Pall
8af63f9920 Windows/ARM64: Fix typo in exception unwinding.
Thanks to Peter Cawley. #593
2023-09-11 23:00:36 +02:00
Mike Pall
9e0437240f FFI: Fix 64 bit shift fold rules.
Thanks to Peter Cawley. #1079
2023-09-11 21:06:25 +02:00
Mike Pall
1c33f46314 Windows/ARM64: Support Windows calling conventions.
Dear Microsoft: your butchering of the (perfectly fine) ARM64 ABI is a disgrace.
Thanks to Peter Cawley. #593
2023-09-11 16:35:28 +02:00
Mike Pall
f63bc569fa Windows/ARM64: Fix exception unwinding.
Thanks to Peter Cawley. #593
2023-09-11 13:33:27 +02:00
Mike Pall
836ab4227a ARM64: Remove unneeded IRCALL_* defs for math intrinsics.
Workaround for MSVC issue.
Thanks to Peter Cawley. #593
2023-09-11 13:14:09 +02:00
Mike Pall
b174d5e66d Fix Cygwin build.
Thanks to Christopher Ng. #1077 #1078
2023-09-11 13:10:17 +02:00
Mike Pall
5a18d4582f Merge branch 'master' into v2.1 2023-09-10 05:26:27 +02:00
Mike Pall
9760984638 Allow path overrides in genversion.lua with minilua, too.
Thanks to arch1t3cht. #1067
2023-09-10 05:23:10 +02:00
Mike Pall
cb413bf8f4 Windows/ARM64: Add initial support.
Only builds with native ARM64 Visual Studio for now.
Thanks to vanc and Stephen Just. #593 #964
2023-09-10 05:20:22 +02:00
Mike Pall
566532b807 Merge branch 'master' into v2.1 2023-09-09 23:20:57 +02:00
Mike Pall
4fe2002292 Improve architecture detection error messages. 2023-09-09 23:01:26 +02:00
Mike Pall
4611e25c0f ARM64: Fuse rotates into logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:59:18 +02:00
Mike Pall
90742d91c2 ARM64: Don't fuse sign extensions into logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:57:46 +02:00
Mike Pall
ba2b34f5e8 ARM64: Disassemble rotates on logical operands.
Thanks to Peter Cawley. #1076
2023-09-09 20:52:02 +02:00
Mike Pall
f442432ecb Merge branch 'master' into v2.1 2023-09-09 18:18:48 +02:00
Mike Pall
44da356e97 ARM: Fix stack check code generation.
Thanks to Peter Cawley. #1068
2023-09-09 18:16:31 +02:00
Mike Pall
b8c6ccd50c ARM64: Fix LDP/STP fusion (again).
Reported and analyzed by Zhongwei Yao. Fix by Peter Cawley. #1075
2023-09-09 18:01:37 +02:00
Mike Pall
0705ef6ce4 ARM64: Ensure branch is in range before emitting TBZ/TBNZ.
Thanks to Peter Cawley. #1074
2023-09-09 17:52:43 +02:00
Mike Pall
59be97edb6 Merge branch 'master' into v2.1 2023-09-09 17:46:10 +02:00
Mike Pall
43eff4aad4 Fix mcode limit check for non-x86 archs.
Thanks to Peter Cawley.
2023-09-09 17:44:54 +02:00
Mike Pall
de2e09f54c ARM64: Improve BC_JLOOP.
Thanks to Peter Cawley. #1070
2023-09-09 17:38:44 +02:00
Mike Pall
6c599960d1 ARM64: Improve integer IR_MUL code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:36:40 +02:00
Mike Pall
4ed83bd990 ARM64: Simplify code generation for IR_STRTO.
Thanks to Peter Cawley. #1070
2023-09-09 17:34:28 +02:00
Mike Pall
a5ee35867c ARM64: Use RID_TMP instead of scratch register in more places.
Thanks to Peter Cawley. #1070
2023-09-09 17:31:06 +02:00
Mike Pall
c1877e648a ARM64: Improve IR_OBAR code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:21:32 +02:00
Mike Pall
c2bdce399e ARM64: Improve IR_UREF code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:19:02 +02:00
Mike Pall
435d8c6301 ARM64: Improve IR_HREF code generation.
Thanks to Peter Cawley. #1070
2023-09-09 17:15:26 +02:00
Mike Pall
315dc3e776 ARM64: Reload BASE via GL instead of spilling it.
Thanks to Peter Cawley. #1068.
2023-09-09 16:56:16 +02:00
Mike Pall
5149b0a3a2 ARM64: Consolidate 32/64-bit constant handling in assembler.
Thanks to Peter Cawley. #1065
2023-09-09 16:30:14 +02:00
Mike Pall
dfc122e45c ARM64: Tune emit_lsptr. Avoid wrong load for asm_prof.
Thanks to Peter Cawley. #1065
2023-09-09 14:20:39 +02:00
Mike Pall
4651ff2fbc ARM64: Inline only use of emit_loada.
Thanks to Peter Cawley. #1065
2023-09-09 14:15:18 +02:00
Mike Pall
9daf9f9003 ARM64: Improve K13 constant rematerialization.
Algorithm by Dougall Johnson: https://dougallj.wordpress.com/2021/10/30/
Thanks to Peter Cawley. #1065
2023-09-09 14:11:25 +02:00
Mike Pall
9bd2404137 Merge branch 'master' into v2.1 2023-09-09 13:42:12 +02:00
Mike Pall
7f9907b4ed Add NaN check to IR_NEWREF.
Thanks to Peter Cawley. #1069
2023-09-09 13:37:31 +02:00
Mike Pall
cc8d88aafc Merge branch 'master' into v2.1 2023-09-09 12:50:13 +02:00
Mike Pall
4d05806ae0 Allow override of paths for genversion.lua.
Thanks to arch1t3cht. #1067
2023-09-09 12:47:27 +02:00
Mike Pall
19707009bf Fix native MinGW build.
Thanks to Victor Bombi. #1071
2023-09-09 12:41:47 +02:00
Mike Pall
41fb94defa Add randomized register allocation for fuzz testing.
This must be explicitly enabled with: -DLUAJIT_RANDOM_RA
Thanks to Peter Cawley. #1062
2023-08-30 01:10:52 +02:00
Mike Pall
2f6c451ce8 ARM64: Improve register allocation for integer IR_MUL/IR_MULOV.
Thanks to Peter Cawley. #1062
2023-08-29 22:38:20 +02:00
Mike Pall
7ff8f26eb8 ARM64: Fix register allocation for IR_*LOAD.
Thanks to Peter Cawley. #1062
2023-08-29 22:35:10 +02:00
Mike Pall
356231edaf Merge branch 'master' into v2.1 2023-08-29 22:30:57 +02:00
Mike Pall
c6ee7e19d1 Update external MSDN URL in code.
Thanks to Kyle Marshall. #1060
2023-08-29 22:27:38 +02:00
Mike Pall
83954100db FFI/ARM64/OSX: Handle non-standard OSX C calling conventions.
Contributed by Peter Cawley. #205
2023-08-29 02:21:51 +02:00
Mike Pall
cf903edb30 FFI: Unify stack setup for C calls in interpreter. 2023-08-29 02:12:13 +02:00
Mike Pall
7cc53f0b85 ARM64: Prevent STP fusion for conditional code emitted by TBAR.
Thanks to Peter Cawley. #1057
2023-08-28 22:39:35 +02:00
Mike Pall
0fa2f1cbcf ARM64: Fix LDP/STP fusing for unaligned accesses.
Thanks to Peter Cawley. #1056
2023-08-28 22:33:54 +02:00
Mike Pall
c0d5240a25 Merge branch 'master' into v2.1 2023-08-28 22:24:36 +02:00
Mike Pall
0ef51b495f Handle table unsinking in the presence of IRFL_TAB_NOMM.
Reported by Sergey Kaplun. #1052
2023-08-28 22:15:42 +02:00
Mike Pall
238a2a80bb Merge branch 'master' into v2.1 2023-08-28 22:02:06 +02:00
Mike Pall
6a3111a57f Use fallback name for install files without valid .git or .relver. 2023-08-28 21:25:51 +02:00
Mike Pall
a0b52aae33 Handle non-.git checkout with .relver in .bat-file builds.
Thanks to Simon Cooke.
2023-08-28 21:59:01 +02:00
Mike Pall
631a45f73b Merge branch 'master' into v2.1 2023-08-28 21:08:00 +02:00
Mike Pall
14e2917e7a Fix external C call stack check when using LUAJIT_MODE_WRAPCFUNC.
Thanks to Peter Cawley. #1047
2023-08-28 21:04:01 +02:00
Mike Pall
309fb42b87 Fix predict_next() in parser (again).
Reported by Sergey Bronnikov. #1054
2023-08-28 21:00:37 +02:00
Mike Pall
03c31124cc Fix typo.
Thanks to Simon Cooke.
2023-08-22 17:06:34 +02:00
Mike Pall
ff192d134d Merge branch 'master' into v2.1 2023-08-22 17:06:14 +02:00
Mike Pall
d0ce82ecdc Handle the case when .git is not a directory.
Thanks to Alexander Shpilkin.
2023-08-22 17:04:22 +02:00
Mike Pall
0b5bf71e37 Merge branch 'master' into v2.1 2023-08-22 15:37:21 +02:00
Mike Pall
6a2163a6b4 Add .gitattributes to dynamically resolve .relver.
Thanks to Alexander Shpilkin.
2023-08-22 15:36:55 +02:00
Mike Pall
33e2a49dbf Add .gitattributes to dynamically resolve .relver.
Thanks to Alexander Shpilkin.
2023-08-22 15:30:27 +02:00
Mike Pall
093759d528 Fix for last commit: also remove symlink on uninstall. 2023-08-22 11:46:12 +02:00
Mike Pall
748ab9d90a Switch to rolling releases: mark v2.1 as production. 2023-08-22 11:13:45 +02:00
Mike Pall
54ef81f864 Merge branch 'master' into v2.1 2023-08-21 13:09:52 +02:00
Mike Pall
ed21acd863 Fix Windows build scripts for rolling releases.
Reported by Miku AuahDark.
2023-08-21 13:08:00 +02:00
Mike Pall
3c290f817f Merge branch 'master' into v2.1 2023-08-21 04:03:25 +02:00
Mike Pall
6351abc78f Switch MSVC and console build scripts to rolling releases. 2023-08-21 03:59:03 +02:00
217 changed files with 2489 additions and 1509 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
/.relver export-subst

1
.relver Normal file
View File

@ -0,0 +1 @@
$Format:%ct$

View File

@ -1,7 +1,7 @@
===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -10,7 +10,7 @@
# For MSVC, please follow the instructions given in src/msvcbuild.bat.
# For MinGW and Cygwin, cd to src and run make with the Makefile there.
#
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
@ -37,12 +37,13 @@ export MULTILIB= lib
DPREFIX= $(DESTDIR)$(PREFIX)
INSTALL_BIN= $(DPREFIX)/bin
INSTALL_LIB= $(DPREFIX)/$(MULTILIB)
INSTALL_SHARE= $(DPREFIX)/share
INSTALL_SHARE_= $(PREFIX)/share
INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_)
INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
INSTALL_INC= $(INSTALL_DEFINC)
INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION)
INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit
INSTALL_LMODD= $(INSTALL_SHARE)/lua
INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
INSTALL_CMODD= $(INSTALL_LIB)/lua
@ -71,7 +72,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
$(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \
UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \
$(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
RM= rm -f
@ -109,11 +110,12 @@ else
endif
TARGET_SYS?= $(HOST_SYS)
ifeq (Darwin,$(TARGET_SYS))
ifneq (,$(filter $(TARGET_SYS),Darwin iOS))
INSTALL_SONAME= $(INSTALL_DYLIBNAME)
INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
LDCONFIG= :
SED_PC+= -e "s| -Wl,-E||"
endif
##############################################################################
@ -142,18 +144,12 @@ install: $(INSTALL_DEP)
$(RM) $(FILE_PC).tmp
cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
@echo ""
@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
@echo "You can do this now by running this command (with sudo):"
@echo ""
@echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
@echo ""
uninstall:
@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
for file in $(FILES_JITLIB); do \
$(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
done

2
README
View File

@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
Project Homepage: https://luajit.org/
LuaJIT is Copyright (C) 2005-2023 Mike Pall.
LuaJIT is Copyright (C) 2005-2025 Mike Pall.
LuaJIT is free software, released under the MIT license.
See full Copyright Notice in the COPYRIGHT file or in luajit.h.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2023 Mike Pall.
/* Copyright (C) 2004-2025 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2023 Mike Pall.
/* Copyright (C) 2004-2025 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.

View File

@ -3,7 +3,7 @@
<head>
<title>Contact</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -94,7 +94,7 @@ don't like that, please complain to Google or Microsoft, not me.
<h2>Copyright</h2>
<p>
All documentation is
Copyright &copy; 2005-2023 Mike Pall.
Copyright &copy; 2005-2025 Mike Pall.
</p>
@ -102,7 +102,7 @@ Copyright &copy; 2005-2023 Mike Pall.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>String Buffer Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -85,7 +85,7 @@ operations.
</p>
<p>
The string buffer library also includes a high-performance
<a href="serialize">serializer</a> for Lua objects.
<a href="#serialize">serializer</a> for Lua objects.
</p>
<h2 id="use">Using the String Buffer Library</h2>
@ -588,9 +588,9 @@ num → 0x07 double.L
tab → 0x08 // Empty table
| 0x09 h.U h*{object object} // Key/value hash
| 0x0a a.U a*object // 0-based array
| 0x0b a.U a*object h.U h*{object object} // Mixed
| 0x0b a.U h.U a*object h*{object object} // Mixed
| 0x0c a.U (a-1)*object // 1-based array
| 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
| 0x0d a.U h.U (a-1)*object h*{object object} // Mixed
tab_mt → 0x0e (index-1).U tab // Metatable dict entry
int64 → 0x10 int.L // FFI int64_t
@ -679,7 +679,7 @@ mappings of files are OK, but only if the file does not change.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Lua/C API Extensions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -173,7 +173,7 @@ Also note that this mechanism is not without overhead.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -316,7 +316,7 @@ without undue conversion penalties.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>ffi.* API Functions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -558,7 +558,7 @@ named <tt>i</tt>.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Semantics</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -440,6 +440,19 @@ If you don't do this, the default Lua number &rarr; <tt>double</tt>
conversion rule applies. A vararg C&nbsp;function expecting an integer
will see a garbled or uninitialized value.
</p>
<p>
Note: this is the only place where creating a boxed scalar number type is
actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt>
etc. anywhere else!</b>
</p>
<p style="font-size: 8pt;">
Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b>
improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It
just adds costly boxing, unboxing and conversions steps. And it may lead
to surprise results, because
<a href="#cdata_arith">cdata arithmetic on scalar numbers</a>
is always performed on 64 bit integers.
</p>
<h2 id="init">Initializers</h2>
<p>
@ -1246,7 +1259,7 @@ compiled.</li>
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>FFI Tutorial</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -587,7 +587,7 @@ it to a local variable in the function scope is unnecessary.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>jit.* Library</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -187,7 +187,7 @@ if you want to know more.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Profiler</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -349,7 +349,7 @@ use.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Extensions</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -160,13 +160,33 @@ passes any arguments after the error function to the function
which is called in a protected context.
</p>
<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3>
<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3>
<p>
Non-ASCII characters are handled transparently by the Lua source code parser.
This allows the use of UTF-8 characters in identifiers and strings.
A UTF-8 BOM is skipped at the start of the source code.
</p>
<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
<p>
As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
<tt>mode</tt> parameter.
</p>
<p>
The default mode string is <tt>"bt"</tt>, which allows loading of both
source code and bytecode. Use <tt>"t"</tt> to allow only source code
or <tt>"b"</tt> to allow only bytecode to be loaded.
</p>
<p>
By default, the <tt>load*</tt> functions generate the native bytecode format.
For cross-compilation purposes, add <tt>W</tt> to the mode string to
force the 32 bit format and <tt>X</tt> to force the 64 bit format.
Add both to force the opposite format. Note that non-native bytecode
generated by <tt>load*</tt> cannot be run, but can still be passed
to <tt>string.dump</tt>.
</p>
<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
<p>
All number-to-string conversions consistently convert non-finite numbers
@ -186,26 +206,33 @@ works independently of the current locale and it supports hex floating-point
numbers (e.g. <tt>0x1.5p-3</tt>).
</p>
<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3>
<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3>
<p>
An extra argument has been added to <tt>string.dump()</tt>. If set to
<tt>true</tt>, 'stripped' bytecode without debug information is
generated. This speeds up later bytecode loading and reduces memory
usage. See also the
<tt>true</tt> or to a string which contains the character <tt>s</tt>,
'stripped' bytecode without debug information is generated. This speeds
up later bytecode loading and reduces memory usage. See also the
<a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
</p>
<p>
The generated bytecode is portable and can be loaded on any architecture
that LuaJIT supports, independent of word size or endianess. However, the
bytecode compatibility versions must match. Bytecode stays compatible
for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
that LuaJIT supports. However, the bytecode compatibility versions must
match. Bytecode only stays compatible within a major+minor version
(x.y.aaa &rarr; x.y.bbb), except for development branches. Foreign bytecode
(e.g. from Lua 5.1) is incompatible and cannot be loaded.
</p>
<p>
Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
a different, incompatible bytecode format for all 64 bit ports. This may be
rectified in the future.
a different, incompatible bytecode format between 32 bit and 64 bit ports.
This may be rectified in the future. In the meantime, use the <tt>W</tt>
and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
for cross-compilation purposes.
</p>
<p>
Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
mode string to dump it in a deterministic manner: identical source code
always gives a byte-for-byte identical bytecode dump. This feature is
mainly useful for reproducible builds.
</p>
<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
@ -238,7 +265,7 @@ and let the GC do its work.
LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
the PRNG results is much superior compared to the standard Lua
implementation, which uses the platform-specific ANSI rand().
implementation, which uses the platform-specific ANSI <tt>rand()</tt>.
</p>
<p>
The PRNG generates the same sequences from the same seeds on all
@ -249,6 +276,10 @@ It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
preserve uniformity.
</p>
<p>
Call <tt>math.randomseed()</tt> without any arguments to seed it from
system entropy.
</p>
<p>
Important: Neither this nor any other PRNG based on the simplistic
<tt>math.random()</tt> API is suitable for cryptographic use.
</p>
@ -286,7 +317,7 @@ enabled:
</p>
<ul>
<li><tt>goto</tt> and <tt>::labels::</tt>.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li>
<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li>
<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li>
<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li>
<li><tt>loadfile(filename [,mode [,env]])</tt>.</li>
@ -426,9 +457,7 @@ the toolchain used to compile LuaJIT:
on the C&nbsp;stack. The contents of the C++&nbsp;exception object
pass through unmodified.</li>
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
The corresponding Lua error message can be retrieved from the Lua stack.<br>
For MSVC for Windows 64 bit this requires compilation of your C++ code
with <tt>/EHa</tt>.</li>
The corresponding Lua error message can be retrieved from the Lua stack.</li>
<li>Throwing Lua errors across C++ frames is safe. C++ destructors
will be called.</li>
</ul>
@ -463,7 +492,7 @@ C++ destructors.</li>
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Installation</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -117,7 +117,7 @@ hold all user-configurable settings:
<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
only).</li>
<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
under POSIX, MinGW or Cygwin.</li>
under POSIX or MinGW.</li>
<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
MSVC (Visual Studio).</li>
</ul>
@ -195,15 +195,13 @@ Obviously the prefixes given during build and installation need to be the same.
<h2 id="windows">Windows Systems</h2>
<h3>Prerequisites</h3>
<p>
Either install one of the open source SDKs
(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
<a href="https://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>), which come with a modified
GCC plus the required development headers.
Either install the open source SDK <a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a>,
which comes with a modified GCC plus the required development headers.
Or install Microsoft's Visual Studio (MSVC).
</p>
<h3>Building with MSVC</h3>
<p>
Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
directory with the source code and run these commands:
</p>
<pre class="code">
@ -214,9 +212,12 @@ msvcbuild
Check the <tt>msvcbuild.bat</tt> file for more options.
Then follow the installation instructions below.
</p>
<h3>Building with MinGW or Cygwin</h3>
<p>
Open a command prompt window and make sure the MinGW or Cygwin programs
For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
</p>
<h3>Building with MinGW</h3>
<p>
Open a command prompt window and make sure the MinGW programs
are in your path. Then <tt>cd</tt> to the directory of the git repository.
Then run this command for MinGW:
</p>
@ -224,12 +225,6 @@ Then run this command for MinGW:
mingw32-make
</pre>
<p>
Or this command for Cygwin:
</p>
<pre class="code">
make
</pre>
<p>
Then follow the installation instructions below.
</p>
<h3>Installing LuaJIT</h3>
@ -246,6 +241,19 @@ absolute path names &mdash; all modules are loaded relative to the
directory where <tt>luajit.exe</tt> is installed
(see <tt>src/luaconf.h</tt>).
</p>
<p>
The final directory layout should look like this:
</p>
<pre class="code">
├── luajit.exe
├── lua51.dll
├── <- put your own classic Lua/C API modules (*.dll) here
└── lua
├── <- put your own Lua modules (*.lua) here
└── jit
├── bc.lua
└── (etc …)
</pre>
<h2 id="cross">Cross-compiling LuaJIT</h2>
<p>
@ -266,6 +274,7 @@ for any supported target:
<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
<li>Both host and target architectures must have the same pointer size.</li>
<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
<li>64 bit targets always require compilation on a 64 bit host.</li>
</ul>
<p>
@ -568,7 +577,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>LuaJIT</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -122,7 +122,7 @@ Lua is a powerful, dynamic and light-weight programming language.
It may be embedded or used as a general-purpose, stand-alone language.
</p>
<p>
LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the
LuaJIT is Copyright &copy; 2005-2025 Mike Pall, released under the
<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
</p>
<p>
@ -193,7 +193,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -3,7 +3,7 @@
<head>
<title>Running LuaJIT</title>
<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2023">
<meta name="Copyright" content="Copyright (C) 2005-2025">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@ -106,6 +106,9 @@ are accepted:
<li><tt>-l</tt> &mdash; Only list bytecode.</li>
<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
<li><tt>-g</tt> &mdash; Keep debug info.</li>
<li><tt>-W</tt> &mdash; Generate 32 bit (non-GC64) bytecode.</li>
<li><tt>-X</tt> &mdash; Generate 64 bit (GC64) bytecode.</li>
<li><tt>-d</tt> &mdash; Generate bytecode in deterministic manner.</li>
<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
@ -120,7 +123,8 @@ file name:
</p>
<ul>
<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
<li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
(OS- and architecture-specific).</li>
<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@ -303,7 +307,7 @@ Here are the parameters and their default settings:
</div>
<div id="foot">
<hr class="hide">
Copyright &copy; 2005-2023
Copyright &copy; 2005-2025
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>

View File

@ -1,6 +1,6 @@
/*
** DynASM ARM encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM ARM module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/*
** DynASM ARM64 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM ARM64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
@ -549,7 +549,7 @@ end
local function parse_load_pair(params, nparams, n, op)
if params[n+2] then werror("too many operands") end
local pn, p2 = params[n], params[n+1]
local scale = shr(op, 30) == 0 and 2 or 3
local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
if not p1 then
if not p2 then
@ -806,8 +806,8 @@ map_op = {
["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- NOTE: ldur etc. are handled by ldr et al.
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
["ldpsw_*"] = "68400000DAxP",
-- Branches.
@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
werror("bad register type")
end
parse_reg_type = false
elseif p == "x" or p == "w" or p == "d" or p == "s" then
elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
if parse_reg_type ~= p then
werror("register size mismatch")
end

View File

@ -1,6 +1,6 @@
/*
** DynASM MIPS encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM MIPS32/MIPS64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM MIPS64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.

View File

@ -1,6 +1,6 @@
/*
** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM PPC/PPC64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
--
-- Support for various extensions contributed by Caio Souza Oliveira.

View File

@ -1,6 +1,6 @@
/*
** DynASM encoding engine prototypes.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module.

View File

@ -1,6 +1,6 @@
/*
** DynASM x86 encoding engine.
** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/

View File

@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x86/x64 module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
@ -627,7 +627,11 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
werror("NYI: rip-relative displacement followed by immediate")
end
-- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
wputlabel("REL_", disp[1], 2)
if disp[2] == "iPJ" then
waction("REL_A", disp[1])
else
wputlabel("REL_", disp[1], 2)
end
else
wputdarg(disp)
end
@ -744,9 +748,9 @@ local function dispexpr(expr)
return imm*map_opsizenum[ops]
end
local mode, iexpr = immexpr(dispt)
if mode == "iJ" then
if mode == "iJ" or mode == "iPJ" then
if c == "-" then werror("cannot invert label reference") end
return { iexpr }
return { iexpr, mode }
end
return expr -- Need to return original signed expression.
end
@ -1147,6 +1151,8 @@ local map_op = {
rep_0 = "F3",
repe_0 = "F3",
repz_0 = "F3",
endbr32_0 = "F30F1EFB",
endbr64_0 = "F30F1EFA",
-- F4: *hlt
cmc_0 = "F5",
-- F6: test... mb,i; div... mb

View File

@ -2,7 +2,7 @@
-- DynASM. A dynamic assembler for code generation engines.
-- Originally designed and implemented for LuaJIT.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See below for full copyright notice.
------------------------------------------------------------------------------
@ -17,7 +17,7 @@ local _info = {
url = "https://luajit.org/dynasm.html",
license = "MIT",
copyright = [[
Copyright (C) 2005-2023 Mike Pall. All rights reserved.
Copyright (C) 2005-2025 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -75,7 +75,7 @@ local function wline(line, needindent)
g_synclineno = g_synclineno + 1
end
-- Write assembler line as a comment, if requestd.
-- Write assembler line as a comment, if requested.
local function wcomment(aline)
if g_opt.comment then
wline(g_opt.comment..aline..g_opt.endcomment, true)

View File

@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
Runs some nested loops and shows the resulting traces.
.SH COPYRIGHT
.PP
\fBLuaJIT\fR is Copyright \(co 2005-2023 Mike Pall.
\fBLuaJIT\fR is Copyright \(co 2005-2025 Mike Pall.
.br
\fBLuaJIT\fR is open source software, released under the MIT license.
.SH SEE ALSO

View File

@ -7,7 +7,7 @@
# Also works with MinGW and Cygwin on Windows.
# Please check msvcbuild.bat for building with MSVC on Windows.
#
# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x64
else
@ -299,6 +299,12 @@ endif
ifneq (,$(LMULTILIB))
TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
endif
##############################################################################
# Target system detection.
@ -320,13 +326,13 @@ ifeq (Darwin,$(TARGET_SYS))
endif
TARGET_STRIP+= -x
TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
else
ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
ifeq (arm64,$(TARGET_LJARCH))
@ -475,7 +481,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
GIT= git
GIT_RELVER= [ -d ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
else
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
endif
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \

View File

@ -25,14 +25,15 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_strscan.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \
lj_libdef.h
lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
lj_libdef.h
@ -55,7 +56,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
lj_emit_*.h lj_asm_*.h
lj_prng.h lj_emit_*.h lj_asm_*.h
lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
lj_bcdef.h
@ -97,7 +98,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
lj_crecord.h lj_strfmt.h
lj_crecord.h lj_strfmt.h lj_strscan.h
lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
lj_ccallback.h lj_buf.h

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** This is a tool to build the hand-tuned assembler code required for
** LuaJIT's bytecode interpreter. It supports a variety of output formats

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _BUILDVM_H

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: Assembler source code emitter.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"
@ -339,6 +339,10 @@ void emit_asm(BuildCtx *ctx)
fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
break;
case BUILD_machasm:
#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300
/* Workaround for XCode 15.0 - 15.2. */
fprintf(ctx->fp, "\t.subsections_via_symbols\n");
#endif
fprintf(ctx->fp,
"\t.cstring\n"
"\t.ascii \"%s\\0\"\n", ctx->dasm_ident);

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: IR folding hash table generator.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: library definition compiler.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"

View File

@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: PE object emitter.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Only used for building on Windows, since we cannot assume the presence
** of a suitable assembler. The host and target byte order must match.
@ -9,7 +9,7 @@
#include "buildvm.h"
#include "lj_bc.h"
#if LJ_TARGET_X86ORX64
#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
/* Context for PE object emitter. */
static char *strtab;
@ -93,6 +93,17 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 6
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
#elif LJ_TARGET_ARM64
#define PEOBJ_ARCH_TARGET 0xaa64
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
#define PEOBJ_RELOC_DIR32 0x01
#define PEOBJ_RELOC_ADDR32NB 0x02
#define PEOBJ_RELOC_OFS (-4)
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 4
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
#endif
/* Section numbers (0-based). */
@ -100,7 +111,7 @@ enum {
PEOBJ_SECT_ABS = -2,
PEOBJ_SECT_UNDEF = -1,
PEOBJ_SECT_TEXT,
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86
@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
uint32_t sofs;
int i, nrsym;
union { uint8_t b; uint32_t u; } host_endian;
#ifdef PEOBJ_PDATA_NRELOC
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
#endif
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
pesect[PEOBJ_SECT_PDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
pesect[PEOBJ_SECT_XDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */
sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
*/
nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif
@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64
{ /* Write .pdata section. */
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
PEreloc reloc;
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@ -308,6 +321,87 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_ARM64
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
{ /* Write .pdata section. */
uint32_t pdata[4];
PEreloc reloc;
pdata[0] = 0;
pdata[1] = 0;
pdata[2] = fcofs;
pdata[3] = 4+24+4;
owrite(ctx, &pdata, sizeof(pdata));
/* Start of .text and start of .xdata. */
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
/* Start of vm_ffi_call and start of second part of .xdata. */
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
{ /* Write .xdata section. */
uint32_t u32;
uint8_t *p, uwc[24];
PEreloc reloc;
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
#define CSAVE_REGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
} while (0)
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
#define CSAVE_FREGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
} while (0)
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
#define CODE_NOP 0xe3
#define CODE_END 0xe4
#define CEND_ALIGN do { \
*p++ = CODE_END; \
while ((p - uwc) & 3) *p++ = CODE_NOP; \
} while (0)
/* Unwind codes for .text section with handler. */
p = uwc;
CADD_FP(192); /* +2 */
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
CEND_ALIGN; /* +1 +1 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 24);
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
owrite(ctx, &u32, 4);
/* Unwind codes for vm_ffi_call without handler. */
p = uwc;
CADD_FP(16); /* +2 */
CSAVE_FPLR(16); /* +1 */
CSAVE_REGPX(19, -32); /* +2 */
CEND_ALIGN; /* +1 +2 -> 8 */
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 8);
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86
/* Write .sxdata section. */
for (i = 0; i < nrsym; i++) {
@ -339,7 +433,7 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym(ctx, ctx->relocsym[i], 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,

View File

@ -2,7 +2,7 @@
-- Lua script to dump the bytecode of the library functions written in Lua.
-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
@ -138,65 +138,73 @@ local function fixup_dump(dump, fixup)
return { dump = ndump, startbc = startbc, sizebc = sizebc }
end
local function find_defs(src)
local function find_defs(src, mode)
local defs = {}
for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
local env = {}
local tcode, fixup = transform_lua(code)
local func = assert(load(tcode, "", nil, env))()
defs[name] = fixup_dump(string.dump(func, true), fixup)
local func = assert(load(tcode, "", mode))
defs[name] = fixup_dump(string.dump(func, mode), fixup)
defs[#defs+1] = name
end
return defs
end
local function gen_header(defs)
local function gen_header(defs32, defs64)
local t = {}
local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
local s, sb = "", ""
for i,name in ipairs(defs) do
local d = defs[name]
s = s .. d.dump
sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
end
w("static const uint8_t libbc_code[] = {\n")
local n = 0
for i=1,#s do
local x = string.byte(s, i)
local xb = string.byte(sb, i)
if xb == 255 then
local name = BCN[x]
local m = #name + 4
if n + m > 78 then n = 0; w("\n") end
n = n + m
w("BC_"); w(name)
else
local m = x < 10 and 2 or (x < 100 and 3 or 4)
if xb == 0 then
if n + m > 78 then n = 0; w("\n") end
else
local name = defs[xb]:gsub("_", ".")
if n ~= 0 then w("\n") end
w("/* "); w(name); w(" */ ")
n = #name + 7
end
n = n + m
w(x)
for j,defs in ipairs{defs64, defs32} do
local s, sb = "", ""
for i,name in ipairs(defs) do
local d = defs[name]
s = s .. d.dump
sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
end
if j == 1 then
w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
else
w("\n#else\n")
end
local n = 0
for i=1,#s do
local x = string.byte(s, i)
local xb = string.byte(sb, i)
if xb == 255 then
local name = BCN[x]
local m = #name + 4
if n + m > 78 then n = 0; w("\n") end
n = n + m
w("BC_"); w(name)
else
local m = x < 10 and 2 or (x < 100 and 3 or 4)
if xb == 0 then
if n + m > 78 then n = 0; w("\n") end
else
local name = defs[xb]:gsub("_", ".")
if n ~= 0 then w("\n") end
w("/* "); w(name); w(" */ ")
n = #name + 7
end
n = n + m
w(x)
end
w(",")
end
w(",")
end
w("\n0\n};\n\n")
w("\n#endif\n0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m = 0
for _,name in ipairs(defs) do
w('{"'); w(name); w('",'); w(m) w('},\n')
m = m + #defs[name].dump
local m32, m64 = 0, 0
for i,name in ipairs(defs32) do
assert(name == defs64[i])
w('{"'); w(name); w('",'); w(m32) w('},\n')
m32 = m32 + #defs32[name].dump
m64 = m64 + #defs64[name].dump
assert(m32 == m64)
end
w("{NULL,"); w(m); w("}\n};\n\n")
w("{NULL,"); w(m32); w("}\n};\n\n")
return table.concat(t)
end
@ -219,7 +227,8 @@ end
local outfile = parse_arg(arg)
local src = read_files(arg)
local defs = find_defs(src)
local hdr = gen_header(defs)
local defs32 = find_defs(src, "Wdts")
local defs64 = find_defs(src, "Xdts")
local hdr = gen_header(defs32, defs64)
write_file(outfile, hdr)

View File

@ -2,7 +2,7 @@
-- Lua script to generate a customized, minified version of Lua.
-- The resulting 'minilua' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------

View File

@ -1,13 +1,14 @@
----------------------------------------------------------------------------
-- Lua script to embed the rolling release version in luajit.h.
----------------------------------------------------------------------------
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
local FILE_INPUT_H = "luajit_rolling.h"
local FILE_INPUT_R = "luajit_relver.txt"
local FILE_OUTPUT_H = "luajit.h"
local arg = {...}
local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
local FILE_LUAJIT_H = arg[3] or "luajit.h"
local function file_read(file)
local fp = assert(io.open(file, "rb"), "run from the wrong directory")
@ -28,8 +29,8 @@ local function file_write_mod(file, data)
assert(fp:close())
end
local text = file_read(FILE_INPUT_H)
local relver = file_read(FILE_INPUT_R):match("(%d+)")
local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "")
local relver = file_read(FILE_RELVER_TXT):match("(%d+)")
if relver then
text = text:gsub("ROLLING", relver)
@ -38,6 +39,7 @@ else
**** WARNING Cannot determine rolling release version from git log.
**** WARNING The 'git' command must be available during the build.
]])
file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target.
end
file_write_mod(FILE_OUTPUT_H, text)
file_write_mod(FILE_LUAJIT_H, text)

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT bytecode listing module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT module to save/list bytecode.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -29,6 +29,9 @@ Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode.
-s Strip debug info (default).
-g Keep debug info.
-W Generate 32 bit (non-GC64) bytecode.
-X Generate 64 bit (GC64) bytecode.
-d Generate bytecode in deterministic manner.
-n name Set module name (default: auto-detect from input name).
-t type Set output file type (default: auto-detect from output name).
-a arch Override architecture for object files (default: native).
@ -38,7 +41,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
-- Stop handling options.
- Use stdin as input and/or stdout as output.
File types: c h obj o raw (default)
File types: c cc h obj o raw (default)
]]
os.exit(1)
end
@ -51,8 +54,9 @@ local function check(ok, ...)
end
local function readfile(ctx, input)
if type(input) == "function" then return input end
if ctx.filename then
if ctx.string then
return check(loadstring(input, nil, ctx.mode))
elseif ctx.filename then
local data
if input == "-" then
data = io.stdin:read("*a")
@ -61,10 +65,10 @@ local function readfile(ctx, input)
data = assert(fp:read("*a"))
assert(fp:close())
end
return check(load(data, ctx.filename))
return check(load(data, ctx.filename, ctx.mode))
else
if input == "-" then input = nil end
return check(loadfile(input))
return check(loadfile(input, ctx.mode))
end
end
@ -81,7 +85,7 @@ end
------------------------------------------------------------------------------
local map_type = {
raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
}
local map_arch = {
@ -435,24 +439,12 @@ typedef struct
{
mach_header; uint32_t reserved;
} mach_header_64;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint32_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command;
typedef struct {
uint32_t cmd, cmdsize;
char segname[16];
uint64_t vmaddr, vmsize, fileoff, filesize;
uint32_t maxprot, initprot, nsects, flags;
} mach_segment_command_64;
typedef struct {
char sectname[16], segname[16];
uint32_t addr, size;
uint32_t offset, align, reloff, nreloc, flags;
uint32_t reserved1, reserved2;
} mach_section;
typedef struct {
char sectname[16], segname[16];
uint64_t addr, size;
@ -462,139 +454,64 @@ typedef struct {
typedef struct {
uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
} mach_symtab_command;
typedef struct {
int32_t strx;
uint8_t type, sect;
int16_t desc;
uint32_t value;
} mach_nlist;
typedef struct {
int32_t strx;
uint8_t type, sect;
uint16_t desc;
uint64_t value;
} mach_nlist_64;
typedef struct
{
int32_t magic, nfat_arch;
} mach_fat_header;
typedef struct
{
int32_t cputype, cpusubtype, offset, size, align;
} mach_fat_arch;
typedef struct {
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[1];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_obj;
typedef struct {
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[1];
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_obj_64;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
} arch[2];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_fat_obj;
typedef struct {
mach_fat_header fat;
mach_fat_arch fat_arch[2];
struct {
mach_header_64 hdr;
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
} arch[2];
mach_nlist_64 sym_entry;
uint8_t space[4096];
} mach_fat_obj_64;
]]
local symname = '_'..LJBC_PREFIX..ctx.modname
local isfat, is64, align, mobj = false, false, 4, "mach_obj"
if ctx.arch == "x64" then
is64, align, mobj = true, 8, "mach_obj_64"
elseif ctx.arch == "arm" then
isfat, mobj = true, "mach_fat_obj"
elseif ctx.arch == "arm64" then
is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
else
check(ctx.arch == "x86", "unsupported architecture for OSX")
local cputype, cpusubtype = 0x01000007, 3
if ctx.arch ~= "x64" then
check(ctx.arch == "arm64", "unsupported architecture for OSX")
cputype, cpusubtype = 0x0100000c, 0
end
local function aligned(v, a) return bit.band(v+a-1, -a) end
local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
-- Create Mach-O object and fill in header.
local o = ffi.new(mobj)
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
if isfat then
o.fat.magic = be32(0xcafebabe)
o.fat.nfat_arch = be32(#cpusubtype)
end
local o = ffi.new("mach_obj_64")
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8)
-- Fill in sections and symbols.
for i=0,#cpusubtype-1 do
local ofs = 0
if isfat then
local a = o.fat_arch[i]
a.cputype = be32(cputype[i+1])
a.cpusubtype = be32(cpusubtype[i+1])
-- Subsequent slices overlap each other to share data.
ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0])
a.offset = be32(ofs)
a.size = be32(mach_size-ofs+#s)
end
local a = o.arch[i]
a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface
a.hdr.cputype = cputype[i+1]
a.hdr.cpusubtype = cpusubtype[i+1]
a.hdr.filetype = 1
a.hdr.ncmds = 2
a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym)
a.seg.cmd = is64 and 0x19 or 0x1
a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)
a.seg.vmsize = #s
a.seg.fileoff = mach_size-ofs
a.seg.filesize = #s
a.seg.maxprot = 1
a.seg.initprot = 1
a.seg.nsects = 1
ffi.copy(a.sec.sectname, "__data")
ffi.copy(a.sec.segname, "__DATA")
a.sec.size = #s
a.sec.offset = mach_size-ofs
a.sym.cmd = 2
a.sym.cmdsize = ffi.sizeof(a.sym)
a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
a.sym.nsyms = 1
a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
a.sym.strsize = aligned(#symname+2, align)
end
o.hdr.magic = 0xfeedfacf
o.hdr.cputype = cputype
o.hdr.cpusubtype = cpusubtype
o.hdr.filetype = 1
o.hdr.ncmds = 2
o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym)
o.seg.cmd = 0x19
o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
o.seg.vmsize = #s
o.seg.fileoff = mach_size
o.seg.filesize = #s
o.seg.maxprot = 1
o.seg.initprot = 1
o.seg.nsects = 1
ffi.copy(o.sec.sectname, "__data")
ffi.copy(o.sec.segname, "__DATA")
o.sec.size = #s
o.sec.offset = mach_size
o.sym.cmd = 2
o.sym.cmdsize = ffi.sizeof(o.sym)
o.sym.symoff = ffi.offsetof(o, "sym_entry")
o.sym.nsyms = 1
o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)
o.sym.strsize = aligned(#symname+2, 8)
o.sym_entry.type = 0xf
o.sym_entry.sect = 1
o.sym_entry.strx = 1
ffi.copy(o.space+1, symname)
-- Write Macho-O object file.
-- Write Mach-O object file.
local fp = savefile(output, "wb")
fp:write(ffi.string(o, mach_size))
bcsave_tail(fp, output, s)
@ -624,7 +541,7 @@ end
local function bcsave(ctx, input, output)
local f = readfile(ctx, input)
local s = string.dump(f, ctx.strip)
local s = string.dump(f, ctx.mode)
local t = ctx.type
if not t then
t = detecttype(output)
@ -647,9 +564,11 @@ local function docmd(...)
local n = 1
local list = false
local ctx = {
strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false,
mode = "bt", arch = jit.arch, os = jit.os:lower(),
type = false, modname = false, string = false,
}
local strip = "s"
local gc64 = ""
while n <= #arg do
local a = arg[n]
if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
@ -660,14 +579,18 @@ local function docmd(...)
if opt == "l" then
list = true
elseif opt == "s" then
ctx.strip = true
strip = "s"
elseif opt == "g" then
ctx.strip = false
strip = ""
elseif opt == "W" or opt == "X" then
gc64 = opt
elseif opt == "d" then
ctx.mode = ctx.mode .. opt
else
if arg[n] == nil or m ~= #a then usage() end
if opt == "e" then
if n ~= 1 then usage() end
arg[1] = check(loadstring(arg[1]))
ctx.string = true
elseif opt == "n" then
ctx.modname = checkmodname(tremove(arg, n))
elseif opt == "t" then
@ -687,6 +610,7 @@ local function docmd(...)
n = n + 1
end
end
ctx.mode = ctx.mode .. strip .. gc64
if list then
if #arg == 0 or #arg > 2 then usage() end
bclist(ctx, arg[1], arg[2] or "-")

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM disassembler module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64 disassembler module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
--
-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register.
[0] = {
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
},
{
shift = 21, mask = 7,
[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
},
{
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
},
{
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
}
},
false -- unallocated
@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register.
{
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
},
{
shift = 21, mask = 7,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
},
{
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
},
{
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
}
}
}
@ -666,6 +658,10 @@ local map_datafp = { -- Data processing, SIMD and FP.
}
}
}
},
{ -- 010
shift = 0, mask = 0x81f8fc00,
[0x100e400] = "moviDdG"
}
}
@ -735,7 +731,7 @@ local map_cond = {
"hi", "ls", "ge", "lt", "gt", "le", "al",
}
local map_shift = { [0] = "lsl", "lsr", "asr", }
local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
local map_extend = {
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
@ -840,6 +836,20 @@ local function parse_fpimm8(op)
return sign * frac * 2^exp
end
local function decode_fpmovi(op)
local lo = rshift(op, 5)
local hi = rshift(op, 9)
lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
band(lo, 8) * 0x1fe00000)
hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
if hi ~= 0 then
return fmt_hex32(hi)..tohex(lo)
else
return fmt_hex32(lo)
end
end
local function prefer_bfx(sf, uns, imms, immr)
if imms < immr or imms == 31 or imms == 63 then
return false
@ -956,7 +966,7 @@ local function disass_ins(ctx)
elseif p == "U" then
local rn = map_regs.x[band(rshift(op, 5), 31)]
local sz = band(rshift(op, 30), 3)
local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
if imm12 ~= 0 then
x = "["..rn..", #"..imm12.."]"
else
@ -993,8 +1003,7 @@ local function disass_ins(ctx)
x = x.."]"
end
elseif p == "P" then
local opcv, sh = rshift(op, 26), 2
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3)
@ -1140,6 +1149,8 @@ local function disass_ins(ctx)
x = 0
elseif p == "F" then
x = parse_fpimm8(op)
elseif p == "G" then
x = "#0x"..decode_fpmovi(op)
elseif p == "g" or p == "f" or p == "x" or p == "w" or
p == "d" or p == "s" then
-- These are handled in D/N/M/A.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS disassembler module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64 disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the big-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64EL disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64R6 disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the r6 big-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS64R6EL disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the r6 little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPSEL disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT PPC disassembler module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x64 disassembler wrapper module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the 64 bit functions from the combined

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x86/x64 disassembler module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT compiler dump module.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -552,7 +552,12 @@ local recdepth = 0
local function fmterr(err, info)
if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end
err = format(vmdef.traceerr[err], info)
local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end
return err
end

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -227,9 +227,7 @@ local function prof_finish()
local samples = prof_samples
if samples == 0 then
if prof_raw ~= true then out:write("[No samples collected]\n") end
return
end
if prof_ann then
elseif prof_ann then
prof_annotate(prof_count1, samples)
else
prof_top(prof_count1, prof_count2, samples, "")

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- Verbose mode of the LuaJIT compiler.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@ -62,7 +62,7 @@ local jit = require("jit")
local jutil = require("jit.util")
local vmdef = require("jit.vmdef")
local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
local type, format = type, string.format
local type, sub, format = type, string.sub, string.format
local stdout, stderr = io.stdout, io.stderr
-- Active flag and output file handle.
@ -89,7 +89,12 @@ end
local function fmterr(err, info)
if type(err) == "number" then
if type(info) == "function" then info = fmtfunc(info) end
err = format(vmdef.traceerr[err], info)
local fmt = vmdef.traceerr[err]
if fmt == "NYI: bytecode %s" then
local oidx = 6 * info
info = sub(vmdef.bcnames, oidx+1, oidx+6)
end
err = format(fmt, info)
end
return err
end

View File

@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler zones.
--
-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--

View File

@ -1,6 +1,6 @@
/*
** Auxiliary library for the Lua/C API.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** Base and coroutine library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -146,6 +146,8 @@ LJLIB_CF(getfenv) LJLIB_REC(.)
cTValue *o = L->base;
if (!(o < L->top && tvisfunc(o))) {
int level = lj_lib_optint(L, 1, 1);
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -168,6 +170,8 @@ LJLIB_CF(setfenv)
setgcref(L->env, obj2gco(t));
return 0;
}
if (level < 0)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
@ -360,7 +364,11 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
static int load_aux(lua_State *L, int status, int envarg)
{
if (status == LUA_OK) {
if (tvistab(L->base+envarg-1)) {
/*
** Set environment table for top-level function.
** Don't do this for non-native bytecode, which returns a prototype.
*/
if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
GCfunc *fn = funcV(L->top-1);
GCtab *t = tabV(L->base+envarg-1);
setgcref(fn->c.env, obj2gco(t));
@ -616,7 +624,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2);
}
lj_state_growstack(co, (MSize)(L->top - L->base));
if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
cTValue *msg = --co->top;
lj_err_callermsg(L, strVdata(msg));
}
return FFH_RETRY;
}

View File

@ -1,6 +1,6 @@
/*
** Bit manipulation library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_bit_c
@ -98,7 +98,7 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
return bit_result64(L, id, x);
}
if (id2) setintV(L->base+1, sh);
setintV(L->base+1, sh);
return FFH_RETRY;
#else
lj_lib_checknumber(L, 1);

View File

@ -1,6 +1,6 @@
/*
** Buffer library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_buffer_c

View File

@ -1,6 +1,6 @@
/*
** Debug library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** FFI library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_ffi_c
@ -305,7 +305,7 @@ LJLIB_CF(ffi_meta___tostring)
p = *(void **)p;
} else if (ctype_isenum(ct->info)) {
msg = "cdata<%s>: %d";
p = (void *)(uintptr_t)*(uint32_t **)p;
p = (void *)(uintptr_t)*(uint32_t *)p;
} else {
if (ctype_isptr(ct->info)) {
p = cdata_getptr(p, ct->size);
@ -513,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
/* Handle ctype __gc metamethod. Use the fast lookup here. */
cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
GCtab *t = cts->finalizer;
GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]);
if (gcref(t->metatable)) {
/* Add to finalizer table, if still enabled. */
copyTV(L, lj_tab_set(L, t, o-1), tv);
@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
"\003win"
#endif
#if LJ_ABI_PAUTH
"\007pauth"
"\005pauth"
#endif
#if LJ_TARGET_UWP
"\003uwp"
@ -765,7 +765,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
return 1;
}
LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_CF(ffi_metatype)
{
@ -791,8 +791,6 @@ LJLIB_CF(ffi_metatype)
return 1;
}
LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */
LJLIB_CF(ffi_gc) LJLIB_REC(.)
{
GCcdata *cd = ffi_checkcdata(L, 1);
@ -825,19 +823,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch)
/* ------------------------------------------------------------------------ */
/* Create special weak-keyed finalizer table. */
static GCtab *ffi_finalizer(lua_State *L)
{
/* NOBARRIER: The table is new (marked white). */
GCtab *t = lj_tab_new(L, 0, 1);
settabV(L, L->top++, t);
setgcref(t->metatable, obj2gco(t));
setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
lj_str_newlit(L, "k"));
t->nomm = (uint8_t)(~(1u<<MM_mode));
return t;
}
/* Register FFI module as loaded. */
static void ffi_register_module(lua_State *L)
{
@ -853,7 +838,6 @@ LUALIB_API int luaopen_ffi(lua_State *L)
{
CTState *cts = lj_ctype_init(L);
settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
cts->finalizer = ffi_finalizer(L);
LJ_LIB_REG(L, NULL, ffi_meta);
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));

View File

@ -1,6 +1,6 @@
/*
** Library initialization.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** I/O library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -25,6 +25,7 @@
#include "lj_strfmt.h"
#include "lj_ff.h"
#include "lj_lib.h"
#include "lj_strscan.h"
/* Userdata payload for I/O file. */
typedef struct IOFileUD {
@ -323,13 +324,14 @@ LJLIB_CF(io_method_seek)
FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
int64_t ofs = 0;
cTValue *o;
TValue *o;
int res;
if (opt == 0) opt = SEEK_SET;
else if (opt == 1) opt = SEEK_CUR;
else if (opt == 2) opt = SEEK_END;
o = L->base+2;
if (o < L->top) {
if (tvisstr(o)) lj_strscan_num(strV(o), o);
if (tvisint(o))
ofs = (int64_t)intV(o);
else if (tvisnum(o))

View File

@ -1,6 +1,6 @@
/*
** JIT library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_jit_c
@ -161,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
/* -- Reflection API for Lua functions ------------------------------------ */
/* Return prototype of first argument (Lua function or prototype object) */
static GCproto *check_Lproto(lua_State *L, int nolua)
{
TValue *o = L->base;
if (L->top > o) {
if (tvisproto(o)) {
return protoV(o);
} else if (tvisfunc(o)) {
if (isluafunc(funcV(o)))
return funcproto(funcV(o));
else if (nolua)
return NULL;
}
}
lj_err_argt(L, 1, LUA_TFUNCTION);
return NULL; /* unreachable */
}
static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
{
setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
@ -187,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
/* local info = jit.util.funcinfo(func [,pc]) */
LJLIB_CF(jit_util_funcinfo)
{
GCproto *pt = check_Lproto(L, 1);
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
if (pt) {
BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
GCtab *t;
@ -229,7 +211,7 @@ LJLIB_CF(jit_util_funcinfo)
/* local ins, m = jit.util.funcbc(func, pc) */
LJLIB_CF(jit_util_funcbc)
{
GCproto *pt = check_Lproto(L, 0);
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2);
if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc];
@ -246,7 +228,7 @@ LJLIB_CF(jit_util_funcbc)
/* local k = jit.util.funck(func, idx) */
LJLIB_CF(jit_util_funck)
{
GCproto *pt = check_Lproto(L, 0);
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
if (idx >= 0) {
if (idx < (ptrdiff_t)pt->sizekn) {
@ -266,7 +248,7 @@ LJLIB_CF(jit_util_funck)
/* local name = jit.util.funcuvname(func, idx) */
LJLIB_CF(jit_util_funcuvname)
{
GCproto *pt = check_Lproto(L, 0);
GCproto *pt = lj_lib_checkLproto(L, 1, 0);
uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
if (idx < pt->sizeuv) {
setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));

View File

@ -1,6 +1,6 @@
/*
** Math library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include <math.h>
@ -13,6 +13,7 @@
#include "lualib.h"
#include "lj_obj.h"
#include "lj_err.h"
#include "lj_lib.h"
#include "lj_vm.h"
#include "lj_prng.h"
@ -183,7 +184,10 @@ LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_randomseed)
{
PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
random_seed(rs, lj_lib_checknum(L, 1));
if (L->base != L->top)
random_seed(rs, lj_lib_checknum(L, 1));
else if (!lj_prng_seed_secure(rs))
lj_err_caller(L, LJ_ERR_PRNGSD);
return 0;
}

View File

@ -1,6 +1,6 @@
/*
** OS library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** Package library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1,6 +1,6 @@
/*
** String library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -122,11 +122,25 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
LJLIB_CF(string_dump)
{
GCfunc *fn = lj_lib_checkfunc(L, 1);
int strip = L->base+1 < L->top && tvistruecond(L->base+1);
SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
GCproto *pt = lj_lib_checkLproto(L, 1, 1);
uint32_t flags = 0;
SBuf *sb;
TValue *o = L->base+1;
if (o < L->top) {
if (tvisstr(o)) {
const char *mode = strVdata(o);
char c;
while ((c = *mode++)) {
if (c == 's') flags |= BCDUMP_F_STRIP;
if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
}
} else if (tvistruecond(o)) {
flags |= BCDUMP_F_STRIP;
}
}
sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1;
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags))
lj_err_caller(L, LJ_ERR_STRDUMP);
setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L);

View File

@ -1,6 +1,6 @@
/*
** Table library.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h

View File

@ -1057,7 +1057,7 @@ static size_t release_unused_segments(mstate m)
mchunkptr p = align_as_chunk(base);
size_t psize = chunksize(p);
/* Can unmap if first chunk holds entire segment and not pinned */
if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
if (!cinuse(p) && (char *)p + psize == (char *)mem2chunk(sp)) {
tchunkptr tp = (tchunkptr)p;
if (p == m->dv) {
m->dv = 0;

View File

@ -1,6 +1,6 @@
/*
** Public Lua/C API.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
return 0; /* Stack overflow. */
} else if (size > 0) {
lj_state_checkstack(L, (MSize)size);
int avail = (int)(mref(L->maxstack, TValue) - L->top);
if (size > avail &&
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
L->top--;
return 0; /* Out of memory. */
}
}
return 1;
}
@ -1047,6 +1052,7 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
/* Flush cache, since traces specialize to basemt. But not during __gc. */
if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM);
o = index2adr(L, idx); /* Stack may have been reallocated. */
if (tvisbool(o)) {
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));

View File

@ -1,6 +1,6 @@
/*
** Target architecture selection.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ARCH_H
@ -57,7 +57,7 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
@ -66,7 +66,7 @@
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else
#error "No support for this architecture (yet)"
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#endif
#endif
@ -124,7 +124,7 @@
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
#if TARGET_OS_IPHONE
#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
#define LJ_TARGET_IOS 1
#else
#define LJ_TARGET_IOS 0
@ -237,7 +237,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70
@ -331,6 +331,7 @@
#define LJ_ARCH_NOFFI 1
#elif LJ_ARCH_BITS == 64
#error "No support for PPC64"
#undef LJ_TARGET_PPC
#endif
#if _ARCH_PWR7
@ -490,36 +491,45 @@
#elif LJ_TARGET_ARM
#if defined(__ARMEB__)
#error "No support for big-endian ARM"
#undef LJ_TARGET_ARM
#endif
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#error "No support for Cortex-M CPUs"
#undef LJ_TARGET_ARM
#endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#undef LJ_TARGET_ARM
#endif
#elif LJ_TARGET_ARM64
#if defined(_ILP32)
#error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif
#elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32"
#undef LJ_TARGET_PPC
#endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#error "No support for PPC/e500, use LuaJIT 2.0"
#undef LJ_TARGET_PPC
#endif
#elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32"
#undef LJ_TARGET_MIPS
#endif
#if LJ_TARGET_MIPSR6
/* Not that useful, since most available r6 CPUs are 64 bit. */
#error "No support for MIPS32R6"
#undef LJ_TARGET_MIPS
#endif
#elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
#error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif
#endif
#endif

View File

@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_asm_c
@ -29,6 +29,7 @@
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_target.h"
#include "lj_prng.h"
#ifdef LUA_USE_ASSERT
#include <stdio.h>
@ -93,6 +94,12 @@ typedef struct ASMState {
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
uint64_t prngbits;
PRNGState prngstate;
#endif
#ifdef RID_NUM_KREF
intptr_t krefk[RID_NUM_KREF];
#endif
@ -173,6 +180,41 @@ IRFLDEF(FLOFS)
0
};
#ifdef LUAJIT_RANDOM_RA
/* Return a fixed number of random bits from the local PRNG state. */
static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
uint64_t b = as->prngbits;
uint32_t res = (1u << nbits) - 1u;
if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
res &= (uint32_t)b;
as->prngbits = b >> nbits;
return res;
}
/* Pick a random register from a register set. */
static Reg rset_pickrandom(ASMState *as, RegSet rs)
{
Reg r = rset_pickbot_(rs);
rs >>= r;
if (rs > 1) { /* More than one bit set? */
while (1) {
/* We need to sample max. the GPR or FPR half of the set. */
uint32_t d = ra_random_bits(as, RSET_BITS-1);
if ((rs >> d) & 1) {
r += d;
break;
}
}
}
return r;
}
#define rset_picktop(rs) rset_pickrandom(as, rs)
#define rset_pickbot(rs) rset_pickrandom(as, rs)
#else
#define rset_picktop(rs) rset_picktop_(rs)
#define rset_pickbot(rs) rset_pickbot_(rs)
#endif
/* -- Target-specific instruction emitter --------------------------------- */
#if LJ_TARGET_X86ORX64
@ -564,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
IRIns *ir = IR(ref);
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
#if LJ_GC64
#if LJ_TARGET_ARM64
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
#else
(ir->o == IR_KINT && k == ir->i) ||
#endif
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
k == (intptr_t)ir_kptr(ir))
@ -903,11 +949,11 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
static void asm_snap_alloc1(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
if (!irref_isk(ref) && ir->r != RID_SUNK) {
if (!irref_isk(ref)) {
bloomset(as->snapfilt1, ref);
bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
if (ra_used(ir)) return;
if (ir->r == RID_SINK) {
if (ir->r == RID_SINK || ir->r == RID_SUNK) {
ir->r = RID_SUNK;
#if LJ_HASFFI
if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
@ -2442,6 +2488,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->realign = NULL;
as->loopinv = 0;
as->parent = J->parent ? traceref(J, J->parent) : NULL;
#ifdef LUAJIT_RANDOM_RA
(void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */
#endif
/* Reserve MCode memory. */
as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
@ -2483,6 +2532,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
#endif
as->ir = J->curfinal->ir; /* Use the copied IR. */
as->curins = J->cur.nins = as->orignins;
#ifdef LUAJIT_RANDOM_RA
as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */
as->prngbits = 0;
#endif
RA_DBG_START();
RA_DBGX((as, "===== STOP ====="));

View File

@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ASM_H

View File

@ -1,6 +1,6 @@
/*
** ARM IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
emit_opk(as, ARMI_ADD, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, ARMI_LDR, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}
@ -1919,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi)
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS);
return;
#elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) {
@ -1990,6 +1998,7 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
int savereg = 0;
Reg pbase;
uint32_t k;
if (irp) {
@ -2000,12 +2009,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
savereg = 1;
}
} else {
pbase = RID_BASE;
}
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
if (savereg)
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
k = emit_isk12(0, (int32_t)(8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP);
@ -2017,7 +2028,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
if (ra_hasspill(irp->s) && !allow)
if (savereg)
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095));
} else {
@ -2031,11 +2042,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
MSize n, nent = snap->nent;
int32_t bias = 0;
/* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
BCReg s = snap_slot(sn);
int32_t ofs = 8*((int32_t)s-1);
int32_t ofs = 8*((int32_t)s-1) - bias;
IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE))
@ -2054,6 +2066,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);
#else
Reg src = ra_alloc1(as, ref, RSET_FPR);
if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) {
int32_t adj = ofs & 0xffffff00; /* K12-friendly. */
bias += adj;
ofs -= adj;
emit_addptr(as, RID_BASE, -adj);
}
emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
#endif
} else {
@ -2082,6 +2100,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
}
checkmclim(as);
}
emit_addptr(as, RID_BASE, bias);
lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
@ -2252,7 +2271,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
return REGSP_HINT(RID_RET);
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
}
static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/*
** ARM64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
** Sponsored by Cisco Systems, Inc.
@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc)
emit_cond_branch(as, cc, target);
}
/* Emit test and branch instruction to exit for guard. */
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
/* Emit test and branch instruction to exit for guard, if in range. */
static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
{
MCode *target = asm_exitstub_addr(as, as->snapno);
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
if (LJ_UNLIKELY(p == as->invmcp)) {
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
as->loopinv = 1;
*p = A64I_B | A64F_S26(target-p);
emit_tnb(as, ai^0x01000000u, r, bit, p-1);
return;
*p = A64I_B | A64F_S26(delta);
ai ^= 0x01000000u;
target = p-1;
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
return 0;
}
emit_tnb(as, ai, r, bit, target);
return 1;
}
/* Emit compare and branch instruction to exit for guard. */
@ -211,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{
IRIns *ir = IR(ref);
int logical = (ai & 0x1f000000) == 0x0a000000;
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
return A64F_M(ir->r);
} else if (irref_isk(ref)) {
uint32_t m;
int64_t k = get_k64val(as, ref);
if ((ai & 0x1f000000) == 0x0a000000)
m = emit_isk13(k, irt_is64(ir->t));
else
m = emit_isk12(k);
uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
if (m)
return m;
} else if (mayfuse(as, ref)) {
@ -232,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
IRIns *irl = IR(ir->op1);
if (sh == A64SH_LSL &&
irl->o == IR_CONV &&
irl->o == IR_CONV && !logical &&
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
shift <= 4 &&
canfuse(as, irl)) {
@ -242,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_SH(sh, shift);
}
} else if (ir->o == IR_CONV &&
} else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
Reg m = ra_alloc1(as, ir->op1, allow);
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
} else if (ir->o == IR_CONV && !logical &&
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_EX(A64EX_SXTW);
@ -419,13 +426,18 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir)
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
uint32_t n, nargs = CCI_XNARGS(ci);
int32_t ofs = 0;
int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7;
Reg gpr, fpr = REGARG_FIRSTFPR;
if (ci->func)
emit_call(as, ci->func);
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n];
IRIns *ir = IR(ref);
@ -436,10 +448,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref);
fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else {
Reg r = ra_alloc1(as, ref, RSET_FPR);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
ofs += 8;
int32_t al = spalign;
#if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_isnum(ir->t) ? 7 : 3;
#endif
spofs = (spofs + al) & ~al;
if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
spofs += al + 1;
}
} else {
if (gpr <= REGARG_LASTGPR) {
@ -449,10 +472,27 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_GPR);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
ofs += 8;
int32_t al = spalign;
#if LJ_HASFFI && LJ_TARGET_OSX
al |= irt_size(ir->t) - 1;
#endif
spofs = (spofs + al) & ~al;
if (al >= 3) {
if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
emit_spstore(as, ir, r, spofs);
} else {
lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
}
spofs += al + 1;
}
}
#if LJ_HASFFI && LJ_TARGET_OSX
} else { /* Marker for start of varargs. */
gpr = REGARG_LASTGPR+1;
fpr = REGARG_LASTFPR+1;
spalign = 7;
#endif
}
}
}
@ -518,8 +558,6 @@ static void asm_retf(ASMState *as, IRIns *ir)
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
/* Need to force a spill on REF_BASE now to update the stack slot. */
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
@ -643,25 +681,22 @@ static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
Reg dest = 0, tmp;
int destused = ra_used(ir);
Reg tmp;
int32_t ofs = 0;
ra_evictset(as, RSET_SCRATCH);
if (destused) {
if (ra_used(ir)) {
if (ra_hasspill(ir->s)) {
ofs = sps_scale(ir->s);
destused = 0;
if (ra_hasreg(ir->r)) {
ra_free(as, ir->r);
ra_modified(as, ir->r);
emit_spload(as, ir, ir->r, ofs);
}
} else {
dest = ra_dest(as, ir, RSET_FPR);
Reg dest = ra_dest(as, ir, RSET_FPR);
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
}
}
if (destused)
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
asm_guardcnb(as, A64I_CBZ, RID_RET);
args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */
@ -752,113 +787,75 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = 0, tmp = RID_TMP;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey;
IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey);
int isk = irref_isk(ir->op2);
int isk = irref_isk(refkey);
IRType1 kt = irkey->t;
uint32_t k = 0;
uint32_t khash;
MCLabel l_end, l_loop, l_next;
MCLabel l_end, l_loop;
rset_clear(allow, tab);
if (!isk) {
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
rset_clear(allow, key);
if (!irt_isstr(kt)) {
tmp = ra_scratch(as, allow);
rset_clear(allow, tmp);
}
} else if (irt_isnum(kt)) {
int64_t val = (int64_t)ir_knum(irkey)->u64;
if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
/* Allocate register for tkey outside of the loop. */
if (isk) {
int64_t kk;
if (irt_isaddr(kt)) {
kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
} else if (irt_isnum(kt)) {
kk = (int64_t)ir_knum(irkey)->u64;
/* Assumes -0.0 is already canonicalized to +0.0. */
} else {
scr = ra_scratch(as, allow);
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
kk = ~((int64_t)~irt_toitype(kt) << 47);
}
rset_clear(allow, scr);
k = emit_isk12(kk);
tkey = k ? 0 : ra_allock(as, kk, allow);
} else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
scr = ra_scratch(as, rset_clear(allow, type));
rset_clear(allow, scr);
tkey = ra_scratch(as, allow);
}
/* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as);
as->invmcp = NULL;
if (merge == IR_NE)
if (merge == IR_NE) {
asm_guardcc(as, CC_AL);
else if (destused)
emit_loada(as, dest, niltvg(J2G(as->J)));
} else if (destused) {
uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
}
/* Follow hash chain until the end. */
l_loop = --as->mcp;
emit_n(as, A64I_CMPx^A64I_K12^0, dest);
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
if (destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
/* Type and value comparison. */
if (merge == IR_EQ)
asm_guardcc(as, CC_EQ);
else
emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
if (k)
emit_n(as, A64I_CMPx^k, tmp);
else
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
/* Construct tkey as canonicalized or tagged key. */
if (!isk) {
if (irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR);
emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
/* A64I_FMOV_R_D from key to tkey done below. */
} else {
emit_nm(as, A64I_FCMPd, key, ftmp);
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
emit_cond_branch(as, CC_LO, l_next);
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
lj_assertA(irt_isaddr(kt), "bad HREF key type");
key = ra_alloc1(as, refkey, allow);
type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
}
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
}
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
@ -872,7 +869,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -881,23 +877,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVw, tmp, dest);
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
emit_nm(as, A64I_FCMPZd, (key & 31), 0);
emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
} else {
checkmclim(as);
emit_dm(as, A64I_MOVw, tmp, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
emit_dnm(as, A64I_EORw, tmp, key, dest);
emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
}
}
}
@ -912,7 +903,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int bigofs = !emit_checkofs(A64I_LDRx, kofs);
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg key, idx = node;
Reg idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
uint64_t k;
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -931,9 +922,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
} else {
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
}
key = ra_scratch(as, allow);
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
emit_lso(as, A64I_LDRx, key, idx, kofs);
emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
if (bigofs)
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
}
@ -941,24 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
emit_opk(as, A64I_ADDx, dest, uv,
if (guarded)
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else {
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
emit_lso(as, A64I_LDRx, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}
@ -1063,7 +1059,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
Reg idx, tmp, type;
Reg idx, tmp;
int32_t ofs = 0;
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@ -1082,8 +1078,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} else {
tmp = ra_scratch(as, gpr);
}
type = ra_scratch(as, rset_clear(gpr, tmp));
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
rset_clear(gpr, idx);
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
/* Always do the type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
@ -1091,10 +1088,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
} else if (irt_isaddr(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
} else if (irt_isnil(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
} else {
@ -1217,9 +1214,8 @@ dotypecheck:
emit_nm(as, A64I_CMPx,
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
} else {
Reg type = ra_scratch(as, allow);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
}
emit_lso(as, A64I_LDRx, tmp, base, ofs);
return;
@ -1289,8 +1285,9 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
Reg mark = RID_TMP;
MCLabel l_end = emit_label(as);
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
/* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_setgl(as, tab, gc.grayagain);
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
emit_getgl(as, link, gc.grayagain);
@ -1304,7 +1301,6 @@ static void asm_obar(ASMState *as, IRIns *ir)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2];
MCLabel l_end;
RegSet allow = RSET_GPR;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@ -1315,14 +1311,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
asm_gencall(as, ci, args);
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(allow, obj));
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
emit_lso(as, A64I_LDRB, tmp, obj,
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
}
@ -1364,12 +1359,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
if (irref_isk(lref))
return 1; /* But swap constants to the right. */
ir = IR(rref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 0; /* Don't swap fusable operands to the left. */
ir = IR(lref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 1; /* But swap fusable operands to the right. */
@ -1415,13 +1410,12 @@ static void asm_intneg(ASMState *as, IRIns *ir)
static void asm_intmul(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
if (irt_isguard(ir->t)) { /* IR_MULOV */
asm_guardcc(as, CC_NE);
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
emit_dnm(as, A64I_SMULL, dest, right, left);
} else {
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@ -1681,16 +1675,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
if (asm_swapops(as, blref, brref)) {
Reg tmp = blref; blref = brref; brref = tmp;
}
bleft = ra_alloc1(as, blref, RSET_GPR);
if (irref_isk(brref)) {
uint64_t k = get_k64val(as, brref);
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
emit_ctz64(k)))
return;
}
m2 = emit_isk13(k, irt_is64(irl->t));
}
bleft = ra_alloc1(as, blref, RSET_GPR);
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
if (!m2)
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@ -1765,37 +1758,28 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
Reg pbase;
uint32_t k;
Reg pbase = RID_BASE;
if (irp) {
if (!ra_hasspill(irp->s)) {
pbase = irp->r;
lj_assertA(ra_hasreg(pbase), "base reg lost");
} else if (allow) {
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
}
} else {
pbase = RID_BASE;
pbase = irp->r;
if (!ra_hasreg(pbase))
pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
}
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
if (pbase & 0x80) /* Restore temp. register. */
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
k = emit_isk12((8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, A64I_CMPx^k, RID_TMP);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack));
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
if (ra_hasspill(irp->s))
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
if (ra_hasspill(irp->s) && !allow)
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
} else {
emit_getgl(as, RID_TMP, cur_L);
if (pbase & 0x40) {
emit_getgl(as, (pbase & 31), jit_base);
if (pbase & 0x80) /* Save temp register. */
emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
}
emit_getgl(as, RID_TMP, cur_L);
}
/* Restore Lua stack from on-trace state. */
@ -1837,7 +1821,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
/* Marker to prevent patching the GC check exit. */
#define ARM64_NOPATCH_GC_CHECK \
(A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
(A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as)
@ -1892,46 +1876,40 @@ static void asm_loop_tail_fixup(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */
/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
/* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as)
{
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
ra_destreg(as, ir, RID_BASE);
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
emit_movrr(as, ir, r, RID_BASE);
}
}
/* Coalesce BASE register for a side trace. */
static Reg asm_head_side_base(ASMState *as, IRIns *irp)
{
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
if (ra_hasspill(irp->s)) {
return ra_dest(as, ir, RSET_GPR);
} else {
Reg r = irp->r;
lj_assertA(ra_hasreg(r), "base reg lost");
if (r != ir->r && !rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
ra_destreg(as, ir, r);
return r;
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (irp->r == r) {
return r; /* Same BASE register already coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
/* Move from coalesced parent reg. */
emit_movrr(as, ir, r, irp->r);
return irp->r;
} else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
}
}
return RID_NONE;
}
/* -- Tail of trace ------------------------------------------------------- */
@ -1975,20 +1953,47 @@ static void asm_tail_prep(ASMState *as)
/* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
IRRef args[CCI_NARGS_MAX*2];
#if LJ_HASFFI
uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
asm_collectargs(as, ir, ci, args);
for (i = 0; i < nargs; i++) {
if (args[i] && irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) nfpr--; else nslots += 2;
} else {
if (ngpr > 0) ngpr--; else nslots += 2;
if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
(LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
IRRef args[CCI_NARGS_MAX*2];
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
asm_collectargs(as, ir, ci, args);
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) {
int al = spalign;
if (!args[i]) {
#if LJ_TARGET_OSX
/* Marker for start of varaargs. */
nfpr = 0;
ngpr = 0;
spalign = 7;
#endif
} else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; }
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif
} else {
if (ngpr > 0) { ngpr--; continue; }
#if LJ_TARGET_OSX
al |= irt_size(IR(args[i])->t) - 1;
#endif
}
spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */
}
nslots = (spofs + 3) >> 2;
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
return REGSP_HINT(RID_RET);
#endif
return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
}
static void asm_setup_target(ASMState *as)

View File

@ -1,6 +1,6 @@
/*
** MIPS IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -456,7 +456,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_addptr(as, base, -8*delta);
asm_guard(as, MIPSI_BNE, RID_TMP,
ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4);
}
/* -- Buffer operations --------------------------------------------------- */
@ -653,11 +653,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
rset_exclude(RSET_GPR, dest));
emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
#if !LJ_TARGET_MIPSR6
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
#else
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
#endif
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)&as->J->k64[LJ_K64_2P63],
@ -670,11 +670,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
rset_exclude(RSET_GPR, dest));
emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
#if !LJ_TARGET_MIPSR6
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
#else
emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
#endif
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)&as->J->k32[LJ_K32_2P63],
@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
MIPSIns mi = irt_is64(ir->t) ?
(st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
(st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
emit_fg(as, mi, left, left);
emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp);
emit_fg(as, mi, tmp, left);
#endif
}
}
@ -1207,22 +1207,29 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
if (guarded)
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC)
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else {
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}

View File

@ -1,6 +1,6 @@
/*
** PPC IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tai(as, PPCI_LWZ, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
if (ir->o == IR_UREFC)
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}

View File

@ -1,6 +1,6 @@
/*
** x86/x64 IR assembler (SSA IR -> machine code).
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Guard handling ------------------------------------------------------ */
@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references.
*/
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
{
IRIns *ir = as->ir;
IRRef i = as->curins;
@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
while (--i > ref) {
if (ir[i].o == conflict)
return 0; /* Conflict found. */
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
}
return 1; /* Ok, no conflict. */
@ -134,13 +136,14 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */
}
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
/* Fuse base offset (vararg load). */
as->mrm.ofs = IR(irb->op2)->i;
IRIns *irk = IR(irb->op2);
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
return irb->op1;
}
return ref; /* Otherwise use the given array base. */
@ -455,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0) &&
noconflict(as, ref, IR_RETF, 2) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@ -466,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 0)) {
noconflict(as, ref, IR_FSTORE, 2)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
@ -481,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 0)) {
noconflict(as, ref, IR_XSTORE, 2)) {
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
@ -814,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
@ -856,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
@ -1172,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
@ -1231,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
@ -1258,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@ -1275,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else {
emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32);
@ -1372,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
if (ir->o == IR_UREFC)
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
else
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
@ -1546,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@ -1593,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else

View File

@ -1,6 +1,6 @@
/*
** Internal assertions.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_assert_c

View File

@ -1,6 +1,6 @@
/*
** Bytecode instruction modes.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bc_c

View File

@ -1,6 +1,6 @@
/*
** Bytecode instruction format.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BC_H

View File

@ -1,6 +1,6 @@
/*
** Bytecode dump definitions.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BCDUMP_H
@ -46,6 +46,8 @@
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
#define BCDUMP_F_DETERMINISTIC 0x80000000
/* Type codes for the GC constants of a prototype. Plus length for strings. */
enum {
BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
@ -61,7 +63,7 @@ enum {
/* -- Bytecode reader/writer ---------------------------------------------- */
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, int strip);
void *data, uint32_t flags);
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls);

View File

@ -1,6 +1,6 @@
/*
** Bytecode reader.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcread_c
@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt)
}
/* Read a single constant key/value of a template table. */
static void bcread_ktabk(LexState *ls, TValue *o)
static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
{
MSize tp = bcread_uleb128(ls);
if (tp >= BCDUMP_KTAB_STR) {
@ -191,6 +191,8 @@ static void bcread_ktabk(LexState *ls, TValue *o)
} else if (tp == BCDUMP_KTAB_NUM) {
o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls);
} else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
settabV(ls->L, o, t);
} else {
lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
setpriV(o, ~tp);
@ -207,15 +209,15 @@ static GCtab *bcread_ktab(LexState *ls)
MSize i;
TValue *o = tvref(t->array);
for (i = 0; i < narray; i++, o++)
bcread_ktabk(ls, o);
bcread_ktabk(ls, o, NULL);
}
if (nhash) { /* Read hash entries. */
MSize i;
for (i = 0; i < nhash; i++) {
TValue key;
bcread_ktabk(ls, &key);
bcread_ktabk(ls, &key, NULL);
lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t);
}
}
return t;
@ -281,8 +283,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
{
BCIns *bc = proto_bc(pt);
bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
pt->framesize, 0);
BCIns op;
if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
else op = BC_FUNCF;
bc[0] = BCINS_AD(op, pt->framesize, 0);
bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
/* Swap bytecode instructions if the endianess differs. */
if (bcread_swap(ls)) {
@ -395,7 +400,7 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI
lua_State *L = ls->L;
@ -405,7 +410,7 @@ static int bcread_header(LexState *ls)
#endif
}
if ((flags & BCDUMP_F_STRIP)) {
ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
ls->chunkname = lj_str_newz(ls->L, *ls->chunkarg == BCDUMP_HEAD1 ? "=?" : ls->chunkarg);
} else {
MSize len = bcread_uleb128(ls);
bcread_need(ls, len);

View File

@ -1,6 +1,6 @@
/*
** Bytecode writer.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcwrite_c
@ -27,7 +27,9 @@ typedef struct BCWriteCtx {
GCproto *pt; /* Root prototype. */
lua_Writer wfunc; /* Writer callback. */
void *wdata; /* Writer callback data. */
int strip; /* Strip debug info. */
TValue **heap; /* Heap used for deterministic sorting. */
uint32_t heapsz; /* Size of heap. */
uint32_t flags; /* BCDUMP_F_* flags. */
int status; /* Status from writer callback. */
#ifdef LUA_USE_ASSERT
global_State *g;
@ -69,6 +71,8 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
*p++ = BCDUMP_KTAB_NUM;
p = lj_strfmt_wuleb128(p, o->u32.lo);
p = lj_strfmt_wuleb128(p, o->u32.hi);
} else if (tvistab(o)) { /* Write the nil value marker as a nil. */
*p++ = BCDUMP_KTAB_NIL;
} else {
lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
*p++ = BCDUMP_KTAB_NIL+~itype(o);
@ -76,6 +80,75 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
ctx->sb.w = p;
}
/* Compare two template table keys. */
static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
{
uint32_t at = itype(a), bt = itype(b);
if (at != bt) { /* This also handles false and true keys. */
return at < bt;
} else if (at == LJ_TSTR) {
return lj_str_cmp(strV(a), strV(b)) < 0;
} else {
return a->u64 < b->u64; /* This works for numbers and integers. */
}
}
/* Insert key into a sorted heap. */
static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
TValue *key)
{
MSize child;
while ((child = idx * 2 + 1) < end) {
/* Find lower of the two children. */
TValue *c0 = heap[child];
if (child + 1 < end) {
TValue *c1 = heap[child + 1];
if (bcwrite_ktabk_lt(c1, c0)) {
c0 = c1;
child++;
}
}
if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
heap[idx] = c0; /* Move lower child up. */
idx = child; /* Descend. */
}
heap[idx] = key; /* Insert key here. */
}
/* Resize heap, dropping content. */
static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
{
lua_State *L = sbufL(&ctx->sb);
if (ctx->heapsz) {
lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
ctx->heapsz = 0;
}
if (nsz) {
ctx->heap = lj_mem_newvec(L, nsz, TValue *);
ctx->heapsz = nsz;
}
}
/* Write hash part of template table in sorted order. */
static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
{
TValue **heap = ctx->heap;
MSize i = nhash;
for (;; node--) { /* Build heap. */
if (!tvisnil(&node->val)) {
bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
if (i == 0) break;
}
}
do { /* Drain heap. */
TValue *key = heap[0]; /* Output lowest key from top. */
bcwrite_ktabk(ctx, key, 0);
bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
key = heap[--nhash]; /* Remove last key. */
bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
} while (nhash);
}
/* Write a template table. */
static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
{
@ -105,14 +178,20 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
bcwrite_ktabk(ctx, o, 1);
}
if (nhash) { /* Write hash entries. */
MSize i = nhash;
Node *node = noderef(t->node) + t->hmask;
for (;; node--)
if (!tvisnil(&node->val)) {
bcwrite_ktabk(ctx, &node->key, 0);
bcwrite_ktabk(ctx, &node->val, 1);
if (--i == 0) break;
}
if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
if (ctx->heapsz < nhash)
bcwrite_heap_resize(ctx, t->hmask + 1);
bcwrite_ktab_sorted_hash(ctx, node, nhash);
} else {
MSize i = nhash;
for (;; node--)
if (!tvisnil(&node->val)) {
bcwrite_ktabk(ctx, &node->key, 0);
bcwrite_ktabk(ctx, &node->val, 1);
if (--i == 0) break;
}
}
}
}
@ -269,7 +348,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
p = lj_strfmt_wuleb128(p, pt->sizekgc);
p = lj_strfmt_wuleb128(p, pt->sizekn);
p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!ctx->strip) {
if (!(ctx->flags & BCDUMP_F_STRIP)) {
if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
p = lj_strfmt_wuleb128(p, sizedbg);
@ -317,11 +396,10 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = BCDUMP_HEAD2;
*p++ = BCDUMP_HEAD3;
*p++ = BCDUMP_VERSION;
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
*p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) +
LJ_BE*BCDUMP_F_BE +
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
LJ_FR2*BCDUMP_F_FR2;
if (!ctx->strip) {
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
if (!(ctx->flags & BCDUMP_F_STRIP)) {
p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len);
}
@ -352,14 +430,16 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
/* Write bytecode for a prototype. */
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
int strip)
uint32_t flags)
{
BCWriteCtx ctx;
int status;
ctx.pt = pt;
ctx.wfunc = writer;
ctx.wdata = data;
ctx.strip = strip;
ctx.heapsz = 0;
if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
ctx.flags = flags;
ctx.status = 0;
#ifdef LUA_USE_ASSERT
ctx.g = G(L);
@ -368,6 +448,7 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status;
lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
bcwrite_heap_resize(&ctx, 0);
return status;
}

View File

@ -1,6 +1,6 @@
/*
** Buffer handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_buf_c
@ -92,10 +92,8 @@ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
char *b = sb->b;
MSize osz = (MSize)(sb->e - b);
if (osz > 2*LJ_MIN_SBUF) {
MSize n = (MSize)(sb->w - b);
b = lj_mem_realloc(L, b, osz, (osz >> 1));
sb->b = b;
sb->w = b + n;
sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */
sb->e = b + (osz >> 1);
}
lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");

View File

@ -1,6 +1,6 @@
/*
** Buffer handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BUF_H

View File

@ -1,6 +1,6 @@
/*
** C data arithmetic.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
p = (uint8_t *)cdata_getptr(p, ct->size);
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
} else if (ctype_isfunc(ct->info)) {
CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
p = (uint8_t *)*(void **)p;
ct = ctype_get(cts,
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
if (i) { /* cts->tab may have been reallocated. */
ca->ct[0] = ctype_get(cts, id0);
}
}
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
ca->ct[i] = ct;
@ -345,9 +349,7 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
if (LJ_LIKELY(tvisint(o))) {
return (uint32_t)intV(o);
} else {
int32_t i = lj_num2bit(numV(o));
if (LJ_DUALNUM) setintV(o, i);
return (uint32_t)i;
return (uint32_t)lj_num2bit(numV(o));
}
}

View File

@ -1,6 +1,6 @@
/*
** C data arithmetic.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CARITH_H

View File

@ -1,6 +1,6 @@
/*
** FFI C call handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@ -20,12 +20,15 @@
#if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */
#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
#if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \
if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
if (cc->retref) CCALL_PUSH(dp);
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@ -40,7 +43,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp; \
CCALL_PUSH(dp); \
} else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \
}
@ -56,7 +59,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp;
CCALL_PUSH(dp);
#endif
@ -67,7 +70,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp; \
CCALL_PUSH(dp); \
}
#endif
@ -278,8 +281,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -345,7 +348,6 @@
goto done; \
} else { \
nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
} else { /* Try to pass argument in GPRs. */ \
if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
@ -356,7 +358,6 @@
goto done; \
} else { \
ngpr = maxgpr; /* Prevent reordering. */ \
if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
}
@ -471,8 +472,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -565,8 +566,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@ -698,10 +699,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
cc->nsp = nsp + n;
memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
if (nsp + sz > CCALL_SIZE_STACK)
return 1; /* Too many arguments. */
cc->nsp = nsp + sz;
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
}
return 0; /* Ok. */
}
@ -779,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) {
while (ct->sib && n <= 4) {
unsigned int m = 1;
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isarray(sct->info)) {
CType *cct = ctype_rawchild(cts, sct);
if (!cct->size) continue;
m = sct->size / cct->size;
sct = cct;
}
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1;
if (!isu) n += m; else if (n < m) n = m;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2;
if (!isu) n += 2*m; else if (n < 2*m) n = 2*m;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
@ -801,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct);
unsigned int s = ccall_classify_struct(cts, sct), sn;
if (s <= 1) goto noth;
r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
sn = (s >> 8) * m;
if (!isu) n += sn; else if (n < sn) n = sn;
}
}
}
@ -983,6 +993,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib;
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did;
@ -1019,25 +1037,31 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_STRUCTARG
} else if (ctype_iscomplex(d->info)) {
CCALL_HANDLE_COMPLEXARG
} else {
} else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) {
sz = CTSIZE_PTR;
}
sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info)) - 1;
if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
}
if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
}
dp = &cc->stack[nsp];
nsp += n;
isva = 0;
done:
@ -1048,7 +1072,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
}
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
/* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
if (ctype_isinteger_or_bool(d->info) && d->size < 4 &&
(!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */
if (d->info & CTF_UNSIGNED)
*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
(uint32_t)*(uint16_t *)dp;
@ -1095,14 +1120,17 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
if (nsp > CCALL_SPS_FREE)
cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
return gcsteps;
}

View File

@ -1,6 +1,6 @@
/*
** FFI C call handling.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CCALL_H
@ -75,6 +75,9 @@ typedef union FPRArg {
#define CCALL_NARG_FPR 8
#define CCALL_NRET_FPR 4
#define CCALL_SPS_FREE 0
#if LJ_TARGET_OSX
#define CCALL_PACK_STACKARG 1
#endif
typedef intptr_t GPRArg;
typedef union FPRArg {
@ -139,6 +142,9 @@ typedef union FPRArg {
#ifndef CCALL_ALIGN_STACKARG
#define CCALL_ALIGN_STACKARG 1
#endif
#ifndef CCALL_PACK_STACKARG
#define CCALL_PACK_STACKARG 0
#endif
#ifndef CCALL_ALIGN_CALLSTATE
#define CCALL_ALIGN_CALLSTATE 8
#endif
@ -152,14 +158,15 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
#define CCALL_MAXSTACK 32
#define CCALL_NUM_STACK 31
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
/* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of stack slots. */
uint8_t nsp; /* Number of bytes on stack. */
uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */
@ -178,7 +185,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
} CCallState;
/* -- C call handling ----------------------------------------------------- */

Some files were not shown because too many files have changed in this diff Show More