From 3f1f9e11f4f699ae94182d4cba158092f434a7f6 Mon Sep 17 00:00:00 2001
From: Mike Pall
Date: Tue, 8 Dec 2009 20:35:29 +0100
Subject: [PATCH] Fast forward to sync public repo.
Compile math.sinh(), math.cosh(), math.tanh() and math.random().
Compile various io.*() functions.
Drive the GC forward on string allocations in the parser.
Improve KNUM fuse vs. load heuristics.
Add abstract C call handling to IR.
---
doc/api.html | 2 +-
doc/changes.html | 25 +-
doc/contact.html | 8 +-
doc/luajit.html | 1 +
lib/bc.lua | 2 +-
lib/dump.lua | 40 ++-
src/Makefile.dep | 15 +-
src/buildvm.c | 14 +-
src/buildvm.h | 1 +
src/buildvm_asm.c | 8 +
src/buildvm_fold.c | 4 +
src/buildvm_peobj.c | 14 +-
src/buildvm_x86.dasc | 117 +++++----
src/lib_base.c | 13 +-
src/lib_io.c | 413 ++++++++++++++----------------
src/lib_math.c | 52 ++--
src/lib_string.c | 10 +-
src/lj_alloc.c | 6 +-
src/lj_api.c | 26 +-
src/lj_asm.c | 598 +++++++++++++++++++++++++------------------
src/lj_def.h | 1 +
src/lj_gc.c | 36 ++-
src/lj_gc.h | 6 +-
src/lj_ir.c | 36 +++
src/lj_ir.h | 114 +++++++--
src/lj_iropt.h | 6 +-
src/lj_lib.c | 2 +-
src/lj_lib.h | 5 +
src/lj_meta.c | 6 +-
src/lj_obj.h | 23 +-
src/lj_opt_fold.c | 129 +++++++---
src/lj_opt_loop.c | 6 +
src/lj_opt_mem.c | 31 +--
src/lj_opt_narrow.c | 2 +-
src/lj_parse.c | 1 +
src/lj_record.c | 125 ++++++++-
src/lj_snap.c | 28 +-
src/lj_state.h | 2 +-
src/lj_str.c | 14 +-
src/lj_str.h | 9 +-
src/lj_tab.c | 16 +-
src/lj_tab.h | 5 +-
src/lj_target_x86.h | 11 +-
src/lj_udata.c | 1 +
44 files changed, 1218 insertions(+), 766 deletions(-)
diff --git a/doc/api.html b/doc/api.html
index 3bb10967..874f7ae9 100644
--- a/doc/api.html
+++ b/doc/api.html
@@ -319,7 +319,7 @@ enable it after running luaL_openlibs .
LuaJIT already intercepts exception handling for systems using
-ELF/DWARF2 stack unwinding (e.g. Linux). This is a zero-cost mechanism
+DWARF2 stack unwinding (e.g. Linux, OSX). This is a zero-cost mechanism
and always enabled. You don't need to use any wrapper functions,
except when you want to get a more specific error message than
"C++ exception" .
diff --git a/doc/changes.html b/doc/changes.html
index 641f1e28..d9a3aadd 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -48,10 +48,27 @@ The current stable version is LuaJI
LuaJIT 2.0.0-beta2 — 2009-11-09
@@ -59,14 +76,14 @@ to see whether newer versions are available.
Allow C++ exception conversion on all platforms
using a wrapper function.
Automatically catch C++ exceptions and rethrow Lua error
-(ELF/DWARF2 only).
+(DWARF2 only).
Check for the correct x87 FPU precision at strategic points.
Always use wrappers for libm functions.
Resurrect metamethod name strings before copying them.
Mark current trace, even if compiler is idle.
Ensure FILE metatable is created only once.
Fix type comparisons when different integer types are involved.
-Fix getmetatable() recording.
+Fix getmetatable() recording.
Fix TDUP with dead keys in template table.
jit.flush(tr) returns status.
Prevent manual flush of a trace that's still linked.
@@ -234,7 +251,7 @@ on a separate line.
Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.
Miscellaneous doc changes. Added a section about
-embedding LuaJIT .
+embedding LuaJIT .
This release is in sync with Coco 1.1.0 (see the
diff --git a/doc/contact.html b/doc/contact.html
index 36d5a825..66d52410 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -46,17 +46,15 @@ You can also send any questions you have directly to me:
diff --git a/doc/luajit.html b/doc/luajit.html
index 9b16ea37..5a2b3689 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -8,6 +8,7 @@
+
diff --git a/lib/bc.lua b/lib/bc.lua
index 532f2493..6296174e 100644
--- a/lib/bc.lua
+++ b/lib/bc.lua
@@ -30,7 +30,7 @@
-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello"
--
-- local out = {
--- -- Do something wich each line:
+-- -- Do something with each line:
-- write = function(t, ...) io.write(...) end,
-- close = function(t) end,
-- flush = function(t) end,
diff --git a/lib/dump.lua b/lib/dump.lua
index 9fde87c1..021fc1c9 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -144,7 +144,7 @@ local colortype_ansi = {
[0] = "%s",
"%s",
"%s",
- "%s",
+ "\027[36m%s\027[m",
"\027[32m%s\027[m",
"%s",
"\027[1m%s\027[m",
@@ -199,9 +199,9 @@ margin-right: 2em;
span.irt_str { color: #00a000; }
span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
span.irt_tab { color: #c00000; }
-span.irt_udt { color: #00c0c0; }
-span.irt_num { color: #0000c0; }
-span.irt_int { color: #c000c0; }
+span.irt_udt, span.irt_lud { color: #00c0c0; }
+span.irt_num { color: #4040c0; }
+span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
]]
@@ -210,7 +210,7 @@ local colorize, irtype
-- Lookup table to convert some literals into names.
local litname = {
["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", },
- ["XLOAD "] = { [0] = "", "unaligned", },
+ ["XLOAD "] = { [0] = "", "R", "U", "RU", },
["TOINT "] = { [0] = "check", "index", "", },
["FLOAD "] = vmdef.irfield,
["FREF "] = vmdef.irfield,
@@ -313,6 +313,27 @@ local function ridsp_name(ridsp)
return ""
end
+-- Recursively gather CALL* args and dump them.
+local function dumpcallargs(tr, ins)
+ if ins < 0 then
+ out:write(formatk(tr, ins))
+ else
+ local m, ot, op1, op2 = traceir(tr, ins)
+ local oidx = 6*shr(ot, 8)
+ local op = sub(vmdef.irnames, oidx+1, oidx+6)
+ if op == "CARG " then
+ dumpcallargs(tr, op1)
+ if op2 < 0 then
+ out:write(" ", formatk(tr, op2))
+ else
+ out:write(" ", format("%04d", op2))
+ end
+ else
+ out:write(format("%04d", ins))
+ end
+ end
+end
+
-- Dump IR and interleaved snapshots.
local function dump_ir(tr, dumpsnap, dumpreg)
local info = traceinfo(tr)
@@ -348,7 +369,8 @@ local function dump_ir(tr, dumpsnap, dumpreg)
else
out:write(format("%04d ------ LOOP ------------\n", ins))
end
- elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then
+ elseif op ~= "NOP " and op ~= "CARG " and
+ (dumpreg or op ~= "RENAME") then
if dumpreg then
out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
else
@@ -359,7 +381,11 @@ local function dump_ir(tr, dumpsnap, dumpreg)
band(ot, 128) == 0 and " " or "+",
irtype[t], op))
local m1 = band(m, 3)
- if m1 ~= 3 then -- op1 != IRMnone
+ if sub(op, 1, 4) == "CALL" then
+ out:write(format("%-10s (", vmdef.ircall[op2]))
+ if op1 ~= -1 then dumpcallargs(tr, op1) end
+ out:write(")")
+ elseif m1 ~= 3 then -- op1 != IRMnone
if op1 < 0 then
out:write(formatk(tr, op1))
else
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 1fb81e27..779ee545 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
+ lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \
+ lj_lib.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
@@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \
- lj_target.h lj_target_x86.h
+ lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \
+ lj_vm.h lj_target.h lj_target_x86.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
lj_ir.h lj_dispatch.h
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
- lj_traceerr.h
+ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_bc.h lj_traceerr.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
diff --git a/src/buildvm.c b/src/buildvm.c
index b3738db4..4aba39d4 100644
--- a/src/buildvm.c
+++ b/src/buildvm.c
@@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME)
};
const char *const irfield_names[] = {
-#define FLNAME(name, type, field) #name,
+#define FLNAME(name, ofs) #name,
IRFLDEF(FLNAME)
#undef FLNAME
NULL
};
+const char *const ircall_names[] = {
+#define IRCALLNAME(name, nargs, kind, type, flags) #name,
+IRCALLDEF(IRCALLNAME)
+#undef IRCALLNAME
+ NULL
+};
+
static const char *const trace_errors[] = {
#define TREDEF(name, msg) msg,
#include "lj_traceerr.h"
@@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx)
}
fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "ircall = {\n[0]=");
+ for (i = 0; ircall_names[i]; i++)
+ fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
+ fprintf(ctx->fp, "}\n\n");
+
fprintf(ctx->fp, "traceerr = {\n[0]=");
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
diff --git a/src/buildvm.h b/src/buildvm.h
index e55527fd..53c820ad 100644
--- a/src/buildvm.h
+++ b/src/buildvm.h
@@ -102,5 +102,6 @@ extern const char *const bc_names[];
extern const char *const ir_names[];
extern const char *const irfpm_names[];
extern const char *const irfield_names[];
+extern const char *const ircall_names[];
#endif
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c
index 5daab13b..31b6f61e 100644
--- a/src/buildvm_asm.c
+++ b/src/buildvm_asm.c
@@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
{
const char *sym = ctx->extnames[r->sym];
+ const char *p = strchr(sym, '@');
+ char buf[80];
+ if (p) {
+ /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */
+ strncpy(buf, sym, p-sym);
+ buf[p-sym] = '\0';
+ sym = buf;
+ }
switch (ctx->mode) {
case BUILD_elfasm:
if (r->type)
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
index 271118e0..77af3dc5 100644
--- a/src/buildvm_fold.c
+++ b/src/buildvm_fold.c
@@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
for (i = 0; irfield_names[i]; i++)
if (!strcmp(irfield_names[i], p+5))
return i;
+ } else if (allowlit && !strncmp(p, "IRCALL_", 7)) {
+ for (i = 0; ircall_names[i]; i++)
+ if (!strcmp(ircall_names[i], p+7))
+ return i;
} else if (allowany && !strcmp("any", p)) {
return 0xff;
} else {
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c
index 1a8661bf..a24ae727 100644
--- a/src/buildvm_peobj.c
+++ b/src/buildvm_peobj.c
@@ -85,6 +85,7 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
#define PEOBJ_RELOC_DIR32 0x06
#define PEOBJ_SYM_PREFIX "_"
+#define PEOBJ_SYMF_PREFIX "@"
#elif LJ_TARGET_X64
#define PEOBJ_ARCH_TARGET 0x8664
#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
@@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
for (i = 0; ctx->extnames[i]; i++) {
- sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]);
+ const char *sym = ctx->extnames[i];
+ const char *p = strchr(sym, '@');
+ if (p) {
+#ifdef PEOBJ_SYMF_PREFIX
+ sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym);
+#else
+ strncpy(name, sym, p-sym);
+ name[p-sym] = '\0';
+#endif
+ } else {
+ sprintf(name, PEOBJ_SYM_PREFIX "%s", sym);
+ }
emit_peobj_sym(ctx, name, 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
}
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 09cfa6dc..e857a6be 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -30,6 +30,9 @@
|.define RD, RC
|.define RDL, RCL
|
+|.define FCARG1, ecx // Fastcall arguments.
+|.define FCARG2, edx
+|
|// Type definitions. Some of these are only used for documentation.
|.type L, lua_State
|.type GL, global_State
@@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| mov RB, LJ_TNUMX
|7:
| not RB
- | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
| jmp <2
|
|.ffunc_2 setmetatable
@@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| jmp ->fff_res1
|3: // Handle numbers inline, unless a number base metatable is present.
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
- | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
+ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
| jne ->fff_fallback
| ffgccheck // Caveat: uses label 1.
| mov L:RB, SAVE_L
- | mov ARG1, L:RB
- | mov ARG2, RA
| mov L:RB->base, RA // Add frame since C call can throw.
| mov [RA-4], PC
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov ARG3, BASE // Save BASE.
- | call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
+ | mov FCARG2, RA // Caveat: FCARG2 == BASE
+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+ | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
| // GCstr returned in eax (RC).
| mov RA, L:RB->base
| mov BASE, ARG3
@@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
|
|.ffunc_1 table_getn
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
- | mov TAB:RB, [RA]
- | mov ARG1, TAB:RB
- | mov RB, RA // Save RA and BASE.
- | mov ARG2, BASE
- | call extern lj_tab_len // (GCtab *t)
+ | mov ARG2, BASE // Save RA and BASE.
+ | mov RB, RA
+ | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
+ | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov RA, RB // Restore RA and BASE.
@@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
|2:
| checktab RD, ->vmeta_len
- | mov TAB:RD, [BASE+RD*8]
- | mov ARG1, TAB:RD
+ | mov TAB:FCARG1, [BASE+RD*8]
| mov RB, BASE // Save BASE.
- | call extern lj_tab_len // (GCtab *t)
+ | call extern lj_tab_len@4 // (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov BASE, RB // Restore BASE.
@@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
break;
case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
| ins_AD // RA = upvalue #, RD = src
- | // Really ugly code due to the lack of a 4th free register.
| mov LFUNC:RB, [BASE-8]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
- | jnz >4
- |1:
- | mov RA, [BASE+RD*8]
- |2:
+ | cmp byte UPVAL:RB->closed, 0
| mov RB, UPVAL:RB->v
+ | mov RA, [BASE+RD*8]
| mov RD, [BASE+RD*8+4]
| mov [RB], RA
| mov [RB+4], RD
- |3:
+ | jz >1
+ | // Check barrier for closed upvalue.
+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
+ | jnz >2
+ |1:
| ins_next
|
- |4: // Upvalue is black. Check if new value is collectable and white.
- | mov RA, [BASE+RD*8+4]
- | sub RA, LJ_TISGCV
- | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | sub RD, LJ_TISGCV
+ | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
| jbe <1
- | mov GCOBJ:RA, [BASE+RD*8]
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
- | jz <2
- | // Crossed a write barrier. So move the barrier forward.
- | mov ARG2, UPVAL:RB
- | mov ARG3, GCOBJ:RA
- | mov RB, UPVAL:RB->v
- | mov RD, [BASE+RD*8+4]
- | mov [RB], GCOBJ:RA
- | mov [RB+4], RD
- |->BC_USETV_Z:
- | mov L:RB, SAVE_L
- | lea GL:RA, [DISPATCH+GG_DISP2G]
- | mov L:RB->base, BASE
- | mov ARG1, GL:RA
- | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
- | mov BASE, L:RB->base
- | jmp <3
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
break;
+#undef TV2MARKOFS
case BC_USETS:
| ins_AND // RA = upvalue #, RD = str const (~)
| mov LFUNC:RB, [BASE-8]
- | mov GCOBJ:RD, [KBASE+RD*4]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | mov RA, UPVAL:RB->v
- | mov dword [RA+4], LJ_TSTR
- | mov [RA], GCOBJ:RD
+ | mov GCOBJ:RA, [KBASE+RD*4]
+ | mov RD, UPVAL:RB->v
+ | mov [RD], GCOBJ:RA
+ | mov dword [RD+4], LJ_TSTR
| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
| jnz >2
|1:
| ins_next
|
- |2: // Upvalue is black. Check if string is white.
- | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
+ |2: // Check if string is white and ensure upvalue is closed.
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
| jz <1
- | // Crossed a write barrier. So move the barrier forward.
- | mov ARG3, GCOBJ:RD
- | mov ARG2, UPVAL:RB
- | jmp ->BC_USETV_Z
+ | cmp byte UPVAL:RB->closed, 0
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov RB, BASE // Save BASE (FCARG2 == BASE).
+ | mov FCARG2, RD
+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
break;
case BC_USETN:
| ins_AD // RA = upvalue #, RD = num const
@@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|2:
- | call extern lj_gc_step_fixtop // (lua_State *L)
- | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | mov L:FCARG1, L:RB
+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
| jmp <1
break;
case BC_TDUP:
| ins_AND // RA = dst, RD = table const (~) (holding template table)
- | mov TAB:RD, [KBASE+RD*4]
| mov L:RB, SAVE_L
- | mov ARG2, TAB:RD
- | mov ARG1, L:RB
| mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
| mov SAVE_PC, PC
| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
| mov L:RB->base, BASE
| jae >3
|2:
- | call extern lj_tab_dup // (lua_State *L, Table *kt)
+ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
| // Table * returned in eax (RC).
| mov BASE, L:RB->base
| movzx RA, PC_RA
@@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|3:
- | call extern lj_gc_step_fixtop // (lua_State *L)
- | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | mov L:FCARG1, L:RB
+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
+ | movzx RD, PC_RD // Need to reload RD.
+ | not RD
| jmp <2
break;
diff --git a/src/lib_base.c b/src/lib_base.c
index 6b9e8eef..821c81b4 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
int32_t base = lj_lib_optint(L, 2, 10);
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
- if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) {
+ if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) {
setnumV(L->base-1, numV(o));
return FFH_RES(1);
}
@@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
return FFH_RES(1);
}
+LJLIB_PUSH("nil")
+LJLIB_PUSH("false")
+LJLIB_PUSH("true")
LJLIB_ASM(tostring) LJLIB_REC(.)
{
TValue *o = lj_lib_checkany(L, 1);
@@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
GCstr *s;
if (tvisnum(o)) {
s = lj_str_fromnum(L, &o->n);
- } else if (tvisnil(o)) {
- s = lj_str_newlit(L, "nil");
- } else if (tvisfalse(o)) {
- s = lj_str_newlit(L, "false");
- } else if (tvistrue(o)) {
- s = lj_str_newlit(L, "true");
+ } else if (tvispri(o)) {
+ s = strV(lj_lib_upvalue(L, -itype(o)));
} else {
if (tvisfunc(o) && isffunc(funcV(o)))
lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);
diff --git a/src/lib_io.c b/src/lib_io.c
index aefe4213..d69b99a4 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -17,14 +17,28 @@
#include "lualib.h"
#include "lj_obj.h"
-#include "lj_err.h"
#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
#include "lj_ff.h"
+#include "lj_trace.h"
#include "lj_lib.h"
-/* Index of standard handles in function environment. */
-#define IO_INPUT 1
-#define IO_OUTPUT 2
+/* Userdata payload for I/O file. */
+typedef struct IOFileUD {
+ FILE *fp; /* File handle. */
+ uint32_t type; /* File type. */
+} IOFileUD;
+
+#define IOFILE_TYPE_FILE 0 /* Regular file. */
+#define IOFILE_TYPE_PIPE 1 /* Pipe. */
+#define IOFILE_TYPE_STDF 2 /* Standard file handle. */
+#define IOFILE_TYPE_MASK 3
+
+#define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */
+
+#define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud)
+#define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id))))
/* -- Error handling ------------------------------------------------------ */
@@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname)
return 1;
} else {
int en = errno; /* Lua API calls may change this value. */
- lua_pushnil(L);
+ setnilV(L->top++);
if (fname)
lua_pushfstring(L, "%s: %s", fname, strerror(en));
else
lua_pushfstring(L, "%s", strerror(en));
- lua_pushinteger(L, en);
+ setintV(L->top++, en);
+ lj_trace_abort(G(L));
return 3;
}
}
-static void io_file_error(lua_State *L, int arg, const char *fname)
+/* -- Open/close helpers -------------------------------------------------- */
+
+static IOFileUD *io_tofilep(lua_State *L)
{
- lua_pushfstring(L, "%s: %s", fname, strerror(errno));
- luaL_argerror(L, arg, lua_tostring(L, -1));
+ if (!(L->base < L->top && tvisudata(L->base) &&
+ udataV(L->base)->udtype == UDTYPE_IO_FILE))
+ lj_err_argtype(L, 1, "FILE*");
+ return (IOFileUD *)uddata(udataV(L->base));
}
-/* -- Open helpers -------------------------------------------------------- */
-
-#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
-
-static FILE *io_tofile(lua_State *L)
+static IOFileUD *io_tofile(lua_State *L)
{
- FILE **f = io_tofilep(L);
- if (*f == NULL)
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp == NULL)
lj_err_caller(L, LJ_ERR_IOCLFL);
- return *f;
+ return iof;
}
-static FILE **io_file_new(lua_State *L)
+static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
{
- FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *));
- *pf = NULL;
- luaL_getmetatable(L, LUA_FILEHANDLE);
- lua_setmetatable(L, -2);
- return pf;
+ IOFileUD *iof = IOSTDF_IOF(L, id);
+ if (iof->fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOSTDCL);
+ return iof->fp;
}
-/* -- Close helpers ------------------------------------------------------- */
-
-static int lj_cf_io_std_close(lua_State *L)
+static IOFileUD *io_file_new(lua_State *L)
{
- lua_pushnil(L);
- lua_pushliteral(L, "cannot close standard file");
- return 2;
+ IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
+ GCudata *ud = udataV(L->top-1);
+ ud->udtype = UDTYPE_IO_FILE;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcrefr(ud->metatable, curr_func(L)->c.env);
+ iof->fp = NULL;
+ iof->type = IOFILE_TYPE_FILE;
+ return iof;
}
-static int lj_cf_io_pipe_close(lua_State *L)
+static IOFileUD *io_file_open(lua_State *L, const char *mode)
{
- FILE **p = io_tofilep(L);
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = fopen(fname, mode);
+ if (iof->fp == NULL)
+ luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+ return iof;
+}
+
+static int io_file_close(lua_State *L, IOFileUD *iof)
+{
+ int ok;
+ if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) {
+ ok = (fclose(iof->fp) == 0);
+ } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) {
#if defined(LUA_USE_POSIX)
- int ok = (pclose(*p) != -1);
+ ok = (pclose(iof->fp) != -1);
#elif defined(LUA_USE_WIN)
- int ok = (_pclose(*p) != -1);
+ ok = (_pclose(iof->fp) != -1);
#else
- int ok = 0;
+ ok = 0;
#endif
- *p = NULL;
+ } else {
+ lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
+ setnilV(L->top++);
+ lua_pushliteral(L, "cannot close standard file");
+ return 2;
+ }
+ iof->fp = NULL;
return io_pushresult(L, ok, NULL);
}
-static int lj_cf_io_file_close(lua_State *L)
-{
- FILE **p = io_tofilep(L);
- int ok = (fclose(*p) == 0);
- *p = NULL;
- return io_pushresult(L, ok, NULL);
-}
-
-static int io_file_close(lua_State *L)
-{
- lua_getfenv(L, 1);
- lua_getfield(L, -1, "__close");
- return (lua_tocfunction(L, -1))(L);
-}
-
/* -- Read/write helpers -------------------------------------------------- */
static int io_file_readnum(lua_State *L, FILE *fp)
{
lua_Number d;
if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
- lua_pushnumber(L, d);
+ setnumV(L->top++, d);
return 1;
} else {
- return 0; /* read fails */
+ return 0;
}
}
-static int test_eof(lua_State *L, FILE *fp)
+static int io_file_testeof(lua_State *L, FILE *fp)
{
int c = getc(fp);
ungetc(c, fp);
@@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
n -= nr; /* still have to read `n' chars */
} while (n > 0 && nr == rlen); /* until end of count or eof */
luaL_pushresult(&b); /* close buffer */
- return (n == 0 || lua_objlen(L, -1) > 0);
+ return (n == 0 || strV(L->top-1)->len > 0);
}
static int io_file_read(lua_State *L, FILE *fp, int start)
@@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
lj_err_arg(L, n+1, LJ_ERR_INVFMT);
} else if (tvisnum(L->base+n)) {
size_t len = (size_t)lj_lib_checkint(L, n+1);
- ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp);
+ ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp);
} else {
lj_err_arg(L, n+1, LJ_ERR_INVOPT);
}
@@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
LJLIB_CF(io_method_close)
{
- if (lua_isnone(L, 1))
- lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT);
- io_tofile(L);
- return io_file_close(L);
+ IOFileUD *iof = L->base < L->top ? io_tofile(L) :
+ IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+ return io_file_close(L, iof);
}
LJLIB_CF(io_method_read)
{
- return io_file_read(L, io_tofile(L), 1);
+ return io_file_read(L, io_tofile(L)->fp, 1);
}
-LJLIB_CF(io_method_write)
+LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
{
- return io_file_write(L, io_tofile(L), 1);
+ return io_file_write(L, io_tofile(L)->fp, 1);
}
-LJLIB_CF(io_method_flush)
+LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
{
- return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL);
+ return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
}
LJLIB_CF(io_method_seek)
{
- FILE *fp = io_tofile(L);
+ FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
lua_Number ofs;
int res;
@@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek)
LJLIB_CF(io_method_setvbuf)
{
- FILE *fp = io_tofile(L);
+ FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
if (opt == 0) opt = _IOFBF;
else if (opt == 1) opt = _IOLBF;
else if (opt == 2) opt = _IONBF;
- return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL);
+ return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL);
}
-/* Forward declaration. */
-static void io_file_lines(lua_State *L, int idx, int toclose);
-
+LJLIB_PUSH(top-2) /* io_lines_iter */
LJLIB_CF(io_method_lines)
{
io_tofile(L);
- io_file_lines(L, 1, 0);
- return 1;
+ setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
+ setudataV(L, L->top+1, udataV(L->base));
+ L->top += 2;
+ return 2;
}
LJLIB_CF(io_method___gc)
{
- FILE *fp = *io_tofilep(L);
- if (fp != NULL) io_file_close(L);
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp != NULL)
+ io_file_close(L, iof);
return 0;
}
LJLIB_CF(io_method___tostring)
{
- FILE *fp = *io_tofilep(L);
- if (fp == NULL)
- lua_pushliteral(L, "file (closed)");
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp != NULL)
+ lua_pushfstring(L, "file (%p)", iof->fp);
else
- lua_pushfstring(L, "file (%p)", fp);
+ lua_pushliteral(L, "file (closed)");
return 1;
}
@@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index)
LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
-static FILE *io_file_get(lua_State *L, int findex)
-{
- GCtab *fenv = tabref(curr_func(L)->c.env);
- GCudata *ud = udataV(&tvref(fenv->array)[findex]);
- FILE *fp = *(FILE **)uddata(ud);
- if (fp == NULL)
- lj_err_caller(L, LJ_ERR_IOSTDCL);
- return fp;
-}
-
LJLIB_CF(io_open)
{
- const char *fname = luaL_checkstring(L, 1);
- const char *mode = luaL_optstring(L, 2, "r");
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, mode);
- return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = fopen(fname, mode);
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
+}
+
+LJLIB_CF(io_popen)
+{
+#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+ IOFileUD *iof = io_file_new(L);
+ iof->type = IOFILE_TYPE_PIPE;
+#ifdef LUA_USE_POSIX
+ fflush(NULL);
+ iof->fp = popen(fname, mode);
+#else
+ iof->fp = _popen(fname, mode);
+#endif
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
+#else
+ luaL_error(L, LUA_QL("popen") " not supported");
+#endif
}
LJLIB_CF(io_tmpfile)
{
- FILE **pf = io_file_new(L);
- *pf = tmpfile();
- return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1;
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = tmpfile();
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL);
}
LJLIB_CF(io_close)
@@ -373,169 +405,112 @@ LJLIB_CF(io_close)
LJLIB_CF(io_read)
{
- return io_file_read(L, io_file_get(L, IO_INPUT), 0);
+ return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0);
}
-LJLIB_CF(io_write)
+LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
{
- return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
+ return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0);
}
-LJLIB_CF(io_flush)
+LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
{
- return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
+ return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
}
-LJLIB_NOREG LJLIB_CF(io_lines_iter)
+static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
{
- FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
- int ok;
- if (fp == NULL)
- lj_err_caller(L, LJ_ERR_IOCLFL);
- ok = io_file_readline(L, fp);
- if (ferror(fp))
- return luaL_error(L, "%s", strerror(errno));
- if (ok)
- return 1;
- if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
- L->top = L->base+1;
- setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
- io_file_close(L);
- }
- return 0;
-}
-
-static void io_file_lines(lua_State *L, int idx, int toclose)
-{
- lua_pushvalue(L, idx);
- lua_pushboolean(L, toclose);
- lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
- funcV(L->top-1)->c.ffid = FF_io_lines_iter;
-}
-
-LJLIB_CF(io_lines)
-{
- if (lua_isnoneornil(L, 1)) { /* no arguments? */
- /* will iterate over default input */
- lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
- return lj_cf_io_method_lines(L);
- } else {
- const char *fname = luaL_checkstring(L, 1);
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, "r");
- if (*pf == NULL)
- io_file_error(L, 1, fname);
- io_file_lines(L, lua_gettop(L), 1);
- return 1;
- }
-}
-
-static int io_std_get(lua_State *L, int fp, const char *mode)
-{
- if (!lua_isnoneornil(L, 1)) {
- const char *fname = lua_tostring(L, 1);
- if (fname) {
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, mode);
- if (*pf == NULL)
- io_file_error(L, 1, fname);
+ if (L->base < L->top && !tvisnil(L->base)) {
+ if (tvisudata(L->base)) {
+ io_tofile(L);
+ L->top = L->base+1;
} else {
- io_tofile(L); /* check that it's a valid file handle */
- lua_pushvalue(L, 1);
+ io_file_open(L, mode);
}
- lua_rawseti(L, LUA_ENVIRONINDEX, fp);
+ /* NOBARRIER: The standard I/O handles are GC roots. */
+ setgcref(G(L)->gcroot[id], gcV(L->top-1));
+ } else {
+ setudataV(L, L->top++, IOSTDF_UD(L, id));
}
- /* return current value */
- lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
return 1;
}
LJLIB_CF(io_input)
{
- return io_std_get(L, IO_INPUT, "r");
+ return io_std_getset(L, GCROOT_IO_INPUT, "r");
}
LJLIB_CF(io_output)
{
- return io_std_get(L, IO_OUTPUT, "w");
+ return io_std_getset(L, GCROOT_IO_OUTPUT, "w");
+}
+
+LJLIB_NOREG LJLIB_CF(io_lines_iter)
+{
+ IOFileUD *iof = io_tofile(L);
+ int ok = io_file_readline(L, iof->fp);
+ if (ferror(iof->fp))
+ lj_err_callermsg(L, strerror(errno));
+ if (!ok && (iof->type & IOFILE_FLAG_CLOSE))
+ io_file_close(L, iof); /* Return values are ignored (ok is 0). */
+ return ok;
+}
+
+LJLIB_PUSH(top-3) /* io_lines_iter */
+LJLIB_CF(io_lines)
+{
+ if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */
+ IOFileUD *iof = io_file_open(L, "r");
+ iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE;
+ setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1)));
+ } else { /* io.lines() iterates over stdin. */
+ setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
+ setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT));
+ L->top += 2;
+ }
+ return 2;
}
LJLIB_CF(io_type)
{
- void *ud;
- luaL_checkany(L, 1);
- ud = lua_touserdata(L, 1);
- lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1))
- lua_pushnil(L); /* not a file */
- else if (*((FILE **)ud) == NULL)
- lua_pushliteral(L, "closed file");
- else
+ cTValue *o = lj_lib_checkany(L, 1);
+ if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE))
+ setnilV(L->top++);
+ else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL)
lua_pushliteral(L, "file");
+ else
+ lua_pushliteral(L, "closed file");
return 1;
}
-LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */
-
-LJLIB_CF(io_popen)
-{
-#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
- const char *fname = luaL_checkstring(L, 1);
- const char *mode = luaL_optstring(L, 2, "r");
- FILE **pf = io_file_new(L);
-#ifdef LUA_USE_POSIX
- fflush(NULL);
- *pf = popen(fname, mode);
-#else
- *pf = _popen(fname, mode);
-#endif
- return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
-#else
- luaL_error(L, LUA_QL("popen") " not supported");
-#endif
-}
-
#include "lj_libdef.h"
/* ------------------------------------------------------------------------ */
-static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname)
+static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
{
- FILE **pf = io_file_new(L);
+ IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
GCudata *ud = udataV(L->top-1);
- GCtab *envt = tabV(L->top-2);
- *pf = fp;
- setgcref(ud->env, obj2gco(envt));
- lj_gc_objbarrier(L, obj2gco(ud), envt);
- if (k > 0) {
- lua_pushvalue(L, -1);
- lua_rawseti(L, -5, k);
- }
- lua_setfield(L, -3, fname);
-}
-
-static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
-{
- lua_createtable(L, narr, 1);
- lua_pushcfunction(L, cls);
- lua_setfield(L, -2, "__close");
+ ud->udtype = UDTYPE_IO_FILE;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcref(ud->metatable, gcV(L->top-3));
+ iof->fp = fp;
+ iof->type = IOFILE_TYPE_STDF;
+ lua_setfield(L, -2, name);
+ return obj2gco(ud);
}
LUALIB_API int luaopen_io(lua_State *L)
{
- lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- if (tvisnil(L->top-1)) {
- LJ_LIB_REG_(L, NULL, io_method);
- lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- }
- io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
- io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
+ lua_pushcfunction(L, lj_cf_io_lines_iter);
+ funcV(L->top-1)->c.ffid = FF_io_lines_iter;
+ LJ_LIB_REG_(L, NULL, io_method);
+ copyTV(L, L->top, L->top-1); L->top++;
+ lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
LJ_LIB_REG(L, io);
- io_fenv_new(L, 0, lj_cf_io_std_close);
- io_std_new(L, stdin, IO_INPUT, "stdin");
- io_std_new(L, stdout, IO_OUTPUT, "stdout");
- io_std_new(L, stderr, 0, "stderr");
- L->top--;
+ setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin"));
+ setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout"));
+ io_std_new(L, stderr, "stderr");
return 1;
}
diff --git a/src/lib_math.c b/src/lib_math.c
index adc77c9d..f3803e8f 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
-LJLIB_ASM_(math_sinh)
-LJLIB_ASM_(math_cosh)
-LJLIB_ASM_(math_tanh)
+LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
+LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
+LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
LJLIB_ASM_(math_frexp)
LJLIB_ASM_(math_modf) LJLIB_REC(.)
@@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
*/
/* PRNG state. */
-typedef struct TW223State {
+struct RandomState {
uint64_t gen[4]; /* State of the 4 LFSR generators. */
int valid; /* State is valid. */
-} TW223State;
+};
/* Union needed for bit-pattern conversion between uint64_t and double. */
typedef union { uint64_t u64; double d; } U64double;
/* Update generator i and compute a running xor of all states. */
#define TW223_GEN(i, k, q, s) \
- z = tw->gen[i]; \
+ z = rs->gen[i]; \
z = (((z<> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<gen[i] = z;
+ r ^= z; rs->gen[i] = z;
/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
-static LJ_NOINLINE double tw223_step(TW223State *tw)
+LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
{
uint64_t z, r = 0;
- U64double u;
TW223_GEN(0, 63, 31, 18)
TW223_GEN(1, 58, 19, 28)
TW223_GEN(2, 55, 24, 7)
TW223_GEN(3, 47, 21, 8)
- u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52);
- return u.d;
+ return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
}
/* PRNG initialization function. */
-static void tw223_init(TW223State *tw, double d)
+static void random_init(RandomState *rs, double d)
{
uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
int i;
@@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d)
r >>= 8;
u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
- tw->gen[i] = u.u64;
+ rs->gen[i] = u.u64;
}
- tw->valid = 1;
+ rs->valid = 1;
for (i = 0; i < 10; i++)
- tw223_step(tw);
+ lj_math_random_step(rs);
}
/* PRNG extract function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
-LJLIB_CF(math_random)
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
+LJLIB_CF(math_random) LJLIB_REC(.)
{
int n = cast_int(L->top - L->base);
- TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ U64double u;
double d;
- if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0);
- d = tw223_step(tw) - 1.0;
+ if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
+ u.u64 = lj_math_random_step(rs);
+ d = u.d - 1.0;
if (n > 0) {
double r1 = lj_lib_checknum(L, 1);
if (n == 1) {
@@ -150,11 +150,11 @@ LJLIB_CF(math_random)
}
/* PRNG seed function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
LJLIB_CF(math_randomseed)
{
- TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
- tw223_init(tw, lj_lib_checknum(L, 1));
+ RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ random_init(rs, lj_lib_checknum(L, 1));
return 0;
}
@@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed)
LUALIB_API int luaopen_math(lua_State *L)
{
- TW223State *tw;
- tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State));
- tw->valid = 0; /* Use lazy initialization to save some time on startup. */
+ RandomState *rs;
+ rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
+ rs->valid = 0; /* Use lazy initialization to save some time on startup. */
LJ_LIB_REG(L, math);
#if defined(LUA_COMPAT_MOD)
lua_getfield(L, -1, "fmod");
diff --git a/src/lib_string.c b/src/lib_string.c
index 6c857328..e7ad12df 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L)
{
GCtab *mt;
GCstr *mmstr;
+ global_State *g;
LJ_LIB_REG(L, string);
#if defined(LUA_COMPAT_GFIND)
lua_getfield(L, -1, "gmatch");
lua_setfield(L, -2, "gfind");
#endif
mt = lj_tab_new(L, 0, 1);
- /* NOBARRIER: G(L)->mmname[] is a GC root. */
- setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt));
- mmstr = strref(G(L)->mmname[MM_index]);
- if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
+ /* NOBARRIER: basemt is a GC root. */
+ g = G(L);
+ setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
+ mmstr = strref(g->mmname[MM_index]);
+ if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1));
mt->nomm = cast_byte(~(1u<= MIN_CHUNK_SIZE) {
- mchunkptr remainder = chunk_plus_offset(newp, nb);
+ mchunkptr rem = chunk_plus_offset(newp, nb);
set_inuse(m, newp, nb);
- set_inuse(m, remainder, rsize);
- lj_alloc_free(m, chunk2mem(remainder));
+ set_inuse(m, rem, rsize);
+ lj_alloc_free(m, chunk2mem(rem));
}
} else if (next == m->top && oldsize + m->topsize > nb) {
/* Expand into top */
diff --git a/src/lj_api.c b/src/lj_api.c
index 7a759e5f..4bac5024 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx)
{
cTValue *o = index2adr(L, idx);
TValue tmp;
- return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)));
+ return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp)));
}
LUA_API int lua_isstring(lua_State *L, int idx)
@@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
TValue tmp;
if (LJ_LIKELY(tvisnum(o)))
return numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
return numV(&tmp);
else
return 0;
@@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
TValue tmp;
if (tvisnum(o))
return numV(o);
- else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
+ else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
return numV(o);
else if (tvisnil(o))
return def;
- else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
+ else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
return 0;
@@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
n = numV(o);
else if (tvisnil(o))
return def;
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
- mt = tabref(G(L)->basemt[itypemap(o)]);
+ mt = tabref(basemt_obj(G(L), o));
if (mt == NULL)
return 0;
settabV(L, L->top, mt);
@@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM);
if (tvisbool(o)) {
- /* NOBARRIER: g->basemt[] is a GC root. */
- setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt));
- setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt));
+ /* NOBARRIER: basemt is a GC root. */
+ setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));
+ setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt));
} else {
- /* NOBARRIER: g->basemt[] is a GC root. */
- setgcref(g->basemt[itypemap(o)], obj2gco(mt));
+ /* NOBARRIER: basemt is a GC root. */
+ setgcref(basemt_obj(g, o), obj2gco(mt));
}
}
L->top--;
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a4d0c606..f26a40a5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -13,6 +13,7 @@
#include "lj_gc.h"
#include "lj_str.h"
#include "lj_tab.h"
+#include "lj_frame.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
@@ -81,6 +82,10 @@ typedef struct ASMState {
#define IR(ref) (&as->ir[(ref)])
+#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
+#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
+#define ASMREF_L REF_NIL /* Stores register for L. */
+
/* Check for variant to invariant references. */
#define iscrossref(as, ref) ((ref) < as->sectref)
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
{ MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
+#define REX_64 (FORCE_REX|0x080000)
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
+#define REX_64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
{
uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
if (rex != 0x40) {
+ rex |= (rr >> 16);
if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
*--p = (MCode)rex;
}
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
-/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
-#define emit_setargr(as, narg, r) \
- emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
-#define emit_setargi(as, narg, imm) \
- emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
-#define emit_setargp(as, narg, ptr) \
- emit_setargi(as, (narg), ptr2addr((ptr)))
-
/* -- Register allocator debugging ---------------------------------------- */
/* #define LUAJIT_DEBUG_RA */
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
memset(as->phireg, 0, sizeof(as->phireg));
memset(as->cost, 0, sizeof(as->cost));
as->cost[RID_ESP] = REGCOST(~0u, 0u);
-
- /* Start slots for spill slot allocation. */
- as->evenspill = (SPS_FIRST+1)&~1;
- as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
}
/* Rematerialize constants. */
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
} else if (ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base);
+ } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
+ lua_assert(irt_isnil(ir->t));
+ emit_getgl(as, r, jit_L);
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
return sps_scale(slot);
}
+/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
+static Reg ra_releasetmp(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ Reg r = ir->r;
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT;
+ return r;
+}
+
/* Restore a register (marked as free). Rematerialize or force a spill. */
static Reg ra_restore(ASMState *as, IRRef ref)
{
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
/* Arch-specific field offsets. */
static const uint8_t field_ofs[IRFL__MAX+1] = {
-#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
+#define FLOFS(name, ofs) (uint8_t)(ofs),
IRFLDEF(FLOFS)
#undef FLOFS
0
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
{
IRIns *irr;
lua_assert(ir->o == IR_STRREF);
- as->mrm.idx = as->mrm.base = RID_NONE;
+ as->mrm.base = as->mrm.idx = RID_NONE;
as->mrm.scale = XM_SCALE1;
as->mrm.ofs = sizeof(GCstr);
if (irref_isk(ir->op1)) {
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
}
}
+static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
+{
+ if (ir->o == IR_KPTR) {
+ as->mrm.ofs = ir->i;
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ } else {
+ lua_assert(ir->o == IR_STRREF);
+ asm_fusestrref(as, ir, allow);
+ }
+}
+
/* Fuse load into memory operand. */
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
{
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
if (ir->o == IR_KNUM) {
+ RegSet avail = as->freeset & ~as->modset & RSET_FPR;
lua_assert(allow != RSET_EMPTY);
- if (!(as->freeset & ~as->modset & RSET_FPR)) {
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
as->mrm.ofs = ptr2addr(ir_knum(ir));
as->mrm.base = as->mrm.idx = RID_NONE;
return RID_MRM;
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_FLOAD) {
- /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
- if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
+ /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
+ if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
+ noconflict(as, ref, IR_FSTORE)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_XLOAD) {
- /* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
+ /* Generic fusion is only ok for 32 bit operand (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
- if (irt_isint(ir->t)) {
- asm_fusestrref(as, IR(ir->op1), xallow);
+ if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
+ asm_fusexref(as, IR(ir->op1), xallow);
return RID_MRM;
}
}
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return ra_allocref(as, ref, allow);
}
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+ RegSet allow = RSET_ALL;
+ uint32_t n, nargs = CCI_NARGS(ci);
+ int32_t ofs = 0;
+ lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
+ emit_call(as, ci->func);
+ for (n = 0; n < nargs; n++) { /* Setup args. */
+#if LJ_64
+#error "NYI: 64 bit mode call argument setup"
+#endif
+ IRIns *ir = IR(args[n]);
+ if (irt_isnum(ir->t)) {
+ if ((ofs & 4) && irref_isk(args[n])) {
+ /* Split stores for unaligned FP consts. */
+ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
+ emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
+ } else {
+ Reg r;
+ if ((allow & RSET_FPR) == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ r = ra_alloc1(as, args[n], allow & RSET_FPR);
+ allow &= ~RID2RSET(r);
+ emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
+ }
+ ofs += 8;
+ } else {
+ if ((ci->flags & CCI_FASTCALL) && n < 2) {
+ Reg r = n == 0 ? RID_ECX : RID_EDX;
+ if (args[n] < ASMREF_TMP1) {
+ emit_loadi(as, r, ir->i);
+ } else {
+ lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
+ allow &= ~RID2RSET(r);
+ if (ra_hasreg(ir->r))
+ emit_movrr(as, r, ir->r);
+ else
+ ra_allocref(as, args[n], RID2RSET(r));
+ }
+ } else {
+ if (args[n] < ASMREF_TMP1) {
+ emit_movmroi(as, RID_ESP, ofs, ir->i);
+ } else {
+ Reg r;
+ if ((allow & RSET_GPR) == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ r = ra_alloc1(as, args[n], allow & RSET_GPR);
+ allow &= ~RID2RSET(r);
+ emit_movtomro(as, r, RID_ESP, ofs);
+ }
+ ofs += 4;
+ }
+ }
+ }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ RegSet drop = RSET_SCRATCH;
+ if ((ci->flags & CCI_NOFPRCLOBBER))
+ drop &= ~RSET_FPR;
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+ if (irt_isnum(ir->t)) {
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
+#if LJ_64
+ if ((ci->flags & CCI_CASTU64)) {
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
+ } else {
+ emit_movrmro(as, RID_RET, RID_ESP, ofs);
+ }
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+#else
+ /* Number result is in x87 st0 for x86 calling convention. */
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
+ }
+ if ((ci->flags & CCI_CASTU64)) {
+ emit_movtomro(as, RID_RET, RID_ESP, ofs);
+ emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
+ } else {
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+ }
+#endif
+ } else {
+ lua_assert(!irt_ispri(ir->t));
+ ra_destreg(as, ir, RID_RET);
+ }
+ }
+}
+
+/* Collect arguments from CALL* and ARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+ const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n = CCI_NARGS(ci);
+ lua_assert(n <= CCI_NARGS_MAX);
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+ while (n-- > 1) {
+ ir = IR(ir->op1);
+ lua_assert(ir->o == IR_CARG);
+ args[n] = ir->op2;
+ }
+ args[0] = ir->op1;
+ lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX];
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ asm_collectargs(as, ir, ci, args);
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+}
+
/* -- Type conversions ---------------------------------------------------- */
static void asm_tonum(ASMState *as, IRIns *ir)
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
- Reg str;
- int32_t ofs;
- RegSet drop = RSET_SCRATCH;
/* Force a spill slot for the destination register (if any). */
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
+ IRRef args[2];
+ RegSet drop = RSET_SCRATCH;
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
ra_evictset(as, drop);
asm_guardcc(as, CC_E);
emit_rr(as, XO_TEST, RID_RET, RID_RET);
- /* int lj_str_numconv(const char *s, TValue *n) */
- emit_call(as, lj_str_numconv);
- ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
- if (ofs == 0) {
- emit_setargr(as, 2, RID_ESP);
- } else {
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
- }
- emit_setargr(as, 1, RID_RET);
- str = ra_alloc1(as, ir->op1, RSET_GPR);
- emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
+ args[0] = ir->op1;
+ args[1] = ASMREF_TMP1;
+ asm_gencall(as, ci, args);
+ /* Store the result to the spill slot or slots SPS_TEMP1/2. */
+ emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
+ RID_ESP, sps_scale(ir->s));
}
static void asm_tostr(ASMState *as, IRIns *ir)
{
IRIns *irl = IR(ir->op1);
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ IRRef args[2];
+ args[0] = ASMREF_L;
as->gcsteps++;
if (irt_isnum(irl->t)) {
- /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
- emit_call(as, lj_str_fromnum);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+ args[1] = ASMREF_TMP1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
+ RID_ESP, ra_spill(as, irl));
} else {
- /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
- emit_call(as, lj_str_fromint);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+ args[1] = ir->op1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
}
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
lua_assert(!irt_isnil(ir->t));
return irt_type(ir->t)-IRT_FALSE;
} else {
- lua_assert(irt_isaddr(ir->t));
+ lua_assert(irt_isgcv(ir->t));
lo = u32ptr(ir_kgc(ir));
hi = lo - 0x04c11db7;
}
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_newref(ASMState *as, IRIns *ir)
{
- IRRef keyref = ir->op2;
- IRIns *irkey = IR(keyref);
- RegSet allow = RSET_GPR;
- Reg tab, tmp;
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- tab = ra_alloc1(as, ir->op1, allow);
- tmp = ra_scratch(as, rset_clear(allow, tab));
- /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
- emit_call(as, lj_tab_newkey);
- emit_setargr(as, 1, tmp);
- emit_setargr(as, 2, tab);
- emit_getgl(as, tmp, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+ IRRef args[3];
+ IRIns *irkey;
+ Reg tmp;
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
+ args[2] = ASMREF_TMP1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ irkey = IR(ir->op2);
if (irt_isnum(irkey->t)) {
/* For numbers use the constant itself or a spill slot as a TValue. */
- if (irref_isk(keyref)) {
- emit_setargp(as, 3, ir_knum(irkey));
- } else {
- emit_setargr(as, 3, tmp);
+ if (irref_isk(ir->op2))
+ emit_loada(as, tmp, ir_knum(irkey));
+ else
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
- }
} else {
/* Otherwise use g->tmptv to hold the TValue. */
- lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
- emit_setargr(as, 3, tmp);
- if (!irref_isk(keyref)) {
- Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
+ if (!irref_isk(ir->op2)) {
+ Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
emit_movtomro(as, src, tmp, 0);
} else if (!irt_ispri(irkey->t)) {
emit_movmroi(as, tmp, 0, irkey->i);
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
/* -- Loads and stores ---------------------------------------------------- */
-static void asm_fload(ASMState *as, IRIns *ir)
+static void asm_fxload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
x86Op xo;
- asm_fusefref(as, ir, RSET_GPR);
+ if (ir->o == IR_FLOAD)
+ asm_fusefref(as, ir, RSET_GPR);
+ else
+ asm_fusexref(as, IR(ir->op1), RSET_GPR);
+ /* ir->op2 is ignored -- unaligned loads are ok on x86. */
switch (irt_type(ir->t)) {
case IRT_I8: xo = XO_MOVSXb; break;
case IRT_U8: xo = XO_MOVZXb; break;
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
}
}
-static void asm_xload(ASMState *as, IRIns *ir)
-{
- Reg dest = ra_dest(as, ir, RSET_GPR);
- x86Op xo;
- asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
- /* ir->op2 is ignored -- unaligned loads are ok on x86. */
- switch (irt_type(ir->t)) {
- case IRT_I8: xo = XO_MOVSXb; break;
- case IRT_U8: xo = XO_MOVZXb; break;
- case IRT_I16: xo = XO_MOVSXw; break;
- case IRT_U16: xo = XO_MOVZXw; break;
- default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
- }
- emit_mrm(as, xo, dest, RID_MRM);
-}
-
-/* -- String ops ---------------------------------------------------------- */
+/* -- Allocations --------------------------------------------------------- */
static void asm_snew(ASMState *as, IRIns *ir)
{
- RegSet allow = RSET_GPR;
- Reg left, right;
- IRIns *irl;
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- irl = IR(ir->op1);
- left = irl->r;
- right = IR(ir->op2)->r;
- if (ra_noreg(left)) {
- lua_assert(irl->o == IR_STRREF);
- /* Get register only for non-const STRREF. */
- if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
- if (ra_hasreg(right)) rset_clear(allow, right);
- left = ra_allocref(as, ir->op1, allow);
- }
- }
- if (ra_noreg(right) && !irref_isk(ir->op2)) {
- if (ra_hasreg(left)) rset_clear(allow, left);
- right = ra_allocref(as, ir->op2, allow);
- }
- /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
- emit_call(as, lj_str_new);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- if (ra_noreg(left)) /* Use immediate for const STRREF. */
- emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
- (int32_t)sizeof(GCstr));
- else
- emit_setargr(as, 2, left);
- if (ra_noreg(right))
- emit_setargi(as, 3, IR(ir->op2)->i);
- else
- emit_setargr(as, 3, right);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
+ IRRef args[3];
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
+ args[2] = ir->op2;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
-/* -- Table ops ----------------------------------------------------------- */
-
static void asm_tnew(ASMState *as, IRIns *ir)
{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
- emit_call(as, lj_tab_new);
- emit_setargr(as, 1, RID_RET);
- emit_setargi(as, 2, ir->op1);
- emit_setargi(as, 3, ir->op2);
- emit_getgl(as, RID_RET, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
+ IRRef args[2];
+ args[0] = ASMREF_L;
+ args[1] = ASMREF_TMP1;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
}
static void asm_tdup(ASMState *as, IRIns *ir)
{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
- emit_call(as, lj_tab_dup);
- emit_setargr(as, 1, RID_RET);
- emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
- emit_getgl(as, RID_RET, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
+ IRRef args[2];
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
-static void asm_tlen(ASMState *as, IRIns *ir)
-{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
- emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
-}
+/* -- Write barriers ------------------------------------------------------ */
static void asm_tbar(ASMState *as, IRIns *ir)
{
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
static void asm_obar(ASMState *as, IRIns *ir)
{
- RegSet allow = RSET_GPR;
- Reg obj, val;
- GCobj *valp;
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+ IRRef args[2];
MCLabel l_end;
- int32_t ofs;
- ra_evictset(as, RSET_SCRATCH);
- if (irref_isk(ir->op2)) {
- valp = ir_kgc(IR(ir->op2));
- val = RID_NONE;
- } else {
- valp = NULL;
- val = ra_alloc1(as, ir->op2, allow);
- rset_clear(allow, val);
- }
- obj = ra_alloc1(as, ir->op1, allow);
- l_end = emit_label(as);
+ Reg obj;
/* No need for other object barriers (yet). */
lua_assert(IR(ir->op1)->o == IR_UREFC);
- ofs = -(int32_t)offsetof(GCupval, tv);
- /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
- emit_call(as, lj_gc_barrieruv);
- if (ofs == 0) {
- emit_setargr(as, 2, obj);
- } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
- emit_setargr(as, 2, obj);
- emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
- } else {
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
- }
- emit_setargp(as, 1, J2G(as->J));
- if (valp)
- emit_setargp(as, 3, valp);
- else
- emit_setargr(as, 3, val);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1;
+ args[1] = ir->op1;
+ asm_gencall(as, ci, args);
+ emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
+ obj = IR(ir->op1)->r;
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_WHITES);
- if (valp)
- emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
- else
+ if (irref_isk(ir->op2)) {
+ GCobj *vp = ir_kgc(IR(ir->op2));
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
+ } else {
+ Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
+ }
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_BLACK);
emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
- ofs + (int32_t)offsetof(GChead, marked));
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
}
/* -- FP/int arithmetic and logic operations ------------------------------ */
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
}
}
emit_mrm(as, XO_UCOMISD, left, right);
- } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
+ } else {
IRRef lref = ir->op1, rref = ir->op2;
IROp leftop = (IROp)(IR(lref)->o);
- lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
/* Swap constants (only for ABC) and fusable loads to the right. */
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
} else {
Reg left;
if (opisfusableload((IROp)irl->o) &&
- ((irt_isi8(irl->t) && checki8(imm)) ||
- (irt_isu8(irl->t) && checku8(imm)))) {
- /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
- ** loads are handled here. The IRT_I16/IRT_U16 loads should never be
- ** fused, since cmp word [mem], imm16 has a length-changing prefix.
+ ((irt_isu8(irl->t) && checku8(imm)) ||
+ ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
+ (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
+ /* Only the IRT_INT case is fused by asm_fuseload.
+ ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
+ ** are handled here.
+ ** Note that cmp word [mem], imm16 should not be generated,
+ ** since it has a length-changing prefix. Compares of a word
+ ** against a sign-extended imm8 are ok, however.
*/
IRType1 origt = irl->t; /* Temporarily flip types. */
irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
if (left == RID_MRM) { /* Fusion succeeded? */
asm_guardcc(as, cc);
emit_i8(as, imm);
- emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
+ emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
+ XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
return;
} /* Otherwise handle register case as usual. */
} else {
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
asm_guardcc(as, cc);
emit_mrm(as, XO_CMP, left, right);
}
- } else { /* Handle ordered string compares. */
- RegSet allow = RSET_GPR;
- /* This assumes lj_str_cmp never uses any SSE registers. */
- ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
- asm_guardcc(as, cc);
- emit_rr(as, XO_TEST, RID_RET, RID_RET);
- emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
- if (irref_isk(ir->op1)) {
- emit_setargi(as, 1, IR(ir->op1)->i);
- } else {
- Reg left = ra_alloc1(as, ir->op1, allow);
- rset_clear(allow, left);
- emit_setargr(as, 1, left);
- }
- if (irref_isk(ir->op2)) {
- emit_setargi(as, 2, IR(ir->op2)->i);
- } else {
- Reg right = ra_alloc1(as, ir->op2, allow);
- emit_setargr(as, 2, right);
- }
}
}
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
/* -- GC handling --------------------------------------------------------- */
/* Sync all live GC values to Lua stack slots. */
-static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
+static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
{
+ /* Some care must be taken when allocating registers here, since this is
+ ** not part of the fast path. All scratch registers are evicted in the
+ ** fast path, so it's easiest to force allocation from scratch registers
+ ** only. This avoids register allocation state unification.
+ */
+ RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
IRRef2 *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as, SnapShot *snap)
{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
MCLabel l_end;
- const BCIns *pc;
- Reg tmp, base;
+ Reg base, lstate, tmp;
RegSet drop = RSET_SCRATCH;
- /* Must evict BASE because the stack may be reallocated by the GC. */
- if (ra_hasreg(IR(REF_BASE)->r))
- drop |= RID2RSET(IR(REF_BASE)->r);
+ if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
+ drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
ra_evictset(as, drop);
- base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
l_end = emit_label(as);
- /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
- emit_call(as, lj_gc_step_jit);
- emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
- emit_setargr(as, 1, RID_RET);
- emit_setargi(as, 3, (int32_t)as->gcsteps);
- emit_getgl(as, RID_RET, jit_L);
- pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
- emit_setargp(as, 2, pc);
- asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
- if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
- ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
+ args[0] = ASMREF_L;
+ args[1] = ASMREF_TMP1;
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ emit_loadi(as, tmp, (int32_t)as->gcsteps);
+ /* We don't know spadj yet, so get the C frame from L->cframe. */
+ emit_movmroi(as, tmp, CFRAME_OFS_PC,
+ (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
+ emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
+ lstate = IR(ASMREF_L)->r;
+ emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
+ /* It's ok if lstate is already in a non-scratch reg. But all allocations
+ ** in the non-fast path must use a scratch reg. See comment above.
+ */
+ base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
+ emit_movtomro(as, base, lstate, offsetof(lua_State, base));
+ asm_gc_sync(as, snap, base);
+ /* BASE/L get restored anyway, better do it inside the slow path. */
+ if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
+ if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
+ ra_restore(as, ASMREF_L);
/* Jump around GC step if GC total < GC threshold. */
tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
emit_sjcc(as, CC_B, l_end);
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
{
int32_t spadj;
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
- spadj = sps_adjust(as);
+ spadj = sps_adjust(as->evenspill);
as->T->spadjust = (uint16_t)spadj;
emit_addptr(as, RID_ESP, -spadj);
}
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
- ra_free(as, r);
- if (r != RID_BASE) {
- ra_scratch(as, RID2RSET(RID_BASE));
- emit_rr(as, XO_MOV, r, RID_BASE);
+ lua_assert(!ra_hasspill(ir->s));
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (r != RID_BASE) {
+ ra_scratch(as, RID2RSET(RID_BASE));
+ emit_rr(as, XO_MOV, r, RID_BASE);
+ }
}
}
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
}
/* Calculate stack frame adjustment. */
- spadj = sps_adjust(as);
+ spadj = sps_adjust(as->evenspill);
spdelta = spadj - (int32_t)as->parent->spadjust;
if (spdelta < 0) { /* Don't shrink the stack frame. */
spadj = (int32_t)as->parent->spadjust;
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
GCfunc *fn = ir_kfunc(IR(ir->op2));
if (isluafunc(fn)) {
BCReg fs = s + funcproto(fn)->framesize;
- newbase = s;
- if (secondbase == ~(BCReg)0) secondbase = s;
if (fs > topslot) topslot = fs;
+ if (s != 0) {
+ newbase = s;
+ if (secondbase == ~(BCReg)0) secondbase = s;
+ }
}
}
}
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
/* Loads and stores. */
case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
- case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
case IR_SLOAD: asm_sload(as, ir); break;
- case IR_XLOAD: asm_xload(as, ir); break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
- /* String ops. */
+ /* Allocations. */
case IR_SNEW: asm_snew(as, ir); break;
-
- /* Table ops. */
case IR_TNEW: asm_tnew(as, ir); break;
case IR_TDUP: asm_tdup(as, ir); break;
- case IR_TLEN: asm_tlen(as, ir); break;
+
+ /* Write barriers. */
case IR_TBAR: asm_tbar(as, ir); break;
case IR_OBAR: asm_obar(as, ir); break;
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_TOSTR: asm_tostr(as, ir); break;
case IR_STRTO: asm_strto(as, ir); break;
+ /* Calls. */
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+ case IR_CARG: break;
+
default:
setintV(&as->J->errinfo, ir->o);
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
IRRef i, nins;
int inloop;
+ ra_setup(as);
+
/* Clear reg/sp for constants. */
for (i = T->nk; i < REF_BIAS; i++)
IR(i)->prev = REGSP_INIT;
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
as->curins = nins;
inloop = 0;
+ as->evenspill = SPS_FIRST;
for (i = REF_FIRST; i < nins; i++) {
IRIns *ir = IR(i);
switch (ir->o) {
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (i == as->stopins+1 && ir->op1 == ir->op2)
as->stopins++;
break;
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ /* NYI: not fastcall-aware, but doesn't matter (yet). */
+ if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
+ as->evenspill = (int32_t)CCI_NARGS(ci);
+#if LJ_64
+ ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
+#else
+ ir->prev = REGSP_HINT(RID_RET);
+#endif
+ if (inloop)
+ as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
+ (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
+ continue;
+ }
/* C calls evict all scratch regs and return results in RID_RET. */
- case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
+ case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
case IR_NEWREF:
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (inloop)
as->modset = RSET_SCRATCH;
break;
- /* Ordered string compares evict all integer scratch registers. */
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- if (irt_isstr(ir->t) && inloop)
- as->modset |= (RSET_SCRATCH & RSET_GPR);
- break;
/* Non-constant shift counts need to be in RID_ECX. */
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
}
ir->prev = REGSP_INIT;
}
+ if ((as->evenspill & 1))
+ as->oddspill = as->evenspill++;
+ else
+ as->oddspill = 0;
}
/* -- Assembler core ------------------------------------------------------ */
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
/* Setup register allocation. */
- ra_setup(as);
asm_setup_regsp(as, T);
if (!as->loopref) {
diff --git a/src/lj_def.h b/src/lj_def.h
index dbfd5bf5..3d6ba417 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t;
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
#define checki16(x) ((x) == (int32_t)(int16_t)(x))
+#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 0d8a03ec..5c9d2bcb 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o)
}
}
-/* Mark the base metatables. */
-static void gc_mark_basemt(global_State *g)
+/* Mark GC roots. */
+static void gc_mark_gcroot(global_State *g)
{
- int i;
- for (i = 0; i < BASEMT_MAX; i++)
- if (tabref(g->basemt[i]) != NULL)
- gc_markobj(g, tabref(g->basemt[i]));
+ ptrdiff_t i;
+ for (i = 0; i < GCROOT__MAX; i++)
+ if (gcref(g->gcroot[i]) != NULL)
+ gc_markobj(g, gcref(g->gcroot[i]));
}
/* Start a GC cycle and mark the root set. */
@@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g)
gc_markobj(g, mainthread(g));
gc_markobj(g, tabref(mainthread(g)->env));
gc_marktv(g, &g->registrytv);
- gc_mark_basemt(g);
+ gc_mark_gcroot(g);
g->gc.state = GCSpropagate;
}
@@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L)
lua_assert(!iswhite(obj2gco(mainthread(g))));
gc_markobj(g, L); /* Mark running thread. */
gc_mark_curtrace(g); /* Mark current trace. */
- gc_mark_basemt(g); /* Mark base metatables (again). */
+ gc_mark_gcroot(g); /* Mark GC roots (again). */
gc_propagate_gray(g); /* Propagate all of the above. */
setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
@@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L)
}
/* Ditto, but fix the stack top first. */
-void lj_gc_step_fixtop(lua_State *L)
+void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
{
if (curr_funcisL(L)) L->top = curr_topL(L);
lj_gc_step(L);
}
/* Perform multiple GC steps. Called from JIT-compiled code. */
-void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps)
+void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
{
- cframe_pc(cframe_raw(L->cframe)) = pc;
L->top = curr_topL(L);
while (steps-- > 0 && lj_gc_step(L) == 0)
;
@@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
makewhite(g, o); /* Make it white to avoid the following barrier. */
}
-/* The reason for duplicating this is that it needs to be visible from ASM. */
-void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v)
+/* Specialized barrier for closed upvalue. Pass &uv->tv. */
+void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv)
{
- lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
- lua_assert(o->gch.gct == ~LJ_TUPVAL);
- /* Preserve invariant during propagation. Otherwise it doesn't matter. */
+#define TV2MARKED(x) \
+ (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked)))
if (g->gc.state == GCSpropagate)
- gc_mark(g, v); /* Move frontier forward. */
+ gc_mark(g, gcV(tv));
else
- makewhite(g, o); /* Make it white to avoid the following barrier. */
+ TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g);
+#undef TV2MARKED
}
/* Close upvalue. Also needs a write barrier. */
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 192066d3..0dbb9b82 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
LJ_FUNC void lj_gc_freeall(global_State *g);
LJ_FUNCA int lj_gc_step(lua_State *L);
-LJ_FUNCA void lj_gc_step_fixtop(lua_State *L);
-LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps);
+LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
+LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* GC check: drive collector forward if the GC threshold has been reached. */
@@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* Write barriers. */
LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
-LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v);
+LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv);
LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 1efb12f0..cf0b6b55 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -6,16 +6,22 @@
#define lj_ir_c
#define LUA_CORE
+/* For pointers to libc/libm functions. */
+#include
+#include
+
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_gc.h"
#include "lj_str.h"
+#include "lj_tab.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_trace.h"
+#include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@@ -32,6 +38,17 @@ IRDEF(IRMODE)
0
};
+/* C call info for CALL* instructions. */
+LJ_DATADEF const CCallInfo lj_ir_callinfo[] = {
+#define IRCALLCI(name, nargs, kind, type, flags) \
+ { (ASMFunction)name, \
+ (nargs)|(CCI_CALL_##kind)|(IRT_##type<t = fins->t)));
}
+/* Emit call to a C function. */
+TRef lj_ir_call(jit_State *J, IRCallID id, ...)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[id];
+ uint32_t n = CCI_NARGS(ci);
+ TRef tr = TREF_NIL;
+ va_list argp;
+ va_start(argp, id);
+ if ((ci->flags & CCI_L)) n--;
+ if (n > 0)
+ tr = va_arg(argp, IRRef);
+ while (n-- > 1)
+ tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef));
+ va_end(argp);
+ if (CCI_OP(ci) == IR_CALLS)
+ J->needsnap = 1; /* Need snapshot after call with side effect. */
+ return emitir(CCI_OPTYPE(ci), tr, id);
+}
+
/* -- Interning of constants ---------------------------------------------- */
/*
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a6973a81..9a7e711d 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -8,6 +8,8 @@
#include "lj_obj.h"
+/* -- IR instructions ----------------------------------------------------- */
+
/* IR instruction definition. Order matters, see below. */
#define IRDEF(_) \
/* Miscellaneous ops. */ \
@@ -101,13 +103,12 @@
_(USTORE, S , ref, ref) \
_(FSTORE, S , ref, ref) \
\
- /* String ops. */ \
- _(SNEW, N , ref, ref) \
- \
- /* Table ops. */ \
+ /* Allocations. */ \
+ _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
_(TNEW, A , lit, lit) \
_(TDUP, A , ref, ___) \
- _(TLEN, L , ref, ___) \
+ \
+ /* Write barriers. */ \
_(TBAR, S , ref, ___) \
_(OBAR, S , ref, ref) \
\
@@ -118,6 +119,12 @@
_(TOSTR, N , ref, ___) \
_(STRTO, G , ref, ___) \
\
+ /* Calls. */ \
+ _(CALLN, N , ref, lit) \
+ _(CALLL, L , ref, lit) \
+ _(CALLS, S , ref, lit) \
+ _(CARG, N , ref, ref) \
+ \
/* End of list. */
/* IR opcodes (max. 256). */
@@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
+/* -- Named IR literals --------------------------------------------------- */
+
/* FPMATH sub-functions. ORDER FPM. */
#define IRFPMDEF(_) \
_(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
@@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM)
IRFPM__MAX
} IRFPMathOp;
-/* FLOAD field IDs. */
+/* FLOAD fields. */
#define IRFLDEF(_) \
- _(STR_LEN, GCstr, len) \
- _(FUNC_ENV, GCfunc, l.env) \
- _(TAB_META, GCtab, metatable) \
- _(TAB_ARRAY, GCtab, array) \
- _(TAB_NODE, GCtab, node) \
- _(TAB_ASIZE, GCtab, asize) \
- _(TAB_HMASK, GCtab, hmask) \
- _(TAB_NOMM, GCtab, nomm) \
- _(UDATA_META, GCudata, metatable)
+ _(STR_LEN, offsetof(GCstr, len)) \
+ _(FUNC_ENV, offsetof(GCfunc, l.env)) \
+ _(TAB_META, offsetof(GCtab, metatable)) \
+ _(TAB_ARRAY, offsetof(GCtab, array)) \
+ _(TAB_NODE, offsetof(GCtab, node)) \
+ _(TAB_ASIZE, offsetof(GCtab, asize)) \
+ _(TAB_HMASK, offsetof(GCtab, hmask)) \
+ _(TAB_NOMM, offsetof(GCtab, nomm)) \
+ _(UDATA_META, offsetof(GCudata, metatable)) \
+ _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
+ _(UDATA_FILE, sizeof(GCudata))
typedef enum {
-#define FLENUM(name, type, field) IRFL_##name,
+#define FLENUM(name, ofs) IRFL_##name,
IRFLDEF(FLENUM)
#undef FLENUM
IRFL__MAX
@@ -183,7 +194,8 @@ IRFLDEF(FLENUM)
#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
/* XLOAD mode, stored in op2. */
-#define IRXLOAD_UNALIGNED 1
+#define IRXLOAD_READONLY 1 /* Load from read-only data. */
+#define IRXLOAD_UNALIGNED 2 /* Unaligned load. */
/* TOINT mode, stored in op2. Ordered by strength of the checks. */
#define IRTOINT_CHECK 0 /* Number checked for integerness. */
@@ -191,6 +203,67 @@ IRFLDEF(FLENUM)
#define IRTOINT_ANY 2 /* Any FP number is ok. */
#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
+/* C call info for CALL* instructions. */
+typedef struct CCallInfo {
+ ASMFunction func; /* Function pointer. */
+ uint32_t flags; /* Number of arguments and flags. */
+} CCallInfo;
+
+#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */
+#define CCI_NARGS_MAX 16 /* Max. # of args. */
+
+#define CCI_OTSHIFT 16
+#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
+#define CCI_OPSHIFT 24
+#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
+
+#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
+#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
+#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
+#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL)
+#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL)
+#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL)
+
+/* C call info flags. */
+#define CCI_L 0x0100 /* Implicit L arg. */
+#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
+#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
+#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
+
+/* Function definitions for CALL* instructions. */
+#define IRCALLDEF(_) \
+ _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
+ _(lj_str_new, 3, S, STR, CCI_L) \
+ _(lj_str_tonum, 2, FN, INT, 0) \
+ _(lj_str_fromint, 2, FN, STR, CCI_L) \
+ _(lj_str_fromnum, 2, FN, STR, CCI_L) \
+ _(lj_tab_new1, 2, FS, TAB, CCI_L) \
+ _(lj_tab_dup, 2, FS, TAB, CCI_L) \
+ _(lj_tab_newkey, 3, S, PTR, CCI_L) \
+ _(lj_tab_len, 1, FL, INT, 0) \
+ _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \
+ _(lj_gc_barrieruv, 2, FS, NIL, 0) \
+ _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
+ _(sinh, 1, N, NUM, 0) \
+ _(cosh, 1, N, NUM, 0) \
+ _(tanh, 1, N, NUM, 0) \
+ _(fputc, 2, S, INT, 0) \
+ _(fwrite, 4, S, INT, 0) \
+ _(fflush, 1, S, INT, 0) \
+ \
+ /* End of list. */
+
+typedef enum {
+#define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name,
+IRCALLDEF(IRCALLENUM)
+#undef IRCALLENUM
+ IRCALL__MAX
+} IRCallID;
+
+LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
+
+/* -- IR operands --------------------------------------------------------- */
+
/* IR operand mode (2 bit). */
typedef enum {
IRMref, /* IR reference. */
@@ -227,6 +300,8 @@ typedef enum {
LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
+/* -- IR instruction types ------------------------------------------------ */
+
/* IR result type and flags (8 bit). */
typedef enum {
/* Map of itypes to non-negative numbers. ORDER LJ_T */
@@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
/* Stored combined IR opcode and type. */
typedef uint16_t IROpT;
+/* -- IR references ------------------------------------------------------- */
+
/* IR references. */
typedef uint16_t IRRef1; /* One stored reference. */
typedef uint32_t IRRef2; /* Two stored references. */
@@ -382,6 +459,8 @@ typedef uint32_t TRef;
#define TREF_FALSE (TREF_PRI(IRT_FALSE))
#define TREF_TRUE (TREF_PRI(IRT_TRUE))
+/* -- IR format ----------------------------------------------------------- */
+
/* IR instruction format (64 bit).
**
** 16 16 8 8 8 8
@@ -425,5 +504,6 @@ typedef union IRIns {
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_knum(ir) (mref((ir)->ptr, cTValue))
+#define ir_kptr(ir) (mref((ir)->ptr, void))
#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 69b0a955..52077ad5 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -6,6 +6,8 @@
#ifndef _LJ_IROPT_H
#define _LJ_IROPT_H
+#include
+
#include "lj_obj.h"
#include "lj_jit.h"
@@ -13,6 +15,7 @@
/* IR emitter. */
LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
+LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
/* Save current IR in J->fold.ins, but do not emit it (yet). */
static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
@@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
/* Emit IR instructions with on-the-fly optimizations. */
LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim);
/* Special return values for the fold functions. */
enum {
@@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
-LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
/* Dead-store elimination. */
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 683c66d6..d8254093 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
- (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o)))))
+ (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o)))))
lj_err_argt(L, narg, LUA_TNUMBER);
return numV(o);
}
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 59a0f2be..a7a6317e 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
#define LIBINIT_FFID 0xfe
#define LIBINIT_END 0xff
+/* Exported library functions. */
+
+typedef struct RandomState RandomState;
+LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
+
#endif
diff --git a/src/lj_meta.c b/src/lj_meta.c
index dff01f85..1182d908 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
- mt = tabref(G(L)->basemt[itypemap(o)]);
+ mt = tabref(basemt_obj(G(L), o));
if (mt) {
cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
if (mo)
@@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n)
{
if (tvisnum(o))
return o;
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), n))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), n))
return n;
else
return NULL;
@@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
top = curr_top(L);
setcont(top, ne ? lj_cont_condf : lj_cont_condt);
copyTV(L, top+1, mo);
- it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA;
+ it = ~o1->gch.gct;
setgcV(L, top+2, &o1->gch, it);
setgcV(L, top+3, &o2->gch, it);
return top+2; /* Trigger metamethod call. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 9101f053..cebeda9b 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -315,7 +315,7 @@ typedef struct GCstr {
/* Userdata object. Payload follows. */
typedef struct GCudata {
GCHeader;
- uint8_t unused1;
+ uint8_t udtype; /* Userdata type. */
uint8_t unused2;
GCRef env; /* Should be at same offset in GCfunc. */
MSize len; /* Size of payload. */
@@ -323,6 +323,13 @@ typedef struct GCudata {
uint32_t align1; /* To force 8 byte alignment of the payload. */
} GCudata;
+/* Userdata types. */
+enum {
+ UDTYPE_USERDATA, /* Regular userdata. */
+ UDTYPE_IO_FILE, /* I/O library FILE. */
+ UDTYPE__MAX
+};
+
#define uddata(u) ((void *)((u)+1))
#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
@@ -496,7 +503,17 @@ MMDEF(MMENUM)
MM_FAST = MM_eq
} MMS;
-#define BASEMT_MAX ((~LJ_TNUMX)+1)
+/* GC root IDs. */
+typedef enum {
+ GCROOT_BASEMT, /* Metatables for base types. */
+ GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */
+ GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
+ GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */
+ GCROOT__MAX
+} GCRootID;
+
+#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)])
+#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
typedef struct GCState {
MSize total; /* Memory currently allocated. */
@@ -544,7 +561,7 @@ typedef struct global_State {
volatile int32_t vmstate; /* VM state or current JIT code trace number. */
GCRef jit_L; /* Current JIT code lua_State or NULL. */
MRef jit_base; /* Current JIT code L->base. */
- GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */
+ GCRef gcroot[GCROOT__MAX]; /* GC roots. */
GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
} global_State;
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 2102561d..98266d21 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -282,21 +282,50 @@ LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)
{
TValue n;
- if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
+ if (lj_str_tonum(ir_kstr(fleft), &n))
return lj_ir_knum(J, numV(&n));
return FAILFOLD;
}
-LJFOLD(SNEW STRREF KINT)
-LJFOLDF(kfold_snew)
+LJFOLD(SNEW KPTR KINT)
+LJFOLDF(kfold_snew_kptr)
+{
+ GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
+ return lj_ir_kstr(J, s);
+}
+
+LJFOLD(SNEW any KINT)
+LJFOLDF(kfold_snew_empty)
{
if (fright->i == 0)
return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
+ return NEXTFOLD;
+}
+
+LJFOLD(STRREF KGC KINT)
+LJFOLDF(kfold_strref)
+{
+ GCstr *str = ir_kstr(fleft);
+ lua_assert((MSize)fright->i < str->len);
+ return lj_ir_kptr(J, (char *)strdata(str) + fright->i);
+}
+
+LJFOLD(STRREF SNEW any)
+LJFOLDF(kfold_strref_snew)
+{
PHIBARRIER(fleft);
- if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
- const char *s = strdata(ir_kstr(IR(fleft->op1)));
- int32_t ofs = IR(fleft->op2)->i;
- return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
+ if (irref_isk(fins->op2) && fright->i == 0) {
+ return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
+ } else {
+ /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
+ IRIns *ir = IR(fleft->op1);
+ IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
+ lua_assert(ir->o == IR_STRREF);
+ PHIBARRIER(ir);
+ fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
+ fins->op1 = str;
+ fins->ot = IRT(IR_STRREF, IRT_PTR);
+ return RETRYFOLD;
}
return NEXTFOLD;
}
@@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp)
}
}
-LJFOLD(LT KGC KGC)
-LJFOLD(GE KGC KGC)
-LJFOLD(LE KGC KGC)
-LJFOLD(GT KGC KGC)
-LJFOLDF(kfold_strcomp)
+LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
+LJFOLDF(kfold_strcmp)
{
- if (irt_isstr(fins->t)) {
- GCstr *a = ir_kstr(fleft);
- GCstr *b = ir_kstr(fright);
- return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
+ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
+ GCstr *a = ir_kstr(IR(fleft->op1));
+ GCstr *b = ir_kstr(IR(fleft->op2));
+ return INTFOLD(lj_str_cmp(a, b));
}
return NEXTFOLD;
}
@@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc)
uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
len == 2 ? IRT(IR_XLOAD, IRT_U16) :
IRTI(IR_XLOAD));
- TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
+ TRef tmp = emitir(ot, strref,
+ IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
if (len == 3)
tmp = emitir(IRTI(IR_BAND), tmp,
@@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload)
LJFOLD(ULOAD any)
LJFOLDX(lj_opt_fwd_uload)
-LJFOLD(TLEN any)
-LJFOLDX(lj_opt_fwd_tlen)
+LJFOLD(CALLL any IRCALL_lj_tab_len)
+LJFOLDX(lj_opt_fwd_tab_len)
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah)
/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
LJFOLD(FLOAD KGC IRFL_STR_LEN)
-LJFOLDF(fload_str_len)
+LJFOLDF(fload_str_len_kgc)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
return INTFOLD((int32_t)ir_kstr(fleft)->len);
return NEXTFOLD;
}
+LJFOLD(FLOAD SNEW IRFL_STR_LEN)
+LJFOLDF(fload_str_len_snew)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
+ PHIBARRIER(fleft);
+ return fleft->op2;
+ }
+ return NEXTFOLD;
+}
+
LJFOLD(FLOAD any IRFL_STR_LEN)
LJFOLDX(lj_opt_cse)
@@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload)
return J->slot[fins->op1];
}
-/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
-LJFOLD(XLOAD STRREF any)
-LJFOLDF(xload_str)
+LJFOLD(XLOAD KPTR any)
+LJFOLDF(xload_kptr)
{
- if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
- GCstr *str = ir_kstr(IR(fleft->op1));
- int32_t ofs = IR(fleft->op2)->i;
- lua_assert((MSize)ofs < str->len);
- lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
- return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
- }
- return CSEFOLD;
+ /* Only fold read-only integer loads for now. */
+ if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t))
+ return INTFOLD(kfold_xload(fins, ir_kptr(fleft)));
+ return NEXTFOLD;
+}
+
+/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
+LJFOLD(XLOAD any any)
+LJFOLDF(fwd_xload)
+{
+ IRRef ref = J->chain[IR_XLOAD];
+ IRRef op1 = fins->op1;
+ while (ref > op1) {
+ if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
+ return ref;
+ ref = IR(ref)->prev;
+ }
+ return EMITFOLD;
}
-/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
/* -- Write barriers ------------------------------------------------------ */
@@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any)
LJFOLDX(lj_opt_dse_fstore)
LJFOLD(NEWREF any any) /* Treated like a store. */
+LJFOLD(CALLS any any)
+LJFOLD(CALLL any any) /* Safeguard fallback. */
LJFOLD(TNEW any any)
LJFOLD(TDUP any)
-LJFOLDF(store_raw)
-{
- return EMITFOLD;
-}
+LJFOLDX(lj_ir_emit)
/* ------------------------------------------------------------------------ */
@@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
}
}
+/* CSE with explicit search limit. */
+TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
+{
+ IRRef ref = J->chain[fins->o];
+ IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+ while (ref > lim) {
+ if (IR(ref)->op12 == op12)
+ return ref;
+ ref = IR(ref)->prev;
+ }
+ return lj_ir_emit(J);
+}
+
/* ------------------------------------------------------------------------ */
#undef IR
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index f9a2a808..90ab1b6f 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J)
/* Undo any partial changes made by the loop optimization. */
static void loop_undo(jit_State *J, IRRef ins)
{
+ ptrdiff_t i;
lj_ir_rollback(J, ins);
+ for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
+ BPropEntry *bp = &J->bpropcache[i];
+ if (bp->val >= ins)
+ bp->key = 0;
+ }
for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
IRIns *ir = IR(ins);
irt_clearphi(ir->t);
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 94fc4ad8..882ba6c5 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_ULOAD];
- while (ref > lim) {
- IRIns *load = IR(ref);
- if (load->op1 == uref)
- return ref; /* Load forwarding. */
- ref = load->prev;
- }
- return EMITFOLD; /* Conflict or no match. */
+ return lj_opt_cselim(J, lim);
}
/* USTORE elimination. */
@@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_FLOAD];
- while (ref > lim) {
- IRIns *load = IR(ref);
- if (load->op1 == oref && load->op2 == fid)
- return ref; /* Load forwarding. */
- ref = load->prev;
- }
- return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ return lj_opt_cselim(J, lim);
}
/* FSTORE elimination. */
@@ -458,10 +444,10 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
-/* -- TLEN forwarding ----------------------------------------------------- */
+/* -- Forwarding of lj_tab_len -------------------------------------------- */
/* This is rather simplistic right now, but better than nothing. */
-TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
+TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
{
IRRef tab = fins->op1; /* Table reference. */
IRRef lim = tab; /* Search limit. */
@@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
}
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_TLEN];
- while (ref > lim) {
- IRIns *tlen = IR(ref);
- if (tlen->op1 == tab)
- return ref; /* Load forwarding. */
- ref = tlen->prev;
- }
- return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ return lj_opt_cselim(J, lim);
}
/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 60a6afb8..b9107c5e 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
{
lua_Number n;
- if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc))
+ if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
n = numV(vc);
/* Limit narrowing for pow to small exponents (or for two constants). */
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 000772fe..1de07e92 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
GCstr *s = lj_str_new(L, str, len);
TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
+ lj_gc_check(L);
return s;
}
diff --git a/src/lj_record.c b/src/lj_record.c
index 68a233b9..9b223ff6 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
} else {
/* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
- mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]);
+ mt = tabref(basemt_obj(J2G(J), &ix->tabv));
if (mt == NULL)
return 0; /* No metamethod. */
mix.tab = lj_ir_ktab(J, mt);
@@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
/* Get runtime value of int argument. */
static int32_t argv2int(jit_State *J, TValue *o)
{
- if (tvisstr(o) && !lj_str_numconv(strVdata(o), o))
+ if (tvisstr(o) && !lj_str_tonum(strV(o), o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
return lj_num2bit(numV(o));
}
@@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
/* Otherwise res[0] already contains the result. */
} else if (tref_isnumber(tr)) {
res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ } else if (tref_ispri(tr)) {
+ res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)]));
} else {
recff_err_nyi(J, rd);
}
@@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = emitir(IRTN(IR_ATAN2), y, x);
}
+static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ res[0] = lj_ir_call(J, rd->data, tr);
+}
+
static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef tr = arg[0];
- if (tref_isinteger(arg[0])) {
+ if (tref_isinteger(tr)) {
res[0] = tr;
res[1] = lj_ir_kint(J, 0);
} else {
@@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
{
+ TRef tr = lj_ir_tonum(J, arg[0]);
if (!tref_isnumber_str(arg[1]))
lj_trace_err(J, LJ_TRERR_BADTYPE);
- res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]);
+ res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]);
UNUSED(rd);
}
@@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = tr;
}
+static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ GCudata *ud = udataV(&rd->fn->c.upvalue[0]);
+ TRef tr, one;
+ lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
+ tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
+ one = lj_ir_knum_one(J);
+ tr = emitir(IRTN(IR_SUB), tr, one);
+ if (arg[0]) {
+ TRef tr1 = lj_ir_tonum(J, arg[0]);
+ if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */
+ TRef tr2 = lj_ir_tonum(J, arg[1]);
+ tr2 = emitir(IRTN(IR_SUB), tr2, tr1);
+ tr2 = emitir(IRTN(IR_ADD), tr2, one);
+ tr = emitir(IRTN(IR_MUL), tr, tr2);
+ tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
+ tr = emitir(IRTN(IR_ADD), tr, tr1);
+ } else { /* d = floor(d*r1) + 1.0 */
+ tr = emitir(IRTN(IR_MUL), tr, tr1);
+ tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
+ tr = emitir(IRTN(IR_ADD), tr, one);
+ }
+ }
+ res[0] = tr;
+}
+
/* -- Bit library fast functions ------------------------------------------ */
/* Record unary bit.tobit, bit.bnot, bit.bswap. */
@@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
- res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0);
+ res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
}
} else { /* Empty range or range underflow: return no results. */
emitir(IRTGI(IR_LE), trend, trstart);
@@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
- res[0] = emitir(IRTI(IR_TLEN), arg[0], 0);
+ res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
} /* else: Interpreter will throw. */
UNUSED(rd);
}
@@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */
- TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
+ TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
MSize len = lj_tab_len(t);
emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
@@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
rd->nres = 0;
if (tref_istab(arg[0]) && arg[1]) {
if (!arg[2]) { /* Simple push: t[#t+1] = v */
- TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
+ TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
RecordIndex ix;
ix.tab = arg[0];
@@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
+/* -- I/O library fast functions ------------------------------------------ */
+
+/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
+** no need to encode the alternate cases for any of the guards.
+*/
+static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id)
+{
+ TRef tr, ud, fp;
+ if (id) { /* io.func() */
+ tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
+ ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
+ } else { /* fp:method() */
+ ud = arg[0];
+ if (!tref_isudata(ud))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
+ }
+ fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE);
+ emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD));
+ return fp;
+}
+
+static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef fp = recff_io_fp(J, res, rd->data);
+ TRef zero = lj_ir_kint(J, 0);
+ TRef one = lj_ir_kint(J, 1);
+ ptrdiff_t i = rd->data == 0 ? 1 : 0;
+ for (; arg[i]; i++) {
+ TRef str = lj_ir_tostr(J, arg[i]);
+ TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero);
+ TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
+ if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
+ TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
+ tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
+ } else {
+ TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_EQ), tr, len);
+ }
+ }
+ res[0] = TREF_TRUE;
+}
+
+static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef fp = recff_io_fp(J, res, rd->data);
+ TRef tr = lj_ir_call(J, IRCALL_fflush, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0));
+ res[0] = TREF_TRUE;
+}
+
/* -- Record calls and returns -------------------------------------------- */
#undef arg
@@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J)
if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
} else if (ta == IRT_STR) {
if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
+ ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
+ rc = lj_ir_kint(J, 0);
+ ta = IRT_INT;
} else {
rec_mm_comp(J, &ix, (int)op);
break;
@@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J)
if (tref_isstr(rc)) {
rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
} else if (tref_istab(rc)) {
- rc = emitir(IRTI(IR_TLEN), rc, 0);
+ rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
} else {
ix.tab = rc;
copyTV(J->L, &ix.tabv, &ix.keyv);
@@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J)
/* fallthrough */
case BC_CALL:
callop:
- if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
- }
rec_call(J, ra, (int)(rb-1), (int)(rc-1));
break;
@@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T)
BCReg j;
for (j = 0; j < s; j++)
if (snap_ref(map[j]) == ref) {
- if (ir->o == IR_FRAME && irt_isfunc(ir->t))
+ if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
+ lua_assert(s != 0);
J->baseslot = s+1;
+ J->framedepth++;
+ }
tr = J->slot[j];
goto dupslot;
}
@@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T)
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
- J->baseslot = s+1;
- J->framedepth++;
+ if (s != 0) {
+ J->baseslot = s+1;
+ J->framedepth++;
+ }
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 09cd095c..d27404f2 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr)
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
if (isluafunc(fn)) {
TValue *fs;
- newbase = o+1;
- fs = newbase + funcproto(fn)->framesize;
+ fs = o+1 + funcproto(fn)->framesize;
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
+ if (s != 0) newbase = o+1;
}
}
}
@@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr)
setnilV(o); /* Clear unreferenced slots of newly added frames. */
}
}
- if (newbase) { /* Clear remainder of newly added frames. */
- L->base = newbase;
- if (ntop >= L->maxstack) { /* Need to grow the stack again. */
- MSize need = (MSize)(ntop - o);
- L->top = o;
- lj_state_growstack(L, need);
- o = L->top;
- ntop = o + need;
- }
- L->top = curr_topL(L);
- for (; o < ntop; o++)
- setnilV(o);
- } else { /* Must not clear slots of existing frame. */
- L->top = curr_topL(L);
+ if (newbase) L->base = newbase;
+ if (ntop >= L->maxstack) { /* Need to grow the stack again. */
+ MSize need = (MSize)(ntop - o);
+ L->top = o;
+ lj_state_growstack(L, need);
+ o = L->top;
+ ntop = o + need;
}
+ L->top = curr_topL(L);
+ for (; o < ntop; o++) /* Clear remainder of newly added frames. */
+ setnilV(o);
lua_assert(map + nslots == flinks-1);
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
}
diff --git a/src/lj_state.h b/src/lj_state.h
index 54e85405..4e4185c0 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -17,7 +17,7 @@
LJ_FUNC void lj_state_relimitstack(lua_State *L);
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
-LJ_FUNCA void lj_state_growstack1(lua_State *L);
+LJ_FUNC void lj_state_growstack1(lua_State *L);
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
{
diff --git a/src/lj_str.c b/src/lj_str.c
index 26f91cba..62322b59 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -21,7 +21,7 @@
/* -- String interning ---------------------------------------------------- */
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
-int32_t lj_str_cmp(GCstr *a, GCstr *b)
+int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
{
MSize i, n = a->len > b->len ? b->len : a->len;
for (i = 0; i < n; i += 4) {
@@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
/* -- Type conversions ---------------------------------------------------- */
+/* Convert string object to number. */
+int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n)
+{
+ return lj_str_numconv(strdata(str), n);
+}
+
/* Convert string to number. */
-int lj_str_numconv(const char *s, TValue *n)
+int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n)
{
lua_Number sign = 1;
const uint8_t *p = (const uint8_t *)s;
@@ -167,7 +173,7 @@ parsedbl:
}
/* Convert number to string. */
-GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
+GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
{
char s[LUAI_MAXNUMBER2STR];
lua_Number n = *np;
@@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
}
/* Convert integer to string. */
-GCstr *lj_str_fromint(lua_State *L, int32_t k)
+GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
{
char s[1+10];
char *p = s+sizeof(s);
diff --git a/src/lj_str.h b/src/lj_str.h
index f7e56d16..e8b242c0 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -11,7 +11,7 @@
#include "lj_obj.h"
/* String interning. */
-LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b);
+LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */
-LJ_FUNCA int lj_str_numconv(const char *s, TValue *n);
-LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k);
+LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n);
+LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
+LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
/* String formatting. */
LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 9af51027..ceafb770 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
return t;
}
+GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
+{
+ GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24);
+ clearapart(t);
+ if (t->hmask > 0) clearhpart(t);
+ return t;
+}
+
/* Duplicate a table. */
-GCtab *lj_tab_dup(lua_State *L, const GCtab *kt)
+GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
{
GCtab *t;
uint32_t asize, hmask;
@@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
{
uint32_t b, sum, na = 0, sz = 0, nn = *narray;
- for (b = 0, sum = 0; (1u< 0 && (sum += bins[b]) >= (1u< (1u< 0 && 2*(sum += bins[b]) > (1u<asize;
if (j > 1 && tvisnil(arrayslot(t, j-1))) {
diff --git a/src/lj_tab.h b/src/lj_tab.h
index e9e8bcd1..b2a8c3aa 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -11,7 +11,8 @@
#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
-LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt);
+LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
+LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
@@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
(inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
-LJ_FUNCA MSize lj_tab_len(GCtab *t);
+LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
#endif
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 3ee4fa00..2fb3c4b8 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -32,6 +32,11 @@ enum {
/* Calling conventions. */
RID_RET = RID_EAX,
+#if LJ_64
+ RID_FPRET = RID_XMM0,
+#else
+ RID_RETHI = RID_EDX,
+#endif
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_EDX, /* Interpreter BASE. */
@@ -98,8 +103,8 @@ enum {
};
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
-#define sps_scale(slot) (4 * (int32_t)(slot))
-#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3))
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
/* -- Exit state ---------------------------------------------------------- */
@@ -185,6 +190,7 @@ typedef enum {
XO_ARITHib = XO_(80),
XO_ARITHi = XO_(81),
XO_ARITHi8 = XO_(83),
+ XO_ARITHiw8 = XO_66(83),
XO_SHIFTi = XO_(c1),
XO_SHIFT1 = XO_(d1),
XO_SHIFTcl = XO_(d3),
@@ -216,6 +222,7 @@ typedef enum {
XO_CVTSI2SD = XO_f20f(2a),
XO_CVTSD2SI = XO_f20f(2d),
XO_CVTTSD2SI= XO_f20f(2c),
+ XO_MOVD = XO_660f(6e),
XO_MOVDto = XO_660f(7e),
XO_FLDq = XO_(dd), XOg_FLDq = 0,
diff --git a/src/lj_udata.c b/src/lj_udata.c
index 863889c9..717d483b 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
global_State *g = G(L);
newwhite(g, ud); /* Not finalized. */
ud->gct = ~LJ_TUDATA;
+ ud->udtype = UDTYPE_USERDATA;
ud->len = sz;
/* NOBARRIER: The GCudata is new (marked white). */
setgcrefnull(ud->metatable);