diff --git a/src/Makefile b/src/Makefile
index 30d64be2..68a9a7cd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -54,9 +54,9 @@ CCOPT_arm64=
CCOPT_ppc=
CCOPT_mips=
#
-CCDEBUG=
+#CCDEBUG=
# Uncomment the next line to generate debug information:
-#CCDEBUG= -g
+CCDEBUG= -g
#
CCWARN= -Wall
# Uncomment the next line to enable more warnings:
@@ -244,6 +244,9 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= s390x
+else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
@@ -275,6 +278,7 @@ endif
endif
endif
endif
+endif
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
@@ -461,7 +465,16 @@ ifeq (ppc,$(TARGET_LJARCH))
DASM_AFLAGS+= -D GPR64
endif
ifeq (PS3,$(TARGET_SYS))
- DASM_AFLAGS+= -D PPE -D TOC
+ DASM_AFLAGS+= -D PPE
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D OPD
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D OPDENV
+ endif
+ ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D ELFV2
endif
endif
endif
@@ -501,10 +514,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
lj_lib.o lj_alloc.o lib_aux.o \
- $(LJLIB_O) lib_init.o
+ $(LJLIB_O) lib_init.o lj_str_hash.o
+
+ifeq (x64,$(TARGET_LJARCH))
+ lj_str_hash-CFLAGS = -msse4.2
+endif
+
+F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))
LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
-LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
+LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o
LIB_VMDEF= jit/vmdef.lua
LIB_VMDEFP= $(LIB_VMDEF)
@@ -526,7 +545,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
##############################################################################
# Mixed mode defaults.
-TARGET_O= $(LUAJIT_A)
+TARGET_O= lj_init.o $(LUAJIT_A)
TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
@@ -608,7 +627,7 @@ E= @echo
default all: $(TARGET_T)
amalg:
- $(MAKE) all "LJCORE_O=ljamalg.o"
+ $(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"
clean:
$(HOST_RM) $(ALL_RM)
@@ -685,8 +704,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
%.o: %.c
$(E) "CC $@"
- $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
- $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+ $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
+ $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<
%.o: %.S
$(E) "ASM $@"
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 9ee47ada..4efda1ba 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -18,10 +18,8 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_bc.h"
-#if LJ_HASJIT
#include "lj_ir.h"
#include "lj_ircall.h"
-#endif
#include "lj_frame.h"
#include "lj_dispatch.h"
#if LJ_HASFFI
@@ -67,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
+#elif LJ_TARGET_S390X
+#include "../dynasm/dasm_s390x.h"
#else
#error "No support for this architecture (yet)"
#endif
@@ -252,7 +252,6 @@ BCDEF(BCNAME)
NULL
};
-#if LJ_HASJIT
const char *const ir_names[] = {
#define IRNAME(name, m, m1, m2) #name,
IRDEF(IRNAME)
@@ -293,9 +292,7 @@ static const char *const trace_errors[] = {
#include "lj_traceerr.h"
NULL
};
-#endif
-#if LJ_HASJIT
static const char *lower(char *buf, const char *s)
{
char *p = buf;
@@ -306,7 +303,6 @@ static const char *lower(char *buf, const char *s)
*p = '\0';
return buf;
}
-#endif
/* Emit C source code for bytecode-related definitions. */
static void emit_bcdef(BuildCtx *ctx)
@@ -324,9 +320,7 @@ static void emit_bcdef(BuildCtx *ctx)
/* Emit VM definitions as Lua code for debug modules. */
static void emit_vmdef(BuildCtx *ctx)
{
-#if LJ_HASJIT
char buf[80];
-#endif
int i;
fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
fprintf(ctx->fp, "return {\n\n");
@@ -335,7 +329,6 @@ static void emit_vmdef(BuildCtx *ctx)
for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
fprintf(ctx->fp, "\",\n\n");
-#if LJ_HASJIT
fprintf(ctx->fp, "irnames = \"");
for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
fprintf(ctx->fp, "\",\n\n");
@@ -364,7 +357,6 @@ static void emit_vmdef(BuildCtx *ctx)
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
fprintf(ctx->fp, "},\n\n");
-#endif
}
/* -- Argument parsing ---------------------------------------------------- */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 7baa011f..e73f9b17 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -87,6 +87,54 @@ err:
}
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
+#elif LJ_TARGET_S390X
+/* Emit halfwords piecewise as assembler text. */
+static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
+{
+ uint16_t *cp = (uint16_t*)p;
+ n /= 2;
+ int i;
+ for (i = 0; i < n; i++) {
+ if ((i & 7) == 0)
+ fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
+ else
+ fprintf(ctx->fp, ",0x%hx", cp[i]);
+ if ((i & 7) == 7) putc('\n', ctx->fp);
+ }
+ if ((n & 7) != 0) putc('\n', ctx->fp);
+}
+
+/* Emit s390x text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
+ const char *sym)
+{
+ if (n & 1 || n < 2) {
+ fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
+ exit(1);
+ }
+ n -= 2;
+ const char *opname = NULL;
+ const char *argt = ""; /* Inserted before argument. */
+ int opcode = *(uint16_t*)(&cp[n]);
+ int arg = (opcode>>4) & 0xf;
+ switch (opcode & 0xff0f) {
+ case 0xa705: opname = "bras"; argt = "%r"; break;
+ case 0xc005: opname = "brasl"; argt = "%r"; break;
+ case 0xa704: opname = "brc"; break;
+ case 0xc004: opname = "brcl"; break;
+ default:
+ fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
+ sym);
+ exit(1);
+ }
+ emit_asm_halfwords(ctx, cp, n);
+ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+ /* Various fixups for external symbols outside of our binary. */
+ fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
+ return;
+ }
+ fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
+}
#else
/* Emit words piecewise as assembler text. */
static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
@@ -140,7 +188,11 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
#else
#define TOCPREFIX ""
#endif
- if ((ins >> 26) == 16) {
+ if ((ins >> 26) == 14) {
+ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+ } else if ((ins >> 26) == 15) {
+ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+ } else if ((ins >> 26) == 16) {
fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
(ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 18) {
@@ -242,6 +294,9 @@ void emit_asm(BuildCtx *ctx)
int i, rel;
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC_ELFV2
+ fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
fprintf(ctx->fp, "\t.text\n");
emit_asm_align(ctx, 4);
@@ -299,6 +354,9 @@ void emit_asm(BuildCtx *ctx)
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
}
ofs += n+4;
+#elif LJ_TARGET_S390X
+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ ofs += n+4;
#else
emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n;
@@ -307,6 +365,8 @@ void emit_asm(BuildCtx *ctx)
}
#if LJ_TARGET_X86ORX64
emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
+#elif LJ_TARGET_S390X
+ emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
#else
emit_asm_words(ctx, ctx->code+ofs, next-ofs);
#endif
diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
index edb55768..7f9ac058 100644
--- a/src/host/buildvm_fold.c
+++ b/src/host/buildvm_fold.c
@@ -5,7 +5,6 @@
#include "buildvm.h"
#include "lj_obj.h"
-#if LJ_HASJIT
#include "lj_ir.h"
/* Context for the folding hash table generator. */
@@ -227,10 +226,4 @@ void emit_fold(BuildCtx *ctx)
makehash(ctx);
}
-#else
-void emit_fold(BuildCtx *ctx)
-{
- UNUSED(ctx);
-}
-#endif
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
index 276463b2..8f07f436 100644
--- a/src/host/buildvm_libbc.h
+++ b/src/host/buildvm_libbc.h
@@ -4,67 +4,42 @@ static const int libbc_endian = 0;
static const uint8_t libbc_code[] = {
#if LJ_FR2
-/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
-220,203,178,130,4,
-/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
-198,190,199,252,3,
-/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
-/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
-BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
-BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
-BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
-/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
-2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0,
-BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
-BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
-/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
-/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
-0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
-BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
-BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
-BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
-BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
-BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
-/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
-2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
-4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
-128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
-BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
-BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
-BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
-11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,1,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15,
+0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,
+0,11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,
+0,88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,
+3,1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,
+252,127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,
+1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,
+5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,
+18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,
+127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,
+0,64,11,10,4,79,6,252,127,76,4,2,0,0
#else
-/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
-220,203,178,130,4,
-/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
-198,190,199,252,3,
-/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
-/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
-BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
-BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
-BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
-/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
-2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0,
-BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
-BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
-/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
-/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
-0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
-BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
-BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
-BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
-BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
-BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
-/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
-2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
-4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
-128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
-BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
-BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
-BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
-11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,1,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15,0,
+1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,
+11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,
+88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,
+1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,
+127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,
+14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,
+5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,
+18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,
+127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,
+0,64,11,10,4,79,6,252,127,76,4,2,0,0
#endif
-0
};
static const struct { const char *name; int ofs; } libbc_map[] = {
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
index ba18812c..072a7495 100644
--- a/src/host/genlibbc.lua
+++ b/src/host/genlibbc.lua
@@ -79,11 +79,9 @@ local name2itype = {
str = 5, func = 9, tab = 12, int = 14, num = 15
}
-local BC, BCN = {}, {}
+local BC = {}
for i=0,#bcnames/6-1 do
- local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "")
- BC[name] = i
- BCN[i] = name
+ BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
end
local xop, xra = isbe and 3 or 0, isbe and 2 or 1
local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
@@ -98,7 +96,6 @@ local function fixup_dump(dump, fixup)
p = read_uleb128(p)
p = read_uleb128(p)
p, sizebc = read_uleb128(p)
- local startbc = tonumber(p - start)
local rawtab = {}
for i=0,sizebc-1 do
local op = p[xop]
@@ -135,7 +132,7 @@ local function fixup_dump(dump, fixup)
local ndump = ffi.string(start, n)
-- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX.
ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f")
- return { dump = ndump, startbc = startbc, sizebc = sizebc }
+ return ndump
end
local function find_defs(src)
@@ -155,46 +152,24 @@ local function gen_header(defs)
local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
- local s, sb = "", ""
- for i,name in ipairs(defs) do
- local d = defs[name]
- s = s .. d.dump
- sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
- .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
- .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
+ local s = ""
+ for _,name in ipairs(defs) do
+ s = s .. defs[name]
end
w("static const uint8_t libbc_code[] = {\n")
local n = 0
for i=1,#s do
local x = string.byte(s, i)
- local xb = string.byte(sb, i)
- if xb == 255 then
- local name = BCN[x]
- local m = #name + 4
- if n + m > 78 then n = 0; w("\n") end
- n = n + m
- w("BC_"); w(name)
- else
- local m = x < 10 and 2 or (x < 100 and 3 or 4)
- if xb == 0 then
- if n + m > 78 then n = 0; w("\n") end
- else
- local name = defs[xb]:gsub("_", ".")
- if n ~= 0 then w("\n") end
- w("/* "); w(name); w(" */ ")
- n = #name + 7
- end
- n = n + m
- w(x)
- end
- w(",")
+ w(x); w(",")
+ n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+ if n >= 75 then n = 0; w("\n") end
end
- w("\n0\n};\n\n")
+ w("0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m = 0
for _,name in ipairs(defs) do
w('{"'); w(name); w('",'); w(m) w('},\n')
- m = m + #defs[name].dump
+ m = m + #defs[name]
end
w("{NULL,"); w(m); w("}\n};\n\n")
return table.concat(t)
diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua
index e8e86c53..a72ef2ef 100644
--- a/src/host/genminilua.lua
+++ b/src/host/genminilua.lua
@@ -327,12 +327,6 @@ local function rename_tokens2(src)
return gsub(src, "ZY([%w_]+)", "union %1")
end
-local function fix_bugs_and_warnings(src)
- src = gsub(src, "(luaD_checkstack%(L,p%->maxstacksize)%)", "%1+p->numparams)")
- src = gsub(src, "if%(sep==%-1%)(return'%[';)\nelse (luaX_lexerror%b();)", "if (sep!=-1)%2\n%1")
- return gsub(src, "(default:{\nNode%*n=mainposition)", "/*fallthrough*/\n%1")
-end
-
local function func_gather(src)
local nodes, list = {}, {}
local pos, len = 1, #src
@@ -431,6 +425,5 @@ src = rename_tokens1(src)
src = func_collect(src)
src = rename_tokens2(src)
src = restore_strings(src)
-src = fix_bugs_and_warnings(src)
src = merge_header(src, license)
io.write(src)
diff --git a/src/host/minilua.c b/src/host/minilua.c
index 76f32aed..cfc7491d 100644
--- a/src/host/minilua.c
+++ b/src/host/minilua.c
@@ -1639,7 +1639,6 @@ lua_number2int(k,n);
if(luai_numeq(cast_num(k),nvalue(key)))
return luaH_getnum(t,k);
}
-/*fallthrough*/
default:{
Node*n=mainposition(t,key);
do{
@@ -2906,8 +2905,8 @@ if(sep>=0){
read_long_string(ls,seminfo,sep);
return TK_STRING;
}
-else if (sep!=-1)luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
-return'[';
+else if(sep==-1)return'[';
+else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
}
case'=':{
next(ls);
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 8d0844c0..031b5902 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -63,15 +63,21 @@ local function ctlsub(c)
end
-- Return one bytecode line.
-local function bcline(func, pc, prefix)
- local ins, m = funcbc(func, pc)
+local function bcline(func, pc, prefix, lineinfo)
+ local ins, m, l = funcbc(func, pc, lineinfo and 1 or 0)
if not ins then return end
local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
local a = band(shr(ins, 8), 0xff)
local oidx = 6*band(ins, 0xff)
local op = sub(bcnames, oidx+1, oidx+6)
- local s = format("%04d %s %-6s %3s ",
- pc, prefix or " ", op, ma == 0 and "" or a)
+ local s
+ if lineinfo then
+ s = format("%04d %7s %s %-6s %3s ",
+ pc, "["..l.."]", prefix or " ", op, ma == 0 and "" or a)
+ else
+ s = format("%04d %s %-6s %3s ",
+ pc, prefix or " ", op, ma == 0 and "" or a)
+ end
local d = shr(ins, 16)
if mc == 13*128 then -- BCMjump
return format("%s=> %04d\n", s, pc+d-0x7fff)
@@ -124,20 +130,52 @@ local function bctargets(func)
end
-- Dump bytecode instructions of a function.
-local function bcdump(func, out, all)
+local function bcdump(func, out, all, lineinfo)
if not out then out = stdout end
local fi = funcinfo(func)
if all and fi.children then
for n=-1,-1000000000,-1 do
local k = funck(func, n)
if not k then break end
- if type(k) == "proto" then bcdump(k, out, true) end
+ if type(k) == "proto" then bcdump(k, out, true, lineinfo) end
end
end
out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
+
+ for n=-1,-1000000000,-1 do
+ local kc = funck(func, n)
+ if not kc then break end
+
+ local typ = type(kc)
+ if typ == "string" then
+ kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
+ out:write(format("KGC %d %s\n", -(n + 1), kc))
+ elseif typ == "proto" then
+ local fi = funcinfo(kc)
+ if fi.ffid then
+ kc = vmdef.ffnames[fi.ffid]
+ else
+ kc = fi.loc
+ end
+ out:write(format("KGC %d %s\n", -(n + 1), kc))
+ elseif typ == "table" then
+ out:write(format("KGC %d table\n", -(n + 1)))
+ else
+ -- error("unknown KGC type: " .. typ)
+ end
+ end
+
+ for n=1,1000000000 do
+ local kc = funck(func, n)
+ if not kc then break end
+ if type(kc) == "number" then
+ out:write(format("KN %d %s\n", n, kc))
+ end
+ end
+
local target = bctargets(func)
for pc=1,1000000000 do
- local s = bcline(func, pc, target[pc] and "=>")
+ local s = bcline(func, pc, target[pc] and "=>", lineinfo)
if not s then break end
out:write(s)
end
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 6227d136..7cb23f17 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -27,6 +27,7 @@ local function usage()
io.stderr:write[[
Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode.
+ -L Only list bytecode with lineinfo.
-s Strip debug info (default).
-g Keep debug info.
-n name Set module name (default: auto-detect from input name).
@@ -592,9 +593,9 @@ end
------------------------------------------------------------------------------
-local function bclist(input, output)
+local function bclist(input, output, lineinfo)
local f = readfile(input)
- require("jit.bc").dump(f, savefile(output, "w"), true)
+ require("jit.bc").dump(f, savefile(output, "w"), true, lineinfo)
end
local function bcsave(ctx, input, output)
@@ -621,6 +622,7 @@ local function docmd(...)
local arg = {...}
local n = 1
local list = false
+ local lineinfo = false
local ctx = {
strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false,
@@ -634,6 +636,9 @@ local function docmd(...)
local opt = a:sub(m, m)
if opt == "l" then
list = true
+ elseif opt == "L" then
+ list = true
+ lineinfo = true
elseif opt == "s" then
ctx.strip = true
elseif opt == "g" then
@@ -662,7 +667,7 @@ local function docmd(...)
end
if list then
if #arg == 0 or #arg > 2 then usage() end
- bclist(arg[1], arg[2] or "-")
+ bclist(arg[1], arg[2] or "-", lineinfo)
else
if #arg ~= 2 then usage() end
bcsave(ctx, arg[1], arg[2])
diff --git a/src/jit/dis_s390x.lua b/src/jit/dis_s390x.lua
new file mode 100644
index 00000000..99dc7484
--- /dev/null
+++ b/src/jit/dis_s390x.lua
@@ -0,0 +1,1594 @@
+----------------------------------------------------------------------------
+-- LuaJIT s390x disassembler module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+--
+-- Contributed by Aditya Bisht from Open Mainframe.
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- NYI:
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local band, lshift, bor, rshift = bit.band, bit.lshift, bit.bor, bit.rshift
+local tohex = bit.tohex
+
+local ONELONG = "%08lx: "
+
+local OPERAND_GPR = 0x1 /* Operand printed as %rx */
+local OPERAND_FPR = 0x2 /* Operand printed as %fx */
+local OPERAND_AR = 0x4 /* Operand printed as %ax */
+local OPERAND_CR = 0x8 /* Operand printed as %cx */
+local OPERAND_DISP = 0x10 /* Operand printed as displacement */
+local OPERAND_BASE = 0x20 /* Operand printed as base register */
+local OPERAND_INDEX = 0x40 /* Operand printed as index register */
+local OPERAND_PCREL = 0x80 /* Operand printed as pc-relative symbol */
+local OPERAND_SIGNED = 0x100 /* Operand printed as signed value */
+local OPERAND_LENGTH = 0x200 /* Operand printed as length (+1) */
+
+-- Registers
+
+local UNUSED = 0, /* Indicates the end of the operand list */
+local R_8 = 1, /* GPR starting at position 8 */
+local R_12 = 2, /* GPR starting at position 12 */
+local R_16 = 3, /* GPR starting at position 16 */
+local R_20 = 4, /* GPR starting at position 20 */
+local R_24 = 5, /* GPR starting at position 24 */
+local R_28 = 6, /* GPR starting at position 28 */
+local R_32 = 7, /* GPR starting at position 32 */
+local F_8 = 8, /* FPR starting at position 8 */
+local F_12 = 9, /* FPR starting at position 12 */
+local F_16 = 10, /* FPR starting at position 16 */
+local F_20 = 11, /* FPR starting at position 16 */
+local F_24 = 12, /* FPR starting at position 24 */
+local F_28 = 13, /* FPR starting at position 28 */
+local F_32 = 14, /* FPR starting at position 32 */
+local A_8 = 15, /* Access reg. starting at position 8 */
+local A_12 = 16, /* Access reg. starting at position 12 */
+local A_24 = 17, /* Access reg. starting at position 24 */
+local A_28 = 18, /* Access reg. starting at position 28 */
+local C_8 = 19, /* Control reg. starting at position 8 */
+local C_12 = 20, /* Control reg. starting at position 12 */
+local B_16 = 21, /* Base register starting at position 16 */
+local B_32 = 22, /* Base register starting at position 32 */
+local X_12 = 23, /* Index register starting at position 12 */
+local D_20 = 24, /* Displacement starting at position 20 */
+local D_36 = 25, /* Displacement starting at position 36 */
+local D20_20 = 26, /* 20 bit displacement starting at 20 */
+local L4_8 = 27, /* 4 bit length starting at position 8 */
+local L4_12 = 28, /* 4 bit length starting at position 12 */
+local L8_8 = 29, /* 8 bit length starting at position 8 */
+local U4_8 = 30, /* 4 bit unsigned value starting at 8 */
+local U4_12 = 31, /* 4 bit unsigned value starting at 12 */
+local U4_16 = 32, /* 4 bit unsigned value starting at 16 */
+local U4_20 = 33, /* 4 bit unsigned value starting at 20 */
+local U4_32 = 34, /* 4 bit unsigned value starting at 32 */
+local U8_8 = 35, /* 8 bit unsigned value starting at 8 */
+local U8_16 = 36, /* 8 bit unsigned value starting at 16 */
+local U8_24 = 37, /* 8 bit unsigned value starting at 24 */
+local U8_32 = 38, /* 8 bit unsigned value starting at 32 */
+local I8_8 = 39, /* 8 bit signed value starting at 8 */
+local I8_32 = 40, /* 8 bit signed value starting at 32 */
+local I16_16 = 41, /* 16 bit signed value starting at 16 */
+local I16_32 = 42, /* 32 bit signed value starting at 16 */
+local U16_16 = 43, /* 16 bit unsigned value starting at 16 */
+local U16_32 = 44, /* 32 bit unsigned value starting at 16 */
+local J16_16 = 45, /* PC relative jump offset at 16 */
+local J32_16 = 46, /* PC relative long offset at 16 */
+local I32_16 = 47, /* 32 bit signed value starting at 16 */
+local U32_16 = 48, /* 32 bit unsigned value starting at 16 */
+local M_16 = 49, /* 4 bit optional mask starting at 16 */
+local RO_28 = 50 /* optional GPR starting at position 28 */
+
+-- Enumeration of the different instruction formats.
+-- For details consult the principles of operation.
+
+local INSTR_INVALID = 1,
+local INSTR_E = 2,
+local INSTR_RIE_R0IU = 3,
+local INSTR_RIE_R0UU = 4,
+local INSTR_RIE_RRP = 5,
+local INSTR_RIE_RRPU = 6,
+local INSTR_RIE_RRUUU = 7,
+local INSTR_RIE_RUPI = 8,
+local INSTR_RIE_RUPU = 9,
+local INSTR_RIL_RI = 10,
+local INSTR_RIL_RP = 11,
+local INSTR_RIL_RU = 12,
+local INSTR_RIL_UP = 13,
+local INSTR_RIS_R0RDU = 14,
+local INSTR_RIS_R0UU = 15,
+local INSTR_RIS_RURDI = 16,
+local INSTR_RIS_RURDU = 17,
+local INSTR_RI_RI = 18,
+local INSTR_RI_RP = 19,
+local INSTR_RI_RU = 20,
+local INSTR_RI_UP = 21,
+local INSTR_RRE_00 = 22,
+local INSTR_RRE_0R = 23,
+local INSTR_RRE_AA = 24,
+local INSTR_RRE_AR = 25,
+local INSTR_RRE_F0 = 26,
+local INSTR_RRE_FF = 27,
+local INSTR_RRE_FR = 28,
+local INSTR_RRE_R0 = 29,
+local INSTR_RRE_RA = 30,
+local INSTR_RRE_RF = 31,
+local INSTR_RRE_RR = 32,
+local INSTR_RRE_RR_OPT = 33,
+local INSTR_RRF_0UFF = 34,
+local INSTR_RRF_F0FF = 35,
+local INSTR_RRF_F0FF2 = 36,
+local INSTR_RRF_F0FR = 37,
+local INSTR_RRF_FFRU = 38,
+local INSTR_RRF_FUFF = 39,
+local INSTR_RRF_M0RR = 40,
+local INSTR_RRF_R0RR = 41,
+local INSTR_RRF_RURR = 42,
+local INSTR_RRF_U0FF = 43,
+local INSTR_RRF_U0RF = 44,
+local INSTR_RRF_U0RR = 45,
+local INSTR_RRF_UUFF = 46,
+local INSTR_RRR_F0FF = 47,
+local INSTR_RRS_RRRDU = 48,
+local INSTR_RR_FF = 49,
+local INSTR_RR_R0 = 50,
+local INSTR_RR_RR = 51,
+local INSTR_RR_U0 = 52,
+local INSTR_RR_UR = 53,
+local INSTR_RSE_CCRD = 54,
+local INSTR_RSE_RRRD = 55,
+local INSTR_RSE_RURD = 56,
+local INSTR_RSI_RRP = 57,
+local INSTR_RSL_R0RD = 58,
+local INSTR_RSY_AARD = 59,
+local INSTR_RSY_CCRD = 60,
+local INSTR_RSY_RRRD = 61,
+local INSTR_RSY_RURD = 62,
+local INSTR_RS_AARD = 63,
+local INSTR_RS_CCRD = 64,
+local INSTR_RS_R0RD = 65,
+local INSTR_RS_RRRD = 66,
+local INSTR_RS_RURD = 67,
+local INSTR_RXE_FRRD = 68,
+local INSTR_RXE_RRRD = 69,
+local INSTR_RXF_FRRDF = 70,
+local INSTR_RXY_FRRD = 71,
+local INSTR_RXY_RRRD = 72,
+local INSTR_RXY_URRD = 73,
+local INSTR_RX_FRRD = 74,
+local INSTR_RX_RRRD = 75,
+local INSTR_RX_URRD = 76,
+local INSTR_SIL_RDI = 77,
+local INSTR_SIL_RDU = 78,
+local INSTR_SIY_IRD = 79,
+local INSTR_SIY_URD = 80,
+local INSTR_SI_URD = 81,
+local INSTR_SSE_RDRD = 82,
+local INSTR_SSF_RRDRD = 83,
+local INSTR_SS_L0RDRD = 84,
+local INSTR_SS_LIRDRD = 85,
+local INSTR_SS_LLRDRD = 86,
+local INSTR_SS_RRRDRD = 87,
+local INSTR_SS_RRRDRD2 = 88,
+local INSTR_SS_RRRDRD3 = 89,
+local INSTR_S_00 = 90,
+local INSTR_S_RD = 91
+
+local operands = {
+ [UNUSED] = { 0, 0, 0 },
+ [R_8] = { 4, 8, OPERAND_GPR },
+ [R_12] = { 4, 12, OPERAND_GPR },
+ [R_16] = { 4, 16, OPERAND_GPR },
+ [R_20] = { 4, 20, OPERAND_GPR },
+ [R_24] = { 4, 24, OPERAND_GPR },
+ [R_28] = { 4, 28, OPERAND_GPR },
+ [R_32] = { 4, 32, OPERAND_GPR },
+ [F_8] = { 4, 8, OPERAND_FPR },
+ [F_12] = { 4, 12, OPERAND_FPR },
+ [F_16] = { 4, 16, OPERAND_FPR },
+ [F_20] = { 4, 16, OPERAND_FPR },
+ [F_24] = { 4, 24, OPERAND_FPR },
+ [F_28] = { 4, 28, OPERAND_FPR },
+ [F_32] = { 4, 32, OPERAND_FPR },
+ [A_8] = { 4, 8, OPERAND_AR },
+ [A_12] = { 4, 12, OPERAND_AR },
+ [A_24] = { 4, 24, OPERAND_AR },
+ [A_28] = { 4, 28, OPERAND_AR },
+ [C_8] = { 4, 8, OPERAND_CR },
+ [C_12] = { 4, 12, OPERAND_CR },
+ [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR },
+ [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR },
+ [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
+ [D_20] = { 12, 20, OPERAND_DISP },
+ [D_36] = { 12, 36, OPERAND_DISP },
+ [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+ [L4_8] = { 4, 8, OPERAND_LENGTH },
+ [L4_12] = { 4, 12, OPERAND_LENGTH },
+ [L8_8] = { 8, 8, OPERAND_LENGTH },
+ [U4_8] = { 4, 8, 0 },
+ [U4_12] = { 4, 12, 0 },
+ [U4_16] = { 4, 16, 0 },
+ [U4_20] = { 4, 20, 0 },
+ [U4_32] = { 4, 32, 0 },
+ [U8_8] = { 8, 8, 0 },
+ [U8_16] = { 8, 16, 0 },
+ [U8_24] = { 8, 24, 0 },
+ [U8_32] = { 8, 32, 0 },
+ [I16_16] = { 16, 16, OPERAND_SIGNED },
+ [U16_16] = { 16, 16, 0 },
+ [U16_32] = { 16, 32, 0 },
+ [J16_16] = { 16, 16, OPERAND_PCREL },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
+ [J32_16] = { 32, 16, OPERAND_PCREL },
+ [I32_16] = { 32, 16, OPERAND_SIGNED },
+ [U32_16] = { 32, 16, 0 },
+ [M_16] = { 4, 16, 0 },
+ [RO_28] = { 4, 28, OPERAND_GPR }
+}
+
+local formats = {
+ [INSTR_E] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 },
+ [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 },
+ [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 },
+ [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 },
+ [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 },
+ [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 },
+ [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 },
+ [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 },
+ [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 },
+ [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 },
+ [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 },
+ [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 },
+ [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 },
+ [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 },
+ [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 },
+ [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 },
+ [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 },
+ [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 },
+ [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 },
+ [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 },
+ [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 },
+ [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 },
+ [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 },
+ [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 },
+ [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 },
+ [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 },
+ [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 },
+ [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 },
+ [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 },
+ [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },
+ [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 },
+ [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 },
+ [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },
+ [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 },
+ [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 },
+ [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 },
+ [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 },
+ [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 },
+ [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 },
+ [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 },
+ [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 },
+ [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 },
+ [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 },
+ [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
+ [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 },
+ [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 },
+ [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
+ [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 },
+ [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 },
+ [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 },
+ [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 },
+ [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 },
+ [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 },
+ [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 },
+ [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 },
+ [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 },
+ [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 },
+ [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 },
+ [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 },
+ [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 },
+ [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
+ [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 },
+}
+
+local opcode = {
+ { "lmd", 0xef, INSTR_SS_RRRDRD3 },
+ { "spm", 0x04, INSTR_RR_R0 },
+ { "balr", 0x05, INSTR_RR_RR },
+ { "bctr", 0x06, INSTR_RR_RR },
+ { "bcr", 0x07, INSTR_RR_UR },
+ { "svc", 0x0a, INSTR_RR_U0 },
+ { "bsm", 0x0b, INSTR_RR_RR },
+ { "bassm", 0x0c, INSTR_RR_RR },
+ { "basr", 0x0d, INSTR_RR_RR },
+ { "mvcl", 0x0e, INSTR_RR_RR },
+ { "clcl", 0x0f, INSTR_RR_RR },
+ { "lpr", 0x10, INSTR_RR_RR },
+ { "lnr", 0x11, INSTR_RR_RR },
+ { "ltr", 0x12, INSTR_RR_RR },
+ { "lcr", 0x13, INSTR_RR_RR },
+ { "nr", 0x14, INSTR_RR_RR },
+ { "clr", 0x15, INSTR_RR_RR },
+ { "or", 0x16, INSTR_RR_RR },
+ { "xr", 0x17, INSTR_RR_RR },
+ { "lr", 0x18, INSTR_RR_RR },
+ { "cr", 0x19, INSTR_RR_RR },
+ { "ar", 0x1a, INSTR_RR_RR },
+ { "sr", 0x1b, INSTR_RR_RR },
+ { "mr", 0x1c, INSTR_RR_RR },
+ { "dr", 0x1d, INSTR_RR_RR },
+ { "alr", 0x1e, INSTR_RR_RR },
+ { "slr", 0x1f, INSTR_RR_RR },
+ { "lpdr", 0x20, INSTR_RR_FF },
+ { "lndr", 0x21, INSTR_RR_FF },
+ { "ltdr", 0x22, INSTR_RR_FF },
+ { "lcdr", 0x23, INSTR_RR_FF },
+ { "hdr", 0x24, INSTR_RR_FF },
+ { "ldxr", 0x25, INSTR_RR_FF },
+ { "lrdr", 0x25, INSTR_RR_FF },
+ { "mxr", 0x26, INSTR_RR_FF },
+ { "mxdr", 0x27, INSTR_RR_FF },
+ { "ldr", 0x28, INSTR_RR_FF },
+ { "cdr", 0x29, INSTR_RR_FF },
+ { "adr", 0x2a, INSTR_RR_FF },
+ { "sdr", 0x2b, INSTR_RR_FF },
+ { "mdr", 0x2c, INSTR_RR_FF },
+ { "ddr", 0x2d, INSTR_RR_FF },
+ { "awr", 0x2e, INSTR_RR_FF },
+ { "swr", 0x2f, INSTR_RR_FF },
+ { "lper", 0x30, INSTR_RR_FF },
+ { "lner", 0x31, INSTR_RR_FF },
+ { "lter", 0x32, INSTR_RR_FF },
+ { "lcer", 0x33, INSTR_RR_FF },
+ { "her", 0x34, INSTR_RR_FF },
+ { "ledr", 0x35, INSTR_RR_FF },
+ { "lrer", 0x35, INSTR_RR_FF },
+ { "axr", 0x36, INSTR_RR_FF },
+ { "sxr", 0x37, INSTR_RR_FF },
+ { "ler", 0x38, INSTR_RR_FF },
+ { "cer", 0x39, INSTR_RR_FF },
+ { "aer", 0x3a, INSTR_RR_FF },
+ { "ser", 0x3b, INSTR_RR_FF },
+ { "mder", 0x3c, INSTR_RR_FF },
+ { "mer", 0x3c, INSTR_RR_FF },
+ { "der", 0x3d, INSTR_RR_FF },
+ { "aur", 0x3e, INSTR_RR_FF },
+ { "sur", 0x3f, INSTR_RR_FF },
+ { "sth", 0x40, INSTR_RX_RRRD },
+ { "la", 0x41, INSTR_RX_RRRD },
+ { "stc", 0x42, INSTR_RX_RRRD },
+ { "ic", 0x43, INSTR_RX_RRRD },
+ { "ex", 0x44, INSTR_RX_RRRD },
+ { "bal", 0x45, INSTR_RX_RRRD },
+ { "bct", 0x46, INSTR_RX_RRRD },
+ { "bc", 0x47, INSTR_RX_URRD },
+ { "lh", 0x48, INSTR_RX_RRRD },
+ { "ch", 0x49, INSTR_RX_RRRD },
+ { "ah", 0x4a, INSTR_RX_RRRD },
+ { "sh", 0x4b, INSTR_RX_RRRD },
+ { "mh", 0x4c, INSTR_RX_RRRD },
+ { "bas", 0x4d, INSTR_RX_RRRD },
+ { "cvd", 0x4e, INSTR_RX_RRRD },
+ { "cvb", 0x4f, INSTR_RX_RRRD },
+ { "st", 0x50, INSTR_RX_RRRD },
+ { "lae", 0x51, INSTR_RX_RRRD },
+ { "n", 0x54, INSTR_RX_RRRD },
+ { "cl", 0x55, INSTR_RX_RRRD },
+ { "o", 0x56, INSTR_RX_RRRD },
+ { "x", 0x57, INSTR_RX_RRRD },
+ { "l", 0x58, INSTR_RX_RRRD },
+ { "c", 0x59, INSTR_RX_RRRD },
+ { "a", 0x5a, INSTR_RX_RRRD },
+ { "s", 0x5b, INSTR_RX_RRRD },
+ { "m", 0x5c, INSTR_RX_RRRD },
+ { "d", 0x5d, INSTR_RX_RRRD },
+ { "al", 0x5e, INSTR_RX_RRRD },
+ { "sl", 0x5f, INSTR_RX_RRRD },
+ { "std", 0x60, INSTR_RX_FRRD },
+ { "mxd", 0x67, INSTR_RX_FRRD },
+ { "ld", 0x68, INSTR_RX_FRRD },
+ { "cd", 0x69, INSTR_RX_FRRD },
+ { "ad", 0x6a, INSTR_RX_FRRD },
+ { "sd", 0x6b, INSTR_RX_FRRD },
+ { "md", 0x6c, INSTR_RX_FRRD },
+ { "dd", 0x6d, INSTR_RX_FRRD },
+ { "aw", 0x6e, INSTR_RX_FRRD },
+ { "sw", 0x6f, INSTR_RX_FRRD },
+ { "ste", 0x70, INSTR_RX_FRRD },
+ { "ms", 0x71, INSTR_RX_RRRD },
+ { "le", 0x78, INSTR_RX_FRRD },
+ { "ce", 0x79, INSTR_RX_FRRD },
+ { "ae", 0x7a, INSTR_RX_FRRD },
+ { "se", 0x7b, INSTR_RX_FRRD },
+ { "mde", 0x7c, INSTR_RX_FRRD },
+ { "me", 0x7c, INSTR_RX_FRRD },
+ { "de", 0x7d, INSTR_RX_FRRD },
+ { "au", 0x7e, INSTR_RX_FRRD },
+ { "su", 0x7f, INSTR_RX_FRRD },
+ { "ssm", 0x80, INSTR_S_RD },
+ { "lpsw", 0x82, INSTR_S_RD },
+ { "diag", 0x83, INSTR_RS_RRRD },
+ { "brxh", 0x84, INSTR_RSI_RRP },
+ { "brxle", 0x85, INSTR_RSI_RRP },
+ { "bxh", 0x86, INSTR_RS_RRRD },
+ { "bxle", 0x87, INSTR_RS_RRRD },
+ { "srl", 0x88, INSTR_RS_R0RD },
+ { "sll", 0x89, INSTR_RS_R0RD },
+ { "sra", 0x8a, INSTR_RS_R0RD },
+ { "sla", 0x8b, INSTR_RS_R0RD },
+ { "srdl", 0x8c, INSTR_RS_R0RD },
+ { "sldl", 0x8d, INSTR_RS_R0RD },
+ { "srda", 0x8e, INSTR_RS_R0RD },
+ { "slda", 0x8f, INSTR_RS_R0RD },
+ { "stm", 0x90, INSTR_RS_RRRD },
+ { "tm", 0x91, INSTR_SI_URD },
+ { "mvi", 0x92, INSTR_SI_URD },
+ { "ts", 0x93, INSTR_S_RD },
+ { "ni", 0x94, INSTR_SI_URD },
+ { "cli", 0x95, INSTR_SI_URD },
+ { "oi", 0x96, INSTR_SI_URD },
+ { "xi", 0x97, INSTR_SI_URD },
+ { "lm", 0x98, INSTR_RS_RRRD },
+ { "trace", 0x99, INSTR_RS_RRRD },
+ { "lam", 0x9a, INSTR_RS_AARD },
+ { "stam", 0x9b, INSTR_RS_AARD },
+ { "mvcle", 0xa8, INSTR_RS_RRRD },
+ { "clcle", 0xa9, INSTR_RS_RRRD },
+ { "stnsm", 0xac, INSTR_SI_URD },
+ { "stosm", 0xad, INSTR_SI_URD },
+ { "sigp", 0xae, INSTR_RS_RRRD },
+ { "mc", 0xaf, INSTR_SI_URD },
+ { "lra", 0xb1, INSTR_RX_RRRD },
+ { "stctl", 0xb6, INSTR_RS_CCRD },
+ { "lctl", 0xb7, INSTR_RS_CCRD },
+ { "cs", 0xba, INSTR_RS_RRRD },
+ { "cds", 0xbb, INSTR_RS_RRRD },
+ { "clm", 0xbd, INSTR_RS_RURD },
+ { "stcm", 0xbe, INSTR_RS_RURD },
+ { "icm", 0xbf, INSTR_RS_RURD },
+ { "mvn", 0xd1, INSTR_SS_L0RDRD },
+ { "mvc", 0xd2, INSTR_SS_L0RDRD },
+ { "mvz", 0xd3, INSTR_SS_L0RDRD },
+ { "nc", 0xd4, INSTR_SS_L0RDRD },
+ { "clc", 0xd5, INSTR_SS_L0RDRD },
+ { "oc", 0xd6, INSTR_SS_L0RDRD },
+ { "xc", 0xd7, INSTR_SS_L0RDRD },
+ { "mvck", 0xd9, INSTR_SS_RRRDRD },
+ { "mvcp", 0xda, INSTR_SS_RRRDRD },
+ { "mvcs", 0xdb, INSTR_SS_RRRDRD },
+ { "tr", 0xdc, INSTR_SS_L0RDRD },
+ { "trt", 0xdd, INSTR_SS_L0RDRD },
+ { "ed", 0xde, INSTR_SS_L0RDRD },
+ { "edmk", 0xdf, INSTR_SS_L0RDRD },
+ { "pku", 0xe1, INSTR_SS_L0RDRD },
+ { "unpku", 0xe2, INSTR_SS_L0RDRD },
+ { "mvcin", 0xe8, INSTR_SS_L0RDRD },
+ { "pka", 0xe9, INSTR_SS_L0RDRD },
+ { "unpka", 0xea, INSTR_SS_L0RDRD },
+ { "plo", 0xee, INSTR_SS_RRRDRD2 },
+ { "srp", 0xf0, INSTR_SS_LIRDRD },
+ { "mvo", 0xf1, INSTR_SS_LLRDRD },
+ { "pack", 0xf2, INSTR_SS_LLRDRD },
+ { "unpk", 0xf3, INSTR_SS_LLRDRD },
+ { "zap", 0xf8, INSTR_SS_LLRDRD },
+ { "cp", 0xf9, INSTR_SS_LLRDRD },
+ { "ap", 0xfa, INSTR_SS_LLRDRD },
+ { "sp", 0xfb, INSTR_SS_LLRDRD },
+ { "mp", 0xfc, INSTR_SS_LLRDRD },
+ { "dp", 0xfd, INSTR_SS_LLRDRD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_01 = {
+ { "sam64", 0x0e, INSTR_E },
+ { "pfpo", 0x0a, INSTR_E },
+ { "ptff", 0x04, INSTR_E },
+ { "pr", 0x01, INSTR_E },
+ { "upt", 0x02, INSTR_E },
+ { "sckpf", 0x07, INSTR_E },
+ { "tam", 0x0b, INSTR_E },
+ { "sam24", 0x0c, INSTR_E },
+ { "sam31", 0x0d, INSTR_E },
+ { "trap2", 0xff, INSTR_E },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_a5 = {
+ { "iihh", 0x00, INSTR_RI_RU },
+ { "iihl", 0x01, INSTR_RI_RU },
+ { "iilh", 0x02, INSTR_RI_RU },
+ { "iill", 0x03, INSTR_RI_RU },
+ { "nihh", 0x04, INSTR_RI_RU },
+ { "nihl", 0x05, INSTR_RI_RU },
+ { "nilh", 0x06, INSTR_RI_RU },
+ { "nill", 0x07, INSTR_RI_RU },
+ { "oihh", 0x08, INSTR_RI_RU },
+ { "oihl", 0x09, INSTR_RI_RU },
+ { "oilh", 0x0a, INSTR_RI_RU },
+ { "oill", 0x0b, INSTR_RI_RU },
+ { "llihh", 0x0c, INSTR_RI_RU },
+ { "llihl", 0x0d, INSTR_RI_RU },
+ { "llilh", 0x0e, INSTR_RI_RU },
+ { "llill", 0x0f, INSTR_RI_RU },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_a7 = {
+ { "tmhh", 0x02, INSTR_RI_RU },
+ { "tmhl", 0x03, INSTR_RI_RU },
+ { "brctg", 0x07, INSTR_RI_RP },
+ { "lghi", 0x09, INSTR_RI_RI },
+ { "aghi", 0x0b, INSTR_RI_RI },
+ { "mghi", 0x0d, INSTR_RI_RI },
+ { "cghi", 0x0f, INSTR_RI_RI },
+ { "tmlh", 0x00, INSTR_RI_RU },
+ { "tmll", 0x01, INSTR_RI_RU },
+ { "brc", 0x04, INSTR_RI_UP },
+ { "bras", 0x05, INSTR_RI_RP },
+ { "brct", 0x06, INSTR_RI_RP },
+ { "lhi", 0x08, INSTR_RI_RI },
+ { "ahi", 0x0a, INSTR_RI_RI },
+ { "mhi", 0x0c, INSTR_RI_RI },
+ { "chi", 0x0e, INSTR_RI_RI },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_b2 = {
+ { "sske", 0x2b, INSTR_RRF_M0RR },
+ { "stckf", 0x7c, INSTR_S_RD },
+ { "cu21", 0xa6, INSTR_RRF_M0RR },
+ { "cuutf", 0xa6, INSTR_RRF_M0RR },
+ { "cu12", 0xa7, INSTR_RRF_M0RR },
+ { "cutfu", 0xa7, INSTR_RRF_M0RR },
+ { "stfle", 0xb0, INSTR_S_RD },
+ { "lpswe", 0xb2, INSTR_S_RD },
+ { "srnmt", 0xb9, INSTR_S_RD },
+ { "lfas", 0xbd, INSTR_S_RD },
+ { "stidp", 0x02, INSTR_S_RD },
+ { "sck", 0x04, INSTR_S_RD },
+ { "stck", 0x05, INSTR_S_RD },
+ { "sckc", 0x06, INSTR_S_RD },
+ { "stckc", 0x07, INSTR_S_RD },
+ { "spt", 0x08, INSTR_S_RD },
+ { "stpt", 0x09, INSTR_S_RD },
+ { "spka", 0x0a, INSTR_S_RD },
+ { "ipk", 0x0b, INSTR_S_00 },
+ { "ptlb", 0x0d, INSTR_S_00 },
+ { "spx", 0x10, INSTR_S_RD },
+ { "stpx", 0x11, INSTR_S_RD },
+ { "stap", 0x12, INSTR_S_RD },
+ { "sie", 0x14, INSTR_S_RD },
+ { "pc", 0x18, INSTR_S_RD },
+ { "sac", 0x19, INSTR_S_RD },
+ { "cfc", 0x1a, INSTR_S_RD },
+ { "ipte", 0x21, INSTR_RRE_RR },
+ { "ipm", 0x22, INSTR_RRE_R0 },
+ { "ivsk", 0x23, INSTR_RRE_RR },
+ { "iac", 0x24, INSTR_RRE_R0 },
+ { "ssar", 0x25, INSTR_RRE_R0 },
+ { "epar", 0x26, INSTR_RRE_R0 },
+ { "esar", 0x27, INSTR_RRE_R0 },
+ { "pt", 0x28, INSTR_RRE_RR },
+ { "iske", 0x29, INSTR_RRE_RR },
+ { "rrbe", 0x2a, INSTR_RRE_RR },
+ { "sske", 0x2b, INSTR_RRE_RR },
+ { "tb", 0x2c, INSTR_RRE_0R },
+ { "dxr", 0x2d, INSTR_RRE_F0 },
+ { "pgin", 0x2e, INSTR_RRE_RR },
+ { "pgout", 0x2f, INSTR_RRE_RR },
+ { "csch", 0x30, INSTR_S_00 },
+ { "hsch", 0x31, INSTR_S_00 },
+ { "msch", 0x32, INSTR_S_RD },
+ { "ssch", 0x33, INSTR_S_RD },
+ { "stsch", 0x34, INSTR_S_RD },
+ { "tsch", 0x35, INSTR_S_RD },
+ { "tpi", 0x36, INSTR_S_RD },
+ { "sal", 0x37, INSTR_S_00 },
+ { "rsch", 0x38, INSTR_S_00 },
+ { "stcrw", 0x39, INSTR_S_RD },
+ { "stcps", 0x3a, INSTR_S_RD },
+ { "rchp", 0x3b, INSTR_S_00 },
+ { "schm", 0x3c, INSTR_S_00 },
+ { "bakr", 0x40, INSTR_RRE_RR },
+ { "cksm", 0x41, INSTR_RRE_RR },
+ { "sqdr", 0x44, INSTR_RRE_F0 },
+ { "sqer", 0x45, INSTR_RRE_F0 },
+ { "stura", 0x46, INSTR_RRE_RR },
+ { "msta", 0x47, INSTR_RRE_R0 },
+ { "palb", 0x48, INSTR_RRE_00 },
+ { "ereg", 0x49, INSTR_RRE_RR },
+ { "esta", 0x4a, INSTR_RRE_RR },
+ { "lura", 0x4b, INSTR_RRE_RR },
+ { "tar", 0x4c, INSTR_RRE_AR },
+ { "cpya", 0x4d, INSTR_RRE_AA },
+ { "sar", 0x4e, INSTR_RRE_AR },
+ { "ear", 0x4f, INSTR_RRE_RA },
+ { "csp", 0x50, INSTR_RRE_RR },
+ { "msr", 0x52, INSTR_RRE_RR },
+ { "mvpg", 0x54, INSTR_RRE_RR },
+ { "mvst", 0x55, INSTR_RRE_RR },
+ { "cuse", 0x57, INSTR_RRE_RR },
+ { "bsg", 0x58, INSTR_RRE_RR },
+ { "bsa", 0x5a, INSTR_RRE_RR },
+ { "clst", 0x5d, INSTR_RRE_RR },
+ { "srst", 0x5e, INSTR_RRE_RR },
+ { "cmpsc", 0x63, INSTR_RRE_RR },
+ { "siga", 0x74, INSTR_S_RD },
+ { "xsch", 0x76, INSTR_S_00 },
+ { "rp", 0x77, INSTR_S_RD },
+ { "stcke", 0x78, INSTR_S_RD },
+ { "sacf", 0x79, INSTR_S_RD },
+ { "stsi", 0x7d, INSTR_S_RD },
+ { "srnm", 0x99, INSTR_S_RD },
+ { "stfpc", 0x9c, INSTR_S_RD },
+ { "lfpc", 0x9d, INSTR_S_RD },
+ { "tre", 0xa5, INSTR_RRE_RR },
+ { "cuutf", 0xa6, INSTR_RRE_RR },
+ { "cutfu", 0xa7, INSTR_RRE_RR },
+ { "stfl", 0xb1, INSTR_S_RD },
+ { "trap4", 0xff, INSTR_S_RD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_b3 = {
+ { "maylr", 0x38, INSTR_RRF_F0FF },
+ { "mylr", 0x39, INSTR_RRF_F0FF },
+ { "mayr", 0x3a, INSTR_RRF_F0FF },
+ { "myr", 0x3b, INSTR_RRF_F0FF },
+ { "mayhr", 0x3c, INSTR_RRF_F0FF },
+ { "myhr", 0x3d, INSTR_RRF_F0FF },
+ { "cegbr", 0xa4, INSTR_RRE_RR },
+ { "cdgbr", 0xa5, INSTR_RRE_RR },
+ { "cxgbr", 0xa6, INSTR_RRE_RR },
+ { "cgebr", 0xa8, INSTR_RRF_U0RF },
+ { "cgdbr", 0xa9, INSTR_RRF_U0RF },
+ { "cgxbr", 0xaa, INSTR_RRF_U0RF },
+ { "cfer", 0xb8, INSTR_RRF_U0RF },
+ { "cfdr", 0xb9, INSTR_RRF_U0RF },
+ { "cfxr", 0xba, INSTR_RRF_U0RF },
+ { "cegr", 0xc4, INSTR_RRE_RR },
+ { "cdgr", 0xc5, INSTR_RRE_RR },
+ { "cxgr", 0xc6, INSTR_RRE_RR },
+ { "cger", 0xc8, INSTR_RRF_U0RF },
+ { "cgdr", 0xc9, INSTR_RRF_U0RF },
+ { "cgxr", 0xca, INSTR_RRF_U0RF },
+ { "lpdfr", 0x70, INSTR_RRE_FF },
+ { "lndfr", 0x71, INSTR_RRE_FF },
+ { "cpsdr", 0x72, INSTR_RRF_F0FF2 },
+ { "lcdfr", 0x73, INSTR_RRE_FF },
+ { "ldgr", 0xc1, INSTR_RRE_FR },
+ { "lgdr", 0xcd, INSTR_RRE_RF },
+ { "adtr", 0xd2, INSTR_RRR_F0FF },
+ { "axtr", 0xda, INSTR_RRR_F0FF },
+ { "cdtr", 0xe4, INSTR_RRE_FF },
+ { "cxtr", 0xec, INSTR_RRE_FF },
+ { "kdtr", 0xe0, INSTR_RRE_FF },
+ { "kxtr", 0xe8, INSTR_RRE_FF },
+ { "cedtr", 0xf4, INSTR_RRE_FF },
+ { "cextr", 0xfc, INSTR_RRE_FF },
+ { "cdgtr", 0xf1, INSTR_RRE_FR },
+ { "cxgtr", 0xf9, INSTR_RRE_FR },
+ { "cdstr", 0xf3, INSTR_RRE_FR },
+ { "cxstr", 0xfb, INSTR_RRE_FR },
+ { "cdutr", 0xf2, INSTR_RRE_FR },
+ { "cxutr", 0xfa, INSTR_RRE_FR },
+ { "cgdtr", 0xe1, INSTR_RRF_U0RF },
+ { "cgxtr", 0xe9, INSTR_RRF_U0RF },
+ { "csdtr", 0xe3, INSTR_RRE_RF },
+ { "csxtr", 0xeb, INSTR_RRE_RF },
+ { "cudtr", 0xe2, INSTR_RRE_RF },
+ { "cuxtr", 0xea, INSTR_RRE_RF },
+ { "ddtr", 0xd1, INSTR_RRR_F0FF },
+ { "dxtr", 0xd9, INSTR_RRR_F0FF },
+ { "eedtr", 0xe5, INSTR_RRE_RF },
+ { "eextr", 0xed, INSTR_RRE_RF },
+ { "esdtr", 0xe7, INSTR_RRE_RF },
+ { "esxtr", 0xef, INSTR_RRE_RF },
+ { "iedtr", 0xf6, INSTR_RRF_F0FR },
+ { "iextr", 0xfe, INSTR_RRF_F0FR },
+ { "ltdtr", 0xd6, INSTR_RRE_FF },
+ { "ltxtr", 0xde, INSTR_RRE_FF },
+ { "fidtr", 0xd7, INSTR_RRF_UUFF },
+ { "fixtr", 0xdf, INSTR_RRF_UUFF },
+ { "ldetr", 0xd4, INSTR_RRF_0UFF },
+ { "lxdtr", 0xdc, INSTR_RRF_0UFF },
+ { "ledtr", 0xd5, INSTR_RRF_UUFF },
+ { "ldxtr", 0xdd, INSTR_RRF_UUFF },
+ { "mdtr", 0xd0, INSTR_RRR_F0FF },
+ { "mxtr", 0xd8, INSTR_RRR_F0FF },
+ { "qadtr", 0xf5, INSTR_RRF_FUFF },
+ { "qaxtr", 0xfd, INSTR_RRF_FUFF },
+ { "rrdtr", 0xf7, INSTR_RRF_FFRU },
+ { "rrxtr", 0xff, INSTR_RRF_FFRU },
+ { "sfasr", 0x85, INSTR_RRE_R0 },
+ { "sdtr", 0xd3, INSTR_RRR_F0FF },
+ { "sxtr", 0xdb, INSTR_RRR_F0FF },
+ { "lpebr", 0x00, INSTR_RRE_FF },
+ { "lnebr", 0x01, INSTR_RRE_FF },
+ { "ltebr", 0x02, INSTR_RRE_FF },
+ { "lcebr", 0x03, INSTR_RRE_FF },
+ { "ldebr", 0x04, INSTR_RRE_FF },
+ { "lxdbr", 0x05, INSTR_RRE_FF },
+ { "lxebr", 0x06, INSTR_RRE_FF },
+ { "mxdbr", 0x07, INSTR_RRE_FF },
+ { "kebr", 0x08, INSTR_RRE_FF },
+ { "cebr", 0x09, INSTR_RRE_FF },
+ { "aebr", 0x0a, INSTR_RRE_FF },
+ { "sebr", 0x0b, INSTR_RRE_FF },
+ { "mdebr", 0x0c, INSTR_RRE_FF },
+ { "debr", 0x0d, INSTR_RRE_FF },
+ { "maebr", 0x0e, INSTR_RRF_F0FF },
+ { "msebr", 0x0f, INSTR_RRF_F0FF },
+ { "lpdbr", 0x10, INSTR_RRE_FF },
+ { "lndbr", 0x11, INSTR_RRE_FF },
+ { "ltdbr", 0x12, INSTR_RRE_FF },
+ { "lcdbr", 0x13, INSTR_RRE_FF },
+ { "sqebr", 0x14, INSTR_RRE_FF },
+ { "sqdbr", 0x15, INSTR_RRE_FF },
+ { "sqxbr", 0x16, INSTR_RRE_FF },
+ { "meebr", 0x17, INSTR_RRE_FF },
+ { "kdbr", 0x18, INSTR_RRE_FF },
+ { "cdbr", 0x19, INSTR_RRE_FF },
+ { "adbr", 0x1a, INSTR_RRE_FF },
+ { "sdbr", 0x1b, INSTR_RRE_FF },
+ { "mdbr", 0x1c, INSTR_RRE_FF },
+ { "ddbr", 0x1d, INSTR_RRE_FF },
+ { "madbr", 0x1e, INSTR_RRF_F0FF },
+ { "msdbr", 0x1f, INSTR_RRF_F0FF },
+ { "lder", 0x24, INSTR_RRE_FF },
+ { "lxdr", 0x25, INSTR_RRE_FF },
+ { "lxer", 0x26, INSTR_RRE_FF },
+ { "maer", 0x2e, INSTR_RRF_F0FF },
+ { "mser", 0x2f, INSTR_RRF_F0FF },
+ { "sqxr", 0x36, INSTR_RRE_FF },
+ { "meer", 0x37, INSTR_RRE_FF },
+ { "madr", 0x3e, INSTR_RRF_F0FF },
+ { "msdr", 0x3f, INSTR_RRF_F0FF },
+ { "lpxbr", 0x40, INSTR_RRE_FF },
+ { "lnxbr", 0x41, INSTR_RRE_FF },
+ { "ltxbr", 0x42, INSTR_RRE_FF },
+ { "lcxbr", 0x43, INSTR_RRE_FF },
+ { "ledbr", 0x44, INSTR_RRE_FF },
+ { "ldxbr", 0x45, INSTR_RRE_FF },
+ { "lexbr", 0x46, INSTR_RRE_FF },
+ { "fixbr", 0x47, INSTR_RRF_U0FF },
+ { "kxbr", 0x48, INSTR_RRE_FF },
+ { "cxbr", 0x49, INSTR_RRE_FF },
+ { "axbr", 0x4a, INSTR_RRE_FF },
+ { "sxbr", 0x4b, INSTR_RRE_FF },
+ { "mxbr", 0x4c, INSTR_RRE_FF },
+ { "dxbr", 0x4d, INSTR_RRE_FF },
+ { "tbedr", 0x50, INSTR_RRF_U0FF },
+ { "tbdr", 0x51, INSTR_RRF_U0FF },
+ { "diebr", 0x53, INSTR_RRF_FUFF },
+ { "fiebr", 0x57, INSTR_RRF_U0FF },
+ { "thder", 0x58, INSTR_RRE_RR },
+ { "thdr", 0x59, INSTR_RRE_RR },
+ { "didbr", 0x5b, INSTR_RRF_FUFF },
+ { "fidbr", 0x5f, INSTR_RRF_U0FF },
+ { "lpxr", 0x60, INSTR_RRE_FF },
+ { "lnxr", 0x61, INSTR_RRE_FF },
+ { "ltxr", 0x62, INSTR_RRE_FF },
+ { "lcxr", 0x63, INSTR_RRE_FF },
+ { "lxr", 0x65, INSTR_RRE_RR },
+ { "lexr", 0x66, INSTR_RRE_FF },
+ { "fixr", 0x67, INSTR_RRF_U0FF },
+ { "cxr", 0x69, INSTR_RRE_FF },
+ { "lzer", 0x74, INSTR_RRE_R0 },
+ { "lzdr", 0x75, INSTR_RRE_R0 },
+ { "lzxr", 0x76, INSTR_RRE_R0 },
+ { "fier", 0x77, INSTR_RRF_U0FF },
+ { "fidr", 0x7f, INSTR_RRF_U0FF },
+ { "sfpc", 0x84, INSTR_RRE_RR_OPT },
+ { "efpc", 0x8c, INSTR_RRE_RR_OPT },
+ { "cefbr", 0x94, INSTR_RRE_RF },
+ { "cdfbr", 0x95, INSTR_RRE_RF },
+ { "cxfbr", 0x96, INSTR_RRE_RF },
+ { "cfebr", 0x98, INSTR_RRF_U0RF },
+ { "cfdbr", 0x99, INSTR_RRF_U0RF },
+ { "cfxbr", 0x9a, INSTR_RRF_U0RF },
+ { "cefr", 0xb4, INSTR_RRE_RF },
+ { "cdfr", 0xb5, INSTR_RRE_RF },
+ { "cxfr", 0xb6, INSTR_RRE_RF },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_b9 = {
+ { "lpgr", 0x00, INSTR_RRE_RR },
+ { "lngr", 0x01, INSTR_RRE_RR },
+ { "ltgr", 0x02, INSTR_RRE_RR },
+ { "lcgr", 0x03, INSTR_RRE_RR },
+ { "lgr", 0x04, INSTR_RRE_RR },
+ { "lurag", 0x05, INSTR_RRE_RR },
+ { "lgbr", 0x06, INSTR_RRE_RR },
+ { "lghr", 0x07, INSTR_RRE_RR },
+ { "agr", 0x08, INSTR_RRE_RR },
+ { "sgr", 0x09, INSTR_RRE_RR },
+ { "algr", 0x0a, INSTR_RRE_RR },
+ { "slgr", 0x0b, INSTR_RRE_RR },
+ { "msgr", 0x0c, INSTR_RRE_RR },
+ { "dsgr", 0x0d, INSTR_RRE_RR },
+ { "eregg", 0x0e, INSTR_RRE_RR },
+ { "lrvgr", 0x0f, INSTR_RRE_RR },
+ { "lpgfr", 0x10, INSTR_RRE_RR },
+ { "lngfr", 0x11, INSTR_RRE_RR },
+ { "ltgfr", 0x12, INSTR_RRE_RR },
+ { "lcgfr", 0x13, INSTR_RRE_RR },
+ { "lgfr", 0x14, INSTR_RRE_RR },
+ { "llgfr", 0x16, INSTR_RRE_RR },
+ { "llgtr", 0x17, INSTR_RRE_RR },
+ { "agfr", 0x18, INSTR_RRE_RR },
+ { "sgfr", 0x19, INSTR_RRE_RR },
+ { "algfr", 0x1a, INSTR_RRE_RR },
+ { "slgfr", 0x1b, INSTR_RRE_RR },
+ { "msgfr", 0x1c, INSTR_RRE_RR },
+ { "dsgfr", 0x1d, INSTR_RRE_RR },
+ { "cgr", 0x20, INSTR_RRE_RR },
+ { "clgr", 0x21, INSTR_RRE_RR },
+ { "sturg", 0x25, INSTR_RRE_RR },
+ { "lbr", 0x26, INSTR_RRE_RR },
+ { "lhr", 0x27, INSTR_RRE_RR },
+ { "cgfr", 0x30, INSTR_RRE_RR },
+ { "clgfr", 0x31, INSTR_RRE_RR },
+ { "bctgr", 0x46, INSTR_RRE_RR },
+ { "ngr", 0x80, INSTR_RRE_RR },
+ { "ogr", 0x81, INSTR_RRE_RR },
+ { "xgr", 0x82, INSTR_RRE_RR },
+ { "flogr", 0x83, INSTR_RRE_RR },
+ { "llgcr", 0x84, INSTR_RRE_RR },
+ { "llghr", 0x85, INSTR_RRE_RR },
+ { "mlgr", 0x86, INSTR_RRE_RR },
+ { "dlgr", 0x87, INSTR_RRE_RR },
+ { "alcgr", 0x88, INSTR_RRE_RR },
+ { "slbgr", 0x89, INSTR_RRE_RR },
+ { "cspg", 0x8a, INSTR_RRE_RR },
+ { "idte", 0x8e, INSTR_RRF_R0RR },
+ { "llcr", 0x94, INSTR_RRE_RR },
+ { "llhr", 0x95, INSTR_RRE_RR },
+ { "esea", 0x9d, INSTR_RRE_R0 },
+ { "lptea", 0xaa, INSTR_RRF_RURR },
+ { "cu14", 0xb0, INSTR_RRF_M0RR },
+ { "cu24", 0xb1, INSTR_RRF_M0RR },
+ { "cu41", 0xb2, INSTR_RRF_M0RR },
+ { "cu42", 0xb3, INSTR_RRF_M0RR },
+ { "crt", 0x72, INSTR_RRF_U0RR },
+ { "cgrt", 0x60, INSTR_RRF_U0RR },
+ { "clrt", 0x73, INSTR_RRF_U0RR },
+ { "clgrt", 0x61, INSTR_RRF_U0RR },
+ { "ptf", 0xa2, INSTR_RRE_R0 },
+ { "pfmf", 0xaf, INSTR_RRE_RR },
+ { "trte", 0xbf, INSTR_RRF_M0RR },
+ { "trtre", 0xbd, INSTR_RRF_M0RR },
+ { "kmac", 0x1e, INSTR_RRE_RR },
+ { "lrvr", 0x1f, INSTR_RRE_RR },
+ { "km", 0x2e, INSTR_RRE_RR },
+ { "kmc", 0x2f, INSTR_RRE_RR },
+ { "kimd", 0x3e, INSTR_RRE_RR },
+ { "klmd", 0x3f, INSTR_RRE_RR },
+ { "epsw", 0x8d, INSTR_RRE_RR },
+ { "trtt", 0x90, INSTR_RRE_RR },
+ { "trtt", 0x90, INSTR_RRF_M0RR },
+ { "trto", 0x91, INSTR_RRE_RR },
+ { "trto", 0x91, INSTR_RRF_M0RR },
+ { "trot", 0x92, INSTR_RRE_RR },
+ { "trot", 0x92, INSTR_RRF_M0RR },
+ { "troo", 0x93, INSTR_RRE_RR },
+ { "troo", 0x93, INSTR_RRF_M0RR },
+ { "mlr", 0x96, INSTR_RRE_RR },
+ { "dlr", 0x97, INSTR_RRE_RR },
+ { "alcr", 0x98, INSTR_RRE_RR },
+ { "slbr", 0x99, INSTR_RRE_RR },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_c0 = {
+ { "lgfi", 0x01, INSTR_RIL_RI },
+ { "xihf", 0x06, INSTR_RIL_RU },
+ { "xilf", 0x07, INSTR_RIL_RU },
+ { "iihf", 0x08, INSTR_RIL_RU },
+ { "iilf", 0x09, INSTR_RIL_RU },
+ { "nihf", 0x0a, INSTR_RIL_RU },
+ { "nilf", 0x0b, INSTR_RIL_RU },
+ { "oihf", 0x0c, INSTR_RIL_RU },
+ { "oilf", 0x0d, INSTR_RIL_RU },
+ { "llihf", 0x0e, INSTR_RIL_RU },
+ { "llilf", 0x0f, INSTR_RIL_RU },
+ { "larl", 0x00, INSTR_RIL_RP },
+ { "brcl", 0x04, INSTR_RIL_UP },
+ { "brasl", 0x05, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_c2 = {
+ { "slgfi", 0x04, INSTR_RIL_RU },
+ { "slfi", 0x05, INSTR_RIL_RU },
+ { "agfi", 0x08, INSTR_RIL_RI },
+ { "afi", 0x09, INSTR_RIL_RI },
+ { "algfi", 0x0a, INSTR_RIL_RU },
+ { "alfi", 0x0b, INSTR_RIL_RU },
+ { "cgfi", 0x0c, INSTR_RIL_RI },
+ { "cfi", 0x0d, INSTR_RIL_RI },
+ { "clgfi", 0x0e, INSTR_RIL_RU },
+ { "clfi", 0x0f, INSTR_RIL_RU },
+ { "msfi", 0x01, INSTR_RIL_RI },
+ { "msgfi", 0x00, INSTR_RIL_RI },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_c4 = {
+ { "lrl", 0x0d, INSTR_RIL_RP },
+ { "lgrl", 0x08, INSTR_RIL_RP },
+ { "lgfrl", 0x0c, INSTR_RIL_RP },
+ { "lhrl", 0x05, INSTR_RIL_RP },
+ { "lghrl", 0x04, INSTR_RIL_RP },
+ { "llgfrl", 0x0e, INSTR_RIL_RP },
+ { "llhrl", 0x02, INSTR_RIL_RP },
+ { "llghrl", 0x06, INSTR_RIL_RP },
+ { "strl", 0x0f, INSTR_RIL_RP },
+ { "stgrl", 0x0b, INSTR_RIL_RP },
+ { "sthrl", 0x07, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_c6 = {
+ { "crl", 0x0d, INSTR_RIL_RP },
+ { "cgrl", 0x08, INSTR_RIL_RP },
+ { "cgfrl", 0x0c, INSTR_RIL_RP },
+ { "chrl", 0x05, INSTR_RIL_RP },
+ { "cghrl", 0x04, INSTR_RIL_RP },
+ { "clrl", 0x0f, INSTR_RIL_RP },
+ { "clgrl", 0x0a, INSTR_RIL_RP },
+ { "clgfrl", 0x0e, INSTR_RIL_RP },
+ { "clhrl", 0x07, INSTR_RIL_RP },
+ { "clghrl", 0x06, INSTR_RIL_RP },
+ { "pfdrl", 0x02, INSTR_RIL_UP },
+ { "exrl", 0x00, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_c8 = {
+ { "mvcos", 0x00, INSTR_SSF_RRDRD },
+ { "ectg", 0x01, INSTR_SSF_RRDRD },
+ { "csst", 0x02, INSTR_SSF_RRDRD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_e3 = {
+ { "ltg", 0x02, INSTR_RXY_RRRD },
+ { "lrag", 0x03, INSTR_RXY_RRRD },
+ { "lg", 0x04, INSTR_RXY_RRRD },
+ { "cvby", 0x06, INSTR_RXY_RRRD },
+ { "ag", 0x08, INSTR_RXY_RRRD },
+ { "sg", 0x09, INSTR_RXY_RRRD },
+ { "alg", 0x0a, INSTR_RXY_RRRD },
+ { "slg", 0x0b, INSTR_RXY_RRRD },
+ { "msg", 0x0c, INSTR_RXY_RRRD },
+ { "dsg", 0x0d, INSTR_RXY_RRRD },
+ { "cvbg", 0x0e, INSTR_RXY_RRRD },
+ { "lrvg", 0x0f, INSTR_RXY_RRRD },
+ { "lt", 0x12, INSTR_RXY_RRRD },
+ { "lray", 0x13, INSTR_RXY_RRRD },
+ { "lgf", 0x14, INSTR_RXY_RRRD },
+ { "lgh", 0x15, INSTR_RXY_RRRD },
+ { "llgf", 0x16, INSTR_RXY_RRRD },
+ { "llgt", 0x17, INSTR_RXY_RRRD },
+ { "agf", 0x18, INSTR_RXY_RRRD },
+ { "sgf", 0x19, INSTR_RXY_RRRD },
+ { "algf", 0x1a, INSTR_RXY_RRRD },
+ { "slgf", 0x1b, INSTR_RXY_RRRD },
+ { "msgf", 0x1c, INSTR_RXY_RRRD },
+ { "dsgf", 0x1d, INSTR_RXY_RRRD },
+ { "cg", 0x20, INSTR_RXY_RRRD },
+ { "clg", 0x21, INSTR_RXY_RRRD },
+ { "stg", 0x24, INSTR_RXY_RRRD },
+ { "cvdy", 0x26, INSTR_RXY_RRRD },
+ { "cvdg", 0x2e, INSTR_RXY_RRRD },
+ { "strvg", 0x2f, INSTR_RXY_RRRD },
+ { "cgf", 0x30, INSTR_RXY_RRRD },
+ { "clgf", 0x31, INSTR_RXY_RRRD },
+ { "strvh", 0x3f, INSTR_RXY_RRRD },
+ { "bctg", 0x46, INSTR_RXY_RRRD },
+ { "sty", 0x50, INSTR_RXY_RRRD },
+ { "msy", 0x51, INSTR_RXY_RRRD },
+ { "ny", 0x54, INSTR_RXY_RRRD },
+ { "cly", 0x55, INSTR_RXY_RRRD },
+ { "oy", 0x56, INSTR_RXY_RRRD },
+ { "xy", 0x57, INSTR_RXY_RRRD },
+ { "ly", 0x58, INSTR_RXY_RRRD },
+ { "cy", 0x59, INSTR_RXY_RRRD },
+ { "ay", 0x5a, INSTR_RXY_RRRD },
+ { "sy", 0x5b, INSTR_RXY_RRRD },
+ { "aly", 0x5e, INSTR_RXY_RRRD },
+ { "sly", 0x5f, INSTR_RXY_RRRD },
+ { "sthy", 0x70, INSTR_RXY_RRRD },
+ { "lay", 0x71, INSTR_RXY_RRRD },
+ { "stcy", 0x72, INSTR_RXY_RRRD },
+ { "icy", 0x73, INSTR_RXY_RRRD },
+ { "lb", 0x76, INSTR_RXY_RRRD },
+ { "lgb", 0x77, INSTR_RXY_RRRD },
+ { "lhy", 0x78, INSTR_RXY_RRRD },
+ { "chy", 0x79, INSTR_RXY_RRRD },
+ { "ahy", 0x7a, INSTR_RXY_RRRD },
+ { "shy", 0x7b, INSTR_RXY_RRRD },
+ { "ng", 0x80, INSTR_RXY_RRRD },
+ { "og", 0x81, INSTR_RXY_RRRD },
+ { "xg", 0x82, INSTR_RXY_RRRD },
+ { "mlg", 0x86, INSTR_RXY_RRRD },
+ { "dlg", 0x87, INSTR_RXY_RRRD },
+ { "alcg", 0x88, INSTR_RXY_RRRD },
+ { "slbg", 0x89, INSTR_RXY_RRRD },
+ { "stpq", 0x8e, INSTR_RXY_RRRD },
+ { "lpq", 0x8f, INSTR_RXY_RRRD },
+ { "llgc", 0x90, INSTR_RXY_RRRD },
+ { "llgh", 0x91, INSTR_RXY_RRRD },
+ { "llc", 0x94, INSTR_RXY_RRRD },
+ { "llh", 0x95, INSTR_RXY_RRRD },
+ { "cgh", 0x34, INSTR_RXY_RRRD },
+ { "laey", 0x75, INSTR_RXY_RRRD },
+ { "ltgf", 0x32, INSTR_RXY_RRRD },
+ { "mfy", 0x5c, INSTR_RXY_RRRD },
+ { "mhy", 0x7c, INSTR_RXY_RRRD },
+ { "pfd", 0x36, INSTR_RXY_URRD },
+ { "lrv", 0x1e, INSTR_RXY_RRRD },
+ { "lrvh", 0x1f, INSTR_RXY_RRRD },
+ { "strv", 0x3e, INSTR_RXY_RRRD },
+ { "ml", 0x96, INSTR_RXY_RRRD },
+ { "dl", 0x97, INSTR_RXY_RRRD },
+ { "alc", 0x98, INSTR_RXY_RRRD },
+ { "slb", 0x99, INSTR_RXY_RRRD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_e5 = {
+ { "strag", 0x02, INSTR_SSE_RDRD },
+ { "chhsi", 0x54, INSTR_SIL_RDI },
+ { "chsi", 0x5c, INSTR_SIL_RDI },
+ { "cghsi", 0x58, INSTR_SIL_RDI },
+ { "clhhsi", 0x55, INSTR_SIL_RDU },
+ { "clfhsi", 0x5d, INSTR_SIL_RDU },
+ { "clghsi", 0x59, INSTR_SIL_RDU },
+ { "mvhhi", 0x44, INSTR_SIL_RDI },
+ { "mvhi", 0x4c, INSTR_SIL_RDI },
+ { "mvghi", 0x48, INSTR_SIL_RDI },
+ { "lasp", 0x00, INSTR_SSE_RDRD },
+ { "tprot", 0x01, INSTR_SSE_RDRD },
+ { "mvcsk", 0x0e, INSTR_SSE_RDRD },
+ { "mvcdk", 0x0f, INSTR_SSE_RDRD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_eb = {
+ { "lmg", 0x04, INSTR_RSY_RRRD },
+ { "srag", 0x0a, INSTR_RSY_RRRD },
+ { "slag", 0x0b, INSTR_RSY_RRRD },
+ { "srlg", 0x0c, INSTR_RSY_RRRD },
+ { "sllg", 0x0d, INSTR_RSY_RRRD },
+ { "tracg", 0x0f, INSTR_RSY_RRRD },
+ { "csy", 0x14, INSTR_RSY_RRRD },
+ { "rllg", 0x1c, INSTR_RSY_RRRD },
+ { "clmh", 0x20, INSTR_RSY_RURD },
+ { "clmy", 0x21, INSTR_RSY_RURD },
+ { "stmg", 0x24, INSTR_RSY_RRRD },
+ { "stctg", 0x25, INSTR_RSY_CCRD },
+ { "stmh", 0x26, INSTR_RSY_RRRD },
+ { "stcmh", 0x2c, INSTR_RSY_RURD },
+ { "stcmy", 0x2d, INSTR_RSY_RURD },
+ { "lctlg", 0x2f, INSTR_RSY_CCRD },
+ { "csg", 0x30, INSTR_RSY_RRRD },
+ { "cdsy", 0x31, INSTR_RSY_RRRD },
+ { "cdsg", 0x3e, INSTR_RSY_RRRD },
+ { "bxhg", 0x44, INSTR_RSY_RRRD },
+ { "bxleg", 0x45, INSTR_RSY_RRRD },
+ { "tmy", 0x51, INSTR_SIY_URD },
+ { "mviy", 0x52, INSTR_SIY_URD },
+ { "niy", 0x54, INSTR_SIY_URD },
+ { "cliy", 0x55, INSTR_SIY_URD },
+ { "oiy", 0x56, INSTR_SIY_URD },
+ { "xiy", 0x57, INSTR_SIY_URD },
+ { "icmh", 0x80, INSTR_RSE_RURD },
+ { "icmh", 0x80, INSTR_RSY_RURD },
+ { "icmy", 0x81, INSTR_RSY_RURD },
+ { "clclu", 0x8f, INSTR_RSY_RRRD },
+ { "stmy", 0x90, INSTR_RSY_RRRD },
+ { "lmh", 0x96, INSTR_RSY_RRRD },
+ { "lmy", 0x98, INSTR_RSY_RRRD },
+ { "lamy", 0x9a, INSTR_RSY_AARD },
+ { "stamy", 0x9b, INSTR_RSY_AARD },
+ { "asi", 0x6a, INSTR_SIY_IRD },
+ { "agsi", 0x7a, INSTR_SIY_IRD },
+ { "alsi", 0x6e, INSTR_SIY_IRD },
+ { "algsi", 0x7e, INSTR_SIY_IRD },
+ { "ecag", 0x4c, INSTR_RSY_RRRD },
+ { "rll", 0x1d, INSTR_RSY_RRRD },
+ { "mvclu", 0x8e, INSTR_RSY_RRRD },
+ { "tp", 0xc0, INSTR_RSL_R0RD },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_ec = {
+ { "brxhg", 0x44, INSTR_RIE_RRP },
+ { "brxlg", 0x45, INSTR_RIE_RRP },
+ { "crb", 0xf6, INSTR_RRS_RRRDU },
+ { "cgrb", 0xe4, INSTR_RRS_RRRDU },
+ { "crj", 0x76, INSTR_RIE_RRPU },
+ { "cgrj", 0x64, INSTR_RIE_RRPU },
+ { "cib", 0xfe, INSTR_RIS_RURDI },
+ { "cgib", 0xfc, INSTR_RIS_RURDI },
+ { "cij", 0x7e, INSTR_RIE_RUPI },
+ { "cgij", 0x7c, INSTR_RIE_RUPI },
+ { "cit", 0x72, INSTR_RIE_R0IU },
+ { "cgit", 0x70, INSTR_RIE_R0IU },
+ { "clrb", 0xf7, INSTR_RRS_RRRDU },
+ { "clgrb", 0xe5, INSTR_RRS_RRRDU },
+ { "clrj", 0x77, INSTR_RIE_RRPU },
+ { "clgrj", 0x65, INSTR_RIE_RRPU },
+ { "clib", 0xff, INSTR_RIS_RURDU },
+ { "clgib", 0xfd, INSTR_RIS_RURDU },
+ { "clij", 0x7f, INSTR_RIE_RUPU },
+ { "clgij", 0x7d, INSTR_RIE_RUPU },
+ { "clfit", 0x73, INSTR_RIE_R0UU },
+ { "clgit", 0x71, INSTR_RIE_R0UU },
+ { "rnsbg", 0x54, INSTR_RIE_RRUUU },
+ { "rxsbg", 0x57, INSTR_RIE_RRUUU },
+ { "rosbg", 0x56, INSTR_RIE_RRUUU },
+ { "risbg", 0x55, INSTR_RIE_RRUUU },
+ { "", 0, INSTR_INVALID }
+}
+
+local opcode_ed[] = {
+ { "mayl", 0x38, INSTR_RXF_FRRDF },
+ { "myl", 0x39, INSTR_RXF_FRRDF },
+ { "may", 0x3a, INSTR_RXF_FRRDF },
+ { "my", 0x3b, INSTR_RXF_FRRDF },
+ { "mayh", 0x3c, INSTR_RXF_FRRDF },
+ { "myh", 0x3d, INSTR_RXF_FRRDF },
+ { "ley", 0x64, INSTR_RXY_FRRD },
+ { "ldy", 0x65, INSTR_RXY_FRRD },
+ { "stey", 0x66, INSTR_RXY_FRRD },
+ { "stdy", 0x67, INSTR_RXY_FRRD },
+ { "sldt", 0x40, INSTR_RXF_FRRDF },
+ { "slxt", 0x48, INSTR_RXF_FRRDF },
+ { "srdt", 0x41, INSTR_RXF_FRRDF },
+ { "srxt", 0x49, INSTR_RXF_FRRDF },
+ { "tdcet", 0x50, INSTR_RXE_FRRD },
+ { "tdcdt", 0x54, INSTR_RXE_FRRD },
+ { "tdcxt", 0x58, INSTR_RXE_FRRD },
+ { "tdget", 0x51, INSTR_RXE_FRRD },
+ { "tdgdt", 0x55, INSTR_RXE_FRRD },
+ { "tdgxt", 0x59, INSTR_RXE_FRRD },
+ { "ldeb", 0x04, INSTR_RXE_FRRD },
+ { "lxdb", 0x05, INSTR_RXE_FRRD },
+ { "lxeb", 0x06, INSTR_RXE_FRRD },
+ { "mxdb", 0x07, INSTR_RXE_FRRD },
+ { "keb", 0x08, INSTR_RXE_FRRD },
+ { "ceb", 0x09, INSTR_RXE_FRRD },
+ { "aeb", 0x0a, INSTR_RXE_FRRD },
+ { "seb", 0x0b, INSTR_RXE_FRRD },
+ { "mdeb", 0x0c, INSTR_RXE_FRRD },
+ { "deb", 0x0d, INSTR_RXE_FRRD },
+ { "maeb", 0x0e, INSTR_RXF_FRRDF },
+ { "mseb", 0x0f, INSTR_RXF_FRRDF },
+ { "tceb", 0x10, INSTR_RXE_FRRD },
+ { "tcdb", 0x11, INSTR_RXE_FRRD },
+ { "tcxb", 0x12, INSTR_RXE_FRRD },
+ { "sqeb", 0x14, INSTR_RXE_FRRD },
+ { "sqdb", 0x15, INSTR_RXE_FRRD },
+ { "meeb", 0x17, INSTR_RXE_FRRD },
+ { "kdb", 0x18, INSTR_RXE_FRRD },
+ { "cdb", 0x19, INSTR_RXE_FRRD },
+ { "adb", 0x1a, INSTR_RXE_FRRD },
+ { "sdb", 0x1b, INSTR_RXE_FRRD },
+ { "mdb", 0x1c, INSTR_RXE_FRRD },
+ { "ddb", 0x1d, INSTR_RXE_FRRD },
+ { "madb", 0x1e, INSTR_RXF_FRRDF },
+ { "msdb", 0x1f, INSTR_RXF_FRRDF },
+ { "lde", 0x24, INSTR_RXE_FRRD },
+ { "lxd", 0x25, INSTR_RXE_FRRD },
+ { "lxe", 0x26, INSTR_RXE_FRRD },
+ { "mae", 0x2e, INSTR_RXF_FRRDF },
+ { "mse", 0x2f, INSTR_RXF_FRRDF },
+ { "sqe", 0x34, INSTR_RXE_FRRD },
+ { "sqd", 0x35, INSTR_RXE_FRRD },
+ { "mee", 0x37, INSTR_RXE_FRRD },
+ { "mad", 0x3e, INSTR_RXF_FRRDF },
+ { "msd", 0x3f, INSTR_RXF_FRRDF },
+ { "", 0, INSTR_INVALID }
+}
+
+-- Extracts an operand value from an instruction.
+local function extract_operand(code, operand)
+ code += operand[2] / 8;
+ bits = band(operand[2], 7) + operand[1]
+ val = 0
+ repeat
+ val = lshift(val, 8)
+ val = bor(val, *code++)
+ bits -= 8
+ until(bits > 0)
+
+ val = rshift(val, -bits)
+ val = band(val, lshift(lshift(1U,operand[1] - 1), 1) - 1)
+
+ -- Check for special long displacement case.
+ if(operand[1] == 20 && operand[2] == 20) then
+ val = bor(lshift(band(val, 0xff), 12), rshift(band(val, 0xfff00), 8))
+ end
+
+ -- Sign extend value if the operand is signed or pc relative.
+ if(band(operand->flags, bor(OPERAND_SIGNED, OPERAND_PCREL)) && band(val, lshift(1U,(operand[1] - 1)))) then
+ val = bor(val, lshift(lshift(-1U, (operand[1] - 1)), 1))
+ end
+
+ -- Double value if the operand is pc relative.
+ if(band(operand[2], OPERAND_PCREL)) then
+ val = lshift(val, 1)
+ end
+
+ -- Length x in an instructions has real length x + 1.
+ if(band(operand[2], OPERAND_LENGTH)) then
+ val++
+ end
+ return val
+end
+
+local function insn_length(code)
+ return lshift((rshift((tonumber(code) + 64), 7) + 1), 1);
+end
+
+local find_insn(code){
+ opfrag = code[1]
+ table = opcode
+
+ if(code[0] == 0x01) then
+ table = opcode_01
+ elseif(code[0] == 0xa5)
+ table = opcode_a5
+ elseif(code[0] == 0xa7)
+ table = opcode_a7
+ elseif(code[0] == 0xb2)
+ table = opcode_b2
+ elseif(code[0] == 0xb3)
+ table = opcode_b3
+ elseif(code[0] == 0xb9)
+ table = opcode_b9
+ elseif(code[0] == 0xc0)
+ table = opcode_c0
+ elseif(code[0] == 0xc2)
+ table = opcode_c2
+ elseif(code[0] == 0xc4)
+ table = opcode_c4
+ elseif(code[0] == 0xc6)
+ table = opcode_c6
+ elseif(code[0] == 0xc8)
+ table = opcode_c8
+ elseif(code[0] == 0xe3)
+ table = opcode_e3
+ opfrag = code[5]
+ elseif(code[0] == 0xe5)
+ table = opcode_e5
+ elseif(code[0] == 0xeb)
+ table = opcode_eb
+ opfrag = code[5]
+ elseif(code[0] == 0xec)
+ table = opcode_ec
+ opfrag = code[5]
+ elseif(code[0] == 0xed)
+ table = opcode_ed
+ opfrag = code[5]
+ else
+ opfrag = code[0]
+ end
+
+ for k, insn in pairs(table) do
+ opmask = formats[insn[3]][1]
+ if(insn[2] == band(opfrag, opmask)) then
+ return insn
+ end
+ end
+ return NULL
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+ local pos = ctx.pos
+ local extra = ""
+ if ctx.rel then
+ local sym = ctx.symtab[ctx.rel]
+ if sym then
+ extra = "\t->"..sym
+ elseif band(ctx.op, 0x0e000000) ~= 0x0a000000 then
+ extra = "\t; 0x"..tohex(ctx.rel)
+ end
+ end
+ if ctx.hexdump > 0 then
+ ctx.out(format("%08x %s %-5s %s%s\n",
+ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+ else
+ ctx.out(format("%08x %-5s %s%s\n",
+ ctx.addr+pos, text, concat(operands, ", "), extra))
+ end
+ ctx.pos = pos + 4
+ end
+
+ -- Fallback for unknown opcodes.
+ local function unknown(ctx)
+ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+ end
+
+ -- Format operand 2 of load/store opcodes.
+ local function fmtload(ctx, op, pos)
+ local base = map_gpr[band(rshift(op, 16), 15)]
+ local x, ofs
+ local ext = (band(op, 0x04000000) == 0)
+ if not ext and band(op, 0x02000000) == 0 then
+ ofs = band(op, 4095)
+ if band(op, 0x00800000) == 0 then ofs = -ofs end
+ if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+ ofs = "#"..ofs
+ elseif ext and band(op, 0x00400000) ~= 0 then
+ ofs = band(op, 15) + band(rshift(op, 4), 0xf0)
+ if band(op, 0x00800000) == 0 then ofs = -ofs end
+ if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+ ofs = "#"..ofs
+ else
+ ofs = map_gpr[band(op, 15)]
+ if ext or band(op, 0xfe0) == 0 then
+ elseif band(op, 0xfe0) == 0x60 then
+ ofs = format("%s, rrx", ofs)
+ else
+ local sh = band(rshift(op, 7), 31)
+ if sh == 0 then sh = 32 end
+ ofs = format("%s, %s #%d", ofs, map_shift[band(rshift(op, 5), 3)], sh)
+ end
+ if band(op, 0x00800000) == 0 then ofs = "-"..ofs end
+ end
+ if ofs == "#0" then
+ x = format("[%s]", base)
+ elseif band(op, 0x01000000) == 0 then
+ x = format("[%s], %s", base, ofs)
+ else
+ x = format("[%s, %s]", base, ofs)
+ end
+ if band(op, 0x01200000) == 0x01200000 then x = x.."!" end
+ return x
+ end
+
+ -- Format operand 2 of vector load/store opcodes.
+ local function fmtvload(ctx, op, pos)
+ local base = map_gpr[band(rshift(op, 16), 15)]
+ local ofs = band(op, 255)*4
+ if band(op, 0x00800000) == 0 then ofs = -ofs end
+ if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end
+ if ofs == 0 then
+ return format("[%s]", base)
+ else
+ return format("[%s, #%d]", base, ofs)
+ end
+ end
+
+ local function fmtvr(op, vr, sh0, sh1)
+ if vr == "s" then
+ return format("s%d", 2*band(rshift(op, sh0), 15)+band(rshift(op, sh1), 1))
+ else
+ return format("d%d", band(rshift(op, sh0), 15)+band(rshift(op, sh1-4), 16))
+ end
+ end
+
+ -- Disassemble a single instruction.
+ local function disass_ins(ctx)
+ local pos = ctx.pos
+ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+ local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+ local operands = {}
+ local suffix = ""
+ local last, name, pat
+ local vr
+ ctx.op = op
+ ctx.rel = nil
+
+ print("noice")
+
+ -- local cond = rshift(op, 28)
+ -- local opat
+ -- if cond == 15 then
+ -- opat = map_uncondins[band(rshift(op, 25), 7)]
+ -- else
+ -- if cond ~= 14 then suffix = map_cond[cond] end
+ -- opat = map_condins[band(rshift(op, 25), 7)]
+ -- end
+ -- while type(opat) ~= "string" do
+ -- if not opat then return unknown(ctx) end
+ -- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+ -- end
+ -- name, pat = match(opat, "^([a-z0-9]*)(.*)")
+ -- if sub(pat, 1, 1) == "." then
+ -- local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
+ -- suffix = suffix..s2
+ -- pat = p2
+ -- end
+
+ -- for p in gmatch(pat, ".") do
+ -- local x = nil
+ -- if p == "D" then
+ -- x = map_gpr[band(rshift(op, 12), 15)]
+ -- elseif p == "N" then
+ -- x = map_gpr[band(rshift(op, 16), 15)]
+ -- elseif p == "S" then
+ -- x = map_gpr[band(rshift(op, 8), 15)]
+ -- elseif p == "M" then
+ -- x = map_gpr[band(op, 15)]
+ -- elseif p == "d" then
+ -- x = fmtvr(op, vr, 12, 22)
+ -- elseif p == "n" then
+ -- x = fmtvr(op, vr, 16, 7)
+ -- elseif p == "m" then
+ -- x = fmtvr(op, vr, 0, 5)
+ -- elseif p == "P" then
+ -- if band(op, 0x02000000) ~= 0 then
+ -- x = ror(band(op, 255), 2*band(rshift(op, 8), 15))
+ -- else
+ -- x = map_gpr[band(op, 15)]
+ -- if band(op, 0xff0) ~= 0 then
+ -- operands[#operands+1] = x
+ -- local s = map_shift[band(rshift(op, 5), 3)]
+ -- local r = nil
+ -- if band(op, 0xf90) == 0 then
+ -- if s == "ror" then s = "rrx" else r = "#32" end
+ -- elseif band(op, 0x10) == 0 then
+ -- r = "#"..band(rshift(op, 7), 31)
+ -- else
+ -- r = map_gpr[band(rshift(op, 8), 15)]
+ -- end
+ -- if name == "mov" then name = s; x = r
+ -- elseif r then x = format("%s %s", s, r)
+ -- else x = s end
+ -- end
+ -- end
+ -- elseif p == "L" then
+ -- x = fmtload(ctx, op, pos)
+ -- elseif p == "l" then
+ -- x = fmtvload(ctx, op, pos)
+ -- elseif p == "B" then
+ -- local addr = ctx.addr + pos + 8 + arshift(lshift(op, 8), 6)
+ -- if cond == 15 then addr = addr + band(rshift(op, 23), 2) end
+ -- ctx.rel = addr
+ -- x = "0x"..tohex(addr)
+ -- elseif p == "F" then
+ -- vr = "s"
+ -- elseif p == "G" then
+ -- vr = "d"
+ -- elseif p == "." then
+ -- suffix = suffix..(vr == "s" and ".f32" or ".f64")
+ -- elseif p == "R" then
+ -- if band(op, 0x00200000) ~= 0 and #operands == 1 then
+ -- operands[1] = operands[1].."!"
+ -- end
+ -- local t = {}
+ -- for i=0,15 do
+ -- if band(rshift(op, i), 1) == 1 then t[#t+1] = map_gpr[i] end
+ -- end
+ -- x = "{"..concat(t, ", ").."}"
+ -- elseif p == "r" then
+ -- if band(op, 0x00200000) ~= 0 and #operands == 2 then
+ -- operands[1] = operands[1].."!"
+ -- end
+ -- local s = tonumber(sub(last, 2))
+ -- local n = band(op, 255)
+ -- if vr == "d" then n = rshift(n, 1) end
+ -- operands[#operands] = format("{%s-%s%d}", last, vr, s+n-1)
+ -- elseif p == "W" then
+ -- x = band(op, 0x0fff) + band(rshift(op, 4), 0xf000)
+ -- elseif p == "T" then
+ -- x = "#0x"..tohex(band(op, 0x00ffffff), 6)
+ -- elseif p == "U" then
+ -- x = band(rshift(op, 7), 31)
+ -- if x == 0 then x = nil end
+ -- elseif p == "u" then
+ -- x = band(rshift(op, 7), 31)
+ -- if band(op, 0x40) == 0 then
+ -- if x == 0 then x = nil else x = "lsl #"..x end
+ -- else
+ -- if x == 0 then x = "asr #32" else x = "asr #"..x end
+ -- end
+ -- elseif p == "v" then
+ -- x = band(rshift(op, 7), 31)
+ -- elseif p == "w" then
+ -- x = band(rshift(op, 16), 31)
+ -- elseif p == "x" then
+ -- x = band(rshift(op, 16), 31) + 1
+ -- elseif p == "X" then
+ -- x = band(rshift(op, 16), 31) - last + 1
+ -- elseif p == "Y" then
+ -- x = band(rshift(op, 12), 0xf0) + band(op, 0x0f)
+ -- elseif p == "K" then
+ -- x = "#0x"..tohex(band(rshift(op, 4), 0x0000fff0) + band(op, 15), 4)
+ -- elseif p == "s" then
+ -- if band(op, 0x00100000) ~= 0 then suffix = "s"..suffix end
+ -- else
+ -- assert(false)
+ -- end
+ -- if x then
+ -- last = x
+ -- if type(x) == "number" then x = "#"..x end
+ -- operands[#operands+1] = x
+ -- end
+ -- end
+
+ -- return putop(ctx, name..suffix, operands)
+ end
+
+ ------------------------------------------------------------------------------
+
+ -- Disassemble a block of code.
+ local function disass_block(ctx, ofs, len)
+ if not ofs then ofs = 0 end
+ local stop = len and ofs+len or #ctx.code
+ ctx.pos = ofs
+ ctx.rel = nil
+ while ctx.pos < stop do disass_ins(ctx) end
+ end
+
+ -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+ local function create(code, addr, out)
+ local ctx = {}
+ ctx.code = code
+ ctx.addr = addr or 0
+ ctx.out = out or io.write
+ ctx.symtab = {}
+ ctx.disass = disass_block
+ ctx.hexdump = 8
+ return ctx
+ end
+
+ -- Simple API: disassemble code (a string) at address and output via out.
+ local function disass(code, addr, out)
+ create(code, addr, out):disass()
+ end
+
+ -- Return register name for RID.
+ local function regname(r)
+ if r < 16 then return map_gpr[r] end
+ return "d"..(r-16)
+ end
+
+ -- Public module functions.
+ return {
+ create = create,
+ disass = disass,
+ regname = regname
+ }
+
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 18e7a4b7..9ddbfb96 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -144,10 +144,11 @@ local function dump_mcode(tr)
if not mcode then return end
if not disass then disass = require("jit.dis_"..jit.arch) end
if addr < 0 then addr = addr + 2^32 end
- out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
+ out:write("---- TRACE REallly ", tr, " mcode ", #mcode, "\n")
local ctx = disass.create(mcode, addr, dumpwrite)
ctx.hexdump = 0
ctx.symtab = fillsymtab(tr, info.nexit)
+ print(info.nexit)
if loop ~= 0 then
symtab[addr+loop] = "LOOP"
ctx:disass(0, loop)
@@ -387,13 +388,13 @@ end
-- Dump snapshots (not interleaved with IR).
local function dump_snap(tr)
- out:write("---- TRACE ", tr, " snapshots\n")
- for i=0,1000000000 do
- local snap = tracesnap(tr, i)
- if not snap then break end
- out:write(format("#%-3d %04d [ ", i, snap[0]))
- printsnap(tr, snap)
- end
+ -- out:write("---- TRACE ", tr, " snapshots\n")
+ -- for i=0,1000000000 do
+ -- local snap = tracesnap(tr, i)
+ -- if not snap then break end
+ -- out:write(format("#%-3d %04d [ ", i, snap[0]))
+ -- printsnap(tr, snap)
+ -- end
end
-- Return a register name or stack slot for a rid/sp location.
@@ -410,20 +411,20 @@ end
-- Dump CALL* function ref and return optional ctype.
local function dumpcallfunc(tr, ins)
- local ctype
- if ins > 0 then
- local m, ot, op1, op2 = traceir(tr, ins)
- if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
- ins = op1
- ctype = formatk(tr, op2)
- end
- end
- if ins < 0 then
- out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
- else
- out:write(format("%04d (", ins))
- end
- return ctype
+ -- local ctype
+ -- if ins > 0 then
+ -- local m, ot, op1, op2 = traceir(tr, ins)
+ -- if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
+ -- ins = op1
+ -- ctype = formatk(tr, op2)
+ -- end
+ -- end
+ -- if ins < 0 then
+ -- out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
+ -- else
+ -- out:write(format("%04d (", ins))
+ -- end
+ -- return ctype
end
-- Recursively gather CALL* args and dump them.
@@ -449,99 +450,99 @@ end
-- Dump IR and interleaved snapshots.
local function dump_ir(tr, dumpsnap, dumpreg)
- local info = traceinfo(tr)
- if not info then return end
- local nins = info.nins
- out:write("---- TRACE ", tr, " IR\n")
- local irnames = vmdef.irnames
- local snapref = 65536
- local snap, snapno
- if dumpsnap then
- snap = tracesnap(tr, 0)
- snapref = snap[0]
- snapno = 0
- end
- for ins=1,nins do
- if ins >= snapref then
- if dumpreg then
- out:write(format(".... SNAP #%-3d [ ", snapno))
- else
- out:write(format(".... SNAP #%-3d [ ", snapno))
- end
- printsnap(tr, snap)
- snapno = snapno + 1
- snap = tracesnap(tr, snapno)
- snapref = snap and snap[0] or 65536
- end
- local m, ot, op1, op2, ridsp = traceir(tr, ins)
- local oidx, t = 6*shr(ot, 8), band(ot, 31)
- local op = sub(irnames, oidx+1, oidx+6)
- if op == "LOOP " then
- if dumpreg then
- out:write(format("%04d ------------ LOOP ------------\n", ins))
- else
- out:write(format("%04d ------ LOOP ------------\n", ins))
- end
- elseif op ~= "NOP " and op ~= "CARG " and
- (dumpreg or op ~= "RENAME") then
- local rid = band(ridsp, 255)
- if dumpreg then
- out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
- else
- out:write(format("%04d ", ins))
- end
- out:write(format("%s%s %s %s ",
- (rid == 254 or rid == 253) and "}" or
- (band(ot, 128) == 0 and " " or ">"),
- band(ot, 64) == 0 and " " or "+",
- irtype[t], op))
- local m1, m2 = band(m, 3), band(m, 3*4)
- if sub(op, 1, 4) == "CALL" then
- local ctype
- if m2 == 1*4 then -- op2 == IRMlit
- out:write(format("%-10s (", vmdef.ircall[op2]))
- else
- ctype = dumpcallfunc(tr, op2)
- end
- if op1 ~= -1 then dumpcallargs(tr, op1) end
- out:write(")")
- if ctype then out:write(" ctype ", ctype) end
- elseif op == "CNEW " and op2 == -1 then
- out:write(formatk(tr, op1))
- elseif m1 ~= 3 then -- op1 != IRMnone
- if op1 < 0 then
- out:write(formatk(tr, op1))
- else
- out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
- end
- if m2 ~= 3*4 then -- op2 != IRMnone
- if m2 == 1*4 then -- op2 == IRMlit
- local litn = litname[op]
- if litn and litn[op2] then
- out:write(" ", litn[op2])
- elseif op == "UREFO " or op == "UREFC " then
- out:write(format(" #%-3d", shr(op2, 8)))
- else
- out:write(format(" #%-3d", op2))
- end
- elseif op2 < 0 then
- out:write(" ", formatk(tr, op2))
- else
- out:write(format(" %04d", op2))
- end
- end
- end
- out:write("\n")
- end
- end
- if snap then
- if dumpreg then
- out:write(format(".... SNAP #%-3d [ ", snapno))
- else
- out:write(format(".... SNAP #%-3d [ ", snapno))
- end
- printsnap(tr, snap)
- end
+ -- local info = traceinfo(tr)
+ -- if not info then return end
+ -- local nins = info.nins
+ -- out:write("---- TRACE ", tr, " IR\n")
+ -- local irnames = vmdef.irnames
+ -- local snapref = 65536
+ -- local snap, snapno
+ -- if dumpsnap then
+ -- snap = tracesnap(tr, 0)
+ -- snapref = snap[0]
+ -- snapno = 0
+ -- end
+ -- for ins=1,nins do
+ -- if ins >= snapref then
+ -- if dumpreg then
+ -- out:write(format(".... SNAP #%-3d [ ", snapno))
+ -- else
+ -- out:write(format(".... SNAP #%-3d [ ", snapno))
+ -- end
+ -- printsnap(tr, snap)
+ -- snapno = snapno + 1
+ -- snap = tracesnap(tr, snapno)
+ -- snapref = snap and snap[0] or 65536
+ -- end
+ -- local m, ot, op1, op2, ridsp = traceir(tr, ins)
+ -- local oidx, t = 6*shr(ot, 8), band(ot, 31)
+ -- local op = sub(irnames, oidx+1, oidx+6)
+ -- if op == "LOOP " then
+ -- if dumpreg then
+ -- out:write(format("%04d ------------ LOOP ------------\n", ins))
+ -- else
+ -- out:write(format("%04d ------ LOOP ------------\n", ins))
+ -- end
+ -- elseif op ~= "NOP " and op ~= "CARG " and
+ -- (dumpreg or op ~= "RENAME") then
+ -- local rid = band(ridsp, 255)
+ -- if dumpreg then
+ -- out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
+ -- else
+ -- out:write(format("%04d ", ins))
+ -- end
+ -- out:write(format("%s%s %s %s ",
+ -- (rid == 254 or rid == 253) and "}" or
+ -- (band(ot, 128) == 0 and " " or ">"),
+ -- band(ot, 64) == 0 and " " or "+",
+ -- irtype[t], op))
+ -- local m1, m2 = band(m, 3), band(m, 3*4)
+ -- if sub(op, 1, 4) == "CALL" then
+ -- local ctype
+ -- if m2 == 1*4 then -- op2 == IRMlit
+ -- out:write(format("%-10s (", vmdef.ircall[op2]))
+ -- else
+ -- ctype = dumpcallfunc(tr, op2)
+ -- end
+ -- if op1 ~= -1 then dumpcallargs(tr, op1) end
+ -- out:write(")")
+ -- if ctype then out:write(" ctype ", ctype) end
+ -- elseif op == "CNEW " and op2 == -1 then
+ -- out:write(formatk(tr, op1))
+ -- elseif m1 ~= 3 then -- op1 != IRMnone
+ -- if op1 < 0 then
+ -- out:write(formatk(tr, op1))
+ -- else
+ -- out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
+ -- end
+ -- if m2 ~= 3*4 then -- op2 != IRMnone
+ -- if m2 == 1*4 then -- op2 == IRMlit
+ -- local litn = litname[op]
+ -- if litn and litn[op2] then
+ -- out:write(" ", litn[op2])
+ -- elseif op == "UREFO " or op == "UREFC " then
+ -- out:write(format(" #%-3d", shr(op2, 8)))
+ -- else
+ -- out:write(format(" #%-3d", op2))
+ -- end
+ -- elseif op2 < 0 then
+ -- out:write(" ", formatk(tr, op2))
+ -- else
+ -- out:write(format(" %04d", op2))
+ -- end
+ -- end
+ -- end
+ -- out:write("\n")
+ -- end
+ -- end
+ -- if snap then
+ -- if dumpreg then
+ -- out:write(format(".... SNAP #%-3d [ ", snapno))
+ -- else
+ -- out:write(format(".... SNAP #%-3d [ ", snapno))
+ -- end
+ -- printsnap(tr, snap)
+ -- end
end
------------------------------------------------------------------------------
@@ -560,37 +561,37 @@ end
-- Dump trace states.
local function dump_trace(what, tr, func, pc, otr, oex)
- if what == "stop" or (what == "abort" and dumpmode.a) then
- if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
- elseif dumpmode.s then dump_snap(tr) end
- if dumpmode.m then dump_mcode(tr) end
- end
- if what == "start" then
- if dumpmode.H then out:write('
\n') end
- out:write("---- TRACE ", tr, " ", what)
- if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
- out:write(" ", fmtfunc(func, pc), "\n")
- elseif what == "stop" or what == "abort" then
- out:write("---- TRACE ", tr, " ", what)
- if what == "abort" then
- out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
- else
- local info = traceinfo(tr)
- local link, ltype = info.link, info.linktype
- if link == tr or link == 0 then
- out:write(" -> ", ltype, "\n")
- elseif ltype == "root" then
- out:write(" -> ", link, "\n")
- else
- out:write(" -> ", link, " ", ltype, "\n")
- end
- end
- if dumpmode.H then out:write("
\n\n") else out:write("\n") end
- else
- if what == "flush" then symtab, nexitsym = {}, 0 end
- out:write("---- TRACE ", what, "\n\n")
- end
- out:flush()
+ -- if what == "stop" or (what == "abort" and dumpmode.a) then
+ -- if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
+ -- elseif dumpmode.s then dump_snap(tr) end
+ -- if dumpmode.m then dump_mcode(tr) end
+ -- end
+ -- if what == "start" then
+ -- if dumpmode.H then out:write('\n') end
+ -- out:write("---- TRACE ", tr, " ", what)
+ -- if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
+ -- out:write(" ", fmtfunc(func, pc), "\n")
+ -- elseif what == "stop" or what == "abort" then
+ -- out:write("---- TRACE ", tr, " ", what)
+ -- if what == "abort" then
+ -- out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
+ -- else
+ -- local info = traceinfo(tr)
+ -- local link, ltype = info.link, info.linktype
+ -- if link == tr or link == 0 then
+ -- out:write(" -> ", ltype, "\n")
+ -- elseif ltype == "root" then
+ -- out:write(" -> ", link, "\n")
+ -- else
+ -- out:write(" -> ", link, " ", ltype, "\n")
+ -- end
+ -- end
+ -- if dumpmode.H then out:write("
\n\n") else out:write("\n") end
+ -- else
+ -- if what == "flush" then symtab, nexitsym = {}, 0 end
+ -- out:write("---- TRACE ", what, "\n\n")
+ -- end
+ -- out:flush()
end
-- Dump recorded bytecode.
@@ -603,6 +604,9 @@ local function dump_record(tr, func, pc, depth)
if pc >= 0 then
line = bcline(func, pc, recprefix)
if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
+ if pc > 0 then
+ line = sub(line, 1, -2) .. " (" .. fmtfunc(func, pc) .. ")\n"
+ end
else
line = "0000 "..recprefix.." FUNCC \n"
end
diff --git a/src/lib_base.c b/src/lib_base.c
index 98ec67c7..56addbba 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -36,6 +36,7 @@
#include "lj_strscan.h"
#include "lj_strfmt.h"
#include "lj_lib.h"
+#include "lj_cdata.h"
/* -- Base library: checks ------------------------------------------------ */
@@ -669,6 +670,52 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn)
setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]);
}
+#if LJ_HASFFI
+LJLIB_NOREG LJLIB_CF(thread_exdata) LJLIB_REC(.)
+{
+ ptrdiff_t nargs = L->top - L->base;
+ GCcdata *cd;
+
+ if (nargs == 0) {
+ CTState *cts = ctype_ctsG(G(L));
+ if (cts == NULL)
+ lj_err_caller(L, LJ_ERR_FFI_NOTLOAD);
+ cts->L = L; /* Save L for errors and allocations. */
+
+ cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR);
+ cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata);
+ setcdataV(L, L->top++, cd);
+ return 1;
+ }
+
+ cd = lj_lib_checkcdata(L, 1);
+ L->exdata = cdata_getptr(cdataptr(cd), CTSIZE_PTR);
+ return 0;
+}
+
+LJLIB_NOREG LJLIB_CF(thread_exdata2) LJLIB_REC(.)
+{
+ ptrdiff_t nargs = L->top - L->base;
+ GCcdata *cd;
+
+ if (nargs == 0) {
+ CTState *cts = ctype_ctsG(G(L));
+ if (cts == NULL)
+ lj_err_caller(L, LJ_ERR_FFI_NOTLOAD);
+ cts->L = L; /* Save L for errors and allocations. */
+
+ cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR);
+ cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata2);
+ setcdataV(L, L->top++, cd);
+ return 1;
+ }
+
+ cd = lj_lib_checkcdata(L, 1);
+ L->exdata2 = cdata_getptr(cdataptr(cd), CTSIZE_PTR);
+ return 0;
+}
+#endif
+
/* ------------------------------------------------------------------------ */
static void newproxy_weaktable(lua_State *L)
@@ -682,6 +729,18 @@ static void newproxy_weaktable(lua_State *L)
t->nomm = (uint8_t)(~(1u<fp = NULL; errno = ENOSYS;
#else
iof->fp = tmpfile();
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 2867d420..50c2b135 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -148,6 +148,66 @@ LJLIB_CF(jit_attach)
return 0;
}
+LJLIB_CF(jit_prngstate)
+{
+ GCtab *cur = lj_tab_new(L, 8, 0);
+
+#if LJ_HASJIT
+ int i;
+ jit_State *J = L2J(L);
+
+ /* The old state. */
+ for (i = 1; i <= 4; i++) {
+ setintV(lj_tab_setint(L, cur, i*2-1), J->prng.u[i-1] & 0xffffffff);
+ setintV(lj_tab_setint(L, cur, i*2), J->prng.u[i-1] >> 32);
+ }
+
+ /* We need to set new state using the input array. */
+ if (L->base < L->top && !tvisnil(L->base)) {
+ PRNGState prng;
+ if (tvisnumber(L->base)) {
+ TValue *o = L->base;
+
+ if (!tvisint(o) && ((double)(uint32_t)numV(o) != numV(o)))
+ lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
+
+ prng.u[0] = numberVint(o);
+ for (i = 1; i < 4; i++)
+ prng.u[i] = 0;
+ } else {
+ GCtab *t = lj_lib_checktab(L, 1);
+ int i = 1, len = lj_tab_len(t);
+
+ /* The input array must have at most 8 elements. */
+ if (len > 8)
+ lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
+
+ for (i = 1; i <= len; i++) {
+ cTValue *v = lj_tab_getint(t, i);
+
+ if (!tvisint(v) && (!tvisnum(v) || (double)(uint32_t)numV(v) != numV(v)))
+ lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
+
+ if (i & 1)
+ prng.u[(i-1)/2] = numberVint(v);
+ else
+ prng.u[(i-1)/2] = prng.u[(i-1)/2] | ((uint64_t)numberVint(v) << 32);
+ }
+ for (i /= 2; i < 4; i++)
+ prng.u[i] = 0;
+ }
+
+ /* Re-initialize the JIT prng. */
+ J->prng = prng;
+ }
+#else
+ for (int i = 1; i <= 8; i++)
+ setintV(lj_tab_setint(L, cur, i), 0);
+#endif
+ settabV(L, L->top++, cur);
+ return 1;
+}
+
LJLIB_PUSH(top-5) LJLIB_SET(os)
LJLIB_PUSH(top-4) LJLIB_SET(arch)
LJLIB_PUSH(top-3) LJLIB_SET(version_num)
@@ -231,6 +291,7 @@ LJLIB_CF(jit_util_funcbc)
{
GCproto *pt = check_Lproto(L, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2);
+ int lineinfo = lj_lib_optint(L, 3, 0);
if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc];
BCOp op = bc_op(ins);
@@ -238,6 +299,11 @@ LJLIB_CF(jit_util_funcbc)
setintV(L->top, ins);
setintV(L->top+1, lj_bc_mode[op]);
L->top += 2;
+ if (lineinfo) {
+ setintV(L->top, lj_debug_line(pt, pc));
+ L->top += 1;
+ return 3;
+ }
return 2;
}
return 0;
@@ -718,7 +784,8 @@ static uint32_t jit_cpudetect(void)
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
}
#endif
-
+#elif LJ_TARGET_S390X
+ /* No optional CPU features to detect (for now). */
#else
#error "Missing CPU detection for this architecture"
#endif
diff --git a/src/lib_os.c b/src/lib_os.c
index 6bcd0147..ce4b90b6 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -76,7 +76,7 @@ LJLIB_CF(os_rename)
LJLIB_CF(os_tmpname)
{
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
lj_err_caller(L, LJ_ERR_OSUNIQF);
return 0;
#else
@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
#endif
} else {
#if LJ_TARGET_POSIX
+ tzset();
stm = localtime_r(&t, &rtm);
#else
stm = localtime(&t);
diff --git a/src/lib_table.c b/src/lib_table.c
index a723326a..ed6aaefd 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -169,6 +169,47 @@ LJLIB_CF(table_concat) LJLIB_REC(.)
return 1;
}
+LJLIB_NOREG LJLIB_CF(table_clone) LJLIB_REC(.)
+{
+ GCtab *src = lj_lib_checktab(L, 1);
+ GCtab *dup = lj_tab_dup(L, src);
+
+ settabV(L, L->base, dup);
+ L->top = L->base+1;
+
+ return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_isarray) LJLIB_REC(.)
+{
+ GCtab *src = lj_lib_checktab(L, 1);
+
+ setboolV(L->base, lj_tab_isarray(src));
+ L->top = L->base+1;
+
+ return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_nkeys) LJLIB_REC(.)
+{
+ GCtab *src = lj_lib_checktab(L, 1);
+
+ setintV(L->base, lj_tab_nkeys(src));
+ L->top = L->base+1;
+
+ return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_isempty) LJLIB_REC(.)
+{
+ GCtab *src = lj_lib_checktab(L, 1);
+
+ setboolV(L->base, lj_tab_isempty(src));
+ L->top = L->base+1;
+
+ return 1;
+}
+
/* ------------------------------------------------------------------------ */
static void set2(lua_State *L, int i, int j)
@@ -304,6 +345,26 @@ static int luaopen_table_new(lua_State *L)
return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
}
+static int luaopen_table_clone(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_clone, FF_table_clone, "clone");
+}
+
+static int luaopen_table_nkeys(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_nkeys, FF_table_nkeys, "nkeys");
+}
+
+static int luaopen_table_isarray(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_isarray, FF_table_isarray, "isarray");
+}
+
+static int luaopen_table_isempty(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_isempty, FF_table_isempty, "isempty");
+}
+
static int luaopen_table_clear(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
@@ -321,6 +382,10 @@ LUALIB_API int luaopen_table(lua_State *L)
lua_setfield(L, -2, "unpack");
#endif
lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+ lj_lib_prereg(L, LUA_TABLIBNAME ".clone", luaopen_table_clone, tabV(L->top-1));
+ lj_lib_prereg(L, LUA_TABLIBNAME ".isarray", luaopen_table_isarray, tabV(L->top-1));
+ lj_lib_prereg(L, LUA_TABLIBNAME ".nkeys", luaopen_table_nkeys, tabV(L->top-1));
+ lj_lib_prereg(L, LUA_TABLIBNAME ".isempty", luaopen_table_isempty, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
return 1;
}
diff --git a/src/lj_api.c b/src/lj_api.c
index d869ebf8..021670fd 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -1143,6 +1143,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
ef = savestack(L, o);
}
status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
+ printf("hello, %d", status);
if (status) hook_restore(g, oldh);
return status;
}
@@ -1195,6 +1196,36 @@ LUA_API int lua_isyieldable(lua_State *L)
return cframe_canyield(L->cframe);
}
+LUA_API void lua_resetthread(lua_State *L, lua_State *th)
+{
+ TValue *stend, *st;
+
+ th->dummy_ffid = FF_C;
+ th->status = LUA_OK;
+
+ setmrefr(th->glref, L->glref);
+ setgcrefr(th->env, L->env);
+
+ th->cframe = NULL;
+
+ st = tvref(th->stack);
+
+ if (st != NULL) {
+ lj_state_relimitstack(th);
+
+ stend = st + th->stacksize;
+ st++; /* Needed for curr_funcisL() on empty stack. */
+ if (LJ_FR2) st++;
+ th->base = th->top = st;
+ lj_func_closeuv(L, st);
+ while (st < stend) /* Clear new slots. */
+ setnilV(st++);
+ }
+
+ th->exdata = L->exdata;
+ th->exdata2 = L->exdata2;
+}
+
LUA_API int lua_yield(lua_State *L, int nresults)
{
void *cf = L->cframe;
@@ -1311,3 +1342,22 @@ LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
g->allocf = f;
}
+LUA_API void lua_setexdata(lua_State *L, void *exdata)
+{
+ L->exdata = exdata;
+}
+
+LUA_API void *lua_getexdata(lua_State *L)
+{
+ return L->exdata;
+}
+
+LUA_API void lua_setexdata2(lua_State *L, void *exdata2)
+{
+ L->exdata2 = exdata2;
+}
+
+LUA_API void *lua_getexdata2(lua_State *L)
+{
+ return L->exdata2;
+}
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 882c99cb..e66dfa8f 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -31,6 +31,8 @@
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
+#define LUAJIT_ARCH_S390X 8
+#define LUAJIT_ARCH_s390x 8
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@@ -59,6 +61,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
+#elif defined(__s390x__) || defined(__s390x)
+#define LUAJIT_TARGET LUAJIT_ARCH_S390X
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
@@ -162,13 +166,6 @@
#define LJ_TARGET_GC64 1
#endif
-#ifdef __NX__
-#define LJ_TARGET_NX 1
-#define LJ_TARGET_CONSOLE 1
-#undef NULL
-#define NULL ((void*)0)
-#endif
-
#ifdef _UWP
#define LJ_TARGET_UWP 1
#if LUAJIT_TARGET == LUAJIT_ARCH_X64
@@ -213,6 +210,10 @@
#error "macOS requires GC64 -- don't disable it"
#endif
+#ifdef __GNUC__
+#define LJ_HAS_OPTIMISED_HASH 1
+#endif
+
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
#define LJ_ARCH_NAME "arm"
@@ -323,8 +324,18 @@
#if LJ_TARGET_CONSOLE
#define LJ_ARCH_PPC32ON64 1
#define LJ_ARCH_NOFFI 1
+#if LJ_TARGET_PS3
+#define LJ_ARCH_PPC_OPD 1
+#endif
#elif LJ_ARCH_BITS == 64
-#error "No support for PPC64"
+#define LJ_ARCH_PPC32ON64 1
+#define LJ_ARCH_NOJIT 1 /* NYI */
+#if _CALL_ELF == 2
+#define LJ_ARCH_PPC_ELFV2 1
+#else
+#define LJ_ARCH_PPC_OPD 1
+#define LJ_ARCH_PPC_OPDENV 1
+#endif
#endif
#if _ARCH_PWR7
@@ -432,6 +443,20 @@
#define LJ_ARCH_VERSION 10
#endif
+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
+
+#define LJ_ARCH_NAME "s390x"
+#define LJ_ARCH_BITS 64
+#define LJ_ARCH_ENDIAN LUAJIT_BE
+#define LJ_TARGET_S390X 1
+#define LJ_TARGET_EHRETREG 0xe
+#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
+#define LJ_TARGET_MASKSHIFT 1
+#define LJ_TARGET_MASKROT 1
+#define LJ_TARGET_UNALIGNED 1
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#define LJ_TARGET_GC64 1
+
#else
#error "No target architecture defined"
#endif
@@ -445,7 +470,7 @@
#error "Need at least GCC 3.4 or newer"
#endif
#elif LJ_TARGET_X64
-#if __GNUC__ < 4
+#if 0 && __GNUC__ < 4
#error "Need at least GCC 4.0 or newer"
#endif
#elif LJ_TARGET_ARM
@@ -490,9 +515,6 @@
#error "No support for ILP32 model on ARM64"
#endif
#elif LJ_TARGET_PPC
-#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
-#error "No support for little-endian PPC32"
-#endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 6f5e0c45..d8118088 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1662,6 +1662,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_asm_s390x.h"
#else
#error "Missing assembler for target CPU"
#endif
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 1f44d023..8c943475 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
return 0;
}
+/* Fuse FP neg-multiply-add/sub. */
+static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
+{
+ IRRef ref = ir->op1;
+ IRIns *irn = IR(ref);
+ if (irn->o != IR_ADD && irn->o != IR_SUB)
+ return 0;
+
+ if (!mayfuse(as, ref))
+ return 0;
+
+ IRRef lref = irn->op1, rref = irn->op2;
+ IRIns *irm;
+ if (lref != rref &&
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+ ra_noreg(irm->r)) ||
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+ (rref = lref, ra_noreg(irm->r))))) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+ Reg left = ra_alloc2(as, irm,
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
+ Reg right = (left >> 8); left &= 255;
+ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31));
+ return 1;
+ }
+ return 0;
+}
+
/* Fuse BAND + BSHL/BSHR into UBFM. */
static int asm_fuseandshift(ASMState *as, IRIns *ir)
{
@@ -1051,10 +1080,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
}
+static int maybe_zero_val(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+
+ switch(ir->o) {
+ case IR_KNULL:
+ return 1;
+ case IR_KINT:
+ return 0 == ir->i;
+ case IR_KINT64:
+ return 0 == ir_kint64(ir)->u64;
+ }
+
+ return 0;
+}
+
static void asm_xstore(ASMState *as, IRIns *ir)
{
if (ir->r != RID_SINK) {
- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ Reg src;
+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+ src = RID_ZERO;
+ else
+ src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
}
@@ -1250,7 +1299,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
int32_t ofs = sizeof(GCcdata);
- Reg r = ra_alloc1(as, ir->op2, allow);
+ Reg r;
+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
+ r = RID_ZERO;
+ else
+ r = ra_alloc1(as, ir->op2, allow);
+
lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
} else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
@@ -1266,7 +1320,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
{
- Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+ Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
@@ -1466,7 +1520,8 @@ static void asm_mul(ASMState *as, IRIns *ir)
static void asm_neg(ASMState *as, IRIns *ir)
{
if (irt_isnum(ir->t)) {
- asm_fpunary(as, ir, A64I_FNEGd);
+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
+ asm_fpunary(as, ir, A64I_FNEGd);
return;
}
asm_intneg(as, ir);
@@ -1919,6 +1974,17 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
+
+ /* IRRefs that get into the side trace from the parent trace may restore
+ * REF_BASE under severe register pressure and thus reach here holding on to
+ * the register. Restore such references so that REF_BASE gets RID_BASE back
+ * when it tries to allocate below. */
+ if (!ra_hasreg(ir->r)) {
+ Reg r = ra_gethint(ir->r);
+ if (!rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+ }
+
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
if (ra_hasspill(irp->s)) {
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 4465efa2..710cbb95 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1298,7 +1298,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
-#if !LJ_64
+#if !LJ_64 || (defined(LUAJIT_USE_VALGRIND) && !LJ_GC64)
MCLabel l_exit;
#endif
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@@ -1313,7 +1313,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
}
}
asm_guardcc(as, CC_NE);
-#if LJ_64
+#if LJ_64 && (!defined(LUAJIT_USE_VALGRIND) || LJ_GC64)
if (!irt_ispri(irkey->t)) {
Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
emit_rmro(as, XO_CMP, key|REX_64, node,
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 25f54dee..8162b950 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -370,6 +370,82 @@
#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */
+#if LJ_ARCH_BITS == 64
+
+#if LJ_ARCH_PPC_ELFV2
+
+#define CCALL_HANDLE_STRUCTRET \
+ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \
+ cc->retref = 1; /* Return by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp; \
+ }
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ int isfp = ccall_classify_fp(cts, ctr); \
+ int i; \
+ if (isfp == FTYPE_FLOAT) { \
+ for (i = 0; i < ctr->size / 4; i++) \
+ ((float *)dp)[i] = cc->fpr[i]; \
+ } else if (isfp == FTYPE_DOUBLE) { \
+ for (i = 0; i < ctr->size / 8; i++) \
+ ((double *)dp)[i] = cc->fpr[i]; \
+ } else { \
+ if (ctr->size < 8 && LJ_BE) { \
+ sp += 8 - ctr->size; \
+ } \
+ memcpy(dp, sp, ctr->size); \
+ }
+
+#else
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = 1; /* Return all structs by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#endif
+
+#define CCALL_HANDLE_COMPLEXRET \
+ /* Complex values are returned in 2 or 4 GPRs. */ \
+ cc->retref = 0;
+
+#define CCALL_HANDLE_STRUCTARG
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0]; \
+ ((float *)dp)[1] = cc->fpr[1]; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0]; \
+ ((double *)dp)[1] = cc->fpr[1]; \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ isfp = 1; \
+ if (d->size == sizeof(float) * 2) { \
+ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \
+ isf32 = 1; \
+ }
+
+#define CCALL_HANDLE_REGARG \
+ if (isfp && d->size == sizeof(float)) { \
+ d = ctype_get(cts, CTID_DOUBLE); \
+ isf32 = 1; \
+ } \
+ if (ngpr < maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ if (ngpr > maxgpr) { \
+ nsp += ngpr - 8; \
+ ngpr = 8; \
+ if (nsp > CCALL_MAXSTACK) { \
+ goto err_nyi; \
+ } \
+ } \
+ goto done; \
+ }
+
+#else
+
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
@@ -378,13 +454,13 @@
/* Complex values are returned in 2 or 4 GPRs. */ \
cc->retref = 0;
-#define CCALL_HANDLE_COMPLEXRET2 \
- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-
#define CCALL_HANDLE_STRUCTARG \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all structs by reference. */
+#define CCALL_HANDLE_COMPLEXRET2 \
+ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
+
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
@@ -420,6 +496,8 @@
}
#endif
+#endif
+
#if !LJ_ABI_SOFTFP
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -574,6 +652,40 @@
goto done; \
}
+#elif LJ_TARGET_S390X
+/* -- POSIX/s390x calling conventions --------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = 1; /* Return all structs by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+ cc->retref = 1; /* Return all complex values by reference. */ \
+ cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ UNUSED(dp); /* Nothing to do. */
+
+#define CCALL_HANDLE_STRUCTARG \
+ /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
+ if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex numbers by reference. */ \
+ /* TODO: not sure why this is different to structs. */ \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+
+#define CCALL_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
+ } else { \
+ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -816,6 +928,50 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
+/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */
+
+#if LJ_ARCH_PPC_ELFV2
+
+#define FTYPE_FLOAT 1
+#define FTYPE_DOUBLE 2
+
+static unsigned int ccall_classify_fp(CTState *cts, CType *ct) {
+ if (ctype_isfp(ct->info)) {
+ if (ct->size == sizeof(float))
+ return FTYPE_FLOAT;
+ else
+ return FTYPE_DOUBLE;
+ } else if (ctype_iscomplex(ct->info)) {
+ if (ct->size == sizeof(float) * 2)
+ return FTYPE_FLOAT;
+ else
+ return FTYPE_DOUBLE;
+ } else if (ctype_isstruct(ct->info)) {
+ int res = -1;
+ int sz = ct->size;
+ while (ct->sib) {
+ ct = ctype_get(cts, ct->sib);
+ if (ctype_isfield(ct->info)) {
+ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct));
+ if (res == -1)
+ res = sub;
+ if (sub != -1 && sub != res)
+ return 0;
+ } else if (ctype_isbitfield(ct->info) ||
+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ return 0;
+ }
+ }
+ if (res > 0 && sz > res * 4 * 8)
+ return 0;
+ return res;
+ } else {
+ return 0;
+ }
+}
+
+#endif
+
/* -- MIPS64 ABI struct classification ---------------------------- */
#if LJ_TARGET_MIPS64
@@ -990,6 +1146,13 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize sz;
MSize n, isfp = 0, isva = 0;
void *dp, *rp = NULL;
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ int isf32 = 0;
+#endif
+
+#if LJ_TARGET_S390X
+ uint32_t onstack = 0;
+#endif
if (fid) { /* Get argument type from field. */
CType *ctf = ctype_get(cts, fid);
@@ -1028,6 +1191,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
+#if LJ_TARGET_S390X
+ onstack = 1;
+#endif
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
@@ -1046,7 +1212,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(void **)dp = rp;
dp = rp;
}
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE
+ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) {
+ dp = (char *)dp + (CTSIZE_PTR - sz);
+ }
+#endif
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if (isfp) {
+ int i;
+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((double *)dp)[i];
+ }
+ if (isf32) {
+ int i;
+ for (i = 0; i < d->size / 8; i++)
+ ((float *)dp)[i*2] = ((double *)dp)[i];
+ }
+#endif
+#if LJ_ARCH_PPC_ELFV2
+ if (ctype_isstruct(d->info)) {
+ isfp = ccall_classify_fp(cts, d);
+ int i;
+ if (isfp == FTYPE_FLOAT) {
+ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((float *)dp)[i];
+ } else if (isfp == FTYPE_DOUBLE) {
+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
+ cc->fpr[nfpr++] = ((double *)dp)[i];
+ }
+ }
+#endif
/* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
if (d->info & CTF_UNSIGNED)
@@ -1060,6 +1256,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info))
+ && d->size <= 4) {
+ if (d->info & CTF_UNSIGNED)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
@@ -1069,6 +1274,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
+#if LJ_TARGET_S390X
+ /* Arguments need to be sign-/zero-extended to 64-bits. */
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
+ (isfp && onstack)) && d->size <= 4) {
+ if (d->info & CTF_UNSIGNED || isfp)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 0b3c5244..52455539 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -86,10 +86,23 @@ typedef union FPRArg {
#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
+#if LJ_ARCH_BITS == 64
+#define CCALL_NARG_FPR 13
+#if LJ_ARCH_PPC_ELFV2
+#define CCALL_NRET_GPR 2
+#define CCALL_NRET_FPR 8
+#define CCALL_SPS_EXTRA 14
+#else
+#define CCALL_NRET_GPR 1
+#define CCALL_NRET_FPR 2
+#define CCALL_SPS_EXTRA 16
+#endif
+#else
#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
#define CCALL_NRET_GPR 4 /* For complex double. */
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
#define CCALL_SPS_EXTRA 4
+#endif
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
@@ -126,6 +139,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
+#elif LJ_TARGET_S390X
+
+#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
+#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
+#define CCALL_NRET_GPR 1 /* GPR 2 */
+#define CCALL_NRET_FPR 1 /* FPR 0 */
+#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
+#define CCALL_SPS_FREE 0
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+ double d;
+ float f;
+} FPRArg;
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 43e44305..c1e67abd 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -21,6 +21,10 @@
#include "lj_trace.h"
#include "lj_vm.h"
+#if LJ_ARCH_PPC_ELFV2
+#include "lualib.h"
+#endif
+
/* -- Target-specific handling of callback slots -------------------------- */
#define CALLBACK_MCODE_SIZE (LJ_PAGESIZE * LJ_NUM_CBPAGE)
@@ -61,8 +65,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_PPC
+#if LJ_ARCH_PPC_OPD
+
+#define CALLBACK_SLOT2OFS(slot) (24*(slot))
+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24)
+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_ARCH_PPC_ELFV2
+
+#define CALLBACK_SLOT2OFS(slot) (4*(slot))
+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4)
+#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10)
+
+#else
+
#define CALLBACK_MCODE_HEAD 24
+#endif
+
#elif LJ_TARGET_MIPS32
#define CALLBACK_MCODE_HEAD 20
@@ -188,24 +208,59 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
return p;
}
#elif LJ_TARGET_PPC
+#if LJ_ARCH_PPC_OPD
+register void *vm_toc __asm__("r2");
+static void *callback_mcode_init(global_State *g, uint64_t *page)
+{
+ uint64_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+ MSize slot;
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p++ = (uint64_t)target;
+ *p++ = (uint64_t)vm_toc;
+ *p++ = (uint64_t)g | ((uint64_t)slot << 47);
+ }
+ return p;
+}
+#else
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
+#if LJ_ARCH_PPC_ELFV2
+ // Needs to be in sync with lj_vm_ffi_callback.
+ lua_assert(CALLBACK_MCODE_SIZE == 4096);
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
+ p++;
+ }
+ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
+ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
+ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
+ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
+ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff);
+ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
+ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
+ *p++ = PPCI_BCTR;
+#else
*p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
+ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
*p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
+ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
*p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
*p++ = PPCI_BCTR;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
+ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
*p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
p++;
}
+#endif
return p;
}
+#endif
#elif LJ_TARGET_MIPS
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
@@ -516,6 +571,15 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
+#elif LJ_TARGET_S390X
+
+#define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
+ } else { \
+ if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -662,6 +726,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
+ if (ctr->size <= 4 &&
+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
+ if (ctr->info & CTF_UNSIGNED)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+ }
+#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 2473b57e..569bfe86 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -153,7 +153,7 @@ typedef struct CType {
/* Simplify target-specific configuration. Checked in lj_ccall.h. */
#define CCALL_MAX_GPR 8
-#define CCALL_MAX_FPR 8
+#define CCALL_MAX_FPR 14
typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 112f5358..65dc4ff0 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -109,6 +109,11 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
return pos;
}
+LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
+{
+ return debug_framepc(L, fn, nextframe);
+}
+
/* -- Line numbers -------------------------------------------------------- */
/* Get line number for a bytecode position. */
@@ -703,3 +708,128 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
lua_concat(L, (int)(L->top - L->base) - top);
}
+#ifdef LUA_USE_TRACE_LOGS
+
+#include "lj_dispatch.h"
+
+#define MAX_TRACE_EVENTS 64
+
+enum {
+ LJ_TRACE_EVENT_ENTER,
+ LJ_TRACE_EVENT_EXIT,
+ LJ_TRACE_EVENT_START
+};
+
+typedef struct {
+ int event;
+ unsigned traceno;
+ unsigned exitno;
+ int directexit;
+ const BCIns *ins;
+ lua_State *thread;
+ GCfunc *fn;
+} lj_trace_event_record_t;
+
+static lj_trace_event_record_t lj_trace_events[MAX_TRACE_EVENTS];
+
+static int rb_start = 0;
+static int rb_end = 0;
+static int rb_full = 0;
+
+static void
+lj_trace_log_event(lj_trace_event_record_t *rec)
+{
+ lj_trace_events[rb_end] = *rec;
+
+ if (rb_full) {
+ rb_end++;
+ if (rb_end == MAX_TRACE_EVENTS) {
+ rb_end = 0;
+ }
+ rb_start = rb_end;
+
+ } else {
+ rb_end++;
+ if (rb_end == MAX_TRACE_EVENTS) {
+ rb_end = 0;
+ rb_full = MAX_TRACE_EVENTS;
+ }
+ }
+}
+
+static GCfunc*
+lj_debug_top_frame_fn(lua_State *L, const BCIns *pc)
+{
+ int size;
+ cTValue *frame;
+
+ frame = lj_debug_frame(L, 0, &size);
+ if (frame == NULL) {
+ return NULL;
+ }
+
+ return frame_func(frame);
+}
+
+LJ_FUNC void LJ_FASTCALL
+lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc,
+ GCfunc *fn)
+{
+ lj_trace_event_record_t r;
+
+ r.event = LJ_TRACE_EVENT_START;
+ r.thread = L;
+ r.ins = pc;
+ r.traceno = traceno;
+ r.fn = fn;
+
+ lj_trace_log_event(&r);
+}
+
+LJ_FUNC void LJ_FASTCALL
+lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc)
+{
+ lj_trace_event_record_t r;
+
+ r.event = LJ_TRACE_EVENT_ENTER;
+ r.thread = L;
+ r.ins = pc;
+ r.traceno = traceno;
+ r.fn = lj_debug_top_frame_fn(L, pc);
+
+ lj_trace_log_event(&r);
+}
+
+static void
+lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct)
+{
+ if (vmstate >= 0) {
+ lj_trace_event_record_t r;
+
+ jit_State *J = L2J(L);
+
+ r.event = LJ_TRACE_EVENT_EXIT;
+ r.thread = L;
+ r.ins = pc;
+ r.traceno = vmstate;
+ r.exitno = J->exitno;
+ r.directexit = direct;
+ r.fn = lj_debug_top_frame_fn(L, pc);
+
+ lj_trace_log_event(&r);
+ }
+}
+
+LJ_FUNC void LJ_FASTCALL
+lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc)
+{
+ lj_log_trace_exit_helper(L, vmstate, pc, 0);
+}
+
+LJ_FUNC void LJ_FASTCALL
+lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc)
+{
+ lj_log_trace_exit_helper(L, vmstate, pc, 1);
+}
+
+#endif /* LUA_USE_TRACE_LOGS */
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 28127ae9..8e145d6a 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -26,6 +26,7 @@ typedef struct lj_Debug {
int isvararg;
} lj_Debug;
+LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe);
LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size);
LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc);
LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
@@ -63,4 +64,15 @@ enum {
VARNAME__MAX
};
+#ifdef LUA_USE_TRACE_LOGS
+LJ_FUNC void LJ_FASTCALL lj_log_trace_direct_exit(lua_State *L,
+ int vmstate, const BCIns *pc);
+LJ_FUNC void LJ_FASTCALL lj_log_trace_normal_exit(lua_State *L,
+ int vmstate, const BCIns *pc);
+LJ_FUNC void LJ_FASTCALL lj_log_trace_entry(lua_State *L,
+ unsigned traceno, const BCIns *pc);
+LJ_FUNC void LJ_FASTCALL lj_log_trace_start_record(lua_State *L, unsigned traceno,
+ const BCIns *pc, GCfunc *fn);
+#endif
+
#endif
diff --git a/src/lj_def.h b/src/lj_def.h
index b61297aa..8541a867 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -66,12 +66,16 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
#define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */
-#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
+#define LJ_MAX_UPVAL 120 /* Max. # of upvalues. */
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
+#if defined(__powerpc64__) && _CALL_ELF != 2
+#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
+#else
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
+#endif
/* Minimum table/buffer sizes. */
#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
@@ -107,7 +111,11 @@ typedef unsigned int uintptr_t;
#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
+#if defined(__powerpc64__) && _CALL_ELF == 2
+#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1)
+#else
#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
+#endif
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 52762eea..0594af51 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -89,7 +89,7 @@ typedef uint16_t HotCount;
typedef struct GG_State {
lua_State L; /* Main thread. */
global_State g; /* Global state. */
-#if LJ_TARGET_ARM && !LJ_TARGET_NX
+#if LJ_TARGET_ARM
/* Make g reachable via K12 encoded DISPATCH-relative addressing. */
uint8_t align1[(16-sizeof(global_State))&15];
#endif
@@ -99,7 +99,7 @@ typedef struct GG_State {
#if LJ_HASJIT
jit_State J; /* JIT state. */
HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
-#if LJ_TARGET_ARM && !LJ_TARGET_NX
+#if LJ_TARGET_ARM
/* Ditto for J. */
uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
#endif
diff --git a/src/lj_err.c b/src/lj_err.c
index 563c7706..56c5ef7e 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
if (version != 1)
return _URC_FATAL_PHASE1_ERROR;
cf = (void *)_Unwind_GetCFA(ctx);
+#ifdef LJ_TARGET_S390X
+ cf -= 160; /* CFA points 160 bytes above r15. */
+#endif
L = cframe_L(cf);
if ((actions & _UA_SEARCH_PHASE)) {
#if LJ_UNWIND_EXT
@@ -753,6 +756,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
G(L)->panic(L);
#else
#if LJ_HASJIT
+ g->saved_jit_base = g->jit_base;
setmref(g->jit_base, NULL);
#endif
{
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 2e5c776a..da4121fb 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -109,6 +109,8 @@ ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
ERRDEF(NOJIT, "JIT compiler permanently disabled by build option")
#endif
ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
+ERRDEF(PRNGSTATE, "PRNG state must be an array with up to 8 integers "
+ "or an integer")
/* Lexer/parser errors. */
ERRDEF(XMODE, "attempt to load chunk with wrong mode")
@@ -178,6 +180,7 @@ ERRDEF(FFI_CBACKOV, "too many callbacks")
#endif
ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
+ERRDEF(FFI_NOTLOAD, "ffi module not loaded (yet)")
#endif
#if LJ_HASBUFFER
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 60c1d84f..528ebc34 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -29,6 +29,7 @@
#include "lj_vm.h"
#include "lj_strscan.h"
#include "lj_strfmt.h"
+#include "lj_cdata.h"
#include "lj_serialize.h"
/* Some local macros to save typing. Undef'd at the end. */
@@ -1459,6 +1460,77 @@ static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
+static void LJ_FASTCALL recff_table_clone(jit_State *J, RecordFFData *rd)
+{
+ TRef src = J->base[0];
+ J->base[0] = lj_ir_call(J, IRCALL_lj_tab_clone, src);
+ UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_isarray(jit_State *J, RecordFFData *rd)
+{
+ TRef src = J->base[0];
+ if (LJ_LIKELY(tref_istab(src))) {
+ TRef trres = lj_ir_call(J, IRCALL_lj_tab_isarray, src);
+ GCtab *t = tabV(&rd->argv[0]);
+ int isarr = lj_tab_isarray(t);
+ TRef tr0 = lj_ir_kint(J, 0);
+ emitir(isarr ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0);
+ J->base[0] = isarr ? TREF_TRUE : TREF_FALSE;
+ } /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_table_nkeys(jit_State *J, RecordFFData *rd)
+{
+ TRef src = J->base[0];
+ if (LJ_LIKELY(tref_istab(src))) {
+ J->base[0] = lj_ir_call(J, IRCALL_lj_tab_nkeys, src);
+ } /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_table_isempty(jit_State *J, RecordFFData *rd)
+{
+ TRef src = J->base[0];
+ if (LJ_LIKELY(tref_istab(src))) {
+ TRef trres = lj_ir_call(J, IRCALL_lj_tab_isempty, src);
+ GCtab *t = tabV(&rd->argv[0]);
+ int isempty = lj_tab_isempty(t);
+ TRef tr0 = lj_ir_kint(J, 0);
+ emitir(isempty ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0);
+ J->base[0] = isempty ? TREF_TRUE : TREF_FALSE;
+ } /* else: Interpreter will throw. */
+}
+
+/* -- thread library fast functions ------------------------------------------ */
+
+#if LJ_HASFFI
+void LJ_FASTCALL recff_thread_exdata(jit_State *J, RecordFFData *rd)
+{
+ TRef tr = J->base[0];
+ if (!tr) {
+ TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
+ TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA);
+ TRef trid = lj_ir_kint(J, CTID_P_VOID);
+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp);
+ return;
+ }
+ recff_nyiu(J, rd); /* this case is too rare to be interesting */
+}
+
+void LJ_FASTCALL recff_thread_exdata2(jit_State *J, RecordFFData *rd)
+{
+ TRef tr = J->base[0];
+ if (!tr) {
+ TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
+ TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA2);
+ TRef trid = lj_ir_kint(J, CTID_P_VOID);
+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp);
+ return;
+ }
+ recff_nyiu(J, rd); /* this case is too rare to be interesting */
+}
+#endif
+
/* -- I/O library fast functions ------------------------------------------ */
/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
diff --git a/src/lj_frame.h b/src/lj_frame.h
index aa1dc11a..40583119 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES 408
#define CFRAME_SIZE 384
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_ARCH_PPC_ELFV2
+#define CFRAME_OFS_ERRF 360
+#define CFRAME_OFS_NRES 356
+#define CFRAME_OFS_PREV 336
+#define CFRAME_OFS_L 352
+#define CFRAME_OFS_PC 348
+#define CFRAME_OFS_MULTRES 344
+#define CFRAME_SIZE 368
+#define CFRAME_SHIFT_MULTRES 3
#elif LJ_ARCH_PPC32ON64
#define CFRAME_OFS_ERRF 472
#define CFRAME_OFS_NRES 468
@@ -264,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_TARGET_S390X
+#define CFRAME_OFS_ERRF 280
+#define CFRAME_OFS_NRES 272
+#define CFRAME_OFS_PREV 264
+#define CFRAME_OFS_L 256
+#define CFRAME_OFS_PC 168
+#define CFRAME_OFS_MULTRES 160
+#define CFRAME_SIZE 240
+/*
+** TODO: it would be good if we always decoded param*8 like
+** the RISC architectures do. If so then SHIFT_MULTRES will
+** need to change to 3.
+*/
+#define CFRAME_SHIFT_MULTRES 0
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif
diff --git a/src/lj_init.c b/src/lj_init.c
new file mode 100644
index 00000000..a6816e1e
--- /dev/null
+++ b/src/lj_init.c
@@ -0,0 +1,69 @@
+#include
+#include "lj_arch.h"
+#include "lj_jit.h"
+#include "lj_vm.h"
+#include "lj_str.h"
+
+#if LJ_TARGET_ARM && LJ_TARGET_LINUX
+#include
+#endif
+
+#ifdef _MSC_VER
+/*
+** Append a function pointer to the static constructor table executed by
+** the C runtime.
+** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
+** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization.
+*/
+#pragma section(".CRT$XCU",read)
+#define LJ_INITIALIZER2_(f,p) \
+ static void f(void); \
+ __declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
+ __pragma(comment(linker,"/include:" p #f "_")) \
+ static void f(void)
+#ifdef _WIN64
+#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"")
+#else
+#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_")
+#endif
+
+#else
+#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void)
+#endif
+
+
+#ifdef LJ_HAS_OPTIMISED_HASH
+static void str_hash_init(uint32_t flags)
+{
+ if (flags & JIT_F_SSE4_2)
+ str_hash_init_sse42 ();
+}
+
+/* CPU detection for interpreter features such as string hash function
+ selection. We choose to cherry-pick from lj_cpudetect and not have a single
+ initializer to make sure that merges with LuaJIT/LuaJIT remain
+ convenient. */
+LJ_INITIALIZER(lj_init_cpuflags)
+{
+ uint32_t flags = 0;
+#if LJ_TARGET_X86ORX64
+
+ uint32_t vendor[4];
+ uint32_t features[4];
+ if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
+ flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
+ flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
+ flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
+ if (vendor[0] >= 7) {
+ uint32_t xfeatures[4];
+ lj_vm_cpuid(7, xfeatures);
+ flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
+ }
+ }
+
+#endif
+
+ /* The reason why we initialized early: select our string hash functions. */
+ str_hash_init (flags);
+}
+#endif
diff --git a/src/lj_ir.h b/src/lj_ir.h
index ed492e93..8aabbec6 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -196,6 +196,8 @@ IRFPMDEF(FPMENUM)
_(FUNC_PC, offsetof(GCfunc, l.pc)) \
_(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
_(THREAD_ENV, offsetof(lua_State, env)) \
+ _(THREAD_EXDATA, offsetof(lua_State, exdata)) \
+ _(THREAD_EXDATA2, offsetof(lua_State, exdata2)) \
_(TAB_META, offsetof(GCtab, metatable)) \
_(TAB_ARRAY, offsetof(GCtab, array)) \
_(TAB_NODE, offsetof(GCtab, node)) \
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 67fb58ae..8db18f08 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -190,6 +190,10 @@ typedef struct CCallInfo {
_(ANY, lj_tab_keyindex, 2, FL, INT, 0) \
_(ANY, lj_vm_next, 2, FL, PTR, 0) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \
+ _(ANY, lj_tab_clone, 2, FS, TAB, CCI_L) \
+ _(ANY, lj_tab_isarray, 1, FL, INT, 0) \
+ _(ANY, lj_tab_nkeys, 1, FL, INT, 0) \
+ _(ANY, lj_tab_isempty, 1, FL, INT, 0) \
_(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 32b3861a..74b40fd9 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -7,7 +7,6 @@
#define _LJ_JIT_H
#include "lj_obj.h"
-#if LJ_HASJIT
#include "lj_ir.h"
/* -- JIT engine flags ---------------------------------------------------- */
@@ -23,6 +22,7 @@
#define JIT_F_SSE3 (JIT_F_CPU << 0)
#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
#define JIT_F_BMI2 (JIT_F_CPU << 2)
+#define JIT_F_SSE4_2 (JIT_F_CPU << 3)
#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
@@ -112,12 +112,12 @@
/* Optimization parameters and their defaults. Length is a char in octal! */
#define JIT_PARAMDEF(_) \
- _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
- _(\011, maxrecord, 4000) /* Max. # of recorded IR instructions. */ \
+ _(\010, maxtrace, 8000) /* Max. # of traces in cache. */ \
+ _(\011, maxrecord, 16000) /* Max. # of recorded IR instructions. */ \
_(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
_(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
_(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
- _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
+ _(\011, minstitch, 3) /* Min. # of IR ins for a stitched trace. */ \
\
_(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
_(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -131,7 +131,7 @@
/* Size of each machine code area (in KBytes). */ \
_(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
/* Max. total size of all machine code areas (in KBytes). */ \
- _(\010, maxmcode, 512) \
+ _(\010, maxmcode, 40960) \
/* End of list. */
enum {
@@ -372,7 +372,6 @@ enum {
#endif
LJ_K64__MAX,
};
-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
enum {
#if LJ_TARGET_X86ORX64
@@ -391,7 +390,6 @@ enum {
#endif
LJ_K32__MAX
};
-#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
@@ -446,13 +444,9 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
-#if LJ_K32__USED
uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
-#endif
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
-#if LJ_K64__USED
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
-#endif
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -516,6 +510,8 @@ typedef struct jit_State {
BCLine prev_line; /* Previous line. */
int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
#endif
+ PRNGState prng; /* PRNG state for the JIT compiler, defaults to prng in
+ global_State. */
} jit_State;
#ifdef LUA_USE_ASSERT
@@ -523,6 +519,5 @@ typedef struct jit_State {
#else
#define lj_assertJ(c, ...) ((void)J)
#endif
-#endif
#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 82a9e256..10cd254f 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -304,6 +304,14 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
return def;
}
+GCcdata *lj_lib_checkcdata(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top && tviscdata(o)))
+ lj_err_argt(L, narg, LUA_TCDATA);
+ return cdataV(o);
+}
+
/* -- Strict type checks -------------------------------------------------- */
/* The following type checks do not coerce between strings and numbers.
@@ -356,4 +364,3 @@ badtype:
return 0; /* unreachable */
}
#endif
-
diff --git a/src/lj_lib.h b/src/lj_lib.h
index a18f52bf..c6053435 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -45,6 +45,7 @@ LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
+LJ_FUNC GCcdata *lj_lib_checkcdata(lua_State *L, int narg);
#if LJ_HASBUFFER
LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 163aada4..537c8333 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -231,7 +231,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
}
/* Next try probing 64K-aligned pseudo-random addresses. */
do {
- hint = lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u<mainthref)->th)
@@ -697,6 +698,12 @@ struct lua_State {
GCRef env; /* Thread environment (table of globals). */
void *cframe; /* End of C stack frame chain. */
MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
+ void *exdata; /* user extra data pointer. added by OpenResty */
+ void *exdata2; /* the 2nd user extra data pointer. added by OpenResty */
+#if LJ_TARGET_ARM
+ uint32_t unused1;
+ uint32_t unused2;
+#endif
};
#define G(L) (mref(L->glref, global_State))
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 09de2f05..a716ddeb 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -370,7 +370,9 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
** since they are followed by at least one guarded VLOAD.
*/
for (ir = IR(J->cur.nins-1); ir > store; ir--)
- if (irt_isguard(ir->t) || ir->o == IR_ALEN)
+ if (irt_isguard(ir->t) || ir->o == IR_ALEN ||
+ (ir->o == IR_CALLL && ir->op2 == IRCALL_lj_tab_nkeys) ||
+ (ir->o == IR_CALLS && ir->op2 == IRCALL_lj_tab_clone))
goto doemit; /* No elimination possible. */
/* Remove redundant store from chain and replace with NOP. */
*refp = store->prev;
diff --git a/src/lj_prng.c b/src/lj_prng.c
index 9e57505e..fd8219de 100644
--- a/src/lj_prng.c
+++ b/src/lj_prng.c
@@ -87,10 +87,6 @@ extern int sys_get_random_number(void *buf, uint64_t len);
extern int sceRandomGetRandomNumber(void *buf, size_t len);
-#elif LJ_TARGET_NX
-
-#include
-
#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
#define WIN32_LEAN_AND_MEAN
@@ -180,11 +176,6 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
goto ok;
-#elif LJ_TARGET_NX
-
- if (getentropy(rs->u, sizeof(rs->u)) == 0)
- goto ok;
-
#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),
diff --git a/src/lj_record.c b/src/lj_record.c
index faa9a508..5cca2425 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -624,7 +624,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
if (bc_j(*pc) != -1 && !innerloopleft(J, pc))
lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */
if ((ev != LOOPEV_ENTERLO &&
- J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0)
+ J->loopref && J->cur.nins - J->loopref > 100) || --J->loopunroll < 0)
lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
J->loopref = J->cur.nins;
}
@@ -664,17 +664,12 @@ static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
RecordIndex ix;
/* Since ITERN is recorded at the start, we need our own loop detection. */
if (J->pc == J->startpc &&
+ (J->cur.nins > REF_FIRST+1 ||
+ (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) &&
J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
- IRRef ref = REF_FIRST + LJ_HASPROFILE;
-#ifdef LUAJIT_ENABLE_CHECKHOOK
- ref += 3;
-#endif
- if (J->cur.nins > ref ||
- (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) {
- J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */
- lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
- return LOOPEV_ENTER;
- }
+ J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */
+ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
+ return LOOPEV_ENTER;
}
J->maxslot = ra;
lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */
@@ -1836,7 +1831,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
if (lnk) { /* Possible tail- or up-recursion. */
lj_trace_flush(J, lnk); /* Flush trace that only returns. */
/* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
- hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
+ hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J->prng) & 15u);
}
lj_trace_err(J, LJ_TRERR_CUNROLL);
}
diff --git a/src/lj_state.c b/src/lj_state.c
index 0b9c46ba..e28cfed4 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -260,6 +260,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
return NULL;
}
L->status = LUA_OK;
+ L->exdata = NULL;
+ L->exdata2 = NULL;
return L;
}
@@ -319,6 +321,8 @@ lua_State *lj_state_new(lua_State *L)
setgcrefr(L1->env, L->env);
stack_init(L1, L); /* init stack */
lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
+ L1->exdata = L->exdata;
+ L1->exdata2 = L->exdata2;
return L1;
}
diff --git a/src/lj_str.c b/src/lj_str.c
index a5282da6..723bfa63 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -19,6 +19,15 @@
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
{
MSize i, n = a->len > b->len ? b->len : a->len;
+#ifdef LUAJIT_USE_VALGRIND
+ for (i = 0; i < n; i++) {
+ uint8_t va = *(const uint8_t *)(strdata(a)+i);
+ uint8_t vb = *(const uint8_t *)(strdata(b)+i);
+ if (va != vb) {
+ return va < vb ? -1 : 1;
+ }
+ }
+#else
for (i = 0; i < n; i += 4) {
/* Note: innocuous access up to end of string + 3. */
uint32_t va = *(const uint32_t *)(strdata(a)+i);
@@ -35,6 +44,7 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
return va < vb ? -1 : 1;
}
}
+#endif
return (int32_t)(a->len - b->len);
}
@@ -72,8 +82,22 @@ int lj_str_haspattern(GCstr *s)
/* -- String hashing ------------------------------------------------------ */
+#ifdef LJ_HAS_OPTIMISED_HASH
+static StrHash hash_sparse_def (uint64_t, const char *, MSize);
+str_sparse_hashfn hash_sparse = hash_sparse_def;
+#if LUAJIT_SECURITY_STRHASH
+static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize);
+str_dense_hashfn hash_dense = hash_dense_def;
+#endif
+#else
+#define hash_sparse hash_sparse_def
+#if LUAJIT_SECURITY_STRHASH
+#define hash_dense hash_dense_def
+#endif
+#endif
+
/* Keyed sparse ARX string hash. Constant time. */
-static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
+static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len)
{
/* Constants taken from lookup3 hash by Bob Jenkins. */
StrHash a, b, h = len ^ (StrHash)seed;
@@ -97,8 +121,8 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
#if LUAJIT_SECURITY_STRHASH
/* Keyed dense ARX string hash. Linear time. */
-static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
- const char *str, MSize len)
+static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h,
+ const char *str, MSize len)
{
StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
if (len > 12) {
@@ -282,8 +306,21 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
s->gct = ~LJ_TSTR;
s->len = len;
s->hash = hash;
+
+#ifdef LUAJIT_TEST_FIXED_ORDER
+ /* If you need predictable key iteration order in lua tables (eg: in data driven test),
+ * build with
+ * "XCFLAGS=-DLUAJIT_TEST_FIXED_ORDER=1 -DLUAJIT_SECURITY_STRID=0
+ * -DLUAJIT_SECURITY_STRHASH=0 -DLUAJIT_SECURITY_PRNG=0 -DLUAJIT_SECURITY_MCODE=0"
+ *
+ * This is for testing only. Please don't use it in production builds.
+ */
+ s->sid = hash;
+#else
#ifndef STRID_RESEED_INTERVAL
- s->sid = g->str.id++;
+ /* s->sid = g->str.id++; */
+ /* if use g->str.id++ as sid, the order of the tab will be indeterminate. */
+ s->sid = hash;
#elif STRID_RESEED_INTERVAL
if (!g->str.idreseed--) {
uint64_t r = lj_prng_u64(&g->prng);
@@ -293,6 +330,7 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
s->sid = g->str.id++;
#else
s->sid = (StrID)lj_prng_u64(&g->prng);
+#endif
#endif
s->reserved = 0;
s->hashalg = (uint8_t)hashalg;
diff --git a/src/lj_str.h b/src/lj_str.h
index 28edb5a5..f7b9234b 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
+#ifdef LJ_HAS_OPTIMISED_HASH
+typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize);
+extern str_sparse_hashfn hash_sparse;
+
+#if LUAJIT_SECURITY_STRHASH
+typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize);
+extern str_dense_hashfn hash_dense;
+#endif
+
+extern void str_hash_init_sse42 (void);
+#endif
+
#endif
diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c
new file mode 100644
index 00000000..0ee4b5f6
--- /dev/null
+++ b/src/lj_str_hash.c
@@ -0,0 +1,309 @@
+/*
+ * This file defines string hash function using CRC32. It takes advantage of
+ * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32
+ * computation. The hash functions try to compute CRC32 of length and up
+ * to 128 bytes of given string.
+ */
+
+#include "lj_arch.h"
+
+#if LJ_HAS_OPTIMISED_HASH == 1 || defined(SMOKETEST)
+#include
+#include
+#include
+#include
+
+#if defined(_MSC_VER)
+#include
+/* Silence deprecated name warning */
+#define getpid _getpid
+#else
+#include
+#endif
+
+#include "lj_def.h"
+#include "lj_str.h"
+#include "lj_jit.h"
+
+
+#if defined(_MSC_VER)
+/*
+ * MSVC doesn't seem to restrict intrinsics used based on /arch: value set
+ * while clang-cl will error on it.
+ */
+#if defined(__clang__) && !defined(__SSE4_2__)
+#error "This file must be built with /arch:AVX1 or higher"
+#endif
+#else
+#if !defined(__SSE4_2__)
+#error "This file must be built with -msse4.2"
+#endif
+#endif
+
+#define lj_crc32_u32 _mm_crc32_u32
+#define lj_crc32_u64 _mm_crc32_u64
+
+#undef LJ_AINLINE
+#define LJ_AINLINE
+
+#if defined(__MINGW32__) || defined(_MSC_VER)
+#define random() ((long) rand())
+#define srandom(seed) srand(seed)
+#endif
+
+static const uint64_t* cast_uint64p(const char* str)
+{
+ return (const uint64_t*)(void*)str;
+}
+
+static const uint32_t* cast_uint32p(const char* str)
+{
+ return (const uint32_t*)(void*)str;
+}
+
+/* hash string with len in [1, 4) */
+static LJ_AINLINE uint32_t hash_sparse_1_4(uint64_t seed, const char* str,
+ uint32_t len)
+{
+#if 0
+ /* TODO: The if-1 part (i.e the original algorithm) is working better when
+ * the load-factor is high, as revealed by conflict benchmark (via
+ * 'make benchmark' command); need to understand why it's so.
+ */
+ uint32_t v = str[0];
+ v = (v << 8) | str[len >> 1];
+ v = (v << 8) | str[len - 1];
+ v = (v << 8) | len;
+ return lj_crc32_u32(0, v);
+#else
+ uint32_t a, b, h = len ^ seed;
+
+ a = *(const uint8_t *)str;
+ h ^= *(const uint8_t *)(str+len-1);
+ b = *(const uint8_t *)(str+(len>>1));
+ h ^= b; h -= lj_rol(b, 14);
+
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ h ^= b; h -= lj_rol(b, 16);
+
+ return h;
+#endif
+}
+
+/* hash string with len in [4, 16) */
+static LJ_AINLINE uint32_t hash_sparse_4_16(uint64_t seed, const char* str,
+ uint32_t len)
+{
+ uint64_t v1, v2, h;
+
+ if (len >= 8) {
+ v1 = *cast_uint64p(str);
+ v2 = *cast_uint64p(str + len - 8);
+ } else {
+ v1 = *cast_uint32p(str);
+ v2 = *cast_uint32p(str + len - 4);
+ }
+
+ h = lj_crc32_u32(0, len ^ seed);
+ h = lj_crc32_u64(h, v1);
+ h = lj_crc32_u64(h, v2);
+ return h;
+}
+
+/* hash string with length in [16, 128) */
+static uint32_t hash_16_128(uint64_t seed, const char* str,
+ uint32_t len)
+{
+ uint64_t h1, h2;
+ uint32_t i;
+
+ h1 = lj_crc32_u32(0, len ^ seed);
+ h2 = 0;
+
+ for (i = 0; i < len - 16; i += 16) {
+ h1 += lj_crc32_u64(h1, *cast_uint64p(str + i));
+ h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8));
+ };
+
+ h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16));
+ h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
+
+ return lj_crc32_u32(h1, h2);
+}
+
+/* **************************************************************************
+ *
+ * Following is code about hashing string with length >= 128
+ *
+ * **************************************************************************
+ */
+static uint32_t random_pos[32][2];
+static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
+
+/* return floor(log2(n)) */
+static LJ_AINLINE uint32_t log2_floor(uint32_t n)
+{
+ if (n <= 127) {
+ return log2_tab[n];
+ }
+
+ if ((n >> 8) <= 127) {
+ return log2_tab[n >> 8] + 8;
+ }
+
+ if ((n >> 16) <= 127) {
+ return log2_tab[n >> 16] + 16;
+ }
+
+ if ((n >> 24) <= 127) {
+ return log2_tab[n >> 24] + 24;
+ }
+
+ return 31;
+}
+
+#define POW2_MASK(n) ((1L << (n)) - 1)
+
+/* This function is to populate `random_pos` such that random_pos[i][*]
+ * contains random value in the range of [2**i, 2**(i+1)).
+ */
+static void str_hash_init_random(void)
+{
+ int i, seed, rml;
+
+ /* Calculate the ceil(log2(RAND_MAX)) */
+ rml = log2_floor(RAND_MAX);
+ if (RAND_MAX & (RAND_MAX - 1)) {
+ rml += 1;
+ }
+
+ /* Init seed */
+ seed = lj_crc32_u32(0, getpid());
+ seed = lj_crc32_u32(seed, time(NULL));
+ srandom(seed);
+
+ /* Now start to populate the random_pos[][]. */
+ for (i = 0; i < 3; i++) {
+ /* No need to provide random value for chunk smaller than 8 bytes */
+ random_pos[i][0] = random_pos[i][1] = 0;
+ }
+
+ for (; i < rml; i++) {
+ random_pos[i][0] = random() & POW2_MASK(i+1);
+ random_pos[i][1] = random() & POW2_MASK(i+1);
+ }
+
+ for (; i < 31; i++) {
+ int j;
+ for (j = 0; j < 2; j++) {
+ uint32_t v, scale;
+ scale = random_pos[i - rml][0];
+ if (scale == 0) {
+ scale = 1;
+ }
+ v = (random() * scale) & POW2_MASK(i+1);
+ random_pos[i][j] = v;
+ }
+ }
+}
+#undef POW2_MASK
+
+/* Return a pre-computed random number in the range of [1**chunk_sz_order,
+ * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
+ * may be greater than chunk-size; it is up to the caller to make sure
+ * "chunk-base + return-value-of-this-func" has valid virtual address.
+ */
+static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
+ uint32_t idx)
+{
+ uint32_t pos = random_pos[chunk_sz_order][idx & 1];
+ return pos;
+}
+
+static LJ_NOINLINE uint32_t hash_128_above(uint64_t seed, const char* str,
+ uint32_t len)
+{
+ uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
+ uint64_t h1, h2, v;
+ const char* chunk_ptr;
+
+ chunk_num = 16;
+ chunk_sz = len / chunk_num;
+ chunk_sz_log2 = log2_floor(chunk_sz);
+
+ pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
+ pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
+
+ h1 = lj_crc32_u32(0, len ^ seed);
+ h2 = 0;
+
+ /* loop over 14 chunks, 2 chunks at a time */
+ for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
+ chunk_ptr += chunk_sz, i++) {
+
+ v = *cast_uint64p(chunk_ptr + pos1);
+ h1 = lj_crc32_u64(h1, v);
+
+ v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
+ h2 = lj_crc32_u64(h2, v);
+ }
+
+ /* the last two chunks */
+ v = *cast_uint64p(chunk_ptr + pos1);
+ h1 = lj_crc32_u64(h1, v);
+
+ v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
+ h2 = lj_crc32_u64(h2, v);
+
+ /* process the trailing part */
+ h1 = lj_crc32_u64(h1, *cast_uint64p(str));
+ h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
+
+ h1 = lj_crc32_u32(h1, h2);
+ return h1;
+}
+
+/* NOTE: the "len" should not be zero */
+static StrHash hash_sparse_sse42(uint64_t seed, const char* str, MSize len)
+{
+ if (len < 4 || len >= 128)
+ return hash_sparse_1_4(seed, str, len);
+
+ if (len >= 16) /* [16, 128) */
+ return hash_16_128(seed, str, len);
+
+ /* [4, 16) */
+ return hash_sparse_4_16(seed, str, len);
+}
+
+#if LUAJIT_SECURITY_STRHASH
+static StrHash hash_dense_sse42(uint64_t seed, uint32_t h, const char* str,
+ MSize len)
+{
+ uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4));
+
+ if (len <= 16)
+ return b;
+
+ if (len < 128) /* [16, 128), try with a different seed. */
+ return hash_16_128(b, str, len);
+
+ /* Otherwise, do the slow crc32 randomization for long strings. */
+ return hash_128_above(b, str, len);
+}
+#endif
+
+void str_hash_init_sse42(void)
+{
+ hash_sparse = hash_sparse_sse42;
+#if LUAJIT_SECURITY_STRHASH
+ hash_dense = hash_dense_sse42;
+#endif
+ str_hash_init_random();
+}
+#endif
diff --git a/src/lj_tab.c b/src/lj_tab.c
index c3609b38..9b93ffe1 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -14,6 +14,8 @@
#include "lj_err.h"
#include "lj_tab.h"
+#include
+
/* -- Object hashing ------------------------------------------------------ */
/* Hash an arbitrary key and return its anchor position in the hash table. */
@@ -691,3 +693,85 @@ MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
}
#endif
+
+GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src)
+{
+ return lj_tab_dup(L, src);
+}
+
+int LJ_FASTCALL lj_tab_isarray(const GCtab *src)
+{
+ Node *node;
+ cTValue *o;
+ ptrdiff_t i;
+
+ node = noderef(src->node);
+ for (i = (ptrdiff_t)src->hmask; i >= 0; i--)
+ if (!tvisnil(&node[i].val)) {
+ o = &node[i].key;
+ if (LJ_UNLIKELY(tvisint(o))) {
+ continue;
+ }
+ if (LJ_UNLIKELY(tvisnum(o))) {
+ lua_Number n = numberVnum(o);
+ if (LJ_LIKELY(rint((double) n) == n)) {
+ continue;
+ }
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *t)
+{
+ MSize narr = (MSize)t->asize;
+ cTValue *e;
+ Node *node;
+ MSize i, cnt = 0;
+
+ e = tvref(t->array);
+ for (i = 0; i < narr; i++)
+ if (LJ_LIKELY(!tvisnil(&e[i])))
+ cnt++;
+
+ if (t->hmask <= 0)
+ return cnt;
+
+ node = noderef(t->node);
+ for (i = 0; i <= (MSize)t->hmask; i++) {
+ Node *n = &node[i];
+ if (LJ_LIKELY(!tvisnil(&n->val))) {
+ cnt++;
+ }
+ }
+
+ return cnt;
+}
+
+int LJ_FASTCALL lj_tab_isempty(const GCtab *t)
+{
+ MSize narr = (MSize)t->asize;
+ cTValue *e;
+ Node *node;
+ MSize i;
+
+ e = tvref(t->array);
+ for (i = 0; i < narr; i++)
+ if (LJ_LIKELY(!tvisnil(&e[i])))
+ return 0;
+
+ if (t->hmask <= 0)
+ return 1;
+
+ node = noderef(t->node);
+ for (i = 0; i <= (MSize)t->hmask; i++) {
+ Node *n = &node[i];
+ if (LJ_LIKELY(!tvisnil(&n->val))) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 2a3f76bf..ed0348a9 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -93,4 +93,9 @@ LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
#endif
+LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src);
+LJ_FUNCA int LJ_FASTCALL lj_tab_isarray(const GCtab *src);
+LJ_FUNCA MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *src);
+LJ_FUNCA int LJ_FASTCALL lj_tab_isempty(const GCtab *t);
+
#endif
diff --git a/src/lj_target.h b/src/lj_target.h
index 19716928..3831cb60 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
+#elif LJ_TARGET_S390X
+#include "lj_target_s390x.h"
#else
#error "Missing include for target CPU"
#endif
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index bc9802a4..41378c9d 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -30,8 +30,13 @@ enum {
/* Calling conventions. */
RID_RET = RID_R3,
+#if LJ_LE
+ RID_RETHI = RID_R4,
+ RID_RETLO = RID_R3,
+#else
RID_RETHI = RID_R3,
RID_RETLO = RID_R4,
+#endif
RID_FPRET = RID_F1,
/* These definitions must match with the *.dasc file(s): */
@@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
#define PPCF_C(r) ((r) << 6)
#define PPCF_MB(n) ((n) << 6)
#define PPCF_ME(n) ((n) << 1)
+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
#define PPCF_Y 0x00200000
#define PPCF_DOT 0x00000001
@@ -200,6 +207,13 @@ typedef enum PPCIns {
PPCI_RLWINM = 0x54000000,
PPCI_RLWIMI = 0x50000000,
+ PPCI_RLDICL = 0x78000000,
+ PPCI_RLDICR = 0x78000004,
+ PPCI_RLDIC = 0x78000008,
+ PPCI_RLDIMI = 0x7800000c,
+ PPCI_RLDCL = 0x78000010,
+ PPCI_RLDCR = 0x78000012,
+
PPCI_B = 0x48000000,
PPCI_BL = 0x48000001,
PPCI_BC = 0x40800000,
diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
new file mode 100644
index 00000000..10b4bd58
--- /dev/null
+++ b/src/lj_target_s390x.h
@@ -0,0 +1,81 @@
+/*
+** Definitions for IBM z/Architecture (s390x) CPUs.
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_S390X_H
+#define _LJ_TARGET_S390X_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
+#define FPRDEF(_) \
+ _(F0) _(F1) _(F2) _(F3) \
+ _(F4) _(F5) _(F6) _(F7) \
+ _(F8) _(F9) _(F10) _(F11) \
+ _(F12) _(F13) _(F14) _(F15)
+// TODO: VREG?
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+
+ /* Calling conventions. */
+ RID_SP = RID_R15,
+ RID_RET = RID_R2,
+ RID_FPRET = RID_F0,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_R7, /* Interpreter BASE. */
+ RID_LPC = RID_R9, /* Interpreter PC. */
+ RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_R0,
+ RID_MIN_FPR = RID_F0,
+ RID_MAX_GPR = RID_MIN_FPR,
+ RID_MAX_FPR = RID_MAX,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
+};
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+**
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
+*/
+#define SPS_FIXED 2
+#define SPS_FIRST 2
+
+#define SPOFS_TMP 0
+
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+#define EXITSTUB_SPACING 4
+#define EXITSTUBS_PER_GROUP 32
+
+/* -- Instructions -------------------------------------------------------- */
+
+#endif
+
diff --git a/src/lj_trace.c b/src/lj_trace.c
index c2329394..f816337c 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -312,6 +312,8 @@ void lj_trace_initstate(global_State *g)
jit_State *J = G2J(g);
TValue *tv;
+ J->prng = g->prng;
+
/* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff);
@@ -390,7 +392,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
/* First try to bump its hotcount several times. */
val = ((uint32_t)J->penalty[i].val << 1) +
- (lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u< PENALTY_MAX) {
blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
return;
@@ -414,6 +416,9 @@ static void trace_start(jit_State *J)
{
lua_State *L;
TraceNo traceno;
+#ifdef LUA_USE_TRACE_LOGS
+ const BCIns *pc = J->pc;
+#endif
if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) {
@@ -474,6 +479,9 @@ static void trace_start(jit_State *J)
}
);
lj_record_setup(J);
+#ifdef LUA_USE_TRACE_LOGS
+ lj_log_trace_start_record(L, (unsigned) J->cur.traceno, pc, J->fn);
+#endif
}
/* Stop tracing. */
@@ -604,21 +612,22 @@ static int trace_abort(jit_State *J)
J->cur.link = 0;
J->cur.linktype = LJ_TRLINK_NONE;
lj_vmevent_send(L, TRACE,
- TValue *frame;
- const BCIns *pc;
+ cTValue *frame;
+ int size;
+ BCIns pc;
GCfunc *fn;
setstrV(L, L->top++, lj_str_newlit(L, "abort"));
setintV(L->top++, traceno);
- /* Find original Lua function call to generate a better error message. */
- frame = J->L->base-1;
- pc = J->pc;
- while (!isluafunc(frame_func(frame))) {
- pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
- frame = frame_prev(frame);
- }
+ /* Find original function call to generate a better error message. */
+ frame = lj_debug_frame(L, 0, &size);
+ lj_assertL(frame != NULL, "missing debug frame");
fn = frame_func(frame);
+ if (frame == L->base-1 && isluafunc(fn))
+ pc = proto_bcpos(funcproto(fn), J->pc);
+ else
+ pc = lj_debug_framepc(L, fn, frame);
setfuncV(L, L->top++, fn);
- setintV(L->top++, proto_bcpos(funcproto(fn), pc));
+ setintV(L->top++, pc);
copyTV(L, L->top++, restorestack(L, errobj));
copyTV(L, L->top++, &J->errinfo);
);
@@ -931,6 +940,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
}
}
}
+#ifdef LUA_USE_TRACE_LOGS
+ lj_log_trace_normal_exit(L, (int) T->traceno, pc);
+#endif
/* Return MULTRES or 0. */
ERRNO_RESTORE
switch (bc_op(*pc)) {
diff --git a/src/ljamalg.c b/src/ljamalg.c
index cae8356c..4d85950a 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -88,4 +88,3 @@
#include "lib_ffi.c"
#include "lib_buffer.c"
#include "lib_init.c"
-
diff --git a/src/lua.h b/src/lua.h
index 6d1634d1..3f631aa1 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -112,6 +112,9 @@ LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
LUA_API void (lua_close) (lua_State *L);
LUA_API lua_State *(lua_newthread) (lua_State *L);
+#define HAVE_LUA_RESETTHREAD 1
+LUA_API void (lua_resetthread) (lua_State *L, lua_State *th);
+
LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);
@@ -245,7 +248,12 @@ LUA_API void (lua_concat) (lua_State *L, int n);
LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud);
LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud);
+LUA_API void lua_setexdata(lua_State *L, void *exdata);
+LUA_API void *lua_getexdata(lua_State *L);
+#define HAVE_LUA_EXDATA2 1
+LUA_API void lua_setexdata2(lua_State *L, void *exdata2);
+LUA_API void *lua_getexdata2(lua_State *L);
/*
** ===============================================================
diff --git a/src/luajit.c b/src/luajit.c
index 6dd64026..6e309260 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -303,8 +303,9 @@ static int loadjitmodule(lua_State *L)
lua_concat(L, 2);
if (lua_pcall(L, 1, 1, 0)) {
const char *msg = lua_tostring(L, -1);
- if (msg && !strncmp(msg, "module ", 7))
- goto nomodule;
+ if (msg && !strncmp(msg, "module ", 7)){
+ printf("hehe\n");
+ goto nomodule;}
return report(L, 1);
}
lua_getfield(L, -1, "start");
@@ -542,7 +543,6 @@ static int pmain(lua_State *L)
}
if ((flags & FLAGS_VERSION)) print_version();
-
s->status = runargs(L, argv, argn);
if (s->status != LUA_OK) return 0;
diff --git a/src/luajit.h b/src/luajit.h
index 31f1eb1f..a4d33001 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,6 +30,8 @@
#include "lua.h"
+#define OPENRESTY_LUAJIT
+
#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3
diff --git a/src/lualib.h b/src/lualib.h
index 87748456..fd84cc54 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -21,6 +21,7 @@
#define LUA_BITLIBNAME "bit"
#define LUA_JITLIBNAME "jit"
#define LUA_FFILIBNAME "ffi"
+#define LUA_THRLIBNAME "thread"
LUALIB_API int luaopen_base(lua_State *L);
LUALIB_API int luaopen_math(lua_State *L);
diff --git a/src/nxbuild.bat b/src/nxbuild.bat
deleted file mode 100644
index c4a21f05..00000000
--- a/src/nxbuild.bat
+++ /dev/null
@@ -1,159 +0,0 @@
-@rem Script to build LuaJIT with NintendoSDK + NX Addon.
-@rem Donated to the public domain by Swyter.
-@rem
-@rem To run this script you must open a "Native Tools Command Prompt for VS".
-@rem
-@rem Either the x86 version for NX32, or x64 for the NX64 target.
-@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
-@rem must match the cross-compiled target (32 or 64 bits).
-@rem
-@rem Then cd to this directory and run this script.
-@rem
-@rem Recommended invocation:
-@rem
-@rem nxbuild # release build, amalgamated
-@rem nxbuild debug # debug build, amalgamated
-@rem
-@rem Additional command-line options (not generally recommended):
-@rem
-@rem noamalg # (after debug) non-amalgamated build
-
-@if not defined INCLUDE goto :FAIL
-@if not defined NINTENDO_SDK_ROOT goto :FAIL
-@if not defined PLATFORM goto :FAIL
-
-@if "%platform%" == "x86" goto :DO_NX32
-@if "%platform%" == "x64" goto :DO_NX64
-
-@echo Error: Current host platform is %platform%!
-@echo.
-@goto :FAIL
-
-@setlocal
-
-:DO_NX32
-@set DASC=vm_arm.dasc
-@set DASMFLAGS= -D HFABI -D FPU
-@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
-@set HOST_PTR_SIZE=4
-goto :BEGIN
-
-:DO_NX64
-@set DASC=vm_arm64.dasc
-@set DASMFLAGS= -D ENDIAN_LE
-@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
-@set HOST_PTR_SIZE=8
-
-:BEGIN
-@rem ---- Host compiler ----
-@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
-@set LJLINK=link /nologo
-@set LJMT=mt /nologo
-@set DASMDIR=..\dynasm
-@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
-
-%LJCOMPILE% host\minilua.c
-@if errorlevel 1 goto :BAD
-%LJLINK% /out:minilua.exe minilua.obj
-@if errorlevel 1 goto :BAD
-if exist minilua.exe.manifest^
- %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
-
-@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
-@minilua
-@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
-
-@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
-@echo Check that the script is being ran under the correct x86/x64 VS prompt.
-@goto :BAD
-
-:PASSED_PTR_CHECK
-@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
-@if errorlevel 1 goto :BAD
-%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
-@if errorlevel 1 goto :BAD
-%LJLINK% /out:buildvm.exe buildvm*.obj
-@if errorlevel 1 goto :BAD
-if exist buildvm.exe.manifest^
- %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
-
-buildvm -m elfasm -o lj_vm.s
-@if errorlevel 1 goto :BAD
-buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
-@if errorlevel 1 goto :BAD
-buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
-@if errorlevel 1 goto :BAD
-buildvm -m libdef -o lj_libdef.h %ALL_LIB%
-@if errorlevel 1 goto :BAD
-buildvm -m recdef -o lj_recdef.h %ALL_LIB%
-@if errorlevel 1 goto :BAD
-buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
-@if errorlevel 1 goto :BAD
-buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
-@if errorlevel 1 goto :BAD
-
-@rem ---- Cross compiler ----
-@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
-@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
-@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc
-@set TARGETLIB_SUFFIX=nx64
-
-%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s
-goto :DEBUGCHECK
-
-:NX32_CROSSBUILD
-@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
-@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc
-@set TARGETLIB_SUFFIX=nx32
-
-%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s
-:DEBUGCHECK
-
-@if "%1" neq "debug" goto :NODEBUG
-@shift
-@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
-@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
-goto :BUILD
-:NODEBUG
-@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
-@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
-:BUILD
-del %TARGETLIB%
-@if "%1" neq "noamalg" goto :AMALG
-for %%f in (lj_*.c lib_*.c) do (
- %LJCOMPILE% %%f
- @if errorlevel 1 goto :BAD
-)
-
-%LJLIB% %TARGETLIB% lj_*.o lib_*.o
-@if errorlevel 1 goto :BAD
-@goto :NOAMALG
-:AMALG
-%LJCOMPILE% ljamalg.c
-@if errorlevel 1 goto :BAD
-%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
-@if errorlevel 1 goto :BAD
-:NOAMALG
-
-@del *.o *.obj *.manifest minilua.exe buildvm.exe
-@echo.
-@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
-
-@goto :END
-:BAD
-@echo.
-@echo *******************************************************
-@echo *** Build FAILED -- Please check the error messages ***
-@echo *******************************************************
-@goto :END
-:FAIL
-@echo To run this script you must open a "Native Tools Command Prompt for VS".
-@echo.
-@echo Either the x86 version for NX32, or x64 for the NX64 target.
-@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
-@echo must match the cross-compiled target (32 or 64 bits).
-@echo.
-@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
-:END
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 3cad37d2..14a7f821 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -21,35 +21,40 @@
|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
-|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3).
+|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3).
|// Function pointers are really a struct: code, TOC, env (optional).
-|// TOCENV Function pointers have an environment pointer, too (not on PS3).
+|// OPDENV Function pointers have an environment pointer, too (not on PS3).
+|// ELFV2 The 64-bit ELF V2 ABI is in use.
|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360).
|// Must avoid (slow) micro-coded instructions.
|
|.if P64
-|.define TOC, 1
-|.define TOCENV, 1
|.macro lpx, a, b, c; ldx a, b, c; .endmacro
|.macro lp, a, b; ld a, b; .endmacro
|.macro stp, a, b; std a, b; .endmacro
+|.macro stpx, a, b, c; stdx a, b, c; .endmacro
|.define decode_OPP, decode_OP8
-|.if FFI
-|// Missing: Calling conventions, 64 bit regs, TOC.
-|.error lib_ffi not yet implemented for PPC64
-|.endif
+|.define PSIZE, 8
|.else
|.macro lpx, a, b, c; lwzx a, b, c; .endmacro
|.macro lp, a, b; lwz a, b; .endmacro
|.macro stp, a, b; stw a, b; .endmacro
+|.macro stpx, a, b, c; stwx a, b, c; .endmacro
|.define decode_OPP, decode_OP4
+|.define PSIZE, 4
|.endif
|
|// Convenience macros for TOC handling.
-|.if TOC
+|.if OPD or ELFV2
|// Linker needs a TOC patch area for every external call relocation.
-|.macro blex, target; bl extern target@plt; nop; .endmacro
+|.macro blex, target; bl extern target; nop; .endmacro
|.macro .toc, a, b; a, b; .endmacro
+|.else
+|.macro blex, target; bl extern target@plt; .endmacro
+|.macro .toc, a, b; .endmacro
+|.endif
+|.if OPD
+|.macro .opd, a, b; a, b; .endmacro
|.if P64
|.define TOC_OFS, 8
|.define ENV_OFS, 16
@@ -57,13 +62,13 @@
|.define TOC_OFS, 4
|.define ENV_OFS, 8
|.endif
-|.else // No TOC.
-|.macro blex, target; bl extern target@plt; .endmacro
-|.macro .toc, a, b; .endmacro
+|.else // No OPD.
+|.macro .opd, a, b; .endmacro
|.endif
-|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro
+|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro
|
|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro
+|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro
|
|.macro andix., y, a, i
|.if PPE
@@ -74,29 +79,6 @@
|.endif
|.endmacro
|
-|.macro clrso, reg
-|.if PPE
-| li reg, 0
-| mtxer reg
-|.else
-| mcrxr cr0
-|.endif
-|.endmacro
-|
-|.macro checkov, reg, noov
-|.if PPE
-| mfxer reg
-| add reg, reg, reg
-| cmpwi reg, 0
-| li reg, 0
-| mtxer reg
-| bgey noov
-|.else
-| mcrxr cr0
-| bley noov
-|.endif
-|.endmacro
-|
|//-----------------------------------------------------------------------
|
|// Fixed register assignments for the interpreter.
@@ -122,6 +104,7 @@
|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
|.define JGL, r31 // On-trace: global_State + 32768.
+|.define BASEP4, r26 // Equal to BASE + 4
|
|// Constants for type-comparisons, stores and conversions. C callee-save.
|.define TISNUM, r22
@@ -158,6 +141,12 @@
|.if FPU
|.define FARG1, f1
|.define FARG2, f2
+|.define FARG3, f3
+|.define FARG4, f4
+|.define FARG5, f5
+|.define FARG6, f6
+|.define FARG7, f7
+|.define FARG8, f8
|.endif
|
|.define CRET1, r3
@@ -165,6 +154,7 @@
|
|.define TOCREG, r2 // TOC register (only used by C code).
|.define ENVREG, r11 // Environment pointer (nested C functions).
+|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD)
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|.if GPR64
@@ -198,6 +188,49 @@
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
|
+|.elif ELFV2
+|
+|// 392(sp) // \ 32 bit C frame info.
+|.define SAVE_LR, 384(sp)
+|.define SAVE_CR, 376(sp) // 64 bit CR save.
+|.define CFRAME_SPACE, 368 // Delta for sp.
+|// Back chain for sp: 368(sp) <-- sp entering interpreter
+|.define SAVE_ERRF, 360(sp) // |
+|.define SAVE_NRES, 356(sp) // |
+|.define SAVE_L, 352(sp) // > Parameter save area.
+|.define SAVE_PC, 348(sp) // |
+|.define SAVE_MULTRES, 344(sp) // |
+|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain.
+|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves.
+|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves.
+|.if ENDIAN_LE
+|.define TMPD_HI, 44(sp)
+|.define TMPD_LO, 40(sp)
+|.define TONUM_HI, 36(sp)
+|.define TONUM_LO, 32(sp)
+|.else
+|.define TMPD_LO, 44(sp)
+|.define TMPD_HI, 40(sp)
+|.define TONUM_LO, 36(sp)
+|.define TONUM_HI, 32(sp)
+|.endif
+|.define SAVE_TOC, 24(sp) // TOC save area.
+|// Next frame lr: 16(sp)
+|// Next frame cr: 8(sp)
+|// Back chain for sp: 0(sp) <-- sp while in interpreter
+|
+|.if ENDIAN_LE
+|.define TMPD_BLO, 32(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
+|.define TMPD_BLO, 39(sp)
+|.define TMPD, TMPD_HI
+|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 32
+|
|.else
|
|// 508(sp) // \ 32 bit C frame info.
@@ -208,23 +241,39 @@
|.define SAVE_MULTRES, 456(sp) // |
|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain.
|.define SAVE_LR, 416(sp)
+|.define SAVE_CR, 408(sp) // 64 bit CR save.
|.define CFRAME_SPACE, 400 // Delta for sp.
|// Back chain for sp: 400(sp) <-- sp entering interpreter
|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves.
|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves.
|// 48(sp) // Callee parameter save area (ABI mandated).
|.define SAVE_TOC, 40(sp) // TOC save area.
+|.if ENDIAN_LE
+|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated).
+|.define TMPD_LO, 32(sp) // /
+|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated).
+|.define TONUM_LO, 24(sp) // /
+|.else
|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated).
|.define TMPD_HI, 32(sp) // /
|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated).
|.define TONUM_HI, 24(sp) // /
+|.endif
|// Next frame lr: 16(sp)
-|.define SAVE_CR, 8(sp) // 64 bit CR save.
+|// Next frame cr: 8(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if ENDIAN_LE
+|.define TMPD_BLO, 32(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
|.define TMPD_BLO, 39(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 112
|
|.endif
|.else
@@ -249,10 +298,17 @@
|.define SAVE_MULTRES, 28(sp)
|.define UNUSED1, 24(sp)
|.if FPU
+|.if ENDIAN_LE
+|.define TMPD_HI, 20(sp)
+|.define TMPD_LO, 16(sp)
+|.define TONUM_HI, 12(sp)
+|.define TONUM_LO, 8(sp)
+|.else
|.define TMPD_LO, 20(sp)
|.define TMPD_HI, 16(sp)
|.define TONUM_LO, 12(sp)
|.define TONUM_HI, 8(sp)
+|.endif
|.else
|.define SFSAVE_4, 20(sp)
|.define SFSAVE_3, 16(sp)
@@ -263,10 +319,22 @@
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
|.if FPU
+|.if ENDIAN_LE
+|.define TMPD_BLO, 16(sp)
+|.define TMPD, TMPD_LO
+|.define TONUM_D, TONUM_LO
+|.else
|.define TMPD_BLO, 23(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
|.endif
+|.else
+|.define TMPD_BLO, 23(sp)
+|.define TMPD, TMPD_HI
+|.define TONUM_D, TONUM_HI
+|.endif
+|
+|.define EXIT_OFFSET, 16
|
|.endif
|
@@ -383,8 +451,35 @@
|//-----------------------------------------------------------------------
|
|// Access to frame relative to BASE.
+|.if ENDIAN_LE
+|.define FRAME_PC, -4
+|.define FRAME_FUNC, -8
+|.define FRAME_CONTPC, -12
+|.define FRAME_CONTRET, -16
+|.define WORD_LO, 0
+|.define WORD_HI, 4
+|.define WORD_BLO, 0
+|.define BASE_LO, BASE
+|.define BASE_HI, BASEP4
+|.macro lwzux2, hi, lo, base, idx
+| lwzux lo, base, idx
+| lwz hi, 4(base)
+|.endmacro
+|.else
|.define FRAME_PC, -8
|.define FRAME_FUNC, -4
+|.define FRAME_CONTPC, -16
+|.define FRAME_CONTRET, -12
+|.define WORD_LO, 4
+|.define WORD_HI, 0
+|.define WORD_BLO, 7
+|.define BASE_LO, BASEP4
+|.define BASE_HI, BASE
+|.macro lwzux2, hi, lo, base, idx
+| lwzux hi, base, idx
+| lwz lo, 4(base)
+|.endmacro
+|.endif
|
|// Instruction decode.
|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
@@ -445,6 +540,7 @@
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| addi BASEP4, BASE, 4
| lwz PC, LFUNC:RB->pc
| lwz INS, 0(PC)
| addi PC, PC, 4
@@ -537,7 +633,12 @@ static void build_subroutines(BuildCtx *ctx)
| lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
| mr BASE, TMP2 // Restore caller base.
| // Prepending may overwrite the pcall frame, so do it at the end.
- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
+ | .if ENDIAN_LE
+ | addi RA, RA, -8
+ | stw TMP1, WORD_HI(RA) // Prepend true to results.
+ | .else
+ | stwu TMP1, -8(RA) // Prepend true to results.
+ | .endif
|
|->vm_returnc:
| addi RD, RD, 8 // RD = (nresults+1)*8.
@@ -603,7 +704,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP1, L->maxstack
| cmplw BASE, TMP1
| bge >8
- | stw TISNIL, 0(BASE)
+ | stw TISNIL, WORD_HI(BASE)
| addi RD, RD, 8
| addi BASE, BASE, 8
| b <2
@@ -654,7 +755,12 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lwz L, SAVE_L
| .toc ld TOCREG, SAVE_TOC
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp BASE, L->base
| .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
@@ -669,7 +775,7 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE) // Results start at BASE-8.
| .FPU stw TMP3, TMPD
| addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
+ | stw TMP1, WORD_HI(RA) // Prepend false to error message.
| li RD, 16 // 2 results: false + error message.
| st_vmstate
| .FPU lfs TONUM, TMPD
@@ -730,7 +836,12 @@ static void build_subroutines(BuildCtx *ctx)
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mr RA, BASE
| lp BASE, L->base
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp TMP1, L->top
| lwz PC, FRAME_PC(BASE)
| .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
@@ -780,7 +891,12 @@ static void build_subroutines(BuildCtx *ctx)
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp TMP1, L->top
| .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| add PC, PC, BASE
@@ -800,8 +916,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_call_dispatch:
| // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
- | lwz TMP0, FRAME_PC(BASE)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
+ | lwz TMP0, WORD_HI-8(BASE)
+ | lwz LFUNC:RB, WORD_LO-8(BASE)
| checkfunc TMP0; bne ->vmeta_call
|
|->vm_call_dispatch_f:
@@ -820,7 +936,9 @@ static void build_subroutines(BuildCtx *ctx)
| sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
| lp TMP1, L->cframe
| addi DISPATCH, DISPATCH, GG_G2DISP
- | .toc lp CARG4, 0(CARG4)
+ | .opd lp TOCREG, TOC_OFS(CARG4)
+ | .opdenv lp ENVREG, ENV_OFS(CARG4)
+ | .opd lp CARG4, 0(CARG4)
| li TMP2, 0
| stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
| stw TMP2, SAVE_ERRF // No error function.
@@ -828,7 +946,9 @@ static void build_subroutines(BuildCtx *ctx)
| stp sp, L->cframe // Add our C frame to cframe chain.
| stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mtctr CARG4
+ | .elfv2 mr FUNCREG, CARG4
| bctrl // (lua_State *L, lua_CFunction func, void *ud)
+ | .toc lp TOCREG, SAVE_TOC
|.if PPE
| mr BASE, CRET1
| cmpwi CRET1, 0
@@ -850,7 +970,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_dispatch:
| // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
- | lwz TMP0, -12(BASE) // Continuation.
+ | lwz TMP0, FRAME_CONTRET(BASE) // Continuation.
| mr RB, BASE
| mr BASE, TMP2 // Restore caller BASE.
| lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
@@ -860,6 +980,9 @@ static void build_subroutines(BuildCtx *ctx)
| lwz PC, -16(RB) // Restore PC from [cont|PC].
| subi TMP2, RD, 8
| stwx TISNIL, RA, TMP2 // Ensure one valid arg.
+ |.if P64
+ | ld TMP3, 0(DISPATCH)
+ |.endif
|.if FFI
| ble >1
|.endif
@@ -914,20 +1037,20 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TSTR
| decode_RB8 RB, INS
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| add CARG2, BASE, RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| b >1
|
|->vmeta_tgets:
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TTAB
- | stw TAB:RB, 4(CARG2)
+ | stw TAB:RB, WORD_LO(CARG2)
| la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | stw TMP0, 0(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| li TMP1, LJ_TSTR
- | stw STR:RC, 4(CARG3)
- | stw TMP1, 0(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
+ | stw TMP1, WORD_HI(CARG3)
| b >1
|
|->vmeta_tgetb: // TMP0 = index
@@ -938,8 +1061,8 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| add CARG2, BASE, RB
|.if DUALNUM
- | stw TISNUM, 0(CARG3)
- | stw TMP0, 4(CARG3)
+ | stw TISNUM, WORD_HI(CARG3)
+ | stw TMP0, WORD_LO(CARG3)
|.else
| stfd f0, 0(CARG3)
|.endif
@@ -977,7 +1100,7 @@ static void build_subroutines(BuildCtx *ctx)
| // BASE = base, L->top = new base, stack = cont/func/t/k
| subfic TMP1, BASE, FRAME_CONT
| lp BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 16 // 2 args for func(t, k).
@@ -996,7 +1119,10 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| b ->BC_TGETR_Z
|1:
- | stwx TISNIL, BASE, RA
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | stwx TISNIL, BASE_HI, RA
| b ->cont_nop
|
|//-----------------------------------------------------------------------
@@ -1005,20 +1131,20 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TSTR
| decode_RB8 RB, INS
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| add CARG2, BASE, RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| b >1
|
|->vmeta_tsets:
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
| li TMP0, LJ_TTAB
- | stw TAB:RB, 4(CARG2)
+ | stw TAB:RB, WORD_LO(CARG2)
| la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | stw TMP0, 0(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| li TMP1, LJ_TSTR
- | stw STR:RC, 4(CARG3)
- | stw TMP1, 0(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
+ | stw TMP1, WORD_HI(CARG3)
| b >1
|
|->vmeta_tsetb: // TMP0 = index
@@ -1029,8 +1155,8 @@ static void build_subroutines(BuildCtx *ctx)
| la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
| add CARG2, BASE, RB
|.if DUALNUM
- | stw TISNUM, 0(CARG3)
- | stw TMP0, 4(CARG3)
+ | stw TISNUM, WORD_HI(CARG3)
+ | stw TMP0, WORD_LO(CARG3)
|.else
| stfd f0, 0(CARG3)
|.endif
@@ -1069,7 +1195,7 @@ static void build_subroutines(BuildCtx *ctx)
| // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
| subfic TMP1, BASE, FRAME_CONT
| lp BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
@@ -1100,17 +1226,9 @@ static void build_subroutines(BuildCtx *ctx)
|->vmeta_comp:
| mr CARG1, L
| subi PC, PC, 4
- |.if DUALNUM
- | mr CARG2, RA
- |.else
| add CARG2, BASE, RA
- |.endif
| stw PC, SAVE_PC
- |.if DUALNUM
- | mr CARG3, RD
- |.else
| add CARG3, BASE, RD
- |.endif
| stp BASE, L->base
| decode_OP1 CARG4, INS
| bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
@@ -1147,7 +1265,7 @@ static void build_subroutines(BuildCtx *ctx)
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
- | lwz TMP0, 0(RA)
+ | lwz TMP0, WORD_HI(RA)
| .gpr64 extsw TMP0, TMP0
| subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true.
| subfe CRET1, CRET1, CRET1
@@ -1155,7 +1273,7 @@ static void build_subroutines(BuildCtx *ctx)
| b <4
|
|->cont_condf: // RA = resultptr
- | lwz TMP0, 0(RA)
+ | lwz TMP0, WORD_HI(RA)
| .gpr64 extsw TMP0, TMP0
| subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false.
| subfe CRET1, CRET1, CRET1
@@ -1207,8 +1325,8 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vmeta_unm:
- | mr CARG3, RD
- | mr CARG4, RD
+ | add CARG3, BASE, RD
+ | add CARG4, BASE, RD
| b >1
|
|->vmeta_arith_vn:
@@ -1243,7 +1361,7 @@ static void build_subroutines(BuildCtx *ctx)
|->vmeta_binop:
| // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
| sub TMP1, CRET1, BASE
- | stw PC, -16(CRET1) // [cont|PC]
+ | stw PC, FRAME_CONTPC(CRET1) // [cont|PC]
| mr TMP2, BASE
| addi PC, TMP1, FRAME_CONT
| mr BASE, CRET1
@@ -1254,7 +1372,7 @@ static void build_subroutines(BuildCtx *ctx)
#if LJ_52
| mr SAVE0, CARG1
#endif
- | mr CARG2, RD
+ | add CARG2, BASE, RD
| stp BASE, L->base
| mr CARG1, L
| stw PC, SAVE_PC
@@ -1331,25 +1449,25 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_1, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz CARG1, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz CARG1, WORD_LO(BASE)
| blt ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
- | lwz CARG4, 8(BASE)
- | lwz CARG1, 4(BASE)
- | lwz CARG2, 12(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz CARG4, WORD_HI+8(BASE)
+ | lwz CARG1, WORD_LO(BASE)
+ | lwz CARG2, WORD_LO+8(BASE)
| blt ->fff_fallback
|.endmacro
|
|.macro .ffunc_n, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
|.if FPU
| lfd FARG1, 0(BASE)
|.else
@@ -1362,15 +1480,15 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_nn, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
|.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG3, 8(BASE)
+ | lwz CARG3, WORD_HI+8(BASE)
| lfd FARG2, 8(BASE)
|.else
- | lwz CARG2, 4(BASE)
- | lwz CARG3, 8(BASE)
- | lwz CARG4, 12(BASE)
+ | lwz CARG2, WORD_LO(BASE)
+ | lwz CARG3, WORD_HI+8(BASE)
+ | lwz CARG4, WORD_LO+8(BASE)
|.endif
| blt ->fff_fallback
| checknum CARG1; bge ->fff_fallback
@@ -1393,17 +1511,17 @@ static void build_subroutines(BuildCtx *ctx)
| cmplw cr1, CARG3, TMP1
| lwz PC, FRAME_PC(BASE)
| bge cr1, ->fff_fallback
- | stw CARG3, 0(RA)
+ | stw CARG3, WORD_HI(RA)
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
| addi TMP1, BASE, 8
| add TMP2, RA, NARGS8:RC
- | stw CARG1, 4(RA)
+ | stw CARG1, WORD_LO(RA)
| beq ->fff_res // Done if exactly 1 argument.
|1:
| cmplw TMP1, TMP2
|.if FPU
| lfd f0, 0(TMP1)
- | stfd f0, 0(TMP1)
+ | stfd f0, -8(TMP1)
|.else
| lwz CARG1, 0(TMP1)
| lwz CARG2, 4(TMP1)
@@ -1416,14 +1534,28 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc type
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
| blt ->fff_fallback
| .gpr64 extsw CARG1, CARG1
+ |.if P64
+ | li TMP0, LJ_TNUMX
+ | srawi TMP3, CARG1, 15
+ | subfc TMP1, TMP0, CARG1
+ |.else
| subfc TMP0, TISNUM, CARG1
- | subfe TMP2, CARG1, CARG1
+ |.endif
+ | subfe TMP2, CARG1, CARG1
+ |.if P64
+ | cmpwi TMP3, -2
+ | orc TMP1, TMP2, TMP1
+ | subf TMP1, TMP0, TMP1
+ | beq >1
+ |.else
| orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
+ | subf TMP1, TISNUM, TMP1
+ |.endif
| slwi TMP1, TMP1, 3
+ |2:
|.if FPU
| la TMP2, CFUNC:RB->upvalue
| lfdx FARG1, TMP2, TMP1
@@ -1433,6 +1565,11 @@ static void build_subroutines(BuildCtx *ctx)
| lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
|.endif
| b ->fff_resn
+ |.if P64
+ |1:
+ | li TMP1, ~LJ_TLIGHTUD<<3
+ | b <2
+ |.endif
|
|//-- Base library: getters and setters ---------------------------------
|
@@ -1455,10 +1592,10 @@ static void build_subroutines(BuildCtx *ctx)
| sub TMP1, TMP0, TMP1
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|3: // Rearranged logic, because we expect _not_ to find the key.
- | lwz CARG4, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
+ | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
| checkstr CARG4; bne >4
| cmpw TMP0, STR:RC; beq >5
|4:
@@ -1476,14 +1613,33 @@ static void build_subroutines(BuildCtx *ctx)
|6:
| cmpwi CARG3, LJ_TUDATA; beq <1
| .gpr64 extsw CARG3, CARG3
+ |.if P64
+ | li TMP0, LJ_TNUMX
+ | srawi TMP3, CARG3, 15
+ | subfc TMP1, TMP0, CARG3
+ |.else
| subfc TMP0, TISNUM, CARG3
+ |.endif
| subfe TMP2, CARG3, CARG3
+ |.if P64
+ | cmpwi TMP3, -2
+ | orc TMP1, TMP2, TMP1
+ | subf TMP1, TMP0, TMP1
+ | beq >7
+ |.else
| orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
+ | subf TMP1, TISNUM, TMP1
+ |.endif
| slwi TMP1, TMP1, 2
+ |8:
| la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
| lwzx TAB:CARG1, TMP2, TMP1
| b <2
+ |.if P64
+ |7:
+ | li TMP1, ~LJ_TLIGHTUD<<2
+ | b <8
+ |.endif
|
|.ffunc_2 setmetatable
| // Fast path: no mt for table yet and not clearing the mt.
@@ -1501,8 +1657,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc rawget
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 0(BASE)
- | lwz TAB:CARG2, 4(BASE)
+ | lwz CARG4, WORD_HI(BASE)
+ | lwz TAB:CARG2, WORD_LO(BASE)
| blt ->fff_fallback
| checktab CARG4; bne ->fff_fallback
| la CARG3, 8(BASE)
@@ -1522,11 +1678,11 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc tonumber
| // Only handles the number case inline (without a base argument).
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
|.if FPU
| lfd FARG1, 0(BASE)
|.else
- | lwz CARG2, 4(BASE)
+ | lwz CARG2, WORD_LO(BASE)
|.endif
| bne ->fff_fallback // Exactly one argument.
| checknum CARG1; bgt ->fff_fallback
@@ -1602,7 +1758,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| la RA, -8(BASE)
#endif
- | stw TISNIL, 8(BASE)
+ | stw TISNIL, 8+WORD_HI(BASE)
| li RD, (3+1)*8
|.if FPU
| stfd f0, 0(RA)
@@ -1614,11 +1770,11 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc ipairs_aux
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
- | lwz TAB:CARG1, 4(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz TAB:CARG1, WORD_LO(BASE)
+ | lwz CARG4, 8+WORD_HI(BASE)
|.if DUALNUM
- | lwz TMP2, 12(BASE)
+ | lwz TMP2, 8+WORD_LO(BASE)
|.else
| lfd FARG2, 8(BASE)
|.endif
@@ -1647,20 +1803,20 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE)
| cmplw TMP0, TMP2
|.if DUALNUM
- | stw TISNUM, 0(RA)
+ | stw TISNUM, WORD_HI(RA)
| slwi TMP3, TMP2, 3
- | stw TMP2, 4(RA)
+ | stw TMP2, WORD_LO(RA)
|.else
| slwi TMP3, TMP2, 3
| stfd FARG2, 0(RA)
|.endif
| ble >2 // Not in array part?
|.if FPU
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
+ | lfdux f0, TMP1, TMP3
+ | lwz TMP2, WORD_HI(TMP1)
|.else
| lwzux TMP2, TMP1, TMP3
- | lwz TMP3, 4(TMP1)
+ | lwz TMP3, WORD_HI(TMP1)
|.endif
|1:
| checknil TMP2
@@ -1685,7 +1841,7 @@ static void build_subroutines(BuildCtx *ctx)
| cmplwi CRET1, 0
| li RD, (0+1)*8
| beq ->fff_res
- | lwz TMP2, 0(CRET1)
+ | lwz TMP2, WORD_HI(CRET1)
|.if FPU
| lfd f0, 0(CRET1)
|.else
@@ -1718,11 +1874,11 @@ static void build_subroutines(BuildCtx *ctx)
| la RA, -8(BASE)
#endif
|.if DUALNUM
- | stw TISNUM, 8(BASE)
+ | stw TISNUM, 8+WORD_HI(BASE)
|.else
- | stw ZERO, 8(BASE)
+ | stw ZERO, 8+WORD_HI(BASE)
|.endif
- | stw ZERO, 12(BASE)
+ | stw ZERO, 8+WORD_LO(BASE)
| li RD, (3+1)*8
|.if FPU
| stfd f0, 0(RA)
@@ -1748,7 +1904,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc xpcall
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 8(BASE)
+ | lwz CARG3, 8+WORD_HI(BASE)
|.if FPU
| lfd FARG2, 8(BASE)
| lfd FARG1, 0(BASE)
@@ -1879,7 +2035,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if resume
| li TMP1, LJ_TTRUE
| la RA, -8(BASE)
- | stw TMP1, -8(BASE) // Prepend true to results.
+ | stw TMP1, WORD_HI-8(BASE) // Prepend true to results.
| addi RD, RD, 16
|.else
| mr RA, BASE
@@ -1904,7 +2060,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
| li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
+ | stw TMP1, WORD_HI-8(BASE) // Prepend false to results.
| la RA, -8(BASE)
|.if FPU
| stfd f0, 0(BASE) // Copy error message.
@@ -1962,8 +2118,8 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_resi:
| lwz PC, FRAME_PC(BASE)
| la RA, -8(BASE)
- | stw TISNUM, -8(BASE)
- | stw CRET1, -4(BASE)
+ | stw TISNUM, WORD_HI-8(BASE)
+ | stw CRET1, WORD_LO-8(BASE)
| b ->fff_res1
|1:
| lus CARG3, 0x41e0 // 2^31.
@@ -1978,9 +2134,9 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_restv:
| // CARG3/CARG1 = TValue result.
| lwz PC, FRAME_PC(BASE)
- | stw CARG3, -8(BASE)
+ | stw CARG3, WORD_HI-8(BASE)
| la RA, -8(BASE)
- | stw CARG1, -4(BASE)
+ | stw CARG1, WORD_LO-8(BASE)
|->fff_res1:
| // RA = results, PC = return.
| li RD, (1+1)*8
@@ -1998,10 +2154,11 @@ static void build_subroutines(BuildCtx *ctx)
| ins_next1
| // Adjust BASE. KBASE is assumed to be set for the calling frame.
| sub BASE, RA, TMP0
+ | addi BASEP4, BASE, 4
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, RA, TMP1
| b <5
@@ -2119,7 +2276,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc math_log
| cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
+ | lwz CARG1, WORD_HI(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
| checknum CARG1; bge ->fff_fallback
|.if FPU
@@ -2148,20 +2305,20 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
|.ffunc math_ldexp
| cmplwi NARGS8:RC, 16
- | lwz TMP0, 0(BASE)
+ | lwz TMP0, WORD_HI(BASE)
|.if FPU
| lfd FARG1, 0(BASE)
|.else
- | lwz CARG1, 0(BASE)
- | lwz CARG2, 4(BASE)
+ | lwz CARG1, WORD_HI(BASE)
+ | lwz CARG2, WORD_LO(BASE)
|.endif
- | lwz TMP1, 8(BASE)
+ | lwz TMP1, WORD_HI+8(BASE)
|.if GPR64
- | lwz CARG2, 12(BASE)
+ | lwz CARG2, WORD_LO+8(BASE)
|.elif FPU
- | lwz CARG1, 12(BASE)
+ | lwz CARG1, WORD_LO+8(BASE)
|.else
- | lwz CARG3, 12(BASE)
+ | lwz CARG3, WORD_LO+8(BASE)
|.endif
| blt ->fff_fallback
| checknum TMP0; bge ->fff_fallback
@@ -2200,8 +2357,8 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| li RD, (2+1)*8
|.if DUALNUM
- | stw TISNUM, 8(RA)
- | stw TMP1, 12(RA)
+ | stw TISNUM, WORD_HI+8(RA)
+ | stw TMP1, WORD_LO+8(RA)
|.else
| stfd FARG2, 8(RA)
|.endif
@@ -2235,9 +2392,9 @@ static void build_subroutines(BuildCtx *ctx)
| add SAVE1, BASE, NARGS8:RC
| bne >4
|1: // Handle integers.
- | lwz CARG4, 0(SAVE0)
+ | lwz CARG4, WORD_HI(SAVE0)
| cmplw cr1, SAVE0, SAVE1
- | lwz CARG2, 4(SAVE0)
+ | lwz CARG2, WORD_LO(SAVE0)
| bge cr1, ->fff_resi
| checknum CARG4
| xoris TMP0, CARG1, 0x8000
@@ -2278,7 +2435,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| bge ->fff_fallback
|5: // Handle numbers.
- | lwz CARG3, 0(SAVE0)
+ | lwz CARG3, WORD_HI(SAVE0)
| cmplw cr1, SAVE0, SAVE1
|.if FPU
| lfd FARG2, 0(SAVE0)
@@ -2317,7 +2474,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| b <5
|7: // Convert integer to number and continue above.
- | lwz CARG3, 4(SAVE0)
+ | lwz CARG3, WORD_LO(SAVE0)
| bne ->fff_fallback
|.if FPU
| tonum_i FARG2, CARG3
@@ -2329,7 +2486,12 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc_n name
| li TMP1, 8
|1:
+ |.if ENDIAN_LE
+ | add CARG2, BASE, TMP1
+ | lwz CARG2, WORD_HI(CARG2)
+ |.else
| lwzx CARG2, BASE, TMP1
+ |.endif
| lfdx FARG2, BASE, TMP1
| cmplw cr1, TMP1, NARGS8:RC
| checknum CARG2
@@ -2353,8 +2515,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc string_byte // Only handle the 1-arg case here.
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz STR:CARG1, WORD_LO(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
| checkstr CARG3
| bne ->fff_fallback
@@ -2385,12 +2547,12 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG3, WORD_HI(BASE)
|.if DUALNUM
- | lwz TMP0, 4(BASE)
+ | lwz TMP0, WORD_LO(BASE)
| bne ->fff_fallback // Exactly 1 argument.
| checknum CARG3; bne ->fff_fallback
- | la CARG2, 7(BASE)
+ | la CARG2, WORD_BLO(BASE)
|.else
| lfd FARG1, 0(BASE)
| bne ->fff_fallback // Exactly 1 argument.
@@ -2414,16 +2576,16 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc string_sub
| ffgccheck
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 16(BASE)
+ | lwz CARG3, WORD_HI+16(BASE)
|.if not DUALNUM
| lfd f0, 16(BASE)
|.endif
- | lwz TMP0, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
+ | lwz TMP0, WORD_HI(BASE)
+ | lwz STR:CARG1, WORD_LO(BASE)
| blt ->fff_fallback
- | lwz CARG2, 8(BASE)
+ | lwz CARG2, WORD_HI+8(BASE)
|.if DUALNUM
- | lwz TMP1, 12(BASE)
+ | lwz TMP1, WORD_LO+8(BASE)
|.else
| lfd f1, 8(BASE)
|.endif
@@ -2431,7 +2593,7 @@ static void build_subroutines(BuildCtx *ctx)
| beq >1
|.if DUALNUM
| checknum CARG3
- | lwz TMP2, 20(BASE)
+ | lwz TMP2, WORD_LO+16(BASE)
| bne ->fff_fallback
|1:
| checknum CARG2; bne ->fff_fallback
@@ -2487,8 +2649,8 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc string_ .. name
| ffgccheck
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz STR:CARG2, 4(BASE)
+ | lwz CARG3, WORD_HI(BASE)
+ | lwz STR:CARG2, WORD_LO(BASE)
| blt ->fff_fallback
| checkstr CARG3
| la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
@@ -2526,10 +2688,10 @@ static void build_subroutines(BuildCtx *ctx)
| addi SAVE0, BASE, 8
| add SAVE1, BASE, NARGS8:RC
|1:
- | lwz CARG4, 0(SAVE0)
+ | lwz CARG4, WORD_HI(SAVE0)
| cmplw cr1, SAVE0, SAVE1
|.if DUALNUM
- | lwz CARG2, 4(SAVE0)
+ | lwz CARG2, WORD_LO(SAVE0)
|.else
| lfd FARG1, 0(SAVE0)
|.endif
@@ -2696,20 +2858,23 @@ static void build_subroutines(BuildCtx *ctx)
|
|->fff_fallback: // Call fast function fallback handler.
| // BASE = new base, RB = CFUNC, RC = nargs*8
- | lp TMP3, CFUNC:RB->f
+ | lp FUNCREG, CFUNC:RB->f
| add TMP1, BASE, NARGS8:RC
| lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
| addi TMP0, TMP1, 8*LUA_MINSTACK
| lwz TMP2, L->maxstack
| stw PC, SAVE_PC // Redundant (but a defined value).
- | .toc lp TMP3, 0(TMP3)
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
+ | .opd lp FUNCREG, 0(FUNCREG)
| cmplw TMP0, TMP2
| stp BASE, L->base
| stp TMP1, L->top
| mr CARG1, L
| bgt >5 // Need to grow stack.
- | mtctr TMP3
+ | mtctr FUNCREG
| bctrl // (lua_State *L)
+ | .toc lp TOCREG, SAVE_TOC
| // Either throws an error, or recovers and returns -1, 0 or nresults+1.
| lp BASE, L->base
| cmpwi CRET1, 0
@@ -2811,6 +2976,7 @@ static void build_subroutines(BuildCtx *ctx)
|3:
| lp BASE, L->base
|4: // Re-dispatch to static ins.
+ | addi BASEP4, BASE, 4
| lwz INS, -4(PC)
| decode_OPP TMP1, INS
| decode_RB8 RB, INS
@@ -2824,7 +2990,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_hook: // Continue from hook yield.
| addi PC, PC, 4
- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
+ | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins.
| b <4
|
|->vm_hotloop: // Hot loop counter underflow.
@@ -2866,6 +3032,7 @@ static void build_subroutines(BuildCtx *ctx)
| lp BASE, L->base
| lp TMP0, L->top
| stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
+ | addi BASEP4, BASE, 4
| sub NARGS8:RC, TMP0, BASE
| add RA, BASE, RA
| lwz LFUNC:RB, FRAME_FUNC(BASE)
@@ -2877,7 +3044,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lwz INS, -4(PC)
- | lwz TRACE:TMP2, -20(RB) // Save previous trace.
+ | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace.
| addic. TMP1, MULTRES, -8
| decode_RA8 RC, INS // Call base.
| beq >2
@@ -2923,10 +3090,16 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG2, PC
| bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
| b ->cont_nop
|
|9:
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ | stwx TISNIL, BASEP4, RC
+ |.else
| stwx TISNIL, BASE, RC
+ |.endif
| addi RC, RC, 8
| b <3
|.endif
@@ -2941,6 +3114,7 @@ static void build_subroutines(BuildCtx *ctx)
| // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
| lp BASE, L->base
| subi PC, PC, 4
+ | addi BASEP4, BASE, 4
| b ->cont_nop
#endif
|
@@ -2950,40 +3124,73 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro savex_, a, b, c, d
|.if FPU
- | stfd f..a, 16+a*8(sp)
- | stfd f..b, 16+b*8(sp)
- | stfd f..c, 16+c*8(sp)
- | stfd f..d, 16+d*8(sp)
+ | stfd f..a, EXIT_OFFSET+a*8(sp)
+ | stfd f..b, EXIT_OFFSET+b*8(sp)
+ | stfd f..c, EXIT_OFFSET+c*8(sp)
+ | stfd f..d, EXIT_OFFSET+d*8(sp)
|.endif
|.endmacro
|
+ |.macro saver, a
+ | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp)
+ |.endmacro
+ |
|->vm_exit_handler:
|.if JIT
- | addi sp, sp, -(16+32*8+32*4)
- | stmw r2, 16+32*8+2*4(sp)
+ | addi sp, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
+ | saver 3 // CARG1
+ | saver 4 // CARG2
+ | saver 5 // CARG3
+ | saver 17 // DISPATCH
| addi DISPATCH, JGL, -GG_DISP2G-32768
| li CARG2, ~LJ_VMST_EXIT
- | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain.
+ | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain.
| stw CARG2, DISPATCH_GL(vmstate)(DISPATCH)
+ | saver 2
+ | saver 6
+ | saver 7
+ | saver 8
+ | saver 9
+ | saver 10
+ | saver 11
+ | saver 12
+ | saver 13
| savex_ 0,1,2,3
- | stw CARG1, 0(sp) // Store extended stack chain.
- | clrso TMP1
+ | stp CARG1, 0(sp) // Store extended stack chain.
+
| savex_ 4,5,6,7
- | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp.
+ | saver 14
+ | saver 15
+ | saver 16
+ | saver 18
+ | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp.
| savex_ 8,9,10,11
- | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP.
+ | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP.
| savex_ 12,13,14,15
| mflr CARG3
| li TMP1, 0
| savex_ 16,17,18,19
- | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP.
+ | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP.
| savex_ 20,21,22,23
| lhz CARG4, 2(CARG3) // Load trace number.
| savex_ 24,25,26,27
| lwz L, DISPATCH_GL(cur_L)(DISPATCH)
| savex_ 28,29,30,31
+ | saver 19
+ | saver 20
+ | saver 21
+ | saver 22
+ | saver 23
+ | saver 24
+ | saver 25
+ | saver 26
+ | saver 27
+ | saver 28
+ | saver 29
+ | saver 30
+ | saver 31
| sub CARG3, TMP0, CARG3 // Compute exit number.
- | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
+ | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH)
| srwi CARG3, CARG3, 2
| stp L, DISPATCH_J(L)(DISPATCH)
| subi CARG3, CARG3, 2
@@ -2992,11 +3199,11 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
| addi CARG1, DISPATCH, GG_DISP2J
| stw CARG3, DISPATCH_J(exitno)(DISPATCH)
- | addi CARG2, sp, 16
+ | addi CARG2, sp, EXIT_OFFSET
| bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
| // Returns MULTRES (unscaled) or negated error code.
| lp TMP1, L->cframe
- | lwz TMP2, 0(sp)
+ | lp TMP2, 0(sp)
| lp BASE, L->base
|.if GPR64
| rldicr sp, TMP1, 0, 61
@@ -3004,7 +3211,7 @@ static void build_subroutines(BuildCtx *ctx)
| rlwinm sp, TMP1, 0, 0, 29
|.endif
| lwz PC, SAVE_PC // Get SAVE_PC.
- | stw TMP2, 0(sp)
+ | stp TMP2, 0(sp)
| stw L, SAVE_L // Set SAVE_L (on-trace resume/yield).
| b >1
|.endif
@@ -3025,7 +3232,12 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM
+ |.endif
| .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| .FPU stw TMP3, TMPD
| li ZERO, 0
@@ -3045,14 +3257,14 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| lpx TMP0, DISPATCH, TMP1
| mtctr TMP0
- | cmplwi TMP1, BC_FUNCF*4 // Function header?
+ | cmplwi TMP1, BC_FUNCF*PSIZE // Function header?
| bge >2
| decode_RB8 RB, INS
| decode_RD8 RD, INS
| decode_RC8 RC, INS
| bctr
|2:
- | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
+ | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function?
| blt >3
| // Check frame below fast function.
| lwz TMP1, FRAME_PC(BASE)
@@ -3062,7 +3274,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP2, -4(TMP1)
| decode_RA8 TMP0, TMP2
| sub TMP1, BASE, TMP0
- | lwz LFUNC:TMP2, -12(TMP1)
+ | lwz LFUNC:TMP2, WORD_LO-16(TMP1)
| lwz TMP1, LFUNC:TMP2->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
|3:
@@ -3111,6 +3323,8 @@ static void build_subroutines(BuildCtx *ctx)
| sfi2d CARG3, CARG4
|
|->vm_modi:
+ | li TMP1, 0
+ | mtxer TMP1
| divwo. TMP0, CARG1, CARG2
| bso >1
|.if GPR64
@@ -3129,7 +3343,8 @@ static void build_subroutines(BuildCtx *ctx)
| cmpwi CARG2, 0
| li CARG1, 0
| beqlr
- | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0.
+ | // Clear SO for -2147483648 % -1 and return 0.
+ | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so
| blr
|
|//-----------------------------------------------------------------------
@@ -3142,10 +3357,18 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_cachesync:
|.if JIT or FFI
| // Compute start of first cache line and number of cache lines.
+ | .if GPR64
+ | rldicr CARG1, CARG1, 0, 58
+ | .else
| rlwinm CARG1, CARG1, 0, 0, 26
+ | .endif
| sub CARG2, CARG2, CARG1
| addi CARG2, CARG2, 31
+ | .if GPR64
+ | srdi. CARG2, CARG2, 5
+ | .else
| rlwinm. CARG2, CARG2, 27, 5, 31
+ | .endif
| beqlr
| mtctr CARG2
| mr CARG3, CARG1
@@ -3172,39 +3395,70 @@ static void build_subroutines(BuildCtx *ctx)
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
- |// Handler for callback functions. Callback slot number in r11, g in r12.
+ |// Handler for callback functions.
+ |// 32-bit: Callback slot number in r12, g in r11.
+ |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2.
+ |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11,
+ |// vm_ffi_callback in r2.
|->vm_ffi_callback:
|.if FFI
|.type CTSTATE, CTState, PC
+ | .if OPD
+ | rldicl r12, r11, 17, 47
+ | rldicl r11, r11, 0, 17
+ | .endif
+ | .if ELFV2
+ | rlwinm r12, r12, 30, 22, 31
+ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha
+ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l
+ | .endif
| saveregs
- | lwz CTSTATE, GL:r12->ctype_state
- | addi DISPATCH, r12, GG_G2DISP
- | stw r11, CTSTATE->cb.slot
- | stw r3, CTSTATE->cb.gpr[0]
+ | lwz CTSTATE, GL:r11->ctype_state
+ | addi DISPATCH, r11, GG_G2DISP
+ | stw r12, CTSTATE->cb.slot
+ | stp r3, CTSTATE->cb.gpr[0]
| .FPU stfd f1, CTSTATE->cb.fpr[0]
- | stw r4, CTSTATE->cb.gpr[1]
+ | stp r4, CTSTATE->cb.gpr[1]
| .FPU stfd f2, CTSTATE->cb.fpr[1]
- | stw r5, CTSTATE->cb.gpr[2]
+ | stp r5, CTSTATE->cb.gpr[2]
| .FPU stfd f3, CTSTATE->cb.fpr[2]
- | stw r6, CTSTATE->cb.gpr[3]
+ | stp r6, CTSTATE->cb.gpr[3]
| .FPU stfd f4, CTSTATE->cb.fpr[3]
- | stw r7, CTSTATE->cb.gpr[4]
+ | stp r7, CTSTATE->cb.gpr[4]
| .FPU stfd f5, CTSTATE->cb.fpr[4]
- | stw r8, CTSTATE->cb.gpr[5]
+ | stp r8, CTSTATE->cb.gpr[5]
| .FPU stfd f6, CTSTATE->cb.fpr[5]
- | stw r9, CTSTATE->cb.gpr[6]
+ | stp r9, CTSTATE->cb.gpr[6]
| .FPU stfd f7, CTSTATE->cb.fpr[6]
- | stw r10, CTSTATE->cb.gpr[7]
+ | stp r10, CTSTATE->cb.gpr[7]
| .FPU stfd f8, CTSTATE->cb.fpr[7]
+ | .if GPR64
+ | stfd f9, CTSTATE->cb.fpr[8]
+ | stfd f10, CTSTATE->cb.fpr[9]
+ | stfd f11, CTSTATE->cb.fpr[10]
+ | stfd f12, CTSTATE->cb.fpr[11]
+ | stfd f13, CTSTATE->cb.fpr[12]
+ | .endif
+ | .if ELFV2
+ | addi TMP0, sp, CFRAME_SPACE+96
+ | .elif GPR64
+ | addi TMP0, sp, CFRAME_SPACE+112
+ | .else
| addi TMP0, sp, CFRAME_SPACE+8
- | stw TMP0, CTSTATE->cb.stack
+ | .endif
+ | stp TMP0, CTSTATE->cb.stack
| mr CARG1, CTSTATE
| stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
| mr CARG2, sp
| bl extern lj_ccallback_enter // (CTState *cts, void *cf)
| // Returns lua_State *.
| lp BASE, L:CRET1->base
+ |.if P64
+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
+ |.else
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ |.endif
| lp RC, L:CRET1->top
| .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li ZERO, 0
@@ -3233,9 +3487,21 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG1, CTSTATE
| mr CARG2, RA
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
- | lwz CRET1, CTSTATE->cb.gpr[0]
+ | lp CRET1, CTSTATE->cb.gpr[0]
| .FPU lfd FARG1, CTSTATE->cb.fpr[0]
- | lwz CRET2, CTSTATE->cb.gpr[1]
+ | lp CRET2, CTSTATE->cb.gpr[1]
+ | .if GPR64
+ | lfd FARG2, CTSTATE->cb.fpr[1]
+ | .else
+ | lp CARG3, CTSTATE->cb.gpr[2]
+ | lp CARG4, CTSTATE->cb.gpr[3]
+ | .endif
+ | .elfv2 lfd f3, CTSTATE->cb.fpr[2]
+ | .elfv2 lfd f4, CTSTATE->cb.fpr[3]
+ | .elfv2 lfd f5, CTSTATE->cb.fpr[4]
+ | .elfv2 lfd f6, CTSTATE->cb.fpr[5]
+ | .elfv2 lfd f7, CTSTATE->cb.fpr[6]
+ | .elfv2 lfd f8, CTSTATE->cb.fpr[7]
| b ->vm_leave_unw
|.endif
|
@@ -3248,23 +3514,46 @@ static void build_subroutines(BuildCtx *ctx)
| lbz CARG2, CCSTATE->nsp
| lbz CARG3, CCSTATE->nfpr
| neg TMP1, TMP1
+ | .if GPR64
+ | std TMP0, 16(sp)
+ | .else
| stw TMP0, 4(sp)
+ | .endif
| cmpwi cr1, CARG3, 0
| mr TMP2, sp
| addic. CARG2, CARG2, -1
+ | .if GPR64
+ | stdux sp, sp, TMP1
+ | .else
| stwux sp, sp, TMP1
+ | .endif
| crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
- | stw r14, -4(TMP2)
- | stw CCSTATE, -8(TMP2)
+ | .if GPR64
+ | std r14, -8(TMP2)
+ | std CCSTATE, -16(TMP2)
+ | .else
+ | stw r14, -4(TMP2)
+ | stw CCSTATE, -8(TMP2)
+ | .endif
| mr r14, TMP2
| la TMP1, CCSTATE->stack
+ | .if GPR64
+ | sldi CARG2, CARG2, 3
+ | .else
| slwi CARG2, CARG2, 2
+ | .endif
| blty >2
- | la TMP2, 8(sp)
+ | .if ELFV2
+ | la TMP2, 96(sp)
+ | .elif GPR64
+ | la TMP2, 112(sp)
+ | .else
+ | la TMP2, 8(sp)
+ | .endif
|1:
- | lwzx TMP0, TMP1, CARG2
- | stwx TMP0, TMP2, CARG2
- | addic. CARG2, CARG2, -4
+ | lpx TMP0, TMP1, CARG2
+ | stpx TMP0, TMP2, CARG2
+ | addic. CARG2, CARG2, -PSIZE
| bge <1
|2:
| bney cr1, >3
@@ -3276,28 +3565,55 @@ static void build_subroutines(BuildCtx *ctx)
| .FPU lfd f6, CCSTATE->fpr[5]
| .FPU lfd f7, CCSTATE->fpr[6]
| .FPU lfd f8, CCSTATE->fpr[7]
+ | .if GPR64
+ | .FPU lfd f9, CCSTATE->fpr[8]
+ | .FPU lfd f10, CCSTATE->fpr[9]
+ | .FPU lfd f11, CCSTATE->fpr[10]
+ | .FPU lfd f12, CCSTATE->fpr[11]
+ | .FPU lfd f13, CCSTATE->fpr[12]
+ | .endif
|3:
- | lp TMP0, CCSTATE->func
- | lwz CARG2, CCSTATE->gpr[1]
- | lwz CARG3, CCSTATE->gpr[2]
- | lwz CARG4, CCSTATE->gpr[3]
- | lwz CARG5, CCSTATE->gpr[4]
- | mtctr TMP0
- | lwz r8, CCSTATE->gpr[5]
- | lwz r9, CCSTATE->gpr[6]
- | lwz r10, CCSTATE->gpr[7]
- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
+ | .toc std TOCREG, SAVE_TOC
+ | lp FUNCREG, CCSTATE->func
+ | lp CARG2, CCSTATE->gpr[1]
+ | lp CARG3, CCSTATE->gpr[2]
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
+ | .opd lp FUNCREG, 0(FUNCREG)
+ | lp CARG4, CCSTATE->gpr[3]
+ | lp CARG5, CCSTATE->gpr[4]
+ | mtctr FUNCREG
+ | lp r8, CCSTATE->gpr[5]
+ | lp r9, CCSTATE->gpr[6]
+ | lp r10, CCSTATE->gpr[7]
+ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| bctrl
- | lwz CCSTATE:TMP1, -8(r14)
- | lwz TMP2, -4(r14)
+ | .toc lp TOCREG, SAVE_TOC
+ | .if GPR64
+ | ld CCSTATE:TMP1, -16(r14)
+ | ld TMP2, -8(r14)
+ | ld TMP0, 16(r14)
+ | .else
+ | lwz CCSTATE:TMP1, -8(r14)
+ | lwz TMP2, -4(r14)
| lwz TMP0, 4(r14)
- | stw CARG1, CCSTATE:TMP1->gpr[0]
+ | .endif
+ | stp CARG1, CCSTATE:TMP1->gpr[0]
| .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
- | stw CARG2, CCSTATE:TMP1->gpr[1]
+ | stp CARG2, CCSTATE:TMP1->gpr[1]
+ | .if GPR64
+ | stfd FARG2, CCSTATE:TMP1->fpr[1]
+ | .endif
+ | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2]
+ | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3]
+ | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4]
+ | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5]
+ | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6]
+ | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7]
| mtlr TMP0
- | stw CARG3, CCSTATE:TMP1->gpr[2]
+ | stp CARG3, CCSTATE:TMP1->gpr[2]
| mr sp, r14
- | stw CARG4, CCSTATE:TMP1->gpr[3]
+ | stp CARG4, CCSTATE:TMP1->gpr[3]
| mr r14, TMP2
| blr
|.endif
@@ -3320,14 +3636,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
+ | addi BASEP4, BASE, 4
|.if DUALNUM
- | lwzux CARG1, RA, BASE
+ | lwzx CARG1, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux CARG3, RD, BASE
+ | lwzx CARG2, BASE_LO, RA
+ | lwzx CARG3, BASE_HI, RD
| lwz TMP2, -4(PC)
| checknum cr0, CARG1
- | lwz CARG4, 4(RD)
+ | lwzx CARG4, BASE_LO, RD
| decode_RD4 TMP2, TMP2
| checknum cr1, CARG3
| addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
@@ -3351,7 +3668,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bgt cr0, ->vmeta_comp
| // RA is a number.
- | .FPU lfd f0, 0(RA)
+ | .FPU lfdx f0, BASE, RA
| bgt cr1, ->vmeta_comp
| blt cr1, >4
| // RA is a number, RD is an integer.
@@ -3371,7 +3688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bl ->vm_sfi2d_1
|.endif
|4:
- | .FPU lfd f1, 0(RD)
+ | .FPU lfdx f1, BASE, RD
|5:
|.if FPU
| fcmpu cr0, f0, f1
@@ -3392,10 +3709,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| b <1
|.else
- | lwzx TMP0, BASE, RA
+ | lwzx TMP0, BASE_HI, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
- | lwzx TMP1, BASE, RD
+ | lwzx TMP1, BASE_HI, RD
| checknum cr0, TMP0
| lwz TMP2, -4(PC)
| lfdx f1, BASE, RD
@@ -3425,16 +3742,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQV: case BC_ISNEV:
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
+ | addi BASEP4, BASE, 4
|.if DUALNUM
- | lwzux CARG1, RA, BASE
+ | lwzx CARG1, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux CARG3, RD, BASE
+ | lwzx CARG2, BASE_LO, RA
+ | .if ENDIAN_LE
+ | lwzx CARG3, BASE_HI, RD
+ | .else
+ | lwzux CARG3, RD, BASE_HI
+ | .endif
| checknum cr0, CARG1
| lwz SAVE0, -4(PC)
| checknum cr1, CARG3
| decode_RD4 SAVE0, SAVE0
- | lwz CARG4, 4(RD)
+ | .if ENDIAN_LE
+ | lwzux CARG4, RD, BASE_LO
+ | .else
+ | lwz CARG4, WORD_LO(RD)
+ | .endif
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
| addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
@@ -3443,11 +3769,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble cr7, ->BC_ISNEN_Z
}
|.else
- | lwzux CARG1, RA, BASE
+ | lwzx CARG1, BASE_HI, RA
| lwz SAVE0, 0(PC)
- | lfd f0, 0(RA)
+ | lfdx f0, BASE, RA
| addi PC, PC, 4
- | lwzux CARG3, RD, BASE
+ | lwzx CARG3, BASE_HI, RD
| checknum cr0, CARG1
| decode_RD4 SAVE0, SAVE0
| lfd f1, 0(RD)
@@ -3468,8 +3794,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
|5: // Either or both types are not numbers.
|.if not DUALNUM
- | lwz CARG2, 4(RA)
- | lwz CARG4, 4(RD)
+ | lwzx CARG2, BASE_LO, RA
+ | lwzx CARG4, BASE_LO, RD
|.endif
|.if FFI
| cmpwi cr7, CARG1, LJ_TCDATA
@@ -3485,10 +3811,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if FFI
| beq cr7, ->vmeta_equal_cd
|.endif
+ |.if P64
+ | cmplwi cr7, TMP2, ~LJ_TUDATA // Avoid 64 bit lightuserdata.
+ |.endif
| cmplw cr5, CARG2, CARG4
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
+ |.if P64
+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt
+ |.endif
| mr SAVE1, PC
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
@@ -3528,9 +3860,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQS: case BC_ISNES:
vk = op == BC_ISEQS;
| // RA = src*8, RD = str_const*8 (~), JMP with RD = target
- | lwzux TMP0, RA, BASE
+ | addi BASEP4, BASE, 4
+ | lwzx TMP0, BASE_HI, RA
| srwi RD, RD, 1
- | lwz STR:TMP3, 4(RA)
+ | lwzx STR:TMP3, BASE_LO, RA
| lwz TMP2, 0(PC)
| subfic RD, RD, -4
| addi PC, PC, 4
@@ -3561,16 +3894,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQN: case BC_ISNEN:
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
+ | addi BASEP4, BASE, 4
|.if DUALNUM
- | lwzux CARG1, RA, BASE
+ | lwzx CARG1, BASE_HI, RA
| addi PC, PC, 4
- | lwz CARG2, 4(RA)
- | lwzux CARG3, RD, KBASE
+ | lwzx CARG2, BASE_LO, RA
+ | lwzux2 CARG3, CARG4, RD, KBASE
| checknum cr0, CARG1
| lwz SAVE0, -4(PC)
| checknum cr1, CARG3
| decode_RD4 SAVE0, SAVE0
- | lwz CARG4, 4(RD)
| addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
|->BC_ISEQN_Z:
@@ -3587,7 +3920,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} else {
|->BC_ISNEN_Z: // Dummy label.
}
- | lwzx CARG1, BASE, RA
+ | lwzx CARG1, BASE_HI, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
| lwz SAVE0, -4(PC)
@@ -3625,7 +3958,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bge cr0, <3
| // RA is a number.
- | .FPU lfd f0, 0(RA)
+ | .FPU lfdx f0, BASE, RA
| blt cr1, >1
| // RA is a number, RD is an integer.
|.if FPU
@@ -3657,7 +3990,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISEQP: case BC_ISNEP:
vk = op == BC_ISEQP;
| // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
- | lwzx TMP0, BASE, RA
+ | addi BASEP4, BASE, 4
+ | lwzx TMP0, BASE_HI, RA
| srwi TMP1, RD, 3
| lwz TMP2, 0(PC)
| not TMP1, TMP1
@@ -3687,7 +4021,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| // RA = dst*8 or unused, RD = src*8, JMP with RD = target
- | lwzx TMP0, BASE, RD
+ | addi BASEP4, BASE, 4
+ | lwzx TMP0, BASE_HI, RD
| lwz INS, 0(PC)
| addi PC, PC, 4
if (op == BC_IST || op == BC_ISF) {
@@ -3732,7 +4067,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTYPE:
| // RA = src*8, RD = -type*8
- | lwzx TMP0, BASE, RA
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | lwzx TMP0, BASE_HI, RA
| srwi TMP1, RD, 3
| ins_next1
|.if not PPE and not GPR64
@@ -3746,7 +4084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_ISNUM:
| // RA = src*8, RD = -(TISNUM-1)*8
- | lwzx TMP0, BASE, RA
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | lwzx TMP0, BASE_HI, RA
| ins_next1
| checknum TMP0
| bge ->vmeta_istype
@@ -3771,18 +4112,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_NOT:
| // RA = dst*8, RD = src*8
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
| ins_next1
- | lwzx TMP0, BASE, RD
+ | lwzx TMP0, BASE_HI, RD
| .gpr64 extsw TMP0, TMP0
| subfic TMP1, TMP0, LJ_TTRUE
| adde TMP0, TMP0, TMP1
- | stwx TMP0, BASE, RA
+ | stwx TMP0, BASE_HI, RA
| ins_next2
break;
case BC_UNM:
| // RA = dst*8, RD = src*8
- | lwzux TMP1, RD, BASE
- | lwz TMP0, 4(RD)
+ | addi BASEP4, BASE, 4
+ | lwzx TMP1, BASE_HI, RD
+ | lwzx TMP0, BASE_LO, RD
+ |.if DUALNUM and not GPR64
+ | mtxer ZERO
+ |.endif
| checknum TMP1
|.if DUALNUM
| bne >5
@@ -3794,18 +4142,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.else
| nego. TMP0, TMP0
| bso >4
- |1:
|.endif
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
|3:
| ins_next2
|4:
- |.if not GPR64
- | // Potential overflow.
- | checkov TMP1, <1 // Ignore unrelated overflow.
- |.endif
| lus TMP1, 0x41e0 // 2^31.
| li TMP0, 0
| b >7
@@ -3815,8 +4158,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| xoris TMP1, TMP1, 0x8000
|7:
| ins_next1
- | stwux TMP1, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP1, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
|.if DUALNUM
| b <3
|.else
@@ -3825,15 +4168,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_LEN:
| // RA = dst*8, RD = src*8
- | lwzux TMP0, RD, BASE
- | lwz CARG1, 4(RD)
+ | addi BASEP4, BASE, 4
+ | lwzx TMP0, BASE_HI, RD
+ | lwzx CARG1, BASE_LO, RD
| checkstr TMP0; bne >2
| lwz CRET1, STR:CARG1->len
|1:
|.if DUALNUM
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw CRET1, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx CRET1, BASE_LO, RA
|.else
| tonum_u f0, CRET1 // Result is a non-negative integer.
| ins_next1
@@ -3865,12 +4209,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.macro ins_arithpre
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ | addi BASEP4, BASE, 4
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzx CARG1, BASE, RB
+ | .if ENDIAN_LE and DUALNUM
+ | addi CARG3, RC, 4
+ | .endif
+ | lwzx CARG1, BASE_HI, RB
| .if DUALNUM
- | lwzx CARG3, KBASE, RC
+ | .if ENDIAN_LE
+ | lwzx CARG3, KBASE, CARG3
+ | .else
+ | lwzx CARG3, KBASE, RC
+ | .endif
| .endif
| .if FPU
| lfdx f14, BASE, RB
@@ -3891,9 +4243,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| .endif
|| break;
||case 1:
- | lwzx CARG1, BASE, RB
+ | .if ENDIAN_LE and DUALNUM
+ | addi CARG3, RC, 4
+ | .endif
+ | lwzx CARG1, BASE_HI, RB
| .if DUALNUM
- | lwzx CARG3, KBASE, RC
+ | .if ENDIAN_LE
+ | lwzx CARG3, KBASE, CARG3
+ | .else
+ | lwzx CARG3, KBASE, RC
+ | .endif
| .endif
| .if FPU
| lfdx f15, BASE, RB
@@ -3914,8 +4273,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| .endif
|| break;
||default:
- | lwzx CARG1, BASE, RB
- | lwzx CARG3, BASE, RC
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG3, BASE_HI, RC
| .if FPU
| lfdx f14, BASE, RB
| lfdx f15, BASE, RC
@@ -3998,50 +4357,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|.macro ins_arithdn, intins, fpins, fpcall
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ | addi BASEP4, BASE, 4
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzux CARG1, RB, BASE
- | lwzux CARG3, RC, KBASE
+ | .if ENDIAN_LE
+ | lwzx TMP1, RB, BASE_HI
+ | lwzux CARG2, RC, KBASE
+ | lwz TMP2, 4(RC)
+ | checknum cr0, TMP1
+ | lwzux CARG1, RB, BASE
+ | checknum cr1, TMP2
+ | .else
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, KBASE
| lwz CARG2, 4(RB)
- | checknum cr0, CARG1
- | lwz CARG4, 4(RC)
- | checknum cr1, CARG3
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
+ | .endif
|| break;
||case 1:
- | lwzux CARG3, RB, BASE
- | lwzux CARG1, RC, KBASE
- | lwz CARG4, 4(RB)
- | checknum cr0, CARG3
- | lwz CARG2, 4(RC)
- | checknum cr1, CARG1
+ | .if ENDIAN_LE
+ | lwzux CARG1, RC, KBASE
+ | lwzx TMP1, RB, BASE_HI
+ | lwz TMP2, 4(RC)
+ | checknum cr0, TMP1
+ | lwzux CARG2, RB, BASE
+ | checknum cr1, TMP2
+ | .else
+ | lwzux CARG3, RB, BASE
+ | lwzux CARG1, RC, KBASE
+ | lwz CARG4, 4(RB)
+ | checknum cr0, CARG3
+ | lwz CARG2, 4(RC)
+ | checknum cr1, CARG1
+ | .endif
|| break;
||default:
- | lwzux CARG1, RB, BASE
- | lwzux CARG3, RC, BASE
- | lwz CARG2, 4(RB)
- | checknum cr0, CARG1
- | lwz CARG4, 4(RC)
- | checknum cr1, CARG3
+ | .if ENDIAN_LE
+ | lwzx TMP1, RB, BASE_HI
+ | lwzx TMP2, RC, BASE_HI
+ | lwzux CARG1, RB, BASE
+ | checknum cr0, TMP1
+ | lwzux CARG2, RC, BASE
+ | checknum cr1, TMP2
+ | .else
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, BASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
+ | .endif
|| break;
||}
| bne >5
| bne cr1, >5
- |.if "intins" == "intmod"
- | mr CARG1, CARG2
- | mr CARG2, CARG4
- |.endif
+ |.if ENDIAN_LE
+ | intins CARG1, CARG1, CARG2
+ |.else
+ | .if "intins" == "intmod"
+ | mr CARG1, CARG2
+ | mr CARG2, CARG4
+ | .endif
| intins CARG1, CARG2, CARG4
- | bso >4
- |1:
+ |.endif
+ | ins_arithfallback bso
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw CARG1, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx CARG1, BASE_LO, RA
|2:
| ins_next2
- |4: // Overflow.
- | checkov TMP0, <1 // Ignore unrelated overflow.
- | ins_arithfallback b
|5: // FP variant.
|.if FPU
||if (vk == 1) {
@@ -4124,8 +4511,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | lwzx CARG1, BASE, RB
- | lwzx CARG3, BASE, RC
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG3, BASE_HI, RC
|.if FPU
| lfdx FARG1, BASE, RB
| lfdx FARG2, BASE, RC
@@ -4164,6 +4554,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Returns NULL (finished) or TValue * (metamethod).
| cmplwi CRET1, 0
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
| bne ->vmeta_binop
| ins_next1
|.if FPU
@@ -4182,42 +4573,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_KSTR:
| // RA = dst*8, RD = str_const*8 (~)
+ | addi BASEP4, BASE, 4
| srwi TMP1, RD, 1
| subfic TMP1, TMP1, -4
| ins_next1
| lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
| li TMP2, LJ_TSTR
- | stwux TMP2, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP2, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
| ins_next2
break;
case BC_KCDATA:
|.if FFI
| // RA = dst*8, RD = cdata_const*8 (~)
+ | addi BASEP4, BASE, 4
| srwi TMP1, RD, 1
| subfic TMP1, TMP1, -4
| ins_next1
| lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
| li TMP2, LJ_TCDATA
- | stwux TMP2, RA, BASE
- | stw TMP0, 4(RA)
+ | stwx TMP2, BASE_HI, RA
+ | stwx TMP0, BASE_LO, RA
| ins_next2
|.endif
break;
case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8
+ | addi BASEP4, BASE, 4
|.if DUALNUM
| slwi RD, RD, 13
| srawi RD, RD, 16
| ins_next1
- | stwux TISNUM, RA, BASE
- | stw RD, 4(RA)
+ | stwx TISNUM, BASE_HI, RA
+ | stwx RD, BASE_LO, RA
| ins_next2
|.else
| // The soft-float approach is faster.
| slwi RD, RD, 13
| srawi TMP1, RD, 31
| xor TMP2, TMP1, RD
+ | .gpr64 extsw RD, RD
| sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
| cntlzw TMP3, TMP2
| subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
@@ -4229,8 +4624,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RD, RD, TMP1 // hi = hi + exponent-1
| and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
| ins_next1
- | stwux RD, RA, BASE
- | stw ZERO, 4(RA)
+ | stwx RD, BASE_HI, RA
+ | stwx ZERO, BASE_LO, RA
| ins_next2
|.endif
break;
@@ -4250,18 +4645,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_KPRI:
| // RA = dst*8, RD = primitive_type*8 (~)
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
| srwi TMP1, RD, 3
| not TMP0, TMP1
| ins_next1
- | stwx TMP0, BASE, RA
+ | stwx TMP0, BASE_HI, RA
| ins_next2
break;
case BC_KNIL:
| // RA = base*8, RD = end*8
- | stwx TISNIL, BASE, RA
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | stwx TISNIL, BASE_HI, RA
| addi RA, RA, 8
|1:
- | stwx TISNIL, BASE, RA
+ | stwx TISNIL, BASE_HI, RA
| cmpw RA, RD
| addi RA, RA, 8
| blt <1
@@ -4305,7 +4706,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz CARG2, UPVAL:RB->v
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbz TMP0, UPVAL:RB->closed
- | lwz TMP2, 0(RD)
+ | lwz TMP2, WORD_HI(RD)
|.if FPU
| stfd f0, 0(CARG2)
|.else
@@ -4313,7 +4714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stw CARG3, 4(CARG2)
|.endif
| cmplwi cr1, TMP0, 0
- | lwz TMP1, 4(RD)
+ | lwz TMP1, WORD_LO(RD)
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
| subi TMP2, TMP2, (LJ_TNUMX+1)
| bne >2 // Upvalue is closed and black?
@@ -4346,8 +4747,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP3, STR:TMP1->marked
| lbz TMP2, UPVAL:RB->closed
| li TMP0, LJ_TSTR
- | stw STR:TMP1, 4(CARG2)
- | stw TMP0, 0(CARG2)
+ | stw STR:TMP1, WORD_LO(CARG2)
+ | stw TMP0, WORD_HI(CARG2)
| bne >2
|1:
| ins_next
@@ -4394,7 +4795,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwzx UPVAL:RB, LFUNC:RB, RA
| ins_next1
| lwz TMP1, UPVAL:RB->v
- | stw TMP0, 0(TMP1)
+ | stw TMP0, WORD_HI(TMP1)
| ins_next2
break;
@@ -4409,6 +4810,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add CARG2, BASE, RA
| bl extern lj_func_closeuv // (lua_State *L, TValue *level)
| lp BASE, L->base
+ | addi BASEP4, BASE, 4
|1:
| ins_next
break;
@@ -4427,8 +4829,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Returns GCfuncL *.
| lp BASE, L->base
| li TMP0, LJ_TFUNC
- | stwux TMP0, RA, BASE
- | stw LFUNC:CRET1, 4(RA)
+ | addi BASEP4, BASE, 4
+ | stwx TMP0, BASE_HI, RA
+ | stwx LFUNC:CRET1, BASE_LO, RA
| ins_next
break;
@@ -4461,8 +4864,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| lp BASE, L->base
| li TMP0, LJ_TTAB
- | stwux TMP0, RA, BASE
- | stw TAB:CRET1, 4(RA)
+ | addi BASEP4, BASE, 4
+ | stwx TMP0, BASE_HI, RA
+ | stwx TAB:CRET1, BASE_LO, RA
| ins_next
if (op == BC_TNEW) {
|3:
@@ -4495,13 +4899,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_TGETV:
| // RA = dst*8, RB = table*8, RC = key*8
- | lwzux CARG1, RB, BASE
- | lwzux CARG2, RC, BASE
- | lwz TAB:RB, 4(RB)
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG2, BASE_HI, RC
+ | lwzx TAB:RB, BASE_LO, RB
|.if DUALNUM
- | lwz RC, 4(RC)
+ | lwzx RC, BASE_LO, RC
|.else
- | lfd f0, 0(RC)
+ | lfdx f0, BASE, RC
|.endif
| checktab CARG1
| checknum cr1, CARG2
@@ -4528,9 +4933,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP2, TMP2, 3
|.endif
| ble ->vmeta_tgetv // Integer key and in array part?
- | lwzx TMP0, TMP1, TMP2
|.if FPU
- | lfdx f14, TMP1, TMP2
+ | .if ENDIAN_LE
+ | lfdux f14, TMP1, TMP2
+ | lwz TMP0, WORD_HI(TMP1)
+ | .else
+ | lwzx TMP0, TMP1, TMP2
+ | lfdx f14, TMP1, TMP2
+ | .endif
|.else
| lwzux SAVE0, TMP1, TMP2
| lwz SAVE1, 4(TMP1)
@@ -4558,21 +4968,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5:
| checkstr CARG2; bne ->vmeta_tgetv
|.if not DUALNUM
- | lwz STR:RC, 4(RC)
+ | lwzx STR:RC, BASE_LO, RC
|.endif
| b ->BC_TGETS_Z // String key?
break;
case BC_TGETS:
| // RA = dst*8, RB = table*8, RC = str_const*8 (~)
- | lwzux CARG1, RB, BASE
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP1, RC, 1
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| subfic TMP1, TMP1, -4
| checktab CARG1
| lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
| bne ->vmeta_tgets1
|->BC_TGETS_Z:
| // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | addi BASEP4, BASE, 4
| lwz TMP0, TAB:RB->hmask
| lwz TMP1, STR:RC->sid
| lwz NODE:TMP2, TAB:RB->node
@@ -4582,16 +4994,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sub TMP1, TMP0, TMP1
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|1:
- | lwz CARG1, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
| checkstr CARG1; bne >4
| cmpw TMP0, STR:RC; bne >4
| checknil CARG2; beq >5 // Key found, but nil value?
|3:
- | stwux CARG2, RA, BASE
- | stw TMP1, 4(RA)
+ | stwx CARG2, BASE_HI, RA
+ | stwx TMP1, BASE_LO, RA
| ins_next
|
|4: // Follow hash chain.
@@ -4612,16 +5024,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TGETB:
| // RA = dst*8, RB = table*8, RC = index*8
- | lwzux CARG1, RB, BASE
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP0, RC, 3
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| checktab CARG1; bne ->vmeta_tgetb
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
|.if FPU
- | lwzx TMP1, TMP2, RC
- | lfdx f0, TMP2, RC
+ | .if ENDIAN_LE
+ | lfdux f0, TMP2, RC
+ | lwz TMP1, WORD_HI(TMP2)
+ | .else
+ | lwzx TMP1, TMP2, RC
+ | lfdx f0, TMP2, RC
+ | .endif
|.else
| lwzux TMP1, TMP2, RC
| lwz TMP3, 4(TMP2)
@@ -4648,12 +5066,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TGETR:
| // RA = dst*8, RB = table*8, RC = key*8
- | add RB, BASE, RB
- | lwz TAB:CARG1, 4(RB)
+ | addi BASEP4, BASE, 4
+ | lwzx TAB:CARG1, BASE_LO, RB
|.if DUALNUM
- | add RC, BASE, RC
| lwz TMP0, TAB:CARG1->asize
- | lwz CARG2, 4(RC)
+ | lwzx CARG2, BASE_LO, RC
| lwz TMP1, TAB:CARG1->array
|.else
| lfdx f0, BASE, RC
@@ -4683,13 +5100,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_TSETV:
| // RA = src*8, RB = table*8, RC = key*8
- | lwzux CARG1, RB, BASE
- | lwzux CARG2, RC, BASE
- | lwz TAB:RB, 4(RB)
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
+ | lwzx CARG2, BASE_HI, RC
+ | lwzx TAB:RB, BASE_LO, RB
|.if DUALNUM
- | lwz RC, 4(RC)
+ | lwzx RC, BASE_LO, RC
|.else
- | lfd f0, 0(RC)
+ | lfdx f0, BASE, RC
|.endif
| checktab CARG1
| checknum cr1, CARG2
@@ -4716,7 +5134,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP0, TMP2, 3
|.endif
| ble ->vmeta_tsetv // Integer key and in array part?
+ | .if ENDIAN_LE
+ | addi TMP2, TMP1, 4
+ | lwzx TMP2, TMP2, TMP0
+ | .else
| lwzx TMP2, TMP1, TMP0
+ | .endif
| lbz TMP3, TAB:RB->marked
|.if FPU
| lfdx f14, BASE, RA
@@ -4750,7 +5173,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5:
| checkstr CARG2; bne ->vmeta_tsetv
|.if not DUALNUM
- | lwz STR:RC, 4(RC)
+ | lwzx STR:RC, BASE_LO, RC
|.endif
| b ->BC_TSETS_Z // String key?
|
@@ -4760,9 +5183,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETS:
| // RA = src*8, RB = table*8, RC = str_const*8 (~)
- | lwzux CARG1, RB, BASE
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP1, RC, 1
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| subfic TMP1, TMP1, -4
| checktab CARG1
| lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
@@ -4787,9 +5211,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP3, TAB:RB->marked
| add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|1:
- | lwz CARG1, NODE:TMP2->key
- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
- | lwz CARG2, NODE:TMP2->val
+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
| lwz NODE:TMP1, NODE:TMP2->next
| checkstr CARG1; bne >5
| cmpw TMP0, STR:RC; bne >5
@@ -4834,9 +5258,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|6:
| li TMP0, LJ_TSTR
- | stw STR:RC, 4(CARG3)
+ | stw STR:RC, WORD_LO(CARG3)
| mr CARG2, TAB:RB
- | stw TMP0, 0(CARG3)
+ | stw TMP0, WORD_HI(CARG3)
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Returns TValue *.
| lp BASE, L->base
@@ -4846,6 +5270,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stw SAVE0, 0(CRET1)
| stw SAVE1, 4(CRET1)
|.endif
+ | addi BASEP4, BASE, 4
| b <3 // No 2nd write barrier needed.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4854,9 +5279,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETB:
| // RA = src*8, RB = table*8, RC = index*8
- | lwzux CARG1, RB, BASE
+ | addi BASEP4, BASE, 4
+ | lwzx CARG1, BASE_HI, RB
| srwi TMP0, RC, 3
- | lwz TAB:RB, 4(RB)
+ | lwzx TAB:RB, BASE_LO, RB
| checktab CARG1; bne ->vmeta_tsetb
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
@@ -4870,7 +5296,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz SAVE1, 4(CARG2)
|.endif
| bge ->vmeta_tsetb
- | lwzx TMP1, TMP2, RC
+ | .if ENDIAN_LE
+ | addi TMP1, TMP2, 4
+ | lwzx TMP1, TMP1, RC
+ | .else
+ | lwzx TMP1, TMP2, RC
+ | .endif
| checknil TMP1; beq >5
|1:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
@@ -4899,13 +5330,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_TSETR:
| // RA = dst*8, RB = table*8, RC = key*8
- | add RB, BASE, RB
- | lwz TAB:CARG2, 4(RB)
+ | addi BASEP4, BASE, 4
+ | lwzx TAB:CARG2, BASE_LO, RB
|.if DUALNUM
- | add RC, BASE, RC
| lbz TMP3, TAB:CARG2->marked
| lwz TMP0, TAB:CARG2->asize
- | lwz CARG3, 4(RC)
+ | lwzx CARG3, BASE_LO, RC
| lwz TMP1, TAB:CARG2->array
|.else
| lfdx f0, BASE, RC
@@ -4946,9 +5376,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RA, BASE, RA
|1:
| add TMP3, KBASE, RD
- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
+ | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table.
| addic. TMP0, MULTRES, -8
- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
+ | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word.
| srwi CARG3, TMP0, 3
| beq >4 // Nothing to copy?
| add CARG3, CARG3, TMP3
@@ -5007,8 +5437,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_CALL:
| // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
| mr TMP2, BASE
- | lwzux TMP0, BASE, RA
- | lwz LFUNC:RB, 4(BASE)
+ | lwzux2 TMP0, LFUNC:RB, BASE, RA
| subi NARGS8:RC, NARGS8:RC, 8
| addi BASE, BASE, 8
| checkfunc TMP0; bne ->vmeta_call
@@ -5022,8 +5451,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_CALLT:
| // RA = base*8, (RB = 0,) RC = (nargs+1)*8
- | lwzux TMP0, RA, BASE
- | lwz LFUNC:RB, 4(RA)
+ | lwzux2 TMP0, LFUNC:RB, RA, BASE
| subi NARGS8:RC, NARGS8:RC, 8
| lwz TMP1, FRAME_PC(BASE)
| checkfunc TMP0
@@ -5086,8 +5514,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
| mr TMP2, BASE
| add BASE, BASE, RA
- | lwz TMP1, -24(BASE)
- | lwz LFUNC:RB, -20(BASE)
+ | lwz TMP1, WORD_HI-24(BASE)
+ | lwz LFUNC:RB, WORD_LO-24(BASE)
|.if FPU
| lfd f1, -8(BASE)
| lfd f0, -16(BASE)
@@ -5097,8 +5525,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz CARG3, -16(BASE)
| lwz CARG4, -12(BASE)
|.endif
- | stw TMP1, 0(BASE) // Copy callable.
- | stw LFUNC:RB, 4(BASE)
+ | stw TMP1, WORD_HI(BASE) // Copy callable.
+ | stw LFUNC:RB, WORD_LO(BASE)
| checkfunc TMP1
| li NARGS8:RC, 16 // Iterators get 2 arguments.
|.if FPU
@@ -5121,8 +5549,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
|->vm_IITERN:
| add RA, BASE, RA
- | lwz TAB:RB, -12(RA)
- | lwz RC, -4(RA) // Get index from control var.
+ | lwz TAB:RB, WORD_LO-16(RA)
+ | lwz RC, WORD_LO-8(RA) // Get index from control var.
| lwz TMP0, TAB:RB->asize
| lwz TMP1, TAB:RB->array
| addi PC, PC, 4
@@ -5130,10 +5558,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmplw RC, TMP0
| slwi TMP3, RC, 3
| bge >5 // Index points after array part?
- | lwzx TMP2, TMP1, TMP3
|.if FPU
- | lfdx f0, TMP1, TMP3
+ | lfdux f0, TMP3, TMP1
+ | lwz TMP2, WORD_HI(TMP3)
|.else
+ | lwzx TMP2, TMP1, TMP3
| lwzux CARG1, TMP3, TMP1
| lwz CARG2, 4(TMP3)
|.endif
@@ -5141,8 +5570,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz INS, -4(PC)
| beq >4
|.if DUALNUM
- | stw RC, 4(RA)
- | stw TISNUM, 0(RA)
+ | stw RC, WORD_LO(RA)
+ | stw TISNUM, WORD_HI(RA)
|.else
| tonum_u f1, RC
|.endif
@@ -5155,7 +5584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stw CARG2, 12(RA)
|.endif
| decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
+ | stw RC, WORD_LO-8(RA) // Update control var.
| add PC, TMP1, TMP3
|.if not DUALNUM
| stfd f1, 0(RA)
@@ -5177,15 +5606,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgty <3
| slwi RB, RC, 3
| sub TMP3, TMP3, RB
- | lwzx RB, TMP2, TMP3
|.if FPU
- | lfdx f0, TMP2, TMP3
+ | lfdux f0, TMP3, TMP2
+ | lwz RB, WORD_HI(TMP3)
|.else
| add CARG3, TMP2, TMP3
| lwz CARG1, 0(CARG3)
| lwz CARG2, 4(CARG3)
- |.endif
| add NODE:TMP3, TMP2, TMP3
+ |.endif
| checknil RB
| lwz INS, -4(PC)
| beq >7
@@ -5212,7 +5641,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| addi RC, RC, 1
| add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
+ | stw RC, WORD_LO-8(RA) // Update control var.
| b <3
|
|7: // Skip holes in hash part.
@@ -5223,10 +5652,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISNEXT:
| // RA = base*8, RD = target (points to ITERN)
| add RA, BASE, RA
- | lwz TMP0, -24(RA)
- | lwz CFUNC:TMP1, -20(RA)
- | lwz TMP2, -16(RA)
- | lwz TMP3, -8(RA)
+ | lwz TMP0, WORD_HI-24(RA)
+ | lwz CFUNC:TMP1, WORD_LO-24(RA)
+ | lwz TMP2, WORD_HI-16(RA)
+ | lwz TMP3, WORD_HI-8(RA)
| cmpwi cr0, TMP2, LJ_TTAB
| cmpwi cr1, TMP0, LJ_TFUNC
| cmpwi cr6, TMP3, LJ_TNIL
@@ -5248,10 +5677,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Despecialize bytecode if any of the checks fail.
| li TMP0, BC_JMP
| li TMP1, BC_ITERC
+ | .if ENDIAN_LE
+ | stb TMP0, -4(PC)
+ | .else
| stb TMP0, -1(PC)
+ | .endif
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
+ | .if ENDIAN_LE
+ | stb TMP1, 0(PC)
+ | .else
| // NYI on big-endian: unpatch JLOOP.
| stb TMP1, 3(PC)
+ | .endif
| b <1
break;
@@ -5295,7 +5732,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addi RA, RA, 8
| blt cr1, <1 // More vararg slots?
|2: // Fill up remainder with nil.
- | stw TISNIL, 0(RA)
+ | stw TISNIL, WORD_HI(RA)
| cmplw RA, TMP2
| addi RA, RA, 8
| blt <2
@@ -5342,6 +5779,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add RA, BASE, RA
| add RC, BASE, SAVE0
| subi TMP3, BASE, 8
+ | addi BASEP4, BASE, 4
| b <6
break;
@@ -5414,13 +5852,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | addi BASEP4, BASE, 4
| ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, TMP2, TMP1
| b <5
@@ -5463,13 +5902,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | addi BASEP4, BASE, 4
| ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
| ins_next2
|
|6: // Fill up results with nil.
- | subi TMP1, RD, 8
+ | addi TMP1, RD, WORD_HI-8
| addi RD, RD, 8
| stwx TISNIL, TMP2, TMP1
| b <5
@@ -5495,11 +5935,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = (op == BC_IFORL || op == BC_JFORL);
|.if DUALNUM
| // Integer loop.
- | lwzux TMP1, RA, BASE
- | lwz CARG1, FORL_IDX*8+4(RA)
+ | lwzux2 TMP1, CARG1, RA, BASE
+ if (vk) {
+ | mtxer ZERO
+ }
| cmplw cr0, TMP1, TISNUM
if (vk) {
- | lwz CARG3, FORL_STEP*8+4(RA)
+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
| bne >9
|.if GPR64
| // Need to check overflow for (a<<32) + (b<<32).
@@ -5511,15 +5953,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addo. CARG1, CARG1, CARG3
|.endif
| cmpwi cr6, CARG3, 0
- | lwz CARG2, FORL_STOP*8+4(RA)
- | bso >6
+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
+ | bso >2
|4:
- | stw CARG1, FORL_IDX*8+4(RA)
+ | stw CARG1, FORL_IDX*8+WORD_LO(RA)
} else {
- | lwz SAVE0, FORL_STEP*8(RA)
- | lwz CARG3, FORL_STEP*8+4(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | lwz CARG2, FORL_STOP*8+4(RA)
+ | lwz SAVE0, FORL_STEP*8+WORD_HI(RA)
+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
+ | lwz TMP2, FORL_STOP*8+WORD_HI(RA)
+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
| cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
@@ -5530,11 +5972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| blt cr6, >5
| cmpw CARG1, CARG2
|1:
- | stw TISNUM, FORL_EXT*8(RA)
+ | stw TISNUM, FORL_EXT*8+WORD_HI(RA)
if (op != BC_JFORL) {
| srwi RD, RD, 1
}
- | stw CARG1, FORL_EXT*8+4(RA)
+ | stw CARG1, FORL_EXT*8+WORD_LO(RA)
if (op != BC_JFORL) {
| add RD, PC, RD
}
@@ -5554,11 +5996,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Invert check for negative step.
| cmpw CARG2, CARG1
| b <1
- if (vk) {
- |6: // Potential overflow.
- | checkov TMP0, <4 // Ignore unrelated overflow.
- | b <2
- }
|.endif
if (vk) {
|.if DUALNUM
@@ -5588,12 +6025,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz CARG3, FORL_STOP*8(RA)
| lwz CARG4, FORL_STOP*8+4(RA)
|.endif
- | lwz SAVE0, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8+WORD_HI(RA)
} else {
|.if DUALNUM
|9: // FP loop.
|.else
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ | lwzx TMP1, RA, BASE_LO
+ | add RA, RA, BASE
+ |.else
| lwzux TMP1, RA, BASE
+ |.endif
| lwz SAVE0, FORL_STEP*8(RA)
| lwz TMP2, FORL_STOP*8(RA)
| cmplw cr0, TMP1, TISNUM
@@ -5696,17 +6139,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
#endif
case BC_IITERL:
| // RA = base*8, RD = target
- | lwzux TMP1, RA, BASE
- | lwz TMP2, 4(RA)
+ | lwzux2 TMP1, TMP2, RA, BASE
| checknil TMP1; beq >1 // Stop if iterator returned nil.
if (op == BC_JITERL) {
- | stw TMP1, -8(RA)
- | stw TMP2, -4(RA)
+ | stw TMP1, WORD_HI-8(RA)
+ | stw TMP2, WORD_LO-8(RA)
| b =>BC_JLOOP
} else {
| branch_RD // Otherwise save control var + branch.
- | stw TMP1, -8(RA)
- | stw TMP2, -4(RA)
+ | stw TMP1, WORD_HI-8(RA)
+ | stw TMP2, WORD_LO-8(RA)
}
|1:
| ins_next
@@ -5735,7 +6177,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Traces on PPC don't store the trace number, so use 0.
| stw ZERO, DISPATCH_GL(vmstate)(DISPATCH)
| lwzx TRACE:TMP2, TMP1, RD
- | clrso TMP1
+ | mtxer ZERO
| lp TMP2, TRACE:TMP2->mcode
| stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| mtctr TMP2
@@ -5787,7 +6229,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
|
|3: // Clear missing parameters.
- | stwx TISNIL, BASE, NARGS8:RC
+ |.if ENDIAN_LE
+ | addi BASEP4, BASE, 4
+ |.endif
+ | stwx TISNIL, BASE_HI, NARGS8:RC
| addi NARGS8:RC, NARGS8:RC, 8
| b <2
break;
@@ -5804,11 +6249,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP2, L->maxstack
| add TMP1, BASE, RC
| add TMP0, RA, RC
- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
+ | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC.
| addi TMP3, RC, 8+FRAME_VARG
| lwz KBASE, -4+PC2PROTO(k)(PC)
| cmplw TMP0, TMP2
- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
+ | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG.
| bge ->vm_growstack_l
| lbz TMP2, -4+PC2PROTO(numparams)(PC)
| mr RA, BASE
@@ -5819,18 +6264,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq >3
|1:
| cmplw RA, RC // Less args than parameters?
- | lwz TMP0, 0(RA)
- | lwz TMP3, 4(RA)
+ | lwz TMP0, WORD_HI(RA)
+ | lwz TMP3, WORD_LO(RA)
| bge >4
- | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
+ | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC).
| addi RA, RA, 8
|2:
| addic. TMP2, TMP2, -1
- | stw TMP0, 8(TMP1)
- | stw TMP3, 12(TMP1)
+ | stw TMP0, WORD_HI+8(TMP1)
+ | stw TMP3, WORD_LO+8(TMP1)
| addi TMP1, TMP1, 8
| bne <1
|3:
+ | addi BASEP4, BASE, 4
| ins_next2
|
|4: // Clear missing parameters.
@@ -5842,35 +6288,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_FUNCCW:
| // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
if (op == BC_FUNCC) {
- | lp RD, CFUNC:RB->f
+ | lp FUNCREG, CFUNC:RB->f
} else {
- | lp RD, DISPATCH_GL(wrapf)(DISPATCH)
+ | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH)
}
| add TMP1, RA, NARGS8:RC
| lwz TMP2, L->maxstack
- | .toc lp TMP3, 0(RD)
+ | .opd lp TMP3, 0(FUNCREG)
| add RC, BASE, NARGS8:RC
| stp BASE, L->base
| cmplw TMP1, TMP2
| stp RC, L->top
| li_vmstate C
- |.if TOC
+ |.if OPD
| mtctr TMP3
|.else
- | mtctr RD
+ | mtctr FUNCREG
|.endif
if (op == BC_FUNCCW) {
| lp CARG2, CFUNC:RB->f
}
| mr CARG1, L
| bgt ->vm_growstack_c // Need to grow stack.
- | .toc lp TOCREG, TOC_OFS(RD)
- | .tocenv lp ENVREG, ENV_OFS(RD)
+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
| st_vmstate
| bctrl // (lua_State *L [, lua_CFunction f])
+ | .toc lp TOCREG, SAVE_TOC
| // Returns nresults.
| lp BASE, L->base
- | .toc ld TOCREG, SAVE_TOC
| slwi RD, CRET1, 3
| lp TMP1, L->top
| li_vmstate INTERP
@@ -5921,7 +6367,11 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0x1\n"
"\t.string \"\"\n"
"\t.uleb128 0x1\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.sleb128 -8\n"
+#else
"\t.sleb128 -4\n"
+#endif
"\t.byte 65\n"
"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
"\t.align 2\n"
@@ -5934,14 +6384,24 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long .Lbegin\n"
"\t.long %d\n"
"\t.byte 0xe\n\t.uleb128 %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
+#endif
fcofs, CFRAME_SIZE);
for (i = 14; i <= 31; i++)
fprintf(ctx->fp,
"\t.byte %d\n\t.uleb128 %d\n"
"\t.byte %d\n\t.uleb128 %d\n",
- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
+#if LJ_ARCH_PPC32ON64
+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
+#else
+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
+#endif
+ );
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE0:\n\n");
@@ -5957,8 +6417,12 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long lj_vm_ffi_call\n"
#endif
"\t.long %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x8e\n\t.uleb128 2\n"
+#endif
+ "\t.byte 0x8e\n\t.uleb128 1\n"
"\t.byte 0xd\n\t.uleb128 0xe\n"
"\t.align 2\n"
".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
@@ -5973,7 +6437,11 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0x1\n"
"\t.string \"zPR\"\n"
"\t.uleb128 0x1\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.sleb128 -8\n"
+#else
"\t.sleb128 -4\n"
+#endif
"\t.byte 65\n"
"\t.uleb128 6\n" /* augmentation length */
"\t.byte 0x1b\n" /* pcrel|sdata4 */
@@ -5991,14 +6459,24 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long %d\n"
"\t.uleb128 0\n" /* augmentation length */
"\t.byte 0xe\n\t.uleb128 %d\n"
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
+#endif
fcofs, CFRAME_SIZE);
for (i = 14; i <= 31; i++)
fprintf(ctx->fp,
"\t.byte %d\n\t.uleb128 %d\n"
"\t.byte %d\n\t.uleb128 %d\n",
- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
+#if LJ_ARCH_PPC32ON64
+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
+#else
+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
+#endif
+ );
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE2:\n\n");
@@ -6026,8 +6504,12 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long lj_vm_ffi_call-.\n"
"\t.long %d\n"
"\t.uleb128 0\n" /* augmentation length */
+#if LJ_ARCH_PPC32ON64
+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
+#else
"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x8e\n\t.uleb128 2\n"
+#endif
+ "\t.byte 0x8e\n\t.uleb128 1\n"
"\t.byte 0xd\n\t.uleb128 0xe\n"
"\t.align 2\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
new file mode 100644
index 00000000..b765f662
--- /dev/null
+++ b/src/vm_s390x.dasc
@@ -0,0 +1,4306 @@
+|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+|
+|// This assembly targets the instruction set available on z10 (and newer)
+|// machines.
+|
+|// ELF ABI registers:
+|// r0,r1 | | volatile |
+|// r2 | parameter and return value | volatile |
+|// r3-r5 | parameter | volatile |
+|// r6 | parameter | saved |
+|// r7-r11 | | saved |
+|// r12 | GOT pointer (needed?) | saved |
+|// r13 | literal pool (not needed) | saved |
+|// r14 | return address | volatile |
+|// r15 | stack pointer | saved |
+|// f0,f2,f4,f6 | parameter and return value | volatile |
+|// f1,f3,f5,f7 | | volatile |
+|// f8-f15 | | saved |
+|// ar0,ar1 | TLS | volatile |
+|// ar2-ar15 | | volatile |
+|
+|.arch s390x
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter, callee-saved.
+|.define KBASE, r8 // Constants of current Lua function.
+|.define PC, r9 // Next PC.
+|.define DISPATCH, r10 // Opcode dispatch table.
+|.define ITYPE, r11 // Temporary used for type information.
+|.define BASE, r13 // Base of current Lua stack frame.
+|
+|// The following temporaries are not saved across C calls, except for RB.
+|.define RA, r4 // Overlaps CARG3.
+|.define RB, r7 // Must be callee-save.
+|.define RC, r5 // Overlaps CARG4.
+|.define RD, r6 // Overlaps CARG5.
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1, r2
+|.define CARG2, r3
+|.define CARG3, r4
+|.define CARG4, r5
+|.define CARG5, r6
+|
+|.define FARG1, f0
+|.define FARG2, f2
+|.define FARG3, f4
+|.define FARG4, f6
+|
+|.define CRET1, r2
+|
+|.define TMPR0, r0
+|.define TMPR1, r1
+|.define OP, r2
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+|
+|// Register save area.
+|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
+|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
+|
+|// Argument save area.
+|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
+|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes.
+|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
+|.define SAVE_L, 256(sp) // Argument 1, in r2.
+|.define RESERVED, 248(sp) // Reserved for compiler use.
+|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
+|
+|// Interpreter stack frame.
+|.define SAVE_FPR15, 232(sp)
+|.define SAVE_FPR14, 224(sp)
+|.define SAVE_FPR13, 216(sp)
+|.define SAVE_FPR12, 208(sp)
+|.define SAVE_FPR11, 200(sp)
+|.define SAVE_FPR10, 192(sp)
+|.define SAVE_FPR9, 184(sp)
+|.define SAVE_FPR8, 176(sp)
+|.define SAVE_PC, 168(sp)
+|.define SAVE_MULTRES, 160(sp)
+|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES
+|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
+|
+|// Callee save area (allocated by interpreter).
+|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+|
+|.macro saveregs
+| stmg r6, r15, SAVE_GPRS_P
+| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+| std f9, SAVE_FPR9
+| std f10, SAVE_FPR10
+| std f11, SAVE_FPR11
+| std f12, SAVE_FPR12
+| std f13, SAVE_FPR13
+| std f14, SAVE_FPR14
+| std f15, SAVE_FPR15
+|.endmacro
+|
+|.macro restoreregs
+| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+| ld f9, SAVE_FPR9
+| ld f10, SAVE_FPR10
+| ld f11, SAVE_FPR11
+| ld f12, SAVE_FPR12
+| ld f13, SAVE_FPR13
+| ld f14, SAVE_FPR14
+| ld f15, SAVE_FPR15
+| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro
+|.macro ins_AB_; srlg RB, RD, 8; .endmacro
+|.macro ins_A_C; llgcr RC, RD; .endmacro
+|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+|
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+| llgc OP, 3(PC)
+| llgh RD, 0(PC)
+| llgc RA, 2(PC)
+| sllg TMPR1, OP, 3
+| lg TMPR1, 0(TMPR1, DISPATCH)
+| la PC, 4(PC)
+| br TMPR1
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| .macro ins_next
+| j ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
+| lg PC, LFUNC:RB->pc
+| llgc OP, 3(PC)
+| llgc RA, 2(PC)
+| sllg TMPR1, OP, 3
+| la PC, 4(PC)
+| lg TMPR1, 0(TMPR1, DISPATCH)
+| br TMPR1
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, RB = LFUNC, RD = nargs+1
+| stg PC, -8(BASE)
+| ins_callt
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to clear or set tags.
+|.macro cleartp, reg
+| nihf reg, 0x7fff
+|.endmacro
+|.macro settp, reg, tp
+| oihf reg, tp<<15
+|.endmacro
+|.macro settp, dst, reg, tp
+| llihf dst, tp<<15
+| ogr dst, reg
+|.endmacro
+|.macro setint, reg
+| settp reg, LJ_TISNUM
+|.endmacro
+|.macro setint, dst, reg
+| settp dst, reg, LJ_TISNUM
+|.endmacro
+|
+|// Macros to test operand types.
+|.macro checktp_nc, reg, tp, target
+| srag ITYPE, reg, 47
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checktp, reg, tp, target
+| srag ITYPE, reg, 47
+| cleartp reg
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checktptp, src, tp, target
+| srag ITYPE, src, 47
+| clfi ITYPE, tp
+| jne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|
+|.macro checknumx, reg, target, jump
+| srag ITYPE, reg, 47
+| clfi ITYPE, LJ_TISNUM
+| jump target
+|.endmacro
+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
+|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
+|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
+|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
+|
+|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
+|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
+|
+|.define PC_OP, -1(PC)
+|.define PC_RA, -2(PC)
+|.define PC_RB, -4(PC)
+|.define PC_RC, -3(PC)
+|.define PC_RD, -4(PC)
+|
+|.macro branchPC, reg
+| // Must not clobber condition code.
+| sllg TMPR1, reg, 2
+| lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+|.endmacro
+|
+|// Decrement hashed hotcount and trigger trace recorder if zero.
+|.macro hotloop, reg
+| lgr reg, PC
+| srlg reg, reg, 1
+| nill reg, HOTCOUNT_PCMASK
+| afi reg, GG_DISP2HOT
+| agr reg, DISPATCH
+| lg TMPR1, (reg)
+| aghi TMPR1, -HOTCOUNT_LOOP
+| stg TMPR1, (reg)
+| jl ->vm_hotloop
+|.endmacro
+|
+|// Set current VM state.
+|.macro set_vmstate, st
+| lghi TMPR1, ~LJ_VMST_..st
+| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
+|.endmacro
+|
+|// Synthesize binary floating-point constants.
+|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
+| llihh tmp, 0x4338
+| ldgr reg, tmp
+|.endmacro
+|
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab)
+| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+| stg reg, tab->gclist
+|.endmacro
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for s390x target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | tmll PC, FRAME_P
+ | je ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ | nill PC, -8
+ | sgr BASE, PC // Restore caller base.
+ | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | load_true ITYPE
+ | stg ITYPE, 0(RA, BASE) // Prepend true to results.
+ |
+ |->vm_returnc:
+ | aghi RD, 1 // RD = nresults+1
+ | je ->vm_unwind_yield
+ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+ | lghi TMPR1, FRAME_C
+ | xgr PC, TMPR1
+ | tmll PC, FRAME_TYPE
+ | jne ->vm_returnp
+ |
+ | // Return to C.
+ | set_vmstate C
+ | nill PC, -8
+ | sgr PC, BASE
+ | lcgr PC, PC // Previous base = BASE - delta.
+ |
+ | aghi RD, -1
+ | je >2
+ |1: // Move results down.
+ | lg RB, 0(BASE, RA)
+ | stg RB, -16(BASE)
+ | la BASE, 8(BASE)
+ | aghi RD, -1
+ | jne <1
+ |2:
+ | lg L:RB, SAVE_L
+ | stg PC, L:RB->base
+ |3:
+ | llgf RD, SAVE_MULTRES
+ | lgf RA, SAVE_NRES // RA = wanted nresults+1
+ |4:
+ | cgr RA, RD
+ | jne >6 // More/less results wanted?
+ |5:
+ | lay BASE, -16(BASE)
+ | stg BASE, L:RB->top
+ |
+ |->vm_leave_cp:
+ | lg RA, SAVE_CFRAME // Restore previous C frame.
+ | stg RA, L:RB->cframe
+ | lghi CRET1, 0 // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+ | restoreregs
+ | br r14
+ |
+ |6:
+ | jl >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | cg BASE, L:RB->maxstack
+ | jh >8
+ | lghi TMPR1, LJ_TNIL
+ | stg TMPR1, -16(BASE)
+ | la BASE, 8(BASE)
+ | aghi RD, 1
+ | j <4
+ |
+ |7: // Fewer results wanted.
+ | cghi RA, 0
+ | je <5 // But check for LUA_MULTRET+1.
+ | sgr RA, RD // Negative result!
+ | sllg TMPR1, RA, 3
+ | la BASE, 0(TMPR1, BASE) // Correct top.
+ | j <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | stg BASE, L:RB->top // Save current top held in BASE (yes).
+ | st RD, SAVE_MULTRES // Need to fill only remainder with nil.
+ | lgr CARG2, RA
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
+ | j <3
+ |
+ |->vm_unwind_yield:
+ | lghi CRET1, LUA_YIELD
+ | j ->vm_unwind_c_eh
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | lgr sp, CARG1
+ | lgfr CARG2, CRET1 // Error return status for vm_pcall.
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | lg L:RB, SAVE_L
+ | lg GL:RB, L:RB->glref
+ | lghi TMPR1, ~LJ_VMST_C
+ | stg TMPR1, GL:RB->vmstate
+ | j ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
+ | lgr sp, CARG1
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | lg L:RB, SAVE_L
+ | lghi RD, 1+1 // Really 1+2 results, incr. later.
+ | lg BASE, L:RB->base
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | la DISPATCH, GG_G2DISP(DISPATCH)
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+ | load_false RA
+ | lg RB, 0(BASE)
+ | stg RA, -16(BASE) // Prepend false to error message.
+ | stg RB, -8(BASE)
+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
+ | set_vmstate INTERP
+ | j ->vm_returnc // Increments RD/MULTRES and returns.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | lghi CARG2, LUA_MINSTACK
+ | j >2
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
+ | aghi RD, -16 // LJ_FR2
+ | j >1
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ |1:
+ | llgc RA, (PC2PROTO(framesize)-4)(PC)
+ | la PC, 4(PC) // Must point after first instruction.
+ | stg BASE, L:RB->base
+ | stg RD, L:RB->top
+ | stg PC, SAVE_PC
+ | lgr CARG2, RA
+ |2:
+ | // RB = L, L->base = new base, L->top = top
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | sgr RD, BASE
+ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2
+ | lghi PC, FRAME_CP
+ | lghi RD, 0
+ | la KBASE, CFRAME_RESUME(sp)
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | aghi DISPATCH, GG_G2DISP
+ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
+ | stg RD, SAVE_CFRAME
+ | st RD, SAVE_NRES
+ | stg RD, SAVE_ERRF
+ | stg KBASE, L:RB->cframe
+ | clm RD, 1, L:RB->status
+ | je >2 // Initial resume (like a call).
+ |
+ | // Resume after yield (like a return).
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ | stc RD, L:RB->status
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, RA
+ | srlg RD, RD, 3
+ | aghi RD, 1 // RD = nresults+1
+ | sgr RA, BASE // RA = resultofs
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z
+ | j ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | lghi PC, FRAME_CP
+ | llgfr CARG4, CARG4
+ | stg CARG4, SAVE_ERRF
+ | j >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | lghi PC, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | st CARG3, SAVE_NRES
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2 // Caveat: RA = CARG3.
+ |
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | stg KBASE, SAVE_CFRAME
+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ | aghi DISPATCH, GG_G2DISP
+ | stg sp, L:RB->cframe
+ |
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ | set_vmstate INTERP
+ | lg BASE, L:RB->base // BASE = old base (used in vmeta_call).
+ | agr PC, RA
+ | sgr PC, BASE // PC = frame delta + frame type
+ |
+ | lg RD, L:RB->top
+ | sgr RD, RA
+ | srlg NARGS:RD, NARGS:RD, 3
+ | aghi NARGS:RD, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+ | lg LFUNC:RB, -16(RA)
+ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
+ |
+ |->vm_call_dispatch_f:
+ | lgr BASE, RA
+ | ins_call
+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | lgr L:RB, CARG1
+ | stg L:RB, SAVE_L
+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ |
+ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sg KBASE, L:RB->top
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lghi TMPR0, 0
+ | stg TMPR0, SAVE_ERRF // No error function.
+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | stg KBASE, SAVE_CFRAME
+ | stg sp, L:RB->cframe
+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ |
+ | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | // TValue * (new base) or NULL returned in r2 (CRET1/).
+ | cghi CRET1, 0
+ | je ->vm_leave_cp // No base? Just remove C frame.
+ | lgr RA, CRET1
+ | lghi PC, FRAME_CP
+ | j <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+ | agr RA, BASE
+ | nill PC, -8
+ | lgr RB, BASE
+ | sgr BASE, PC // Restore caller BASE.
+ | sllg TMPR1, RD, 3
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg.
+ | lgr RC, RA // ... in [RC]
+ | lg PC, -24(RB) // Restore PC from [cont|PC].
+ | lg RA, -32(RB)
+ |.if FFI
+ | clfi RA, 1
+ | jle >1
+ |.endif
+ | lg LFUNC:KBASE, -16(BASE)
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, (PC2PROTO(k))(KBASE)
+ | // BASE = base, RC = result, RB = meta base
+ | br RA // Jump to continuation.
+ |
+ |.if FFI
+ |1:
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: Tail call from C function.
+ | sgr RB, BASE
+ | srl RB, 3
+ | ahi RB, -3
+ | llgfr RD, RB
+ | j ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | llgc RA, PC_RB
+ | sllg RA, RA, 3
+ | aghi RB, -32
+ | la RA, 0(RA, BASE)
+ | sgr RA, RB
+ | je ->cont_ra
+ | lcgr RA, RA
+ | srlg RA, RA, 3
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RA // Caveat: RA == CARG3.
+ | lg TMPR0, 0(RC)
+ | stg TMPR0, 0(RB)
+ | lgr CARG2, RB
+ | j ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | stg STR:RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | llgc TMPR1, PC_OP
+ | cghi TMPR1, BC_GGET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
+ | stg TAB:RA, 0(RB)
+ | j >2
+ |
+ |->vmeta_tgetb:
+ | llgc RC, PC_RC
+ | setint RC
+ | stg RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tgetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, RB
+ | lgr CARG3, RC
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | je >3
+ |->cont_ra: // BASE = base, RC = result
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RB, 0(RC)
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | lg RA, L:RB->top
+ | stg PC, -24(RA) // [cont|PC]
+ | la PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+ | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
+ | cleartp LFUNC:RB
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | lgr CARG1, TAB:RB
+ | lgfr CARG2, RC
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+ | llgc RA, PC_RA
+ | ltgr RC, CRET1
+ | jne ->BC_TGETR_Z
+ | lghi ITYPE, LJ_TNIL
+ | j ->BC_TGETR2_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | stg STR:RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | llgc TMPR0, PC_OP
+ | cghi TMPR0, BC_GSET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
+ | stg TAB:RA, 0(RB)
+ | j >2
+ |
+ |->vmeta_tsetb:
+ | llgc RC, PC_RC
+ | setint RC
+ | stg RC, SAVE_TMP
+ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tsetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, RB
+ | lgr CARG3, RC
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | je >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ |->cont_nop: // BASE = base, (RC = result)
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | lg RA, L:RB->top
+ | stg PC, -24(RA) // [cont|PC]
+ | llgc RC, PC_RA
+ | // Copy value to third argument.
+ | sllg RB, RC, 3
+ | lg RB, 0(RB, BASE)
+ | stg RB, 16(RA)
+ | la PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+ | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v).
+ | cleartp LFUNC:RB
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | lg L:CARG1, SAVE_L
+ | lgr CARG2, TAB:RB
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RC
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in r2 (CRET1).
+ | lgr RC, CRET1
+ | llgc RA, PC_RA
+ | j ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RA, BASE)
+ | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3
+ | lgr CARG1, L:RB
+ | llgc CARG4, PC_OP
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ |3:
+ | lgr RC, CRET1
+ | lg BASE, L:RB->base
+ | clgfi RC, 1
+ | jh ->vmeta_binop
+ |4:
+ | la PC, 4(PC)
+ | jl >6
+ |5:
+ | llgh RD, PC_RD
+ | branchPC RD
+ |6:
+ | ins_next
+ |
+ |->cont_condt: // BASE = base, RC = result
+ | la PC, 4(PC)
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+ | lghi TMPR0, LJ_TISTRUECOND
+ | clr ITYPE, TMPR0 // Branch if result is true.
+ | jl <5
+ | j <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+ | lghi TMPR0, LJ_TISTRUECOND
+ | clr ITYPE, TMPR0 // Branch if result is false.
+ | j <4
+ |
+ |->vmeta_equal:
+ | cleartp TAB:RD
+ | lay PC, -4(PC)
+ | lgr CARG2, RA
+ | lgfr CARG4, RB
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG3, RD
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ | j <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | lay PC, -4(PC)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG1, L:RB
+ | llgf CARG2, -4(PC)
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ | j <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | llgfr CARG2, RA
+ | llgfr CARG3, RD // Caveat: CARG3 == RA.
+ | lgr L:CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | lg BASE, L:RB->base
+ | j <6
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vn:
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | la RB, 0(RB, BASE)
+ | la RC, 0(RC, KBASE)
+ | j >1
+ |
+ |->vmeta_arith_nvo:
+ | llgc RC, PC_RC
+ | llgc RB, PC_RB
+ |->vmeta_arith_nv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+ | la TMPR1, 0(RC, KBASE)
+ | la RC, 0(RB, BASE)
+ | lgr RB, TMPR1
+ | j >1
+ |
+ |->vmeta_unm:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | la RC, 0(RD, BASE)
+ | lgr RB, RC
+ | j >1
+ |
+ |->vmeta_arith_vvo:
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+ | lgr CARG2, RA
+ | lgr CARG3, RB // Caveat: CARG3 == RA.
+ | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | cghi CRET1, 0
+ | lgr RC, CRET1
+ | je ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
+ | lgr RA, RC
+ | sgr RC, BASE
+ | stg PC, -24(RA) // [cont|PC]
+ | la PC, FRAME_CONT(RC)
+ | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2).
+ | j ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RD, BASE)
+ | lgr L:CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o)
+ | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
+ | lgr RC, CRET1
+ | lg BASE, L:RB->base
+#if LJ_52
+ | cghi RC, 0
+ | jne ->vmeta_binop // Binop call for compatibility.
+ | llgh RD, PC_RD
+ | sllg RD, RD, 3
+ | lg TAB:CARG1, 0(RD, BASE)
+ | cleartp TAB:CARG1
+ | j ->BC_LEN_Z
+#else
+ | j ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
+ | la RA, 16(RA, BASE) // RA previously set to RA*8.
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
+ | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this).
+ | lgr RB, RA
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lay CARG2, -16(RA)
+ | sllg RD, RD, 3
+ | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA.
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | lgr RA, RB
+ | lg L:RB, SAVE_L
+ | lg BASE, L:RB->base
+ | lg NARGS:RD, SAVE_TMP
+ | lg LFUNC:RB, -16(RA)
+ | aghi NARGS:RD, 1 // 32-bit on x64.
+ | // This is fragile. L->base must not move, KBASE must always be defined.
+ | cgr KBASE, BASE // Continue with CALLT if flag set.
+ | je ->BC_CALLT_Z
+ | cleartp LFUNC:RB
+ | lgr BASE, RA
+ | ins_call // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG2, RA
+ | lgr CARG1, RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base)
+ | lg BASE, L:RB->base
+ | llgc OP, PC_OP
+ | llgc RA, PC_RA
+ | llgh RD, PC_RD
+ | sllg TMPR1, OP, 3
+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
+ | br TMPR1
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | clfi NARGS:RD, 1+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | clfi NARGS:RD, 2+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+ | lg TMPR0, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | op f0, 0(BASE)
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc_n name, ld
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | lg TMPR1, 0(BASE)
+ | lg TMPR0, 8(BASE)
+ | ld FARG1, 0(BASE)
+ | ld FARG2, 8(BASE)
+ | checknumtp TMPR1, ->fff_fallback
+ | checknumtp TMPR0, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
+ | lg RB, (DISPATCH_GL(gc.total))(DISPATCH)
+ | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | jl >1
+ | brasl r14, ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | lg RB, 0(BASE)
+ | srag ITYPE, RB, 47
+ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES
+ | lg RB, 0(BASE)
+ | stg RB, -16(BASE)
+ | ahi RD, -2
+ | je >2
+ | lgr RA, BASE
+ |1:
+ | la RA, 8(RA)
+ | lg RB, 0(RA)
+ | stg RB, -16(RA)
+ | brct RD, <1
+ |2:
+ | llgf RD, SAVE_MULTRES
+ | j ->fff_res_
+ |
+ |.ffunc_1 type
+ | lg RC, 0(BASE)
+ | srag RC, RC, 47
+ | lghi RB, LJ_TISNUM
+ | clgr RC, RB
+ | jnl >1
+ | lgr RC, RB
+ |1:
+ | lghi TMPR0, -1
+ | xgr RC, TMPR0
+ |2:
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | sllg RC, RC, 3
+ | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
+ | lg PC, -8(BASE)
+ | settp STR:RC, LJ_TSTR
+ | stg STR:RC, -16(BASE)
+ | j ->fff_res1
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | lg TAB:RB, 0(BASE)
+ | lg PC, -8(BASE)
+ | checktab TAB:RB, >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | lg TAB:RB, TAB:RB->metatable
+ |2:
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -16(BASE)
+ | cghi TAB:RB, 0
+ | je ->fff_res1
+ | settp TAB:RC, TAB:RB, LJ_TTAB
+ | stg TAB:RC, -16(BASE) // Store metatable as default result.
+ | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
+ | llgf RA, TAB:RB->hmask
+ | n RA, STR:RC->sid
+ | settp STR:RC, LJ_TSTR
+ | mghi RA, #NODE
+ | ag NODE:RA, TAB:RB->node
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | cg STR:RC, NODE:RA->key
+ | je >5
+ |4:
+ | ltg NODE:RA, NODE:RA->next
+ | jne <3
+ | j ->fff_res1 // Not found, keep default result.
+ |5:
+ | lg RB, NODE:RA->val
+ | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
+ | stg RB, -16(BASE) // Return value of mt.__metatable.
+ | j ->fff_res1
+ |
+ |6:
+ | clfi ITYPE, LJ_TUDATA; je <1
+ | clfi ITYPE, LJ_TISNUM; jh >7
+ | lhi ITYPE, LJ_TISNUM
+ |7:
+ | lhi TMPR0, -1
+ | xr ITYPE, TMPR0 // not ITYPE
+ | llgfr ITYPE, ITYPE
+ | sllg ITYPE, ITYPE, 3
+ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
+ | j <2
+ |
+ |.ffunc_2 setmetatable
+ | lg TAB:RB, 0(BASE)
+ | lgr TAB:TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | lghi TMPR0, 0
+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+ | lg TAB:RA, 8(BASE)
+ | checktab TAB:RA, ->fff_fallback
+ | stg TAB:RA, TAB:RB->metatable
+ | lg PC, -8(BASE)
+ | stg TAB:TMPR1, -16(BASE) // Return original table.
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | je >1
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+ | barrierback TAB:RB, RC
+ |1:
+ | j ->fff_res1
+ |
+ |.ffunc_2 rawget
+ | lg TAB:CARG2, 0(BASE)
+ | checktab TAB:CARG2, ->fff_fallback
+ | la CARG3, 8(BASE)
+ | lg CARG1, SAVE_L
+ | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // cTValue * returned in r2 (CRET1).
+ | // Copy table slot.
+ | lg RB, 0(CRET1)
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+ | lg RB, 0(BASE)
+ | checknumber RB, ->fff_fallback
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | lg PC, -8(BASE)
+ | lg STR:RB, 0(BASE)
+ | checktp_nc STR:RB, LJ_TSTR, >3
+ | // A __tostring method in the string base metatable is ignored.
+ |2:
+ | stg STR:RB, -16(BASE)
+ | j ->fff_res1
+ |3: // Handle numbers inline, unless a number base metatable is present.
+ | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
+ | lghi TMPR0, 0
+ | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
+ | jne ->fff_fallback
+ | ffgccheck // Caveat: uses label 1.
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base // Add frame since C call can throw.
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | lgr CARG2, BASE // Otherwise: CARG2 == BASE
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // GCstr returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | settp STR:RB, CRET1, LJ_TSTR
+ | j <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | je >2 // Missing 2nd arg?
+ |1:
+ | lg CARG1, 0(BASE)
+ | lg PC, -8(BASE)
+ | checktab CARG1, ->fff_fallback
+ | lgr RB, BASE // Save BASE.
+ | la CARG2, 8(BASE)
+ | lay CARG3, -16(BASE)
+ | brasl r14, extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // 1=found, 0=end, -1=error returned in r2 (CRET1).
+ | lgr BASE, RB // Restore BASE.
+ | ltr RD, CRET1; jh ->fff_res2 // Found key/value.
+ | jl ->fff_fallback_2 // Invalid key.
+ | // End of traversal: return nil.
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -16(BASE)
+ | j ->fff_res1
+ |2: // Set missing 2nd arg to nil.
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, 8(BASE)
+ | j <1
+ |
+ |.ffunc_1 pairs
+ | lg TAB:RB, 0(BASE)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+#endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | lg TAB:RB, 0(BASE)
+ | checktab TAB:RB, ->fff_fallback
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | lg PC, -8(BASE)
+ | aghi RA, 1
+ | setint ITYPE, RA
+ | stg ITYPE, -16(BASE)
+ | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
+ | lg RD, TAB:RB->array
+ | lgfr TMPR1, RA
+ | sllg TMPR1, TMPR1, 3
+ | la RD, 0(TMPR1, RD)
+ |1:
+ | lg TMPR0, 0(RD)
+ | cghi TMPR0, LJ_TNIL; je ->fff_res0
+ | // Copy array slot.
+ | stg TMPR0, -8(BASE)
+ |->fff_res2:
+ | lghi RD, 1+2
+ | j ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | lt TMPR0, TAB:RB->hmask; je ->fff_res0
+ | lgr CARG1, TAB:RB
+ | lgfr CARG2, RA
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+ | ltgr RD, CRET1
+ | jne <1
+ |->fff_res0:
+ | lghi RD, 1+0
+ | j ->fff_res
+ |
+ |.ffunc_1 ipairs
+ | lg TAB:RB, 0(BASE)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | lghi TMPR0, 0
+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+#endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+ | llihf RD, LJ_TISNUM<<15
+ | stg RD, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+ | la RA, 16(BASE)
+ | aghi NARGS:RD, -1
+ | lghi PC, 16+FRAME_PCALL
+ |1:
+ | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
+ | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
+ | nill RB, 1 // High bits already zero (from load).
+ | agr PC, RB // Remember active hook before pcall.
+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
+ | lgr KBASE, RD
+ |2:
+ | sllg TMPR1, KBASE, 3
+ | lg RB, -24(TMPR1, RA)
+ | stg RB, -16(TMPR1, RA)
+ | aghi KBASE, -1
+ | jh <2
+ | j ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+ | lg LFUNC:RA, 8(BASE)
+ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
+ | lg LFUNC:RB, 0(BASE) // Swap function and traceback.
+ | stg LFUNC:RA, 0(BASE)
+ | stg LFUNC:RB, 8(BASE)
+ | la RA, 24(BASE)
+ | aghi NARGS:RD, -2
+ | lghi PC, 24+FRAME_PCALL
+ | j <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | lg L:RB, 0(BASE)
+ | lgr L:TMPR0, L:RB // Save type for checktptp.
+ | cleartp L:RB
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | lg L:RB, CFUNC:RB->upvalue[0].gcr
+ | cleartp L:RB
+ |.endif
+ | lg PC, -8(BASE)
+ | stg PC, SAVE_PC
+ | stg L:RB, SAVE_TMP
+ |.if resume
+ | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
+ |.endif
+ | ltg TMPR0, L:RB->cframe; jne ->fff_fallback
+ | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
+ | lg RA, L:RB->top
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+ | cg RA, L:RB->base // Check for presence of initial func.
+ | je ->fff_fallback
+ | lg PC, -8(RA) // Move initial function up.
+ | stg PC, 0(RA)
+ | la RA, 8(RA)
+ |1:
+ | sllg TMPR1, NARGS:RD, 3
+ |.if resume
+ | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
+ |.else
+ | lay PC, -8(TMPR1, RA) // Check stack space (-1).
+ |.endif
+ | clg PC, L:RB->maxstack; jh ->fff_fallback
+ | stg PC, L:RB->top
+ |
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ |.if resume
+ | la BASE, 8(BASE) // Keep resumed thread in stack for GC.
+ |.endif
+ | stg BASE, L:RB->top
+ |.if resume
+ | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move.
+ |.else
+ | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move.
+ |.endif
+ | sgr RB, PC // Relative to PC.
+ |
+ | cgr PC, RA
+ | je >3
+ |2: // Move args to coroutine.
+ | lg RC, 0(RB, PC)
+ | stg RC, -8(PC)
+ | lay PC, -8(PC)
+ | cgr PC, RA
+ | jne <2
+ |3:
+ | lgr CARG2, RA
+ | lg L:CARG1, SAVE_TMP
+ | lghi CARG3, 0
+ | lghi CARG4, 0
+ | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ |
+ | lg L:RB, SAVE_L
+ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ |
+ | clfi CRET1, LUA_YIELD
+ | jh >8
+ |4:
+ | lg RA, L:PC->base
+ | lg KBASE, L:PC->top
+ | stg RA, L:PC->top // Clear coroutine stack.
+ | lgr PC, KBASE
+ | sgr PC, RA
+ | je >6 // No results?
+ | la RD, 0(PC, BASE)
+ | llgfr PC, PC
+ | srlg PC, PC, 3
+ | clg RD, L:RB->maxstack
+ | jh >9 // Need to grow stack?
+ |
+ | lgr RB, BASE
+ | sgr RB, RA
+ |5: // Move results from coroutine.
+ | lg RD, 0(RA)
+ | stg RD, 0(RA, RB)
+ | la RA, 8(RA)
+ | cgr RA, KBASE
+ | jne <5
+ |6:
+ |.if resume
+ | la RD, 2(PC) // nresults+1 = 1 + true + results.
+ | load_true ITYPE // Prepend true to results.
+ | stg ITYPE, -8(BASE)
+ |.else
+ | la RD, 1(PC) // nresults+1 = 1 + results.
+ |.endif
+ |7:
+ | lg PC, SAVE_PC
+ | st RD, SAVE_MULTRES
+ |.if resume
+ | lghi RA, -8
+ |.else
+ | lghi RA, 0
+ |.endif
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z
+ | j ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | load_false ITYPE // Prepend false to results.
+ | stg ITYPE, -8(BASE)
+ | lg RA, L:PC->top
+ | aghi RA, -8
+ | stg RA, L:PC->top // Clear error from coroutine stack.
+ | // Copy error message.
+ | lg RD, 0(RA)
+ | stg RD, 0(BASE)
+ | lghi RD, 1+2 // nresults+1 = 1 + false + error.
+ | j <7
+ |.else
+ | lgr CARG2, L:PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Error function does not return.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | lg L:RA, SAVE_TMP
+ | stg KBASE, L:RA->top // Undo coroutine stack clearing.
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | j <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | lg L:RB, SAVE_L
+ | lg TMPR0, L:RB->cframe
+ | tmll TMPR0, CFRAME_RESUME
+ | je ->fff_fallback
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | stg RD, L:RB->top
+ | lghi RD, 0
+ | stg RD, L:RB->cframe
+ | lghi CRET1, LUA_YIELD
+ | stc CRET1, L:RB->status
+ | j ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.ffunc_1 math_abs
+ | lg RB, 0(BASE)
+ | checkint RB, >3
+ | lpr RB, RB; jo >2
+ |->fff_resbit:
+ |->fff_resi:
+ | setint RB
+ |->fff_resRB:
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |2:
+ | llihh RB, 0x41e0 // 2^31
+ | j ->fff_resRB
+ |3:
+ | jh ->fff_fallback
+ | nihh RB, 0x7fff // Clear sign bit.
+ | lg PC, -8(BASE)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_n math_sqrt, sqdb
+ |->fff_resf0:
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+ | // fallthrough
+ |
+ |->fff_res1:
+ | lghi RD, 1+1
+ |->fff_res:
+ | st RD, SAVE_MULTRES
+ |->fff_res_:
+ | tmll PC, FRAME_TYPE
+ | jne >7
+ |5:
+ | llgc TMPR1, PC_RB
+ | clgr TMPR1, RD // More results expected?
+ | jh >6
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | sllg TMPR1, RD, 3
+ | lghi TMPR0, LJ_TNIL
+ | stg TMPR0, -24(TMPR1, BASE)
+ | la RD, 1(RD)
+ | j <5
+ |
+ |7: // Non-standard return case.
+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
+ | j ->vm_return
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checknumx RB, ->fff_resRB, je
+ | jh ->fff_fallback
+ | brasl r14, ->vm_ .. func
+ | cfdbr RB, 0, f0
+ | jo ->fff_resf0
+ | llgfr RB, RB
+ | j ->fff_resi
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.ffunc math_log
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | brasl r14, extern log
+ | j ->fff_resf0
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | brasl r14, extern func
+ | j ->fff_resf0
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | brasl r14, extern func
+ | j ->fff_resf0
+ |.endmacro
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | lg CARG1, 8(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+ | checkinttp CARG1, ->fff_fallback
+ | lgfr CARG1, CARG1
+ | brasl r14, extern ldexp // (double, int)
+ | j ->fff_resf0
+ |
+ |.ffunc_n math_frexp
+ | la CARG1, SAVE_TMP
+ | brasl r14, extern frexp
+ | llgf RB, SAVE_TMP
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+ | setint RB
+ | stg RB, -8(BASE)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |
+ |.ffunc_n math_modf
+ | lay CARG1, -16(BASE)
+ | brasl r14, extern modf // (double, double*)
+ | lg PC, -8(BASE)
+ | stdy f0, -8(BASE)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |
+ |.macro math_minmax, name, cjmp
+ | .ffunc name
+ | lghi RA, 2*8
+ | sllg TMPR1, RD, 3
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checkint RB, >4
+ |1: // Handle integers.
+ | clgr RA, TMPR1; jhe ->fff_resRB
+ | lg TMPR0, -8(RA, BASE)
+ | checkint TMPR0, >3
+ | cr RB, TMPR0
+ | cjmp >2
+ | lgr RB, TMPR0
+ |2:
+ | aghi RA, 8
+ | j <1
+ |3:
+ | jh ->fff_fallback
+ | // Convert intermediate result to number and continue below.
+ | cdfbr f0, RB
+ | ldgr f1, TMPR0
+ | j >6
+ |4:
+ | jh ->fff_fallback
+ |5: // Handle numbers or integers.
+ | clgr RA, TMPR1; jhe ->fff_resf0
+ | lg RB, -8(RA, BASE)
+ | ldy f1, -8(RA, BASE)
+ | checknumx RB, >6, jl
+ | jh ->fff_fallback
+ | cdfbr f1, RB
+ |6:
+ | cdbr f0, f1
+ | cjmp >7
+ | ldr f0, f1
+ |7:
+ | aghi RA, 8
+ | j <5
+ |.endmacro
+ |
+ | math_minmax math_min, jnh
+ | math_minmax math_max, jnl
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | chi NARGS:RD, 1+1; jne ->fff_fallback
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg PC, -8(BASE)
+ | ltg TMPR0, STR:RB->len
+ | je ->fff_res0 // Return no results for empty string.
+ | llgc RB, STR:RB[1]
+ | j ->fff_resi
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
+ | lg RB, 0(BASE)
+ | checkint RB, ->fff_fallback
+ | clfi RB, 255; jh ->fff_fallback
+ | strvh RB, SAVE_TMP // Store [c,0].
+ | lghi TMPR1, 1
+ | la RD, SAVE_TMP // Points to stack. Little-endian.
+ |->fff_newstr:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | llgfr CARG3, TMPR1 // Zero-extended to size_t.
+ | lgr CARG2, RD
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // GCstr * returned in r2 (CRET1).
+ | lgr STR:RD, CRET1
+ | lg BASE, L:RB->base
+ | lg PC, -8(BASE)
+ | settp STR:RD, LJ_TSTR
+ | stg STR:RD, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | lghi TMPR1, -1
+ | clfi NARGS:RD, 1+2; jl ->fff_fallback
+ | jnh >1
+ | lg TMPR1, 16(BASE)
+ | checkint TMPR1, ->fff_fallback
+ |1:
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg ITYPE, 8(BASE)
+ | lgfr RA, ITYPE
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM
+ | jne ->fff_fallback
+ | llgf RC, STR:RB->len
+ | clr RC, TMPR1 // len < end? (unsigned compare)
+ | jl >5
+ |2:
+ | cghi RA, 0 // start <= 0?
+ | jle >7
+ |3:
+ | sr TMPR1, RA // start > end?
+ | jnhe ->fff_emptystr
+ | la RD, (#STR-1)(RA, STR:RB)
+ | ahi TMPR1, 1
+ |4:
+ | j ->fff_newstr
+ |
+ |5: // Negative end or overflow.
+ | chi TMPR1, 0
+ | jnl >6
+ | ahi TMPR1, 1
+ | ar TMPR1, RC // end = end+(len+1)
+ | j <2
+ |6: // Overflow.
+ | lr TMPR1, RC // end = len
+ | j <2
+ |
+ |7: // Negative start or underflow.
+ | je >8
+ | agr RA, RC // start = start+(len+1)
+ | aghi RA, 1
+ | jh <3 // start > 0?
+ |8: // Underflow.
+ | lghi RA, 1 // start = 1
+ | j <3
+ |
+ |->fff_emptystr: // Range underflow.
+ | lghi TMPR1, 0
+ | j <4
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+ | ffgccheck
+ | lg STR:CARG2, 0(BASE)
+ | checkstr STR:CARG2, ->fff_fallback
+ | lg L:RB, SAVE_L
+ | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
+ | stg BASE, L:RB->base
+ | lg RC, SBUF:CARG1->b
+ | stg L:RB, SBUF:CARG1->L
+ | stg RC, SBUF:CARG1->w
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_buf_putstr_ .. name
+ | // lgr CARG1, CRET1 (nop, CARG1==CRET1)
+ | brasl r14, extern lj_buf_tostr
+ | j ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |.macro .ffunc_bit, name, kind, fdef
+ | fdef name
+ |.if kind == 2
+ | bfpconst_tobit f1, RB
+ |.endif
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checkint RB, >1
+ |.if kind > 0
+ | j >2
+ |.else
+ | j ->fff_resbit
+ |.endif
+ |1:
+ | jh ->fff_fallback
+ |.if kind < 2
+ | bfpconst_tobit f1, RB
+ |.endif
+ | adbr f0, f1
+ | lgdr RB, f0
+ | llgfr RB, RB
+ |2:
+ |.endmacro
+ |
+ |.macro .ffunc_bit, name, kind
+ | .ffunc_bit name, kind, .ffunc_1
+ |.endmacro
+ |
+ |.ffunc_bit bit_tobit, 0
+ | j ->fff_resbit
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
+ | lgr TMPR1, NARGS:RD // Save for fallback.
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -16(RD, BASE)
+ |1:
+ | clgr RD, BASE
+ | jle ->fff_resbit
+ | lg RA, 0(RD)
+ | checkint RA, >2
+ | ins RB, RA
+ | aghi RD, -8
+ | j <1
+ |2:
+ | jh ->fff_fallback_bit_op
+ | ldgr f0, RA
+ | adbr f0, f1
+ | lgdr RA, f0
+ | ins RB, RA
+ | aghi RD, -8
+ | j <1
+ |.endmacro
+ |
+ |.ffunc_bit_op bit_band, nr
+ |.ffunc_bit_op bit_bor, or
+ |.ffunc_bit_op bit_bxor, xr
+ |
+ |.ffunc_bit bit_bswap, 1
+ | lrvr RB, RB
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+ | xilf RB, -1
+ | j ->fff_resbit
+ |
+ |->fff_fallback_bit_op:
+ | lgr NARGS:RD, TMPR1 // Restore for fallback
+ | j ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ | .ffunc_bit name, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | nill RA, 0x1f // Limit shift to 5-bits.
+ | ins RB, 0(RA)
+ | j ->fff_resbit
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, sll
+ |.ffunc_bit_sh bit_rshift, srl
+ |.ffunc_bit_sh bit_arshift, sra
+ |
+ |.ffunc_bit bit_rol, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_ror, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | lcr RA, RA // Right rotate equivalent to negative left rotate.
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+ | lghi NARGS:RD, 1+2 // Other args are ignored, anyway.
+ | j ->fff_fallback
+ |->fff_fallback_1:
+ | lghi NARGS:RD, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RD = nargs+1
+ | lg L:RB, SAVE_L
+ | lg PC, -8(BASE) // Fallback may overwrite PC.
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
+ | stg RD, L:RB->top
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+ | clg RA, L:RB->maxstack
+ | jh >5 // Need to grow stack.
+ | lgr CARG1, L:RB
+ | lg TMPR1, CFUNC:RD->f
+ | basr r14, TMPR1 // (lua_State *L)
+ | lg BASE, L:RB->base
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | lgr RD, CRET1
+ | cghi RD, 0; jh ->fff_res // Returned nresults+1?
+ |1:
+ | lg RA, L:RB->top
+ | sgr RA, BASE
+ | srlg RA, RA, 3
+ | cghi RD, 0
+ | la NARGS:RD, 1(RA)
+ | lg LFUNC:RB, -16(BASE)
+ | jne ->vm_call_tail // Returned -1?
+ | cleartp LFUNC:RB
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | lgr RA, BASE
+ | tmll PC, FRAME_TYPE
+ | jne >3
+ | llgc RB, PC_RA
+ | lcgr RB, RB
+ | sllg RB, RB, 3
+ | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
+ | j ->vm_call_dispatch // Resolve again for tailcall.
+ |3:
+ | lgr RB, PC
+ | nill RB, -8
+ | sgr BASE, RB
+ | j ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | lghi CARG2, LUA_MINSTACK
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lghi RD, 0 // Simulate a return 0.
+ | j <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+ | stg r14, SAVE_TMP // Save return address
+ | lg L:RB, SAVE_L
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | lgr CARG1, L:RB
+ | stg RD, L:RB->top
+ | brasl r14, extern lj_gc_step // (lua_State *L)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, BASE
+ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | lg r14, SAVE_TMP // Restore return address.
+ | br r14
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+ | tmll RD, HOOK_ACTIVE
+ | jne >5
+ | j >1
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+ | tmll RD, HOOK_ACTIVE // Hook already active?
+ | jne >5
+ |
+ | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
+ | je >5
+ | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+ | ahi TMPR0, -1
+ | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+ | je >1
+ | tmll RD, LUA_MASKLINE
+ | je >5
+ |1:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | lg BASE, L:RB->base
+ |4:
+ | llgc RA, PC_RA
+ |5:
+ | llgc OP, PC_OP
+ | sllg TMPR1, OP, 3
+ | llgh RD, PC_RD
+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH)
+ | br TMPR1
+ |
+ |->cont_hook: // Continue from hook yield.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg RB, LFUNC:RB->pc
+ | llgc RD, (PC2PROTO(framesize))(RB)
+ | sllg RD, RD, 3
+ | la RD, 0(RD, BASE)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | stg RD, L:RB->top
+ | lgr CARG2, PC
+ | la CARG1, GG_DISP2J(DISPATCH)
+ | stg L:RB, (DISPATCH_J(L))(DISPATCH)
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | j <3
+ |.endif
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | stg PC, SAVE_PC
+ |.if JIT
+ | j >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | stg PC, SAVE_PC
+ | oill PC, 1 // Marker for hot call.
+ | 1:
+ |.endif
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | stg RD, L:RB->top
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // ASMFunction returned in r2 (CRET1).
+ | lghi TMPR0, 0
+ | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook.
+ |.if JIT
+ |// nill PC, -2
+ |.endif
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, BASE
+ | lgr RB, CRET1
+ | llgc RA, PC_RA
+ | srl RD, 3
+ | ahi NARGS:RD, 1
+ | llgfr RD, RD
+ | br RB
+ |
+ |->cont_stitch: // Trace stitching.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+ | stg r0, 0
+ | stg r0, 0
+ |->vm_exit_interp:
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// FP value rounding. Called by math.floor/math.ceil fast functions.
+ |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
+ |.macro vm_round, name, mask
+ |->name:
+ | ldr f4, f0
+ | lghi r0, 1
+ | cdfbr f1, r0
+ | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
+ | fidbra f4, mask, f4, 0
+ | ldr f0, f4
+ | jnle >1
+ | br r14
+ |1: // partial remainder (sanity check)
+ | stg r0, 0
+ |.endmacro
+ |
+ | vm_round vm_floor, 7 // Round towards -inf.
+ | vm_round vm_ceil, 6 // Round towards +inf.
+ | vm_round vm_trunc, 5 // Round towards 0.
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod: // NYI.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->assert_bad_for_arg_type:
+ | stg r0, 0
+ | stg r0, 0
+#ifdef LUA_USE_ASSERT
+#endif
+ |
+ |->vm_next:
+ |.if JIT
+ |// stg r0, 0 // NYI On big-endian.
+ |// stg r0, 0
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
+ |// .if FFI
+ |// .type CTSTATE, CTState, PC
+ |// saveregs
+ |// la DISPATCH, GG_G2DISP(RB)
+ |// lg CTSTATE, GL:RB->ctype_state
+ |// llgcr RC, RC
+ |// stg RC, CTSTATE->cb.slot
+ |//
+ |// la RC, CFRAME_SIZE(sp)
+ |// .endif
+ |
+ |
+ |
+ |
+ |
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ | stg r0, 0
+ | stg r0, 0
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, r8
+ | stmg r6, r15, 48(sp)
+ | lgr r13, sp // Use r13 as frame pointer.
+ | lgr CCSTATE, CARG1
+ | lg r7, CCSTATE->func
+ |
+ | // Readjust stack.
+ | sgf sp, CCSTATE->spadj
+ |
+ | // Copy stack slots.
+ | llgc r1, CCSTATE->nsp
+ | chi r1, 0
+ | jh >2
+ |1:
+ | lmg CARG1, CARG5, CCSTATE->gpr[0]
+ | // TODO: conditionally load FPRs?
+ | ld FARG1, CCSTATE->fpr[0]
+ | ld FARG2, CCSTATE->fpr[1]
+ | ld FARG3, CCSTATE->fpr[2]
+ | ld FARG4, CCSTATE->fpr[3]
+ | basr r14, r7
+ |
+ | stg CRET1, CCSTATE->gpr[0]
+ | std f0, CCSTATE->fpr[0]
+ |
+ | lgr sp, r13
+ | lmg r6, r15, 48(sp)
+ | br r14
+ |
+ |2:
+ | sll r1, 3
+ | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
+ | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
+ |3:
+ | chi r1, 256
+ | jl >4
+ | mvc 0(256, r11), 0(r10)
+ | la r10, 256(r10)
+ | la r11, 256(r11)
+ | ahi r1, -256
+ | j <3
+ |
+ |4:
+ | ahi r1, -1
+ | jl <1
+ | larl r9, >5
+ | ex r1, 0(r9)
+ | j <1
+ |
+ |5:
+ | // exrl target
+ | mvc 0(1, r11), 0(r10)
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ (void)vk;
+ |// Note: aligning all instructions does not pay off.
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ |.macro jmp_comp, lt, ge, le, gt, target
+ ||switch (op) {
+ ||case BC_ISLT:
+ | lt target
+ ||break;
+ ||case BC_ISGE:
+ | ge target
+ ||break;
+ ||case BC_ISLE:
+ | le target
+ ||break;
+ ||case BC_ISGT:
+ | gt target
+ ||break;
+ ||default: break; /* Shut up GCC. */
+ ||}
+ |.endmacro
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RD = src2, JMP with RD = target
+ | ins_AD
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | ld f1, 0(RD, BASE)
+ | lg RA, 0(RA, BASE)
+ | lg RD, 0(RD, BASE)
+ | srag ITYPE, RA, 47
+ | srag RB, RD, 47
+ |
+ | clfi ITYPE, LJ_TISNUM; jne >7
+ | clfi RB, LJ_TISNUM; jne >8
+ | // Both are integers.
+ | la PC, 4(PC)
+ | cr RA, RD
+ | jmp_comp jhe, jl, jh, jle, >9
+ |6:
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | jh ->vmeta_comp
+ | // RA is a number.
+ | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
+ | // RA is a number, RD is an integer.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |8: // RA is an integer, RD is not an integer.
+ | jh ->vmeta_comp
+ | // RA is an integer, RD is a number.
+ | cdfbr f0, RA
+ |1:
+ | la PC, 4(PC)
+ | cdbr f0, f1
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ | jmp_comp jnl, jl, jnle, jle, <9
+ | j <6
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
+ | sllg RD, RD, 3
+ | ld f1, 0(RD, BASE)
+ | lg RD, 0(RD, BASE)
+ | sllg RA, RA, 3
+ | ld f0, 0(RA, BASE)
+ | lg RA, 0(RA, BASE)
+ | la PC, 4(PC)
+ | srag RB, RD, 47
+ | srag ITYPE, RA, 47
+ | clfi RB, LJ_TISNUM; jne >7
+ | clfi ITYPE, LJ_TISNUM; jne >8
+ | cr RD, RA
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RD is not an integer.
+ | jh >5
+ | // RD is a number.
+ | clfi ITYPE, LJ_TISNUM; jl >1; jne >5
+ | // RD is a number, RA is an integer.
+ | cdfbr f0, RA
+ | j >1
+ |
+ |8: // RD is an integer, RA is not an integer.
+ | jh >5
+ | // RD is an integer, RA is a number.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |1:
+ | cdbr f0, f1
+ |4:
+ iseqne_fp:
+ if (vk) {
+ | jne >2 // Unordered means not equal.
+ } else {
+ | je >1 // Unordered means not equal.
+ }
+ iseqne_end:
+ if (vk) {
+ |1: // EQ: Branch to the target.
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2: // NE: Fallthrough to next instruction.
+ |.if not FFI
+ |3:
+ |.endif
+ } else {
+ |.if not FFI
+ |3:
+ |.endif
+ |2: // NE: Branch to the target.
+ | llgh RD, PC_RD
+ | branchPC RD
+ |1: // EQ: Fallthrough to next instruction.
+ }
+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+ op == BC_ISEQN || op == BC_ISNEN)) {
+ | j <9
+ } else {
+ | ins_next
+ }
+ |
+ if (op == BC_ISEQV || op == BC_ISNEV) {
+ |5: // Either or both types are not numbers.
+ |.if FFI
+ | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
+ | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
+ |.endif
+ | cgr RA, RD
+ | je <1 // Same GCobjs or pvalues?
+ | cr RB, ITYPE
+ | jne <2 // Not the same type?
+ | clfi RB, LJ_TISTABUD
+ | jh <2 // Different objects and not table/ud?
+ |
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | cleartp TAB:RA
+ | lg TAB:RB, TAB:RA->metatable
+ | cghi TAB:RB, 0
+ | je <2 // No metatable?
+ | tm TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod.
+ } else {
+ |.if FFI
+ |3:
+ | clfi ITYPE, LJ_TCDATA
+ if (LJ_DUALNUM && vk) {
+ | jne <9
+ } else {
+ | jne <2
+ }
+ | j ->vmeta_equal_cd
+ |.endif
+ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg RB, 0(RA, BASE)
+ | la PC, 4(PC)
+ | checkstr RB, >3
+ | cg RB, 0(RD, KBASE)
+ iseqne_test:
+ if (vk) {
+ | jne >2
+ } else {
+ | je >1
+ }
+ goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | lg RB, 0(RA, BASE)
+ | ld f1, 0(RD, KBASE)
+ | lg RD, 0(RD, KBASE)
+ | la PC, 4(PC)
+ | checkint RB, >7
+ | checkint RD, >8
+ | cr RB, RD
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | jh >3
+ | // RA is a number.
+ | checkint RD, >1
+ | // RA is a number, RD is an integer.
+ | cdfbr f1, RD
+ | j >1
+ |
+ |8: // RA is an integer, RD is a number.
+ | cdfbr f0, RB
+ | cdbr f0, f1
+ | j >4
+ |1:
+ | cdbr f0, f1
+ |4:
+ goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | srag RB, RB, 47
+ | la PC, 4(PC)
+ | cr RB, RD
+ if (!LJ_HASFFI) goto iseqne_test;
+ if (vk) {
+ | jne >3
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ |3:
+ | cghi RB, LJ_TCDATA; jne <2
+ | j ->vmeta_equal_cd
+ } else {
+ | je >2
+ | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
+ | llgh RD, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ }
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RD, BASE)
+ | la PC, 4(PC)
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | lgr RB, ITYPE
+ }
+ | srag ITYPE, ITYPE, 47
+ | clfi ITYPE, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISTC) {
+ | jhe >1
+ } else {
+ | jl >1
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | stg RB, 0(RA, BASE)
+ }
+ | llgh RD, PC_RD
+ | branchPC RD
+ |1: // Fallthrough to the next instruction.
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+ | lghr RD, RD
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | srag RB, RB, 47
+ | agr RB, RD
+ | jne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | ins_AD // RA = src, RD = -(TISNUM-1)
+ | sllg TMPR1, RA, 3
+ | lg TMPR1, 0(TMPR1, BASE)
+ | checknumtp TMPR1, ->vmeta_istype
+ | ins_next
+ break;
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+ | sllg RA, RA, 3
+ | stg RB, 0(RA, BASE)
+ | ins_next_
+ break;
+ case BC_NOT:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg RB, 0(RD, BASE)
+ | srag RB, RB, 47
+ | load_false RC
+ | clfi RB, LJ_TISTRUECOND
+ | jl >1
+ | load_true RC
+ |1:
+ | stg RC, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_UNM:
+ | ins_AD // RA = dst, RD = src
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+ | checkint RB, >3
+ | lcr RB, RB; jo >2
+ |1:
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ |2:
+ | llihh RB, 0x41e0 // (double)2^31
+ | j <1
+ |3:
+ | jh ->vmeta_unm
+ | // Toggle sign bit.
+ | llihh TMPR0, 0x8000
+ | xgr RB, TMPR0
+ | j <1
+ break;
+ case BC_LEN:
+ | ins_AD // RA = dst, RD = src
+ | sllg RD, RD, 3
+ | lg RD, 0(RD, BASE)
+ | checkstr RD, >2
+ | llgf RD, STR:RD->len
+ |1:
+ | sllg RA, RA, 3
+ | setint RD
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ |2:
+ | cghi ITYPE, LJ_TTAB; jne ->vmeta_len
+ | lgr TAB:CARG1, TAB:RD
+#if LJ_52
+ | lg TAB:RB, TAB:RD->metatable
+ | cghi TAB:RB, 0
+ | jne >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | brasl r14, extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in r2 (CRET1).
+ | lgr RD, CRET1
+ | llgc RA, PC_RA
+ | j <1
+#if LJ_52
+ |9: // Check for __len.
+ | tm TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check.
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithpre
+ | ins_ABC
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | sllg RA, RA, 3
+ |.endmacro
+ |
+ |.macro ins_arithfp, ins
+ | ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | ld f0, 0(RB, BASE)
+ | ld f1, 0(RC, KBASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checknumtp RB, ->vmeta_arith_vno
+ | checknumtp RC, ->vmeta_arith_vno
+ | ins f0, f1
+ || break;
+ ||case 1:
+ | ld f1, 0(RB, BASE)
+ | ld f0, 0(RC, KBASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checknumtp RB, ->vmeta_arith_nvo
+ | checknumtp RC, ->vmeta_arith_nvo
+ | ins f0, f1
+ || break;
+ ||default:
+ | ld f0, 0(RB, BASE)
+ | ld f1, 0(RC, BASE)
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checknumtp RB, ->vmeta_arith_vvo
+ | checknumtp RC, ->vmeta_arith_vvo
+ | ins f0, f1
+ || break;
+ ||}
+ | std f0, 0(RA, BASE)
+ | ins_next
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins
+ | ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_vno
+ | checkint RC, ->vmeta_arith_vno
+ | intins RB, RC; jo ->vmeta_arith_vno
+ || break;
+ ||case 1:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_nvo
+ | checkint RC, ->vmeta_arith_nvo
+ | intins RC, RB; jo ->vmeta_arith_nvo
+ || break;
+ ||default:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checkint RB, ->vmeta_arith_vvo
+ | checkint RC, ->vmeta_arith_vvo
+ | intins RB, RC; jo ->vmeta_arith_vvo
+ || break;
+ ||}
+ ||if (vk == 1) {
+ | // setint RC
+ | stg RC, 0(RA, BASE)
+ ||} else {
+ | // setint RB
+ | stg RB, 0(RA, BASE)
+ ||}
+ | ins_next
+ |.endmacro
+
+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn ar
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn sr
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithpre
+ | // For multiplication we use msgfr and check if the result
+ | // fits in an int32_t.
+ switch(op) {
+ case BC_MULVN:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_vno
+ | checkint RC, ->vmeta_arith_vno
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_vno
+ break;
+ case BC_MULNV:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, KBASE)
+ | checkint RB, ->vmeta_arith_nvo
+ | checkint RC, ->vmeta_arith_nvo
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_nvo
+ break;
+ default:
+ | lg RB, 0(RB, BASE)
+ | lg RC, 0(RC, BASE)
+ | checkint RB, ->vmeta_arith_vvo
+ | checkint RC, ->vmeta_arith_vvo
+ | lgfr RB, RB
+ | msgfr RB, RC
+ | lgfr RC, RB
+ | cgr RB, RC; jne ->vmeta_arith_vvo
+ break;
+ }
+ | llgfr RB, RB
+ | setint RB
+ | stg RB, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp ddbr
+ break;
+ // TODO: implement fast mod operation.
+ // x86_64 does floating point mod, however it might be better to use integer mod.
+ case BC_MODVN:
+ | j ->vmeta_arith_vno
+ break;
+ case BC_MODNV:
+ | j ->vmeta_arith_nvo
+ break;
+ case BC_MODVV:
+ | j ->vmeta_arith_vvo
+ break;
+ case BC_POW:
+ | ins_ABC
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | ld FARG1, 0(RB, BASE)
+ | ld FARG2, 0(RC, BASE)
+ | lg TMPR0, 0(RB, BASE)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | lg TMPR0, 0(RC, BASE)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+
+ case BC_CAT:
+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG3, RC
+ | sgr CARG3, RB
+ | sllg RC, RC, 3
+ | la CARG2, 0(RC, BASE)
+ |->BC_CAT_Z:
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | ltgr RC, CRET1
+ | jne ->vmeta_binop
+ | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
+ | sllg RB, RB, 3
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | lg RC, 0(RB, BASE)
+ | stg RC, 0(RA, BASE)
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | ins_AND // RA = dst, RD = str const (~)
+ | sllg RD, RD, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TSTR
+ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | ins_AND // RA = dst, RD = cdata const (~)
+ | sllg RD, RD, 3
+ | sllg RA, RA, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TCDATA
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | ins_AD // RA = dst, RD = signed int16 literal
+ | // Assumes DUALNUM.
+ | lhr RD, RD // Sign-extend literal to 32-bits.
+ | setint RD
+ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KNUM:
+ | ins_AD // RA = dst, RD = num const
+ | sllg RD, RD, 3
+ | ld f0, 0(RD, KBASE)
+ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KPRI:
+ | ins_AD // RA = dst, RD = primitive type (~)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 47
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0 // not
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KNIL:
+ | ins_AD // RA = dst_start, RD = dst_end
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | la RA, 8(RA, BASE)
+ | la RD, 0(RD, BASE)
+ | lghi RB, LJ_TNIL
+ | stg RB, -8(RA) // Sets minimum 2 slots.
+ |1:
+ | stg RB, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RD
+ | jle <1
+ | ins_next
+ break;
+
+/* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
+ | lg RB, UPVAL:RB->v
+ | lg RD, 0(RB)
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+ | ins_AD // RA = upvalue #, RD = src
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | sllg RA, RA, 3
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | tm UPVAL:RB->closed, 0xff
+ | lg RB, UPVAL:RB->v
+ | sllg TMPR1, RD, 3
+ | lg RA, 0(TMPR1, BASE)
+ | stg RA, 0(RB)
+ | je >1
+ | // Check barrier for closed upvalue.
+ | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+ |
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | srag RD, RA, 47
+ | ahi RD, -LJ_TISGCV
+ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+ | jle <1
+ | cleartp GCOBJ:RA
+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RB
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | j <1
+ break;
+#undef TV2MARKOFS
+ case BC_USETS:
+ | ins_AND // RA = upvalue #, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | lg STR:RA, 0(RD, KBASE)
+ | lg RD, UPVAL:RB->v
+ | settp STR:ITYPE, STR:RA, LJ_TSTR
+ | stg STR:ITYPE, 0(RD)
+ | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+ | je <1
+ | tm UPVAL:RB->closed, 0xff
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RD
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | j <1
+ break;
+ case BC_USETN:
+ | ins_AD // RA = upvalue #, RD = num const
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | ld f0, 0(RD, KBASE)
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | lg RA, UPVAL:RB->v
+ | std f0, 0(RA)
+ | ins_next
+ break;
+ case BC_USETP:
+ | ins_AD // RA = upvalue #, RD = primitive type (~)
+ | lg LFUNC:RB, -16(BASE)
+ | sllg RA, RA, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | sllg RD, RD, 47
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0
+ | lg RA, UPVAL:RB->v
+ | stg RD, 0(RA)
+ | ins_next
+ break;
+ case BC_UCLO:
+ | ins_AD // RA = level, RD = target
+ | branchPC RD // Do this first to free RD.
+ | lg L:RB, SAVE_L
+ | ltg TMPR0, L:RB->openupval
+ | je >1
+ | stg BASE, L:RB->base
+ | sllg RA, RA, 3
+ | la CARG2, 0(RA, BASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | lg BASE, L:RB->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lg CARG3, -16(BASE)
+ | cleartp CARG3
+ | sllg RD, RD, 3
+ | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | brasl r14, extern lj_func_newL_gc
+ | // GCfuncL * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | settp LFUNC:CRET1, LJ_TFUNC
+ | stg LFUNC:CRET1, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_TNEW:
+ | ins_AD // RA = dst, RD = hbits|asize
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | stg PC, SAVE_PC
+ | jhe >5
+ |1:
+ | srlg CARG3, RD, 11
+ | llill TMPR0, 0x7ff
+ | nr RD, TMPR0
+ | cr RD, TMPR0
+ | je >3
+ |2:
+ | lgr L:CARG1, L:RB
+ | llgfr CARG2, RD
+ | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits)
+ | // Table * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+ | settp TAB:CRET1, LJ_TTAB
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+ |3: // Turn 0x7ff into 0x801.
+ | llill RD, 0x801
+ | j <2
+ |5:
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
+ | llgh RD, PC_RD
+ | j <1
+ break;
+ case BC_TDUP:
+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
+ | lg L:RB, SAVE_L
+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
+ | stg PC, SAVE_PC
+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
+ | stg BASE, L:RB->base
+ | jhe >3
+ |2:
+ | sllg RD, RD, 3
+ | lg TAB:CARG2, 0(RD, KBASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Table * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | settp TAB:CRET1, LJ_TTAB
+ | sllg RA, RA, 3
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+ |3:
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
+ | llgh RD, PC_RD // Need to reload RD.
+ | lghi TMPR0, -1
+ | xgr RD, TMPR0 // not RD
+ | j <2
+ break;
+
+ case BC_GGET:
+ | ins_AND // RA = dst, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TGETS_Z
+ break;
+ case BC_GSET:
+ | ins_AND // RA = src, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TSETS_Z
+ break;
+
+ case BC_TGETV:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tgetv
+ |
+ | // Integer key?
+ | checkint RC, >5
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tgetv // Not in array part? Use fallback.
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check.
+ | j <1
+ |
+ |5: // String key?
+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
+ | cleartp STR:RC
+ | j ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | ins_ABC
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR1, -1
+ | xgr RC, TMPR1
+ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->sid
+ | lgfr TMPR1, TMPR1
+ | mghi TMPR1, #NODE
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cg ITYPE, NODE:TMPR1->key
+ | jne >4
+ | // Get node value.
+ | lg ITYPE, NODE:TMPR1->val
+ | cghi ITYPE, LJ_TNIL
+ | je >5 // Key found, but nil value?
+ |2:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | lg NODE:TMPR1, NODE:TMPR1->next
+ | cghi NODE:TMPR1, 0
+ | jne <1
+ | // End of hash chain: key not found, nil result.
+ | lghi ITYPE, LJ_TNIL
+ |
+ |5: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <2 // No metatable: done.
+ | tm TAB:TMPR1->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tgetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetb
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL
+ | je >2
+ |1:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check.
+ | j <1
+ break;
+ case BC_TGETR:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+ | sllg RC, RC, 3
+ | llgf RC, 4(RC, BASE) // Load low word (big endian).
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetr // Not in array part? Use fallback.
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ |->BC_TGETR_Z:
+ | lg ITYPE, 0(RC)
+ |->BC_TGETR2_Z:
+ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tsetv
+ |
+ | // Integer key?
+ | checkint RC, >5
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tsetv
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+ |5: // String key?
+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
+ | cleartp STR:RC
+ | j ->BC_TSETS_Z
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+ case BC_TSETS:
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR0, -1
+ | xgr RC, TMPR0 // ~RC
+ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->sid
+ | lgfr TMPR1, TMPR1
+ | mghi TMPR1, #NODE
+ | mvi TAB:RB->nomm, 0 // Clear metamethod cache.
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cg ITYPE, NODE:TMPR1->key
+ | jne >5
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(TMPR1)
+ | je >4 // Previous value is nil?
+ |2:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |3: // Set node value.
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(TMPR1)
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | lg TAB:ITYPE, TAB:RB->metatable
+ | cghi TAB:ITYPE, 0
+ | je <2
+ | tm TAB:ITYPE->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check.
+ | j <2
+ |
+ |5: // Follow hash chain.
+ | lg NODE:TMPR1, NODE:TMPR1->next
+ | cghi NODE:TMPR1, 0
+ | jne <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je >6 // No metatable: continue.
+ | tm TAB:TMPR1->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | stg ITYPE, SAVE_TMP
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | la CARG3, SAVE_TMP
+ | lgr CARG2, TAB:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
+ | lgr TMPR1, CRET1
+ | lg L:CRET1, SAVE_L
+ | lg BASE, L:CRET1->base
+ | llgc RA, PC_RA
+ | j <2 // Must check write barrier for value.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, ITYPE
+ | j <3
+ break;
+ case BC_TSETB:
+ | ins_ABC // RA = src, RB = table, RC = byte literal
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tsetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetb
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+ | tm TAB:TMPR1->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+ case BC_TSETR:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetr
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Set array slot.
+ |->BC_TSETR_Z:
+ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR1
+ | j <2
+ break;
+
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ |1:
+ | sllg RA, RA, 3
+ | sllg TMPR1, RD, 3
+ | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
+ | la RA, 0(RA, BASE)
+ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
+ | cleartp TAB:RB
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | llgf RD, SAVE_MULTRES
+ | aghi RD, -1
+ | je >4 // Nothing to copy?
+ | agr RD, TMPR1 // Compute needed size.
+ | clgf RD, TAB:RB->asize
+ | jh >5 // Doesn't fit into array part?
+ | sgr RD, TMPR1
+ | sllg TMPR1, TMPR1, 3
+ | ag TMPR1, TAB:RB->array
+ |3: // Copy result slots to table.
+ | lg RB, 0(RA)
+ | la RA, 8(RA)
+ | stg RB, 0(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | brctg RD, <3
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgr CARG2, TAB:RB
+ | lgfr CARG3, RD
+ | lgr L:RB, L:CARG1
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA // Restore RA.
+ | llgh RD, PC_RD // Restore RD.
+ | j <1 // Retry.
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:RB, RD
+ | j <2
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ | sllg RA, RA, 3
+ | lgr RD, RC
+ if (op == BC_CALLM) {
+ | agf NARGS:RD, SAVE_MULTRES
+ }
+ | lg LFUNC:RB, 0(RA, BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | ins_AD // RA = base, RD = extra_nargs
+ | a NARGS:RD, SAVE_MULTRES
+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE)
+ | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
+ | lg LFUNC:RB, -16(RA)
+ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
+ |->BC_CALLT_Z:
+ | lg PC, -8(BASE)
+ | tmll PC, FRAME_TYPE
+ | jne >7
+ |1:
+ | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
+ | st NARGS:RD, SAVE_MULTRES
+ | aghi NARGS:RD, -1
+ | je >3
+ |2: // Move args down.
+ | lg RB, 0(RA)
+ | la RA, 8(RA)
+ | stg RB, 0(KBASE)
+ | la KBASE, 8(KBASE)
+ | brctg NARGS:RD, <2
+ |
+ | lg LFUNC:RB, -16(BASE)
+ |3:
+ | cleartp LFUNC:RB
+ | llgf NARGS:RD, SAVE_MULTRES
+ | llgc TMPR1, LFUNC:RB->ffid
+ | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
+ | jh >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function.
+ | tmll PC, FRAME_TYPE // Lua frame below?
+ | jne <4
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, (PC2PROTO(k))(KBASE)
+ | j <4
+ |
+ |7: // Tailcall from a vararg function.
+ | aghi PC, -FRAME_VARG
+ | tmll PC, FRAME_TYPEP
+ | jne >8 // Vararg frame below?
+ | sgr BASE, PC // Need to relocate BASE/KBASE down.
+ | lgr KBASE, BASE
+ | lg PC, -8(BASE)
+ | j <1
+ |8:
+ | aghi PC, FRAME_VARG
+ | j <1
+ break;
+
+ case BC_ITERC:
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE) // fb = base+2
+ | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
+ | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
+ | stg RB, 0(RA)
+ | stg RC, 8(RA)
+ | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5]
+ | stg LFUNC:RB, -16(RA)
+ | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call.
+ | checkfunc LFUNC:RB, ->vmeta_call
+ | lgr BASE, RA
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ |.if JIT
+ | hotloop RB // NYI: add hotloop, record BC_ITERN.
+ |.endif
+ |->vm_IITERN:
+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | sllg RA, RA, 3
+ | lg TAB:RB, -16(RA, BASE)
+ | cleartp TAB:RB
+ | llgf RC, -4(RA, BASE) // Get index from control var.
+ | llgf TMPR1, TAB:RB->asize
+ | la PC, 4(PC)
+ | lg ITYPE, TAB:RB->array
+ |1: // Traverse array part.
+ | clr RC, TMPR1; jhe >5 // Index points after array part?
+ | sllg RD, RC, 3 // Warning: won't work if RD==RC!
+ | lg TMPR0, 0(RD, ITYPE)
+ | cghi TMPR0, LJ_TNIL; je >4
+ | // Copy array slot to returned value.
+ | lgr RB, TMPR0
+ | stg RB, 8(RA, BASE)
+ | // Return array index as a numeric key.
+ | setint ITYPE, RC
+ | stg ITYPE, 0(RA, BASE)
+ | ahi RC, 1
+ | sty RC, -4(RA, BASE) // Update control var.
+ |2:
+ | llgh RD, PC_RD // Get target from ITERL.
+ | branchPC RD
+ |3:
+ | ins_next
+ |
+ |4: // Skip holes in array part.
+ | ahi RC, 1
+ | j <1
+ |
+ |5: // Traverse hash part.
+ | sr RC, TMPR1
+ |6:
+ | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1.
+ | llgfr ITYPE, RC
+ | mghi ITYPE, #NODE
+ | ag NODE:ITYPE, TAB:RB->node
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, NODE:ITYPE->val; je >7
+ | ar TMPR1, RC
+ | ahi TMPR1, 1
+ | // Copy key and value from hash slot.
+ | lg RB, NODE:ITYPE->key
+ | lg RC, NODE:ITYPE->val
+ | stg RB, 0(RA, BASE)
+ | stg RC, 8(RA, BASE)
+ | sty TMPR1, -4(RA, BASE)
+ | j <2
+ |
+ |7: // Skip holes in hash part.
+ | ahi RC, 1
+ | j <6
+ break;
+
+ case BC_ISNEXT:
+ | ins_AD // RA = base, RD = target (points to ITERN)
+ | sllg RA, RA, 3
+ | lg CFUNC:RB, -24(RA, BASE)
+ | checkfunc CFUNC:RB, >5
+ | lg TMPR1, -16(RA, BASE)
+ | checktptp TMPR1, LJ_TTAB, >5
+ | lghi TMPR0, LJ_TNIL
+ | cg TMPR0, -8(RA, BASE); jne >5
+ | llgc TMPR1, CFUNC:RB->ffid
+ | clfi TMPR1, (uint8_t)FF_next_N; jne >5
+ | branchPC RD
+ | llihl TMPR1, 0x7fff
+ | iihh TMPR1, 0xfffe
+ | stg TMPR1, -8(RA, BASE) // Initialize control var.
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | lghi TMPR0, BC_JMP
+ | stcy TMPR0, PC_OP
+ | branchPC RD
+ | mvi 3(PC), BC_ITERC
+ | j <1
+ break;
+
+ case BC_VARG:
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+ | sllg RA, RA, 3
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+ | la TMPR1, (16+FRAME_VARG)(RC, BASE)
+ | la RA, 0(RA, BASE)
+ | sg TMPR1, -8(BASE)
+ | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
+ | cghi RB, 0
+ | je >5 // Copy all varargs?
+ | lay RB, -8(RA, RB)
+ | clgr TMPR1, BASE // No vararg slots?
+ | lghi TMPR0, LJ_TNIL
+ | jnl >2
+ |1: // Copy vararg slots to destination slots.
+ | lg RC, -16(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | stg RC, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB // All destination slots filled?
+ | jnl >3
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <1
+ |2: // Fill up remainder with nil.
+ | stg TMPR0, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB
+ | jl <2
+ |3:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | lghi TMPR0, 1
+ | st TMPR0, SAVE_MULTRES // MULTRES = 0+1
+ | lgr RC, BASE
+ | slgr RC, TMPR1
+ | jno <3 // No vararg slots? (borrow or zero)
+ | llgfr RB, RC
+ | srlg RB, RB, 3
+ | ahi RB, 1
+ | st RB, SAVE_MULTRES // MULTRES = #varargs+1
+ | lg L:RB, SAVE_L
+ | agr RC, RA
+ | clg RC, L:RB->maxstack
+ | jh >7 // Need to grow stack?
+ |6: // Copy all vararg slots.
+ | lg RC, -16(TMPR1)
+ | la TMPR1, 8(TMPR1)
+ | stg RC, 0(RA)
+ | la RA, 8(RA)
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <6
+ | j <3
+ |
+ |7: // Grow stack for varargs.
+ | stg BASE, L:RB->base
+ | stg RA, L:RB->top
+ | stg PC, SAVE_PC
+ | sgr TMPR1, BASE // Need delta, because BASE may change.
+ | st TMPR1, SAVE_TMP_HI
+ | llgf CARG2, SAVE_MULTRES
+ | aghi CARG2, -1
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+ | lgf TMPR1, SAVE_TMP_HI
+ | lg RA, L:RB->top
+ | agr TMPR1, BASE
+ | j <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | ins_AD // RA = results, RD = extra_nresults
+ | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1.
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+ | ins_AD // RA = results, RD = nresults+1
+ if (op != BC_RET0) {
+ | sllg RA, RA, 3
+ }
+ |1:
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES // Save nresults+1.
+ | tmll PC, FRAME_TYPE // Check frame type marker.
+ | jne >7 // Not returning to a fixarg Lua func?
+ switch (op) {
+ case BC_RET:
+ |->BC_RET_Z:
+ | lgr KBASE, BASE // Use KBASE for result move.
+ | aghi RD, -1
+ | je >3
+ |2: // Move results down.
+ | lg RB, 0(KBASE, RA)
+ | stg RB, -16(KBASE)
+ | la KBASE, 8(KBASE)
+ | brctg RD, <2
+ |3:
+ | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
+ | llgc RB, PC_RB
+ |5:
+ | cgr RB, RD // More results expected?
+ | jh >6
+ break;
+ case BC_RET1:
+ | lg RB, 0(BASE, RA)
+ | stg RB, -16(BASE)
+ /* fallthrough */
+ case BC_RET0:
+ |5:
+ | llgc TMPR1, PC_RB
+ | cgr TMPR1, RD
+ | jh >6
+ default:
+ break;
+ }
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | lg LFUNC:KBASE, -16(BASE)
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+ | lg KBASE, PC2PROTO(k)(KBASE)
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | lghi TMPR1, LJ_TNIL
+ if (op == BC_RET) {
+ | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
+ | la KBASE, 8(KBASE)
+ } else {
+ | sllg RC, RD, 3 // RC used as temp.
+ | stg TMPR1, -24(RC, BASE)
+ }
+ | la RD, 1(RD)
+ | j <5
+ |
+ |7: // Non-standard return case.
+ | lay RB, -FRAME_VARG(PC)
+ | tmll RB, FRAME_TYPEP
+ | jne ->vm_return
+ | // Return from vararg function: relocate BASE down and RA up.
+ | sgr BASE, RB
+ if (op != BC_RET0) {
+ | agr RA, RB
+ }
+ | j <1
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, 0(RA)
+ |.define FOR_STOP, 8(RA)
+ |.define FOR_STEP, 16(RA)
+ |.define FOR_EXT, 24(RA)
+
+ case BC_FORL:
+ {
+ |.if JIT
+ | hotloop RB
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+ break;
+ }
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, FOR_IDX
+ | checkint RB, >9
+ | lg TMPR1, FOR_STOP
+ if (!vk) {
+ | checkint TMPR1, ->vmeta_for
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | // lg TMPR1, FOR_STOP
+ | checkinttp TMPR1, ->assert_bad_for_arg_type
+ | lg TMPR0, FOR_STEP
+ | checkinttp TMPR0, ->assert_bad_for_arg_type
+#endif
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+ | ar RB, ITYPE; jo >1
+ | setint RB
+ | stg RB, FOR_IDX
+ }
+ | cr RB, TMPR1
+ | stg RB, FOR_EXT
+ if (op == BC_FORI) {
+ | jle >7
+ |1:
+ |6:
+ | branchPC RD
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ } else if (op == BC_IFORL) {
+ | jh >7
+ |6:
+ | branchPC RD
+ |1:
+ } else {
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ }
+ |7:
+ | ins_next
+ |
+ |5: // Invert check for negative step.
+ if (!vk) {
+ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+ | ar RB, ITYPE; jo <1
+ | setint RB
+ | stg RB, FOR_IDX
+ }
+ | cr RB, TMPR1
+ | stg RB, FOR_EXT
+ if (op == BC_FORI) {
+ | jhe <7
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jhe =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jl <7
+ } else {
+ | jhe =>BC_JLOOP
+ }
+ | j <6
+ |9: // Fallback to FP variant.
+ if (!vk) {
+ | jhe ->vmeta_for
+ }
+ if (!vk) {
+ | lg TMPR0, FOR_STOP
+ | checknumtp TMPR0, ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | lg TMPR0, FOR_STOP
+ | checknumtp TMPR0, ->assert_bad_for_arg_type
+ | lg TMPR0, FOR_STEP
+ | checknumtp TMPR0, ->assert_bad_for_arg_type
+#endif
+ }
+ | lg RB, FOR_STEP
+ if (!vk) {
+ | checknum RB, ->vmeta_for
+ }
+ | ld f0, FOR_IDX
+ | ld f1, FOR_STOP
+ if (vk) {
+ | adb f0, FOR_STEP
+ | std f0, FOR_IDX
+ }
+ | cghi RB, 0; jl >3
+ | cdbr f1, f0
+ |1:
+ | std f0, FOR_EXT
+ if (op == BC_FORI) {
+ | jnl <7
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | llgh RD, PC_RD
+ | jnl =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jl <7
+ } else {
+ | jnl =>BC_JLOOP
+ }
+ | j <6
+ |
+ |3: // Invert comparison if step is negative.
+ | cdbr f0, f1
+ | j <1
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop RB
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | ins_AJ // RA = base, RD = target
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, 0(RA)
+ | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | stg RB, -8(RA)
+ | j =>BC_JLOOP
+ } else {
+ | branchPC RD // Otherwise save control var + branch.
+ | stg RB, -8(RA)
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop RB
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+ break;
+
+ case BC_ILOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ | stg r0, 0
+ | stg r0, 0
+ break;
+
+ case BC_JMP:
+ | ins_AJ // RA = unused, RD = target
+ | branchPC RD
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ /*
+ ** Reminder: A function may be called with func/args above L->maxstack,
+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+ ** too. This means all FUNC* ops (including fast functions) must check
+ ** for stack overflow _before_ adding more slots!
+ */
+
+ case BC_FUNCF:
+ |.if JIT
+ |// stg r0, 0
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | lg KBASE, (PC2PROTO(k)-4)(PC)
+ | lg L:RB, SAVE_L
+ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE) // Top of frame.
+ | clg RA, L:RB->maxstack
+ | jh ->vm_growstack_f
+ | llgc RA, (PC2PROTO(numparams)-4)(PC)
+ | clgr NARGS:RD, RA // Check for missing parameters.
+ | jle >3
+ |2:
+ if (op == BC_JFUNCF) {
+ | llgh RD, PC_RD
+ | j =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | sllg TMPR1, NARGS:RD, 3
+ | lghi TMPR0, LJ_TNIL
+ |4:
+ | stg TMPR0, -8(TMPR1, BASE)
+ | la TMPR1, 8(TMPR1)
+ | la RD, 1(RD)
+ | clgr RD, RA
+ | jle <4
+ | j <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | stg r0, 0 // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | sllg TMPR1, NARGS:RD, 3
+ | la RB, (FRAME_VARG+8)(TMPR1)
+ | la RD, 8(TMPR1, BASE)
+ | lg LFUNC:KBASE, -16(BASE)
+ | stg RB, -8(RD) // Store delta + FRAME_VARG.
+ | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
+ | lg L:RB, SAVE_L
+ | sllg RA, RA, 3
+ | la RA, 0(RA, RD)
+ | cg RA, L:RB->maxstack
+ | jh ->vm_growstack_v // Need to grow stack.
+ | lgr RA, BASE
+ | lgr BASE, RD
+ | llgc RB, (PC2PROTO(numparams)-4)(PC)
+ | cghi RB, 0
+ | je >2
+ | aghi RA, 8
+ | lghi TMPR1, LJ_TNIL
+ |1: // Copy fixarg slots up to new frame.
+ | la RA, 8(RA)
+ | cgr RA, BASE
+ | jnl >3 // Less args than parameters?
+ | lg KBASE, -16(RA)
+ | stg KBASE, 0(RD)
+ | la RD, 8(RD)
+ | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
+ | brctg RB, <1
+ |2:
+ if (op == BC_JFUNCV) {
+ | llgh RD, PC_RD
+ | j =>BC_JLOOP
+ } else {
+ | lg KBASE, (PC2PROTO(k)-4)(PC)
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
+ | la RD, 8(RD)
+ | brctg RB, <3
+ | j <2
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | ins_AD // BASE = new base, RD = nargs+1
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+ | lg KBASE, CFUNC:RB->f
+ | lg L:RB, SAVE_L
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD,BASE)
+ | stg BASE, L:RB->base
+ | la RA, (8*LUA_MINSTACK)(RD)
+ | clg RA, L:RB->maxstack
+ | stg RD, L:RB->top
+ | lgr CARG1, L:RB
+ if (op != BC_FUNCC) {
+ | lgr CARG2, KBASE
+ }
+ | jh ->vm_growstack_c // Need to grow stack.
+ | set_vmstate C
+ if (op == BC_FUNCC) {
+ | basr r14, KBASE // (lua_State *L)
+ } else {
+ | // (lua_State *L, lua_CFunction f)
+ | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
+ | basr r14, TMPR1
+ }
+ | // nresults returned in r2 (CRET1).
+ | lgr RD, CRET1
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+ | sllg TMPR1, RD, 3
+ | la RA, 0(TMPR1, BASE)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | lg PC, -8(BASE) // Fetch PC of caller.
+ | j ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+ dasm_growpc(Dst, BC__MAX);
+ build_subroutines(ctx);
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.long .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.long .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.long .LASFDE2-.Lframe1\n"
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.long .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0xe\n"
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
+ "\t.align 8\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.long .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.long .LASFDE3-.Lframe2\n"
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+ break;
+ default: /* No other modes. */
+ break;
+ }
+}
+
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 8dd48b84..02054033 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2455,6 +2455,19 @@ static void build_subroutines(BuildCtx *ctx)
| mov r13, [RA-8]
| mov r12, [RA]
| mov rsp, RA // Reposition stack to C frame.
+#ifdef LUA_USE_TRACE_LOGS
+ | mov CARG1, SAVE_L
+ | mov L:CARG1->base, BASE
+ | mov RB, RD // Save RD
+ | mov TMP1, PC // Save PC
+ | mov CARG3, PC // CARG3 == BASE
+ | mov CARG2d, dword [DISPATCH+DISPATCH_GL(vmstate)]
+ | call extern lj_log_trace_direct_exit@8
+ | mov PC, TMP1
+ | mov RD, RB
+ | mov RB, SAVE_L
+ | mov BASE, L:RB->base
+#endif
|.endif
| test RDd, RDd; js >9 // Check for error from exit.
| mov L:RB, SAVE_L
@@ -4512,6 +4525,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_JLOOP:
|.if JIT
| ins_AD // RA = base (ignored), RD = traceno
+#ifdef LUA_USE_TRACE_LOGS
+ |.if not X64WIN
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Save BASE
+ | mov TMP1, RD // Save RD
+ | mov CARG3, PC // CARG3 == BASE
+ | mov CARG2, RD
+ | mov CARG1, RB
+ | call extern lj_log_trace_entry@8
+ | mov RD, TMP1
+ | mov BASE, L:RB->base
+ |.endif
+#endif
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RD*8]
| mov RD, TRACE:RD->mcode
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index de12ac64..8d3c82c4 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2905,6 +2905,21 @@ static void build_subroutines(BuildCtx *ctx)
| mov r13, TMPa
| mov r12, TMPQ
|.endif
+#ifdef LUA_USE_TRACE_LOGS
+ |.if X64
+ | mov FCARG1, SAVE_L
+ | mov L:FCARG1->base, BASE
+ | mov RB, RD // Save RD
+ | mov TMP1, PC // Save PC
+ | mov CARG3d, PC // CARG3d == BASE
+ | mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)]
+ | call extern lj_log_trace_direct_exit@8
+ | mov PC, TMP1
+ | mov RD, RB
+ | mov RB, SAVE_L
+ | mov BASE, L:RB->base
+ |.endif
+#endif
| test RD, RD; js >9 // Check for error from exit.
| mov L:RB, SAVE_L
| mov MULTRES, RD
@@ -5306,6 +5321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_JLOOP:
|.if JIT
| ins_AD // RA = base (ignored), RD = traceno
+#ifdef LUA_USE_TRACE_LOGS
+ |.if X64
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Save BASE
+ | mov TMP1, RD // Save RD
+ | mov CARG3d, PC // CARG3d == BASE
+ | mov FCARG2, RD
+ | mov FCARG1, RB
+ | call extern lj_log_trace_entry@8
+ | mov RD, TMP1
+ | mov BASE, L:RB->base
+ |.endif
+#endif
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RD*4]
| mov RDa, TRACE:RD->mcode
diff --git a/src/x64/Makefile b/src/x64/Makefile
new file mode 100644
index 00000000..27277140
--- /dev/null
+++ b/src/x64/Makefile
@@ -0,0 +1,13 @@
+.PHONY: default test benchmark clean
+
+default:
+ @echo "make target include: test bechmark clean"
+
+test:
+ $(MAKE) -C test test
+
+benchmark:
+ $(MAKE) -C test benchmark
+
+clean:
+ $(MAKE) -C test clean
diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile
new file mode 100644
index 00000000..4326ab3d
--- /dev/null
+++ b/src/x64/test/Makefile
@@ -0,0 +1,47 @@
+.PHONY: default test benchmark
+
+default: test benchmark
+
+COMMON_OBJ := test_util.o
+
+TEST_PROGRAM := ht_test
+BENCHMARK_PROGRAM := ht_benchmark
+
+TEST_PROGRAM_OBJ := $(COMMON_OBJ) test.o
+BENCHMARK_PROGRAM_OBJ := $(COMMON_OBJ) benchmark.o
+
+ifeq ($(WITH_VALGRIND), 1)
+ VALGRIND := valgrind --leak-check=full
+else
+ VALGRIND :=
+endif
+
+CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src
+
+%.o: %.cxx
+ $(CXX) $(CXXFLAGS) -MD -c $<
+
+test: $(TEST_PROGRAM)
+ @echo "some unit test"
+ $(VALGRIND) ./$(TEST_PROGRAM)
+
+ @echo "smoke test"
+ ../../luajit test_str_comp.lua
+
+benchmark: $(BENCHMARK_PROGRAM)
+ # micro benchmark
+ ./$(BENCHMARK_PROGRAM)
+
+$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ)
+ cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt
+ $(CXX) $+ $(CXXFLAGS) -lm -o $@
+
+$(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ)
+ cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt
+ $(CXX) $+ $(CXXFLAGS) -o $@
+
+-include dep1.txt
+-include dep2.txt
+
+clean:
+ -rm -f *.o *.d dep*.txt $(BENCHMARK_PROGRAM) $(TEST_PROGRAM)
diff --git a/src/x64/test/benchmark.cxx b/src/x64/test/benchmark.cxx
new file mode 100644
index 00000000..1ea8fb6b
--- /dev/null
+++ b/src/x64/test/benchmark.cxx
@@ -0,0 +1,360 @@
+#include // for gettimeofday()
+extern "C" {
+#define LUAJIT_SECURITY_STRHASH 1
+#include "../../lj_str.h"
+str_sparse_hashfn hash_sparse;
+str_dense_hashfn hash_dense;
+#include "../../lj_str_hash.c"
+}
+#include
+#include
+#include
+#include
+#include "test_util.hpp"
+#include
+#include
+
+using namespace std;
+
+#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
+#define lj_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
+
+const char* separator = "-------------------------------------------";
+
+static uint32_t LJ_AINLINE
+original_hash_sparse(uint64_t seed, const char *str, size_t len)
+{
+ uint32_t a, b, h = len ^ seed;
+ if (len >= 4) {
+ a = lj_getu32(str); h ^= lj_getu32(str+len-4);
+ b = lj_getu32(str+(len>>1)-2);
+ h ^= b; h -= lj_rol(b, 14);
+ b += lj_getu32(str+(len>>2)-1);
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ h ^= b; h -= lj_rol(b, 16);
+ } else {
+ a = *(const uint8_t *)str;
+ h ^= *(const uint8_t *)(str+len-1);
+ b = *(const uint8_t *)(str+(len>>1));
+ h ^= b; h -= lj_rol(b, 14);
+ }
+
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ h ^= b; h -= lj_rol(b, 16);
+
+ return h;
+}
+
+static uint32_t original_hash_dense(uint64_t seed, uint32_t h,
+ const char *str, size_t len)
+{
+ uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4));
+ if (len > 12) {
+ uint32_t a = (uint32_t)seed;
+ const char *pe = str+len-12, *p = pe, *q = str;
+ do {
+ a += lj_getu32(p);
+ b += lj_getu32(p+4);
+ h += lj_getu32(p+8);
+ p = q; q += 12;
+ h ^= b; h -= lj_rol(b, 14);
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ } while (p < pe);
+ h ^= b; h -= lj_rol(b, 16);
+ a ^= h; a -= lj_rol(h, 4);
+ b ^= a; b -= lj_rol(a, 14);
+ }
+ return b;
+}
+
+
+template double
+BenchmarkHashTmpl(T func, uint64_t seed, char* buf, size_t len)
+{
+ TestClock timer;
+ uint32_t h = 0;
+
+ timer.start();
+ for(int i = 1; i < 1000000 * 100; i++) {
+ // So the buf is not loop invariant, hence the F(...)
+ buf[i % 4096] = i;
+ h += func(seed, buf, len) ^ i;
+ }
+ timer.stop();
+
+ // make h alive
+ test_printf("%x", h);
+ return timer.getElapseInSecond();
+}
+
+struct TestFuncWasSparse
+{
+ uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
+ return original_hash_sparse(seed, buf, len);
+ }
+};
+
+struct TestFuncIsSparse
+{
+ uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
+ return hash_sparse_sse42(seed, buf, len);
+ }
+};
+
+struct TestFuncWasDense
+{
+ uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
+ return original_hash_dense(seed, 42, buf, len);
+ }
+};
+
+struct TestFuncIsDense
+{
+ uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
+ return hash_dense_sse42(seed, 42, buf, len);
+ }
+};
+
+static void
+benchmarkIndividual(uint64_t seed, char* buf)
+{
+ fprintf(stdout,"\n\nCompare performance of particular len (in second)\n");
+ fprintf(stdout, "%-12s%-8s%-8s%s%-8s%-8s%s\n", "len",
+ "was (s)", "is (s)", "diff (s)",
+ "was (d)", "is (d)", "diff (d)");
+ fprintf(stdout, "-------------------------------------------\n");
+
+ uint32_t lens[] = {3, 4, 7, 10, 15, 16, 20, 32, 36, 63, 80, 100,
+ 120, 127, 280, 290, 400};
+ for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
+ uint32_t len = lens[i];
+ double e1 = BenchmarkHashTmpl(TestFuncWasSparse(), seed, buf, len);
+ double e2 = BenchmarkHashTmpl(TestFuncIsSparse(), seed, buf, len);
+ double e3 = BenchmarkHashTmpl(TestFuncWasDense(), seed, buf, len);
+ double e4 = BenchmarkHashTmpl(TestFuncIsDense(), seed, buf, len);
+ fprintf(stdout, "len = %4d: %-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
+ len, e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
+ }
+}
+
+template double
+BenchmarkChangeLenTmpl(T func, uint64_t seed, char* buf, uint32_t* len_vect,
+ uint32_t len_num)
+{
+ TestClock timer;
+ uint32_t h = 0;
+
+ timer.start();
+ for(int i = 1; i < 1000000 * 100; i++) {
+ for (int j = 0; j < (int)len_num; j++) {
+ // So the buf is not loop invariant, hence the F(...)
+ buf[(i + j) % 4096] = i;
+ h += func(seed, buf, len_vect[j]) ^ j;
+ }
+ }
+ timer.stop();
+
+ // make h alive
+ test_printf("%x", h);
+ return timer.getElapseInSecond();
+}
+
+// It is to measure the performance when length is changing.
+// The purpose is to see how balanced branches impact the performance.
+//
+static void
+benchmarkToggleLens(uint64_t seed, char* buf)
+{
+ double e1, e2, e3, e4;
+ fprintf(stdout,"\nChanging length (in second):");
+ fprintf(stdout, "\n%-24s%-8s%-8s%s%-8s%-8s%s\n%s\n", "len",
+ "was (s)", "is (s)", "diff (s)",
+ "was (d)", "is (d)", "diff (d)",
+ separator);
+
+ uint32_t lens1[] = {4, 9};
+ e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens1, 2);
+ e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens1, 2);
+ e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens1, 2);
+ e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens1, 2);
+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "4,9",
+ e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
+
+ uint32_t lens2[] = {1, 4, 9};
+ e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens2, 3);
+ e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens2, 3);
+ e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens2, 3);
+ e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens2, 3);
+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "1,4,9",
+ e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
+
+ uint32_t lens3[] = {1, 33, 4, 9};
+ e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens3, 4);
+ e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens3, 4);
+ e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens3, 4);
+ e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens3, 4);
+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
+ "1,33,4,9", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
+
+ uint32_t lens4[] = {16, 33, 64, 89};
+ e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens4, 4);
+ e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens4, 4);
+ e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens4, 4);
+ e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens4, 4);
+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
+ "16,33,64,89", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
+}
+
+static void
+genRandomString(uint32_t min, uint32_t max,
+ uint32_t num, vector& result)
+{
+ double scale = (max - min) / (RAND_MAX + 1.0);
+ result.clear();
+ result.reserve(num);
+ for (uint32_t i = 0; i < num; i++) {
+ uint32_t len = (rand() * scale) + min;
+
+ char* buf = new char[len];
+ for (uint32_t l = 0; l < len; l++) {
+ buf[l] = rand() % 255;
+ }
+ result.push_back(string(buf, len));
+ delete[] buf;
+ }
+}
+
+// Return the standard deviation of given array of number
+static double
+standarDeviation(const vector& v)
+{
+ uint64_t total = 0;
+ for (vector::const_iterator i = v.begin(), e = v.end();
+ i != e; ++i) {
+ total += *i;
+ }
+
+ double avg = total / (double)v.size();
+ double sd = 0;
+
+ for (vector::const_iterator i = v.begin(), e = v.end();
+ i != e; ++i) {
+ double t = avg - *i;
+ sd = sd + t*t;
+ }
+
+ return sqrt(sd/v.size());
+}
+
+static vector
+benchmarkConflictHelper(uint64_t seed, uint32_t bucketNum,
+ const vector& strs)
+{
+ if (bucketNum & (bucketNum - 1)) {
+ bucketNum = (1L << (log2_floor(bucketNum) + 1));
+ }
+ uint32_t mask = bucketNum - 1;
+
+ vector conflictWasSparse(bucketNum);
+ vector conflictIsSparse(bucketNum);
+ vector conflictWasDense(bucketNum);
+ vector conflictIsDense(bucketNum);
+
+ conflictWasSparse.resize(bucketNum);
+ conflictIsSparse.resize(bucketNum);
+ conflictWasDense.resize(bucketNum);
+ conflictIsDense.resize(bucketNum);
+
+ for (vector::const_iterator i = strs.begin(), e = strs.end();
+ i != e; ++i) {
+ uint32_t h1 = original_hash_sparse(seed, i->c_str(), i->size());
+ uint32_t h2 = hash_sparse_sse42(seed, i->c_str(), i->size());
+ uint32_t h3 = original_hash_dense(seed, h1, i->c_str(), i->size());
+ uint32_t h4 = hash_dense_sse42(seed, h2, i->c_str(), i->size());
+
+ conflictWasSparse[h1 & mask]++;
+ conflictIsSparse[h2 & mask]++;
+ conflictWasDense[h3 & mask]++;
+ conflictIsDense[h4 & mask]++;
+ }
+
+#if 0
+ std::sort(conflictWas.begin(), conflictWas.end(), std::greater());
+ std::sort(conflictIs.begin(), conflictIs.end(), std::greater());
+
+ fprintf(stderr, "%d %d %d %d vs %d %d %d %d\n",
+ conflictWas[0], conflictWas[1], conflictWas[2], conflictWas[3],
+ conflictIs[0], conflictIs[1], conflictIs[2], conflictIs[3]);
+#endif
+ vector ret(4);
+ ret[0] = standarDeviation(conflictWasSparse);
+ ret[1] = standarDeviation(conflictIsSparse);
+ ret[2] = standarDeviation(conflictWasDense);
+ ret[3] = standarDeviation(conflictIsDense);
+
+ return ret;
+}
+
+static void
+benchmarkConflict(uint64_t seed)
+{
+ float loadFactor[] = { 0.5f, 1.0f, 2.0f, 4.0f, 8.0f };
+ int bucketNum[] = { 512, 1024, 2048, 4096, 8192, 16384};
+ int lenRange[][2] = { {1,3}, {4, 15}, {16, 127}, {128, 1024}, {1, 1024}};
+
+ fprintf(stdout,
+ "\nBechmarking conflict (stand deviation of conflict)\n%s\n",
+ separator);
+
+ for (uint32_t k = 0; k < sizeof(lenRange)/sizeof(lenRange[0]); k++) {
+ fprintf(stdout, "\nlen range from %d - %d\n", lenRange[k][0],
+ lenRange[k][1]);
+ fprintf(stdout, "%-10s %-12s %-10s %-10s diff (s) %-10s %-10s diff (d)\n%s\n",
+ "bucket", "load-factor", "was (s)", "is (s)", "was (d)", "is (d)",
+ separator);
+ for (uint32_t i = 0; i < sizeof(bucketNum)/sizeof(bucketNum[0]); ++i) {
+ for (uint32_t j = 0;
+ j < sizeof(loadFactor)/sizeof(loadFactor[0]);
+ ++j) {
+ int strNum = bucketNum[i] * loadFactor[j];
+ vector strs(strNum);
+ genRandomString(lenRange[k][0], lenRange[k][1], strNum, strs);
+
+ vector p;
+ p = benchmarkConflictHelper(seed, bucketNum[i], strs);
+ fprintf(stdout, "%-10d %-12.2f %-10.2f %-10.2f %-10.2f %-10.2f %-10.2f %.2f\n",
+ bucketNum[i], loadFactor[j],
+ p[0], p[1], p[0] - p[1],
+ p[2], p[3], p[2] - p[3]);
+ }
+ }
+ }
+}
+
+static void
+benchmarkHashFunc()
+{
+ srand(time(0));
+
+ uint64_t seed = (uint32_t) rand();
+ char buf[4096];
+ char c = getpid() % 'a';
+ for (int i = 0; i < (int)sizeof(buf); i++) {
+ buf[i] = (c + i) % 255;
+ }
+
+ benchmarkConflict(seed);
+ benchmarkIndividual(seed, buf);
+ benchmarkToggleLens(seed, buf);
+}
+
+int
+main(int argc, char** argv)
+{
+ fprintf(stdout, "========================\nMicro benchmark...\n");
+ benchmarkHashFunc();
+ return 0;
+}
diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp
new file mode 100644
index 00000000..432c7bbb
--- /dev/null
+++ b/src/x64/test/test.cpp
@@ -0,0 +1,81 @@
+#include
+#include
+#include
+#include