diff --git a/src/Makefile b/src/Makefile index 30d64be2..68a9a7cd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -54,9 +54,9 @@ CCOPT_arm64= CCOPT_ppc= CCOPT_mips= # -CCDEBUG= +#CCDEBUG= # Uncomment the next line to generate debug information: -#CCDEBUG= -g +CCDEBUG= -g # CCWARN= -Wall # Uncomment the next line to enable more warnings: @@ -244,6 +244,9 @@ else ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else +ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH))) + TARGET_LJARCH= s390x +else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__AARCH64EB__=1 @@ -275,6 +278,7 @@ endif endif endif endif +endif ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 @@ -461,7 +465,16 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D GPR64 endif ifeq (PS3,$(TARGET_SYS)) - DASM_AFLAGS+= -D PPE -D TOC + DASM_AFLAGS+= -D PPE + endif + ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D OPD + endif + ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D OPDENV + endif + ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ELFV2 endif endif endif @@ -501,10 +514,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ lj_lib.o lj_alloc.o lib_aux.o \ - $(LJLIB_O) lib_init.o + $(LJLIB_O) lib_init.o lj_str_hash.o + +ifeq (x64,$(TARGET_LJARCH)) + lj_str_hash-CFLAGS = -msse4.2 +endif + +F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<)) LJVMCORE_O= $(LJVM_O) $(LJCORE_O) -LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) +LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o LIB_VMDEF= jit/vmdef.lua LIB_VMDEFP= $(LIB_VMDEF) @@ -526,7 +545,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) ############################################################################## # Mixed mode defaults. -TARGET_O= $(LUAJIT_A) +TARGET_O= lj_init.o $(LUAJIT_A) TARGET_T= $(LUAJIT_T) $(LUAJIT_SO) TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO) @@ -608,7 +627,7 @@ E= @echo default all: $(TARGET_T) amalg: - $(MAKE) all "LJCORE_O=ljamalg.o" + $(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o" clean: $(HOST_RM) $(ALL_RM) @@ -685,8 +704,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c %.o: %.c $(E) "CC $@" - $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< - $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< + $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $< %.o: %.S $(E) "ASM $@" diff --git a/src/host/buildvm.c b/src/host/buildvm.c index 9ee47ada..4efda1ba 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -18,10 +18,8 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_bc.h" -#if LJ_HASJIT #include "lj_ir.h" #include "lj_ircall.h" -#endif #include "lj_frame.h" #include "lj_dispatch.h" #if LJ_HASFFI @@ -67,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" +#elif LJ_TARGET_S390X +#include "../dynasm/dasm_s390x.h" #else #error "No support for this architecture (yet)" #endif @@ -252,7 +252,6 @@ BCDEF(BCNAME) NULL }; -#if LJ_HASJIT const char *const ir_names[] = { #define IRNAME(name, m, m1, m2) #name, IRDEF(IRNAME) @@ -293,9 +292,7 @@ static const char *const trace_errors[] = { #include "lj_traceerr.h" NULL }; -#endif -#if LJ_HASJIT static const char *lower(char *buf, const char *s) { char *p = buf; @@ -306,7 +303,6 @@ static const char *lower(char *buf, const char *s) *p = '\0'; return buf; } -#endif /* Emit C source code for bytecode-related definitions. */ static void emit_bcdef(BuildCtx *ctx) @@ -324,9 +320,7 @@ static void emit_bcdef(BuildCtx *ctx) /* Emit VM definitions as Lua code for debug modules. */ static void emit_vmdef(BuildCtx *ctx) { -#if LJ_HASJIT char buf[80]; -#endif int i; fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); fprintf(ctx->fp, "return {\n\n"); @@ -335,7 +329,6 @@ static void emit_vmdef(BuildCtx *ctx) for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); fprintf(ctx->fp, "\",\n\n"); -#if LJ_HASJIT fprintf(ctx->fp, "irnames = \""); for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); fprintf(ctx->fp, "\",\n\n"); @@ -364,7 +357,6 @@ static void emit_vmdef(BuildCtx *ctx) for (i = 0; trace_errors[i]; i++) fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); fprintf(ctx->fp, "},\n\n"); -#endif } /* -- Argument parsing ---------------------------------------------------- */ diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 7baa011f..e73f9b17 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -87,6 +87,54 @@ err: } fprintf(ctx->fp, "\t%s %s\n", opname, sym); } +#elif LJ_TARGET_S390X +/* Emit halfwords piecewise as assembler text. */ +static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n) +{ + uint16_t *cp = (uint16_t*)p; + n /= 2; + int i; + for (i = 0; i < n; i++) { + if ((i & 7) == 0) + fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]); + else + fprintf(ctx->fp, ",0x%hx", cp[i]); + if ((i & 7) == 7) putc('\n', ctx->fp); + } + if ((n & 7) != 0) putc('\n', ctx->fp); +} + +/* Emit s390x text relocations. */ +static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, + const char *sym) +{ + if (n & 1 || n < 2) { + fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n); + exit(1); + } + n -= 2; + const char *opname = NULL; + const char *argt = ""; /* Inserted before argument. */ + int opcode = *(uint16_t*)(&cp[n]); + int arg = (opcode>>4) & 0xf; + switch (opcode & 0xff0f) { + case 0xa705: opname = "bras"; argt = "%r"; break; + case 0xc005: opname = "brasl"; argt = "%r"; break; + case 0xa704: opname = "brc"; break; + case 0xc004: opname = "brcl"; break; + default: + fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n", + sym); + exit(1); + } + emit_asm_halfwords(ctx, cp, n); + if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { + /* Various fixups for external symbols outside of our binary. */ + fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym); + return; + } + fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym); +} #else /* Emit words piecewise as assembler text. */ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) @@ -140,7 +188,11 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, #else #define TOCPREFIX "" #endif - if ((ins >> 26) == 16) { + if ((ins >> 26) == 14) { + fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 15) { + fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 16) { fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); } else if ((ins >> 26) == 18) { @@ -242,6 +294,9 @@ void emit_asm(BuildCtx *ctx) int i, rel; fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); +#if LJ_ARCH_PPC_ELFV2 + fprintf(ctx->fp, "\t.abiversion 2\n"); +#endif fprintf(ctx->fp, "\t.text\n"); emit_asm_align(ctx, 4); @@ -299,6 +354,9 @@ void emit_asm(BuildCtx *ctx) emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); } ofs += n+4; +#elif LJ_TARGET_S390X + emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); + ofs += n+4; #else emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); ofs += n; @@ -307,6 +365,8 @@ void emit_asm(BuildCtx *ctx) } #if LJ_TARGET_X86ORX64 emit_asm_bytes(ctx, ctx->code+ofs, next-ofs); +#elif LJ_TARGET_S390X + emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs); #else emit_asm_words(ctx, ctx->code+ofs, next-ofs); #endif diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c index edb55768..7f9ac058 100644 --- a/src/host/buildvm_fold.c +++ b/src/host/buildvm_fold.c @@ -5,7 +5,6 @@ #include "buildvm.h" #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" /* Context for the folding hash table generator. */ @@ -227,10 +226,4 @@ void emit_fold(BuildCtx *ctx) makehash(ctx); } -#else -void emit_fold(BuildCtx *ctx) -{ - UNUSED(ctx); -} -#endif diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h index 276463b2..8f07f436 100644 --- a/src/host/buildvm_libbc.h +++ b/src/host/buildvm_libbc.h @@ -4,67 +4,42 @@ static const int libbc_endian = 0; static const uint8_t libbc_code[] = { #if LJ_FR2 -/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, -220,203,178,130,4, -/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, -198,190,199,252,3, -/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, -/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, -BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, -BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, -BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, -/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, -2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0, -BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, -BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, -/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, -/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, -0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, -BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, -BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, -BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, -BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, -BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, -/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, -2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, -4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, -128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, -BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, -BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, -BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, -11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, +0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, +0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, +16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, +128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,1,16,16,0,12,0,16,1,9,0,43,2, +0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, +0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15, +0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0, +0,11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2, +0,88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59, +3,1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4, +252,127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16, +1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88, +5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128, +18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252, +127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9, +0,64,11,10,4,79,6,252,127,76,4,2,0,0 #else -/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, -220,203,178,130,4, -/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, -198,190,199,252,3, -/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, -/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, -BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, -BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, -BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, -/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, -2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0, -BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, -BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, -/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, -/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, -0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, -BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, -BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, -BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, -BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, -BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, -/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, -2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, -4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, -128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, -BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, -BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, -BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, -11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, +0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, +0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, +16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, +128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,1,16,16,0,12,0,16,1,9,0,43,2, +0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, +0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15,0, +1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0, +11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0, +88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3, +1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, +127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1, +14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88, +5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128, +18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252, +127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9, +0,64,11,10,4,79,6,252,127,76,4,2,0,0 #endif -0 }; static const struct { const char *name; int ofs; } libbc_map[] = { diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua index ba18812c..072a7495 100644 --- a/src/host/genlibbc.lua +++ b/src/host/genlibbc.lua @@ -79,11 +79,9 @@ local name2itype = { str = 5, func = 9, tab = 12, int = 14, num = 15 } -local BC, BCN = {}, {} +local BC = {} for i=0,#bcnames/6-1 do - local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "") - BC[name] = i - BCN[i] = name + BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i end local xop, xra = isbe and 3 or 0, isbe and 2 or 1 local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3 @@ -98,7 +96,6 @@ local function fixup_dump(dump, fixup) p = read_uleb128(p) p = read_uleb128(p) p, sizebc = read_uleb128(p) - local startbc = tonumber(p - start) local rawtab = {} for i=0,sizebc-1 do local op = p[xop] @@ -135,7 +132,7 @@ local function fixup_dump(dump, fixup) local ndump = ffi.string(start, n) -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX. ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f") - return { dump = ndump, startbc = startbc, sizebc = sizebc } + return ndump end local function find_defs(src) @@ -155,46 +152,24 @@ local function gen_header(defs) local function w(x) t[#t+1] = x end w("/* This is a generated file. DO NOT EDIT! */\n\n") w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") - local s, sb = "", "" - for i,name in ipairs(defs) do - local d = defs[name] - s = s .. d.dump - sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) - .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) - .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) + local s = "" + for _,name in ipairs(defs) do + s = s .. defs[name] end w("static const uint8_t libbc_code[] = {\n") local n = 0 for i=1,#s do local x = string.byte(s, i) - local xb = string.byte(sb, i) - if xb == 255 then - local name = BCN[x] - local m = #name + 4 - if n + m > 78 then n = 0; w("\n") end - n = n + m - w("BC_"); w(name) - else - local m = x < 10 and 2 or (x < 100 and 3 or 4) - if xb == 0 then - if n + m > 78 then n = 0; w("\n") end - else - local name = defs[xb]:gsub("_", ".") - if n ~= 0 then w("\n") end - w("/* "); w(name); w(" */ ") - n = #name + 7 - end - n = n + m - w(x) - end - w(",") + w(x); w(",") + n = n + (x < 10 and 2 or (x < 100 and 3 or 4)) + if n >= 75 then n = 0; w("\n") end end - w("\n0\n};\n\n") + w("0\n};\n\n") w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") local m = 0 for _,name in ipairs(defs) do w('{"'); w(name); w('",'); w(m) w('},\n') - m = m + #defs[name].dump + m = m + #defs[name] end w("{NULL,"); w(m); w("}\n};\n\n") return table.concat(t) diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua index e8e86c53..a72ef2ef 100644 --- a/src/host/genminilua.lua +++ b/src/host/genminilua.lua @@ -327,12 +327,6 @@ local function rename_tokens2(src) return gsub(src, "ZY([%w_]+)", "union %1") end -local function fix_bugs_and_warnings(src) - src = gsub(src, "(luaD_checkstack%(L,p%->maxstacksize)%)", "%1+p->numparams)") - src = gsub(src, "if%(sep==%-1%)(return'%[';)\nelse (luaX_lexerror%b();)", "if (sep!=-1)%2\n%1") - return gsub(src, "(default:{\nNode%*n=mainposition)", "/*fallthrough*/\n%1") -end - local function func_gather(src) local nodes, list = {}, {} local pos, len = 1, #src @@ -431,6 +425,5 @@ src = rename_tokens1(src) src = func_collect(src) src = rename_tokens2(src) src = restore_strings(src) -src = fix_bugs_and_warnings(src) src = merge_header(src, license) io.write(src) diff --git a/src/host/minilua.c b/src/host/minilua.c index 76f32aed..cfc7491d 100644 --- a/src/host/minilua.c +++ b/src/host/minilua.c @@ -1639,7 +1639,6 @@ lua_number2int(k,n); if(luai_numeq(cast_num(k),nvalue(key))) return luaH_getnum(t,k); } -/*fallthrough*/ default:{ Node*n=mainposition(t,key); do{ @@ -2906,8 +2905,8 @@ if(sep>=0){ read_long_string(ls,seminfo,sep); return TK_STRING; } -else if (sep!=-1)luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); -return'['; +else if(sep==-1)return'['; +else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); } case'=':{ next(ls); diff --git a/src/jit/bc.lua b/src/jit/bc.lua index 8d0844c0..031b5902 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua @@ -63,15 +63,21 @@ local function ctlsub(c) end -- Return one bytecode line. -local function bcline(func, pc, prefix) - local ins, m = funcbc(func, pc) +local function bcline(func, pc, prefix, lineinfo) + local ins, m, l = funcbc(func, pc, lineinfo and 1 or 0) if not ins then return end local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128) local a = band(shr(ins, 8), 0xff) local oidx = 6*band(ins, 0xff) local op = sub(bcnames, oidx+1, oidx+6) - local s = format("%04d %s %-6s %3s ", - pc, prefix or " ", op, ma == 0 and "" or a) + local s + if lineinfo then + s = format("%04d %7s %s %-6s %3s ", + pc, "["..l.."]", prefix or " ", op, ma == 0 and "" or a) + else + s = format("%04d %s %-6s %3s ", + pc, prefix or " ", op, ma == 0 and "" or a) + end local d = shr(ins, 16) if mc == 13*128 then -- BCMjump return format("%s=> %04d\n", s, pc+d-0x7fff) @@ -124,20 +130,52 @@ local function bctargets(func) end -- Dump bytecode instructions of a function. -local function bcdump(func, out, all) +local function bcdump(func, out, all, lineinfo) if not out then out = stdout end local fi = funcinfo(func) if all and fi.children then for n=-1,-1000000000,-1 do local k = funck(func, n) if not k then break end - if type(k) == "proto" then bcdump(k, out, true) end + if type(k) == "proto" then bcdump(k, out, true, lineinfo) end end end out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined)) + + for n=-1,-1000000000,-1 do + local kc = funck(func, n) + if not kc then break end + + local typ = type(kc) + if typ == "string" then + kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub)) + out:write(format("KGC %d %s\n", -(n + 1), kc)) + elseif typ == "proto" then + local fi = funcinfo(kc) + if fi.ffid then + kc = vmdef.ffnames[fi.ffid] + else + kc = fi.loc + end + out:write(format("KGC %d %s\n", -(n + 1), kc)) + elseif typ == "table" then + out:write(format("KGC %d table\n", -(n + 1))) + else + -- error("unknown KGC type: " .. typ) + end + end + + for n=1,1000000000 do + local kc = funck(func, n) + if not kc then break end + if type(kc) == "number" then + out:write(format("KN %d %s\n", n, kc)) + end + end + local target = bctargets(func) for pc=1,1000000000 do - local s = bcline(func, pc, target[pc] and "=>") + local s = bcline(func, pc, target[pc] and "=>", lineinfo) if not s then break end out:write(s) end diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 6227d136..7cb23f17 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -27,6 +27,7 @@ local function usage() io.stderr:write[[ Save LuaJIT bytecode: luajit -b[options] input output -l Only list bytecode. + -L Only list bytecode with lineinfo. -s Strip debug info (default). -g Keep debug info. -n name Set module name (default: auto-detect from input name). @@ -592,9 +593,9 @@ end ------------------------------------------------------------------------------ -local function bclist(input, output) +local function bclist(input, output, lineinfo) local f = readfile(input) - require("jit.bc").dump(f, savefile(output, "w"), true) + require("jit.bc").dump(f, savefile(output, "w"), true, lineinfo) end local function bcsave(ctx, input, output) @@ -621,6 +622,7 @@ local function docmd(...) local arg = {...} local n = 1 local list = false + local lineinfo = false local ctx = { strip = true, arch = jit.arch, os = jit.os:lower(), type = false, modname = false, @@ -634,6 +636,9 @@ local function docmd(...) local opt = a:sub(m, m) if opt == "l" then list = true + elseif opt == "L" then + list = true + lineinfo = true elseif opt == "s" then ctx.strip = true elseif opt == "g" then @@ -662,7 +667,7 @@ local function docmd(...) end if list then if #arg == 0 or #arg > 2 then usage() end - bclist(arg[1], arg[2] or "-") + bclist(arg[1], arg[2] or "-", lineinfo) else if #arg ~= 2 then usage() end bcsave(ctx, arg[1], arg[2]) diff --git a/src/jit/dis_s390x.lua b/src/jit/dis_s390x.lua new file mode 100644 index 00000000..99dc7484 --- /dev/null +++ b/src/jit/dis_s390x.lua @@ -0,0 +1,1594 @@ +---------------------------------------------------------------------------- +-- LuaJIT s390x disassembler module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Aditya Bisht from Open Mainframe. +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- NYI: +------------------------------------------------------------------------------ + +local type = type +local sub, byte, format = string.sub, string.byte, string.format +local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local lower, rep = string.lower, string.rep +local bit = require("bit") +local band, lshift, bor, rshift = bit.band, bit.lshift, bit.bor, bit.rshift +local tohex = bit.tohex + +local ONELONG = "%08lx: " + +local OPERAND_GPR = 0x1 /* Operand printed as %rx */ +local OPERAND_FPR = 0x2 /* Operand printed as %fx */ +local OPERAND_AR = 0x4 /* Operand printed as %ax */ +local OPERAND_CR = 0x8 /* Operand printed as %cx */ +local OPERAND_DISP = 0x10 /* Operand printed as displacement */ +local OPERAND_BASE = 0x20 /* Operand printed as base register */ +local OPERAND_INDEX = 0x40 /* Operand printed as index register */ +local OPERAND_PCREL = 0x80 /* Operand printed as pc-relative symbol */ +local OPERAND_SIGNED = 0x100 /* Operand printed as signed value */ +local OPERAND_LENGTH = 0x200 /* Operand printed as length (+1) */ + +-- Registers + +local UNUSED = 0, /* Indicates the end of the operand list */ +local R_8 = 1, /* GPR starting at position 8 */ +local R_12 = 2, /* GPR starting at position 12 */ +local R_16 = 3, /* GPR starting at position 16 */ +local R_20 = 4, /* GPR starting at position 20 */ +local R_24 = 5, /* GPR starting at position 24 */ +local R_28 = 6, /* GPR starting at position 28 */ +local R_32 = 7, /* GPR starting at position 32 */ +local F_8 = 8, /* FPR starting at position 8 */ +local F_12 = 9, /* FPR starting at position 12 */ +local F_16 = 10, /* FPR starting at position 16 */ +local F_20 = 11, /* FPR starting at position 16 */ +local F_24 = 12, /* FPR starting at position 24 */ +local F_28 = 13, /* FPR starting at position 28 */ +local F_32 = 14, /* FPR starting at position 32 */ +local A_8 = 15, /* Access reg. starting at position 8 */ +local A_12 = 16, /* Access reg. starting at position 12 */ +local A_24 = 17, /* Access reg. starting at position 24 */ +local A_28 = 18, /* Access reg. starting at position 28 */ +local C_8 = 19, /* Control reg. starting at position 8 */ +local C_12 = 20, /* Control reg. starting at position 12 */ +local B_16 = 21, /* Base register starting at position 16 */ +local B_32 = 22, /* Base register starting at position 32 */ +local X_12 = 23, /* Index register starting at position 12 */ +local D_20 = 24, /* Displacement starting at position 20 */ +local D_36 = 25, /* Displacement starting at position 36 */ +local D20_20 = 26, /* 20 bit displacement starting at 20 */ +local L4_8 = 27, /* 4 bit length starting at position 8 */ +local L4_12 = 28, /* 4 bit length starting at position 12 */ +local L8_8 = 29, /* 8 bit length starting at position 8 */ +local U4_8 = 30, /* 4 bit unsigned value starting at 8 */ +local U4_12 = 31, /* 4 bit unsigned value starting at 12 */ +local U4_16 = 32, /* 4 bit unsigned value starting at 16 */ +local U4_20 = 33, /* 4 bit unsigned value starting at 20 */ +local U4_32 = 34, /* 4 bit unsigned value starting at 32 */ +local U8_8 = 35, /* 8 bit unsigned value starting at 8 */ +local U8_16 = 36, /* 8 bit unsigned value starting at 16 */ +local U8_24 = 37, /* 8 bit unsigned value starting at 24 */ +local U8_32 = 38, /* 8 bit unsigned value starting at 32 */ +local I8_8 = 39, /* 8 bit signed value starting at 8 */ +local I8_32 = 40, /* 8 bit signed value starting at 32 */ +local I16_16 = 41, /* 16 bit signed value starting at 16 */ +local I16_32 = 42, /* 32 bit signed value starting at 16 */ +local U16_16 = 43, /* 16 bit unsigned value starting at 16 */ +local U16_32 = 44, /* 32 bit unsigned value starting at 16 */ +local J16_16 = 45, /* PC relative jump offset at 16 */ +local J32_16 = 46, /* PC relative long offset at 16 */ +local I32_16 = 47, /* 32 bit signed value starting at 16 */ +local U32_16 = 48, /* 32 bit unsigned value starting at 16 */ +local M_16 = 49, /* 4 bit optional mask starting at 16 */ +local RO_28 = 50 /* optional GPR starting at position 28 */ + +-- Enumeration of the different instruction formats. +-- For details consult the principles of operation. + +local INSTR_INVALID = 1, +local INSTR_E = 2, +local INSTR_RIE_R0IU = 3, +local INSTR_RIE_R0UU = 4, +local INSTR_RIE_RRP = 5, +local INSTR_RIE_RRPU = 6, +local INSTR_RIE_RRUUU = 7, +local INSTR_RIE_RUPI = 8, +local INSTR_RIE_RUPU = 9, +local INSTR_RIL_RI = 10, +local INSTR_RIL_RP = 11, +local INSTR_RIL_RU = 12, +local INSTR_RIL_UP = 13, +local INSTR_RIS_R0RDU = 14, +local INSTR_RIS_R0UU = 15, +local INSTR_RIS_RURDI = 16, +local INSTR_RIS_RURDU = 17, +local INSTR_RI_RI = 18, +local INSTR_RI_RP = 19, +local INSTR_RI_RU = 20, +local INSTR_RI_UP = 21, +local INSTR_RRE_00 = 22, +local INSTR_RRE_0R = 23, +local INSTR_RRE_AA = 24, +local INSTR_RRE_AR = 25, +local INSTR_RRE_F0 = 26, +local INSTR_RRE_FF = 27, +local INSTR_RRE_FR = 28, +local INSTR_RRE_R0 = 29, +local INSTR_RRE_RA = 30, +local INSTR_RRE_RF = 31, +local INSTR_RRE_RR = 32, +local INSTR_RRE_RR_OPT = 33, +local INSTR_RRF_0UFF = 34, +local INSTR_RRF_F0FF = 35, +local INSTR_RRF_F0FF2 = 36, +local INSTR_RRF_F0FR = 37, +local INSTR_RRF_FFRU = 38, +local INSTR_RRF_FUFF = 39, +local INSTR_RRF_M0RR = 40, +local INSTR_RRF_R0RR = 41, +local INSTR_RRF_RURR = 42, +local INSTR_RRF_U0FF = 43, +local INSTR_RRF_U0RF = 44, +local INSTR_RRF_U0RR = 45, +local INSTR_RRF_UUFF = 46, +local INSTR_RRR_F0FF = 47, +local INSTR_RRS_RRRDU = 48, +local INSTR_RR_FF = 49, +local INSTR_RR_R0 = 50, +local INSTR_RR_RR = 51, +local INSTR_RR_U0 = 52, +local INSTR_RR_UR = 53, +local INSTR_RSE_CCRD = 54, +local INSTR_RSE_RRRD = 55, +local INSTR_RSE_RURD = 56, +local INSTR_RSI_RRP = 57, +local INSTR_RSL_R0RD = 58, +local INSTR_RSY_AARD = 59, +local INSTR_RSY_CCRD = 60, +local INSTR_RSY_RRRD = 61, +local INSTR_RSY_RURD = 62, +local INSTR_RS_AARD = 63, +local INSTR_RS_CCRD = 64, +local INSTR_RS_R0RD = 65, +local INSTR_RS_RRRD = 66, +local INSTR_RS_RURD = 67, +local INSTR_RXE_FRRD = 68, +local INSTR_RXE_RRRD = 69, +local INSTR_RXF_FRRDF = 70, +local INSTR_RXY_FRRD = 71, +local INSTR_RXY_RRRD = 72, +local INSTR_RXY_URRD = 73, +local INSTR_RX_FRRD = 74, +local INSTR_RX_RRRD = 75, +local INSTR_RX_URRD = 76, +local INSTR_SIL_RDI = 77, +local INSTR_SIL_RDU = 78, +local INSTR_SIY_IRD = 79, +local INSTR_SIY_URD = 80, +local INSTR_SI_URD = 81, +local INSTR_SSE_RDRD = 82, +local INSTR_SSF_RRDRD = 83, +local INSTR_SS_L0RDRD = 84, +local INSTR_SS_LIRDRD = 85, +local INSTR_SS_LLRDRD = 86, +local INSTR_SS_RRRDRD = 87, +local INSTR_SS_RRRDRD2 = 88, +local INSTR_SS_RRRDRD3 = 89, +local INSTR_S_00 = 90, +local INSTR_S_RD = 91 + +local operands = { + [UNUSED] = { 0, 0, 0 }, + [R_8] = { 4, 8, OPERAND_GPR }, + [R_12] = { 4, 12, OPERAND_GPR }, + [R_16] = { 4, 16, OPERAND_GPR }, + [R_20] = { 4, 20, OPERAND_GPR }, + [R_24] = { 4, 24, OPERAND_GPR }, + [R_28] = { 4, 28, OPERAND_GPR }, + [R_32] = { 4, 32, OPERAND_GPR }, + [F_8] = { 4, 8, OPERAND_FPR }, + [F_12] = { 4, 12, OPERAND_FPR }, + [F_16] = { 4, 16, OPERAND_FPR }, + [F_20] = { 4, 16, OPERAND_FPR }, + [F_24] = { 4, 24, OPERAND_FPR }, + [F_28] = { 4, 28, OPERAND_FPR }, + [F_32] = { 4, 32, OPERAND_FPR }, + [A_8] = { 4, 8, OPERAND_AR }, + [A_12] = { 4, 12, OPERAND_AR }, + [A_24] = { 4, 24, OPERAND_AR }, + [A_28] = { 4, 28, OPERAND_AR }, + [C_8] = { 4, 8, OPERAND_CR }, + [C_12] = { 4, 12, OPERAND_CR }, + [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, + [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, + [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, + [D_20] = { 12, 20, OPERAND_DISP }, + [D_36] = { 12, 36, OPERAND_DISP }, + [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, + [L4_8] = { 4, 8, OPERAND_LENGTH }, + [L4_12] = { 4, 12, OPERAND_LENGTH }, + [L8_8] = { 8, 8, OPERAND_LENGTH }, + [U4_8] = { 4, 8, 0 }, + [U4_12] = { 4, 12, 0 }, + [U4_16] = { 4, 16, 0 }, + [U4_20] = { 4, 20, 0 }, + [U4_32] = { 4, 32, 0 }, + [U8_8] = { 8, 8, 0 }, + [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_32] = { 8, 32, 0 }, + [I16_16] = { 16, 16, OPERAND_SIGNED }, + [U16_16] = { 16, 16, 0 }, + [U16_32] = { 16, 32, 0 }, + [J16_16] = { 16, 16, OPERAND_PCREL }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [J32_16] = { 32, 16, OPERAND_PCREL }, + [I32_16] = { 32, 16, OPERAND_SIGNED }, + [U32_16] = { 32, 16, 0 }, + [M_16] = { 4, 16, 0 }, + [RO_28] = { 4, 28, OPERAND_GPR } +} + +local formats = { + [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, + [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, + [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, + [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, + [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, + [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, + [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, + [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, + [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, + [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, + [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, + [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, + [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, + [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, + [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, + [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, + [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, + [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, + [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, + [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, + [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, + [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, + [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, + [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, + [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, + [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, + [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, + [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, + [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, + [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, + [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, + [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, + [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, + [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, + [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, + [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, + [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, + [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, + [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, + [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, + [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, + [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, + [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, + [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, + [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, + [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, + [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, + [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, + [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, + [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, + [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, + [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, + [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, + [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, + [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, + [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, + [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, + [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, + [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, + [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, +} + +local opcode = { + { "lmd", 0xef, INSTR_SS_RRRDRD3 }, + { "spm", 0x04, INSTR_RR_R0 }, + { "balr", 0x05, INSTR_RR_RR }, + { "bctr", 0x06, INSTR_RR_RR }, + { "bcr", 0x07, INSTR_RR_UR }, + { "svc", 0x0a, INSTR_RR_U0 }, + { "bsm", 0x0b, INSTR_RR_RR }, + { "bassm", 0x0c, INSTR_RR_RR }, + { "basr", 0x0d, INSTR_RR_RR }, + { "mvcl", 0x0e, INSTR_RR_RR }, + { "clcl", 0x0f, INSTR_RR_RR }, + { "lpr", 0x10, INSTR_RR_RR }, + { "lnr", 0x11, INSTR_RR_RR }, + { "ltr", 0x12, INSTR_RR_RR }, + { "lcr", 0x13, INSTR_RR_RR }, + { "nr", 0x14, INSTR_RR_RR }, + { "clr", 0x15, INSTR_RR_RR }, + { "or", 0x16, INSTR_RR_RR }, + { "xr", 0x17, INSTR_RR_RR }, + { "lr", 0x18, INSTR_RR_RR }, + { "cr", 0x19, INSTR_RR_RR }, + { "ar", 0x1a, INSTR_RR_RR }, + { "sr", 0x1b, INSTR_RR_RR }, + { "mr", 0x1c, INSTR_RR_RR }, + { "dr", 0x1d, INSTR_RR_RR }, + { "alr", 0x1e, INSTR_RR_RR }, + { "slr", 0x1f, INSTR_RR_RR }, + { "lpdr", 0x20, INSTR_RR_FF }, + { "lndr", 0x21, INSTR_RR_FF }, + { "ltdr", 0x22, INSTR_RR_FF }, + { "lcdr", 0x23, INSTR_RR_FF }, + { "hdr", 0x24, INSTR_RR_FF }, + { "ldxr", 0x25, INSTR_RR_FF }, + { "lrdr", 0x25, INSTR_RR_FF }, + { "mxr", 0x26, INSTR_RR_FF }, + { "mxdr", 0x27, INSTR_RR_FF }, + { "ldr", 0x28, INSTR_RR_FF }, + { "cdr", 0x29, INSTR_RR_FF }, + { "adr", 0x2a, INSTR_RR_FF }, + { "sdr", 0x2b, INSTR_RR_FF }, + { "mdr", 0x2c, INSTR_RR_FF }, + { "ddr", 0x2d, INSTR_RR_FF }, + { "awr", 0x2e, INSTR_RR_FF }, + { "swr", 0x2f, INSTR_RR_FF }, + { "lper", 0x30, INSTR_RR_FF }, + { "lner", 0x31, INSTR_RR_FF }, + { "lter", 0x32, INSTR_RR_FF }, + { "lcer", 0x33, INSTR_RR_FF }, + { "her", 0x34, INSTR_RR_FF }, + { "ledr", 0x35, INSTR_RR_FF }, + { "lrer", 0x35, INSTR_RR_FF }, + { "axr", 0x36, INSTR_RR_FF }, + { "sxr", 0x37, INSTR_RR_FF }, + { "ler", 0x38, INSTR_RR_FF }, + { "cer", 0x39, INSTR_RR_FF }, + { "aer", 0x3a, INSTR_RR_FF }, + { "ser", 0x3b, INSTR_RR_FF }, + { "mder", 0x3c, INSTR_RR_FF }, + { "mer", 0x3c, INSTR_RR_FF }, + { "der", 0x3d, INSTR_RR_FF }, + { "aur", 0x3e, INSTR_RR_FF }, + { "sur", 0x3f, INSTR_RR_FF }, + { "sth", 0x40, INSTR_RX_RRRD }, + { "la", 0x41, INSTR_RX_RRRD }, + { "stc", 0x42, INSTR_RX_RRRD }, + { "ic", 0x43, INSTR_RX_RRRD }, + { "ex", 0x44, INSTR_RX_RRRD }, + { "bal", 0x45, INSTR_RX_RRRD }, + { "bct", 0x46, INSTR_RX_RRRD }, + { "bc", 0x47, INSTR_RX_URRD }, + { "lh", 0x48, INSTR_RX_RRRD }, + { "ch", 0x49, INSTR_RX_RRRD }, + { "ah", 0x4a, INSTR_RX_RRRD }, + { "sh", 0x4b, INSTR_RX_RRRD }, + { "mh", 0x4c, INSTR_RX_RRRD }, + { "bas", 0x4d, INSTR_RX_RRRD }, + { "cvd", 0x4e, INSTR_RX_RRRD }, + { "cvb", 0x4f, INSTR_RX_RRRD }, + { "st", 0x50, INSTR_RX_RRRD }, + { "lae", 0x51, INSTR_RX_RRRD }, + { "n", 0x54, INSTR_RX_RRRD }, + { "cl", 0x55, INSTR_RX_RRRD }, + { "o", 0x56, INSTR_RX_RRRD }, + { "x", 0x57, INSTR_RX_RRRD }, + { "l", 0x58, INSTR_RX_RRRD }, + { "c", 0x59, INSTR_RX_RRRD }, + { "a", 0x5a, INSTR_RX_RRRD }, + { "s", 0x5b, INSTR_RX_RRRD }, + { "m", 0x5c, INSTR_RX_RRRD }, + { "d", 0x5d, INSTR_RX_RRRD }, + { "al", 0x5e, INSTR_RX_RRRD }, + { "sl", 0x5f, INSTR_RX_RRRD }, + { "std", 0x60, INSTR_RX_FRRD }, + { "mxd", 0x67, INSTR_RX_FRRD }, + { "ld", 0x68, INSTR_RX_FRRD }, + { "cd", 0x69, INSTR_RX_FRRD }, + { "ad", 0x6a, INSTR_RX_FRRD }, + { "sd", 0x6b, INSTR_RX_FRRD }, + { "md", 0x6c, INSTR_RX_FRRD }, + { "dd", 0x6d, INSTR_RX_FRRD }, + { "aw", 0x6e, INSTR_RX_FRRD }, + { "sw", 0x6f, INSTR_RX_FRRD }, + { "ste", 0x70, INSTR_RX_FRRD }, + { "ms", 0x71, INSTR_RX_RRRD }, + { "le", 0x78, INSTR_RX_FRRD }, + { "ce", 0x79, INSTR_RX_FRRD }, + { "ae", 0x7a, INSTR_RX_FRRD }, + { "se", 0x7b, INSTR_RX_FRRD }, + { "mde", 0x7c, INSTR_RX_FRRD }, + { "me", 0x7c, INSTR_RX_FRRD }, + { "de", 0x7d, INSTR_RX_FRRD }, + { "au", 0x7e, INSTR_RX_FRRD }, + { "su", 0x7f, INSTR_RX_FRRD }, + { "ssm", 0x80, INSTR_S_RD }, + { "lpsw", 0x82, INSTR_S_RD }, + { "diag", 0x83, INSTR_RS_RRRD }, + { "brxh", 0x84, INSTR_RSI_RRP }, + { "brxle", 0x85, INSTR_RSI_RRP }, + { "bxh", 0x86, INSTR_RS_RRRD }, + { "bxle", 0x87, INSTR_RS_RRRD }, + { "srl", 0x88, INSTR_RS_R0RD }, + { "sll", 0x89, INSTR_RS_R0RD }, + { "sra", 0x8a, INSTR_RS_R0RD }, + { "sla", 0x8b, INSTR_RS_R0RD }, + { "srdl", 0x8c, INSTR_RS_R0RD }, + { "sldl", 0x8d, INSTR_RS_R0RD }, + { "srda", 0x8e, INSTR_RS_R0RD }, + { "slda", 0x8f, INSTR_RS_R0RD }, + { "stm", 0x90, INSTR_RS_RRRD }, + { "tm", 0x91, INSTR_SI_URD }, + { "mvi", 0x92, INSTR_SI_URD }, + { "ts", 0x93, INSTR_S_RD }, + { "ni", 0x94, INSTR_SI_URD }, + { "cli", 0x95, INSTR_SI_URD }, + { "oi", 0x96, INSTR_SI_URD }, + { "xi", 0x97, INSTR_SI_URD }, + { "lm", 0x98, INSTR_RS_RRRD }, + { "trace", 0x99, INSTR_RS_RRRD }, + { "lam", 0x9a, INSTR_RS_AARD }, + { "stam", 0x9b, INSTR_RS_AARD }, + { "mvcle", 0xa8, INSTR_RS_RRRD }, + { "clcle", 0xa9, INSTR_RS_RRRD }, + { "stnsm", 0xac, INSTR_SI_URD }, + { "stosm", 0xad, INSTR_SI_URD }, + { "sigp", 0xae, INSTR_RS_RRRD }, + { "mc", 0xaf, INSTR_SI_URD }, + { "lra", 0xb1, INSTR_RX_RRRD }, + { "stctl", 0xb6, INSTR_RS_CCRD }, + { "lctl", 0xb7, INSTR_RS_CCRD }, + { "cs", 0xba, INSTR_RS_RRRD }, + { "cds", 0xbb, INSTR_RS_RRRD }, + { "clm", 0xbd, INSTR_RS_RURD }, + { "stcm", 0xbe, INSTR_RS_RURD }, + { "icm", 0xbf, INSTR_RS_RURD }, + { "mvn", 0xd1, INSTR_SS_L0RDRD }, + { "mvc", 0xd2, INSTR_SS_L0RDRD }, + { "mvz", 0xd3, INSTR_SS_L0RDRD }, + { "nc", 0xd4, INSTR_SS_L0RDRD }, + { "clc", 0xd5, INSTR_SS_L0RDRD }, + { "oc", 0xd6, INSTR_SS_L0RDRD }, + { "xc", 0xd7, INSTR_SS_L0RDRD }, + { "mvck", 0xd9, INSTR_SS_RRRDRD }, + { "mvcp", 0xda, INSTR_SS_RRRDRD }, + { "mvcs", 0xdb, INSTR_SS_RRRDRD }, + { "tr", 0xdc, INSTR_SS_L0RDRD }, + { "trt", 0xdd, INSTR_SS_L0RDRD }, + { "ed", 0xde, INSTR_SS_L0RDRD }, + { "edmk", 0xdf, INSTR_SS_L0RDRD }, + { "pku", 0xe1, INSTR_SS_L0RDRD }, + { "unpku", 0xe2, INSTR_SS_L0RDRD }, + { "mvcin", 0xe8, INSTR_SS_L0RDRD }, + { "pka", 0xe9, INSTR_SS_L0RDRD }, + { "unpka", 0xea, INSTR_SS_L0RDRD }, + { "plo", 0xee, INSTR_SS_RRRDRD2 }, + { "srp", 0xf0, INSTR_SS_LIRDRD }, + { "mvo", 0xf1, INSTR_SS_LLRDRD }, + { "pack", 0xf2, INSTR_SS_LLRDRD }, + { "unpk", 0xf3, INSTR_SS_LLRDRD }, + { "zap", 0xf8, INSTR_SS_LLRDRD }, + { "cp", 0xf9, INSTR_SS_LLRDRD }, + { "ap", 0xfa, INSTR_SS_LLRDRD }, + { "sp", 0xfb, INSTR_SS_LLRDRD }, + { "mp", 0xfc, INSTR_SS_LLRDRD }, + { "dp", 0xfd, INSTR_SS_LLRDRD }, + { "", 0, INSTR_INVALID } +} + +local opcode_01 = { + { "sam64", 0x0e, INSTR_E }, + { "pfpo", 0x0a, INSTR_E }, + { "ptff", 0x04, INSTR_E }, + { "pr", 0x01, INSTR_E }, + { "upt", 0x02, INSTR_E }, + { "sckpf", 0x07, INSTR_E }, + { "tam", 0x0b, INSTR_E }, + { "sam24", 0x0c, INSTR_E }, + { "sam31", 0x0d, INSTR_E }, + { "trap2", 0xff, INSTR_E }, + { "", 0, INSTR_INVALID } +} + +local opcode_a5 = { + { "iihh", 0x00, INSTR_RI_RU }, + { "iihl", 0x01, INSTR_RI_RU }, + { "iilh", 0x02, INSTR_RI_RU }, + { "iill", 0x03, INSTR_RI_RU }, + { "nihh", 0x04, INSTR_RI_RU }, + { "nihl", 0x05, INSTR_RI_RU }, + { "nilh", 0x06, INSTR_RI_RU }, + { "nill", 0x07, INSTR_RI_RU }, + { "oihh", 0x08, INSTR_RI_RU }, + { "oihl", 0x09, INSTR_RI_RU }, + { "oilh", 0x0a, INSTR_RI_RU }, + { "oill", 0x0b, INSTR_RI_RU }, + { "llihh", 0x0c, INSTR_RI_RU }, + { "llihl", 0x0d, INSTR_RI_RU }, + { "llilh", 0x0e, INSTR_RI_RU }, + { "llill", 0x0f, INSTR_RI_RU }, + { "", 0, INSTR_INVALID } +} + +local opcode_a7 = { + { "tmhh", 0x02, INSTR_RI_RU }, + { "tmhl", 0x03, INSTR_RI_RU }, + { "brctg", 0x07, INSTR_RI_RP }, + { "lghi", 0x09, INSTR_RI_RI }, + { "aghi", 0x0b, INSTR_RI_RI }, + { "mghi", 0x0d, INSTR_RI_RI }, + { "cghi", 0x0f, INSTR_RI_RI }, + { "tmlh", 0x00, INSTR_RI_RU }, + { "tmll", 0x01, INSTR_RI_RU }, + { "brc", 0x04, INSTR_RI_UP }, + { "bras", 0x05, INSTR_RI_RP }, + { "brct", 0x06, INSTR_RI_RP }, + { "lhi", 0x08, INSTR_RI_RI }, + { "ahi", 0x0a, INSTR_RI_RI }, + { "mhi", 0x0c, INSTR_RI_RI }, + { "chi", 0x0e, INSTR_RI_RI }, + { "", 0, INSTR_INVALID } +} + +local opcode_b2 = { + { "sske", 0x2b, INSTR_RRF_M0RR }, + { "stckf", 0x7c, INSTR_S_RD }, + { "cu21", 0xa6, INSTR_RRF_M0RR }, + { "cuutf", 0xa6, INSTR_RRF_M0RR }, + { "cu12", 0xa7, INSTR_RRF_M0RR }, + { "cutfu", 0xa7, INSTR_RRF_M0RR }, + { "stfle", 0xb0, INSTR_S_RD }, + { "lpswe", 0xb2, INSTR_S_RD }, + { "srnmt", 0xb9, INSTR_S_RD }, + { "lfas", 0xbd, INSTR_S_RD }, + { "stidp", 0x02, INSTR_S_RD }, + { "sck", 0x04, INSTR_S_RD }, + { "stck", 0x05, INSTR_S_RD }, + { "sckc", 0x06, INSTR_S_RD }, + { "stckc", 0x07, INSTR_S_RD }, + { "spt", 0x08, INSTR_S_RD }, + { "stpt", 0x09, INSTR_S_RD }, + { "spka", 0x0a, INSTR_S_RD }, + { "ipk", 0x0b, INSTR_S_00 }, + { "ptlb", 0x0d, INSTR_S_00 }, + { "spx", 0x10, INSTR_S_RD }, + { "stpx", 0x11, INSTR_S_RD }, + { "stap", 0x12, INSTR_S_RD }, + { "sie", 0x14, INSTR_S_RD }, + { "pc", 0x18, INSTR_S_RD }, + { "sac", 0x19, INSTR_S_RD }, + { "cfc", 0x1a, INSTR_S_RD }, + { "ipte", 0x21, INSTR_RRE_RR }, + { "ipm", 0x22, INSTR_RRE_R0 }, + { "ivsk", 0x23, INSTR_RRE_RR }, + { "iac", 0x24, INSTR_RRE_R0 }, + { "ssar", 0x25, INSTR_RRE_R0 }, + { "epar", 0x26, INSTR_RRE_R0 }, + { "esar", 0x27, INSTR_RRE_R0 }, + { "pt", 0x28, INSTR_RRE_RR }, + { "iske", 0x29, INSTR_RRE_RR }, + { "rrbe", 0x2a, INSTR_RRE_RR }, + { "sske", 0x2b, INSTR_RRE_RR }, + { "tb", 0x2c, INSTR_RRE_0R }, + { "dxr", 0x2d, INSTR_RRE_F0 }, + { "pgin", 0x2e, INSTR_RRE_RR }, + { "pgout", 0x2f, INSTR_RRE_RR }, + { "csch", 0x30, INSTR_S_00 }, + { "hsch", 0x31, INSTR_S_00 }, + { "msch", 0x32, INSTR_S_RD }, + { "ssch", 0x33, INSTR_S_RD }, + { "stsch", 0x34, INSTR_S_RD }, + { "tsch", 0x35, INSTR_S_RD }, + { "tpi", 0x36, INSTR_S_RD }, + { "sal", 0x37, INSTR_S_00 }, + { "rsch", 0x38, INSTR_S_00 }, + { "stcrw", 0x39, INSTR_S_RD }, + { "stcps", 0x3a, INSTR_S_RD }, + { "rchp", 0x3b, INSTR_S_00 }, + { "schm", 0x3c, INSTR_S_00 }, + { "bakr", 0x40, INSTR_RRE_RR }, + { "cksm", 0x41, INSTR_RRE_RR }, + { "sqdr", 0x44, INSTR_RRE_F0 }, + { "sqer", 0x45, INSTR_RRE_F0 }, + { "stura", 0x46, INSTR_RRE_RR }, + { "msta", 0x47, INSTR_RRE_R0 }, + { "palb", 0x48, INSTR_RRE_00 }, + { "ereg", 0x49, INSTR_RRE_RR }, + { "esta", 0x4a, INSTR_RRE_RR }, + { "lura", 0x4b, INSTR_RRE_RR }, + { "tar", 0x4c, INSTR_RRE_AR }, + { "cpya", 0x4d, INSTR_RRE_AA }, + { "sar", 0x4e, INSTR_RRE_AR }, + { "ear", 0x4f, INSTR_RRE_RA }, + { "csp", 0x50, INSTR_RRE_RR }, + { "msr", 0x52, INSTR_RRE_RR }, + { "mvpg", 0x54, INSTR_RRE_RR }, + { "mvst", 0x55, INSTR_RRE_RR }, + { "cuse", 0x57, INSTR_RRE_RR }, + { "bsg", 0x58, INSTR_RRE_RR }, + { "bsa", 0x5a, INSTR_RRE_RR }, + { "clst", 0x5d, INSTR_RRE_RR }, + { "srst", 0x5e, INSTR_RRE_RR }, + { "cmpsc", 0x63, INSTR_RRE_RR }, + { "siga", 0x74, INSTR_S_RD }, + { "xsch", 0x76, INSTR_S_00 }, + { "rp", 0x77, INSTR_S_RD }, + { "stcke", 0x78, INSTR_S_RD }, + { "sacf", 0x79, INSTR_S_RD }, + { "stsi", 0x7d, INSTR_S_RD }, + { "srnm", 0x99, INSTR_S_RD }, + { "stfpc", 0x9c, INSTR_S_RD }, + { "lfpc", 0x9d, INSTR_S_RD }, + { "tre", 0xa5, INSTR_RRE_RR }, + { "cuutf", 0xa6, INSTR_RRE_RR }, + { "cutfu", 0xa7, INSTR_RRE_RR }, + { "stfl", 0xb1, INSTR_S_RD }, + { "trap4", 0xff, INSTR_S_RD }, + { "", 0, INSTR_INVALID } +} + +local opcode_b3 = { + { "maylr", 0x38, INSTR_RRF_F0FF }, + { "mylr", 0x39, INSTR_RRF_F0FF }, + { "mayr", 0x3a, INSTR_RRF_F0FF }, + { "myr", 0x3b, INSTR_RRF_F0FF }, + { "mayhr", 0x3c, INSTR_RRF_F0FF }, + { "myhr", 0x3d, INSTR_RRF_F0FF }, + { "cegbr", 0xa4, INSTR_RRE_RR }, + { "cdgbr", 0xa5, INSTR_RRE_RR }, + { "cxgbr", 0xa6, INSTR_RRE_RR }, + { "cgebr", 0xa8, INSTR_RRF_U0RF }, + { "cgdbr", 0xa9, INSTR_RRF_U0RF }, + { "cgxbr", 0xaa, INSTR_RRF_U0RF }, + { "cfer", 0xb8, INSTR_RRF_U0RF }, + { "cfdr", 0xb9, INSTR_RRF_U0RF }, + { "cfxr", 0xba, INSTR_RRF_U0RF }, + { "cegr", 0xc4, INSTR_RRE_RR }, + { "cdgr", 0xc5, INSTR_RRE_RR }, + { "cxgr", 0xc6, INSTR_RRE_RR }, + { "cger", 0xc8, INSTR_RRF_U0RF }, + { "cgdr", 0xc9, INSTR_RRF_U0RF }, + { "cgxr", 0xca, INSTR_RRF_U0RF }, + { "lpdfr", 0x70, INSTR_RRE_FF }, + { "lndfr", 0x71, INSTR_RRE_FF }, + { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, + { "lcdfr", 0x73, INSTR_RRE_FF }, + { "ldgr", 0xc1, INSTR_RRE_FR }, + { "lgdr", 0xcd, INSTR_RRE_RF }, + { "adtr", 0xd2, INSTR_RRR_F0FF }, + { "axtr", 0xda, INSTR_RRR_F0FF }, + { "cdtr", 0xe4, INSTR_RRE_FF }, + { "cxtr", 0xec, INSTR_RRE_FF }, + { "kdtr", 0xe0, INSTR_RRE_FF }, + { "kxtr", 0xe8, INSTR_RRE_FF }, + { "cedtr", 0xf4, INSTR_RRE_FF }, + { "cextr", 0xfc, INSTR_RRE_FF }, + { "cdgtr", 0xf1, INSTR_RRE_FR }, + { "cxgtr", 0xf9, INSTR_RRE_FR }, + { "cdstr", 0xf3, INSTR_RRE_FR }, + { "cxstr", 0xfb, INSTR_RRE_FR }, + { "cdutr", 0xf2, INSTR_RRE_FR }, + { "cxutr", 0xfa, INSTR_RRE_FR }, + { "cgdtr", 0xe1, INSTR_RRF_U0RF }, + { "cgxtr", 0xe9, INSTR_RRF_U0RF }, + { "csdtr", 0xe3, INSTR_RRE_RF }, + { "csxtr", 0xeb, INSTR_RRE_RF }, + { "cudtr", 0xe2, INSTR_RRE_RF }, + { "cuxtr", 0xea, INSTR_RRE_RF }, + { "ddtr", 0xd1, INSTR_RRR_F0FF }, + { "dxtr", 0xd9, INSTR_RRR_F0FF }, + { "eedtr", 0xe5, INSTR_RRE_RF }, + { "eextr", 0xed, INSTR_RRE_RF }, + { "esdtr", 0xe7, INSTR_RRE_RF }, + { "esxtr", 0xef, INSTR_RRE_RF }, + { "iedtr", 0xf6, INSTR_RRF_F0FR }, + { "iextr", 0xfe, INSTR_RRF_F0FR }, + { "ltdtr", 0xd6, INSTR_RRE_FF }, + { "ltxtr", 0xde, INSTR_RRE_FF }, + { "fidtr", 0xd7, INSTR_RRF_UUFF }, + { "fixtr", 0xdf, INSTR_RRF_UUFF }, + { "ldetr", 0xd4, INSTR_RRF_0UFF }, + { "lxdtr", 0xdc, INSTR_RRF_0UFF }, + { "ledtr", 0xd5, INSTR_RRF_UUFF }, + { "ldxtr", 0xdd, INSTR_RRF_UUFF }, + { "mdtr", 0xd0, INSTR_RRR_F0FF }, + { "mxtr", 0xd8, INSTR_RRR_F0FF }, + { "qadtr", 0xf5, INSTR_RRF_FUFF }, + { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "rrdtr", 0xf7, INSTR_RRF_FFRU }, + { "rrxtr", 0xff, INSTR_RRF_FFRU }, + { "sfasr", 0x85, INSTR_RRE_R0 }, + { "sdtr", 0xd3, INSTR_RRR_F0FF }, + { "sxtr", 0xdb, INSTR_RRR_F0FF }, + { "lpebr", 0x00, INSTR_RRE_FF }, + { "lnebr", 0x01, INSTR_RRE_FF }, + { "ltebr", 0x02, INSTR_RRE_FF }, + { "lcebr", 0x03, INSTR_RRE_FF }, + { "ldebr", 0x04, INSTR_RRE_FF }, + { "lxdbr", 0x05, INSTR_RRE_FF }, + { "lxebr", 0x06, INSTR_RRE_FF }, + { "mxdbr", 0x07, INSTR_RRE_FF }, + { "kebr", 0x08, INSTR_RRE_FF }, + { "cebr", 0x09, INSTR_RRE_FF }, + { "aebr", 0x0a, INSTR_RRE_FF }, + { "sebr", 0x0b, INSTR_RRE_FF }, + { "mdebr", 0x0c, INSTR_RRE_FF }, + { "debr", 0x0d, INSTR_RRE_FF }, + { "maebr", 0x0e, INSTR_RRF_F0FF }, + { "msebr", 0x0f, INSTR_RRF_F0FF }, + { "lpdbr", 0x10, INSTR_RRE_FF }, + { "lndbr", 0x11, INSTR_RRE_FF }, + { "ltdbr", 0x12, INSTR_RRE_FF }, + { "lcdbr", 0x13, INSTR_RRE_FF }, + { "sqebr", 0x14, INSTR_RRE_FF }, + { "sqdbr", 0x15, INSTR_RRE_FF }, + { "sqxbr", 0x16, INSTR_RRE_FF }, + { "meebr", 0x17, INSTR_RRE_FF }, + { "kdbr", 0x18, INSTR_RRE_FF }, + { "cdbr", 0x19, INSTR_RRE_FF }, + { "adbr", 0x1a, INSTR_RRE_FF }, + { "sdbr", 0x1b, INSTR_RRE_FF }, + { "mdbr", 0x1c, INSTR_RRE_FF }, + { "ddbr", 0x1d, INSTR_RRE_FF }, + { "madbr", 0x1e, INSTR_RRF_F0FF }, + { "msdbr", 0x1f, INSTR_RRF_F0FF }, + { "lder", 0x24, INSTR_RRE_FF }, + { "lxdr", 0x25, INSTR_RRE_FF }, + { "lxer", 0x26, INSTR_RRE_FF }, + { "maer", 0x2e, INSTR_RRF_F0FF }, + { "mser", 0x2f, INSTR_RRF_F0FF }, + { "sqxr", 0x36, INSTR_RRE_FF }, + { "meer", 0x37, INSTR_RRE_FF }, + { "madr", 0x3e, INSTR_RRF_F0FF }, + { "msdr", 0x3f, INSTR_RRF_F0FF }, + { "lpxbr", 0x40, INSTR_RRE_FF }, + { "lnxbr", 0x41, INSTR_RRE_FF }, + { "ltxbr", 0x42, INSTR_RRE_FF }, + { "lcxbr", 0x43, INSTR_RRE_FF }, + { "ledbr", 0x44, INSTR_RRE_FF }, + { "ldxbr", 0x45, INSTR_RRE_FF }, + { "lexbr", 0x46, INSTR_RRE_FF }, + { "fixbr", 0x47, INSTR_RRF_U0FF }, + { "kxbr", 0x48, INSTR_RRE_FF }, + { "cxbr", 0x49, INSTR_RRE_FF }, + { "axbr", 0x4a, INSTR_RRE_FF }, + { "sxbr", 0x4b, INSTR_RRE_FF }, + { "mxbr", 0x4c, INSTR_RRE_FF }, + { "dxbr", 0x4d, INSTR_RRE_FF }, + { "tbedr", 0x50, INSTR_RRF_U0FF }, + { "tbdr", 0x51, INSTR_RRF_U0FF }, + { "diebr", 0x53, INSTR_RRF_FUFF }, + { "fiebr", 0x57, INSTR_RRF_U0FF }, + { "thder", 0x58, INSTR_RRE_RR }, + { "thdr", 0x59, INSTR_RRE_RR }, + { "didbr", 0x5b, INSTR_RRF_FUFF }, + { "fidbr", 0x5f, INSTR_RRF_U0FF }, + { "lpxr", 0x60, INSTR_RRE_FF }, + { "lnxr", 0x61, INSTR_RRE_FF }, + { "ltxr", 0x62, INSTR_RRE_FF }, + { "lcxr", 0x63, INSTR_RRE_FF }, + { "lxr", 0x65, INSTR_RRE_RR }, + { "lexr", 0x66, INSTR_RRE_FF }, + { "fixr", 0x67, INSTR_RRF_U0FF }, + { "cxr", 0x69, INSTR_RRE_FF }, + { "lzer", 0x74, INSTR_RRE_R0 }, + { "lzdr", 0x75, INSTR_RRE_R0 }, + { "lzxr", 0x76, INSTR_RRE_R0 }, + { "fier", 0x77, INSTR_RRF_U0FF }, + { "fidr", 0x7f, INSTR_RRF_U0FF }, + { "sfpc", 0x84, INSTR_RRE_RR_OPT }, + { "efpc", 0x8c, INSTR_RRE_RR_OPT }, + { "cefbr", 0x94, INSTR_RRE_RF }, + { "cdfbr", 0x95, INSTR_RRE_RF }, + { "cxfbr", 0x96, INSTR_RRE_RF }, + { "cfebr", 0x98, INSTR_RRF_U0RF }, + { "cfdbr", 0x99, INSTR_RRF_U0RF }, + { "cfxbr", 0x9a, INSTR_RRF_U0RF }, + { "cefr", 0xb4, INSTR_RRE_RF }, + { "cdfr", 0xb5, INSTR_RRE_RF }, + { "cxfr", 0xb6, INSTR_RRE_RF }, + { "", 0, INSTR_INVALID } +} + +local opcode_b9 = { + { "lpgr", 0x00, INSTR_RRE_RR }, + { "lngr", 0x01, INSTR_RRE_RR }, + { "ltgr", 0x02, INSTR_RRE_RR }, + { "lcgr", 0x03, INSTR_RRE_RR }, + { "lgr", 0x04, INSTR_RRE_RR }, + { "lurag", 0x05, INSTR_RRE_RR }, + { "lgbr", 0x06, INSTR_RRE_RR }, + { "lghr", 0x07, INSTR_RRE_RR }, + { "agr", 0x08, INSTR_RRE_RR }, + { "sgr", 0x09, INSTR_RRE_RR }, + { "algr", 0x0a, INSTR_RRE_RR }, + { "slgr", 0x0b, INSTR_RRE_RR }, + { "msgr", 0x0c, INSTR_RRE_RR }, + { "dsgr", 0x0d, INSTR_RRE_RR }, + { "eregg", 0x0e, INSTR_RRE_RR }, + { "lrvgr", 0x0f, INSTR_RRE_RR }, + { "lpgfr", 0x10, INSTR_RRE_RR }, + { "lngfr", 0x11, INSTR_RRE_RR }, + { "ltgfr", 0x12, INSTR_RRE_RR }, + { "lcgfr", 0x13, INSTR_RRE_RR }, + { "lgfr", 0x14, INSTR_RRE_RR }, + { "llgfr", 0x16, INSTR_RRE_RR }, + { "llgtr", 0x17, INSTR_RRE_RR }, + { "agfr", 0x18, INSTR_RRE_RR }, + { "sgfr", 0x19, INSTR_RRE_RR }, + { "algfr", 0x1a, INSTR_RRE_RR }, + { "slgfr", 0x1b, INSTR_RRE_RR }, + { "msgfr", 0x1c, INSTR_RRE_RR }, + { "dsgfr", 0x1d, INSTR_RRE_RR }, + { "cgr", 0x20, INSTR_RRE_RR }, + { "clgr", 0x21, INSTR_RRE_RR }, + { "sturg", 0x25, INSTR_RRE_RR }, + { "lbr", 0x26, INSTR_RRE_RR }, + { "lhr", 0x27, INSTR_RRE_RR }, + { "cgfr", 0x30, INSTR_RRE_RR }, + { "clgfr", 0x31, INSTR_RRE_RR }, + { "bctgr", 0x46, INSTR_RRE_RR }, + { "ngr", 0x80, INSTR_RRE_RR }, + { "ogr", 0x81, INSTR_RRE_RR }, + { "xgr", 0x82, INSTR_RRE_RR }, + { "flogr", 0x83, INSTR_RRE_RR }, + { "llgcr", 0x84, INSTR_RRE_RR }, + { "llghr", 0x85, INSTR_RRE_RR }, + { "mlgr", 0x86, INSTR_RRE_RR }, + { "dlgr", 0x87, INSTR_RRE_RR }, + { "alcgr", 0x88, INSTR_RRE_RR }, + { "slbgr", 0x89, INSTR_RRE_RR }, + { "cspg", 0x8a, INSTR_RRE_RR }, + { "idte", 0x8e, INSTR_RRF_R0RR }, + { "llcr", 0x94, INSTR_RRE_RR }, + { "llhr", 0x95, INSTR_RRE_RR }, + { "esea", 0x9d, INSTR_RRE_R0 }, + { "lptea", 0xaa, INSTR_RRF_RURR }, + { "cu14", 0xb0, INSTR_RRF_M0RR }, + { "cu24", 0xb1, INSTR_RRF_M0RR }, + { "cu41", 0xb2, INSTR_RRF_M0RR }, + { "cu42", 0xb3, INSTR_RRF_M0RR }, + { "crt", 0x72, INSTR_RRF_U0RR }, + { "cgrt", 0x60, INSTR_RRF_U0RR }, + { "clrt", 0x73, INSTR_RRF_U0RR }, + { "clgrt", 0x61, INSTR_RRF_U0RR }, + { "ptf", 0xa2, INSTR_RRE_R0 }, + { "pfmf", 0xaf, INSTR_RRE_RR }, + { "trte", 0xbf, INSTR_RRF_M0RR }, + { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "kmac", 0x1e, INSTR_RRE_RR }, + { "lrvr", 0x1f, INSTR_RRE_RR }, + { "km", 0x2e, INSTR_RRE_RR }, + { "kmc", 0x2f, INSTR_RRE_RR }, + { "kimd", 0x3e, INSTR_RRE_RR }, + { "klmd", 0x3f, INSTR_RRE_RR }, + { "epsw", 0x8d, INSTR_RRE_RR }, + { "trtt", 0x90, INSTR_RRE_RR }, + { "trtt", 0x90, INSTR_RRF_M0RR }, + { "trto", 0x91, INSTR_RRE_RR }, + { "trto", 0x91, INSTR_RRF_M0RR }, + { "trot", 0x92, INSTR_RRE_RR }, + { "trot", 0x92, INSTR_RRF_M0RR }, + { "troo", 0x93, INSTR_RRE_RR }, + { "troo", 0x93, INSTR_RRF_M0RR }, + { "mlr", 0x96, INSTR_RRE_RR }, + { "dlr", 0x97, INSTR_RRE_RR }, + { "alcr", 0x98, INSTR_RRE_RR }, + { "slbr", 0x99, INSTR_RRE_RR }, + { "", 0, INSTR_INVALID } +} + +local opcode_c0 = { + { "lgfi", 0x01, INSTR_RIL_RI }, + { "xihf", 0x06, INSTR_RIL_RU }, + { "xilf", 0x07, INSTR_RIL_RU }, + { "iihf", 0x08, INSTR_RIL_RU }, + { "iilf", 0x09, INSTR_RIL_RU }, + { "nihf", 0x0a, INSTR_RIL_RU }, + { "nilf", 0x0b, INSTR_RIL_RU }, + { "oihf", 0x0c, INSTR_RIL_RU }, + { "oilf", 0x0d, INSTR_RIL_RU }, + { "llihf", 0x0e, INSTR_RIL_RU }, + { "llilf", 0x0f, INSTR_RIL_RU }, + { "larl", 0x00, INSTR_RIL_RP }, + { "brcl", 0x04, INSTR_RIL_UP }, + { "brasl", 0x05, INSTR_RIL_RP }, + { "", 0, INSTR_INVALID } +} + +local opcode_c2 = { + { "slgfi", 0x04, INSTR_RIL_RU }, + { "slfi", 0x05, INSTR_RIL_RU }, + { "agfi", 0x08, INSTR_RIL_RI }, + { "afi", 0x09, INSTR_RIL_RI }, + { "algfi", 0x0a, INSTR_RIL_RU }, + { "alfi", 0x0b, INSTR_RIL_RU }, + { "cgfi", 0x0c, INSTR_RIL_RI }, + { "cfi", 0x0d, INSTR_RIL_RI }, + { "clgfi", 0x0e, INSTR_RIL_RU }, + { "clfi", 0x0f, INSTR_RIL_RU }, + { "msfi", 0x01, INSTR_RIL_RI }, + { "msgfi", 0x00, INSTR_RIL_RI }, + { "", 0, INSTR_INVALID } +} + +local opcode_c4 = { + { "lrl", 0x0d, INSTR_RIL_RP }, + { "lgrl", 0x08, INSTR_RIL_RP }, + { "lgfrl", 0x0c, INSTR_RIL_RP }, + { "lhrl", 0x05, INSTR_RIL_RP }, + { "lghrl", 0x04, INSTR_RIL_RP }, + { "llgfrl", 0x0e, INSTR_RIL_RP }, + { "llhrl", 0x02, INSTR_RIL_RP }, + { "llghrl", 0x06, INSTR_RIL_RP }, + { "strl", 0x0f, INSTR_RIL_RP }, + { "stgrl", 0x0b, INSTR_RIL_RP }, + { "sthrl", 0x07, INSTR_RIL_RP }, + { "", 0, INSTR_INVALID } +} + +local opcode_c6 = { + { "crl", 0x0d, INSTR_RIL_RP }, + { "cgrl", 0x08, INSTR_RIL_RP }, + { "cgfrl", 0x0c, INSTR_RIL_RP }, + { "chrl", 0x05, INSTR_RIL_RP }, + { "cghrl", 0x04, INSTR_RIL_RP }, + { "clrl", 0x0f, INSTR_RIL_RP }, + { "clgrl", 0x0a, INSTR_RIL_RP }, + { "clgfrl", 0x0e, INSTR_RIL_RP }, + { "clhrl", 0x07, INSTR_RIL_RP }, + { "clghrl", 0x06, INSTR_RIL_RP }, + { "pfdrl", 0x02, INSTR_RIL_UP }, + { "exrl", 0x00, INSTR_RIL_RP }, + { "", 0, INSTR_INVALID } +} + +local opcode_c8 = { + { "mvcos", 0x00, INSTR_SSF_RRDRD }, + { "ectg", 0x01, INSTR_SSF_RRDRD }, + { "csst", 0x02, INSTR_SSF_RRDRD }, + { "", 0, INSTR_INVALID } +} + +local opcode_e3 = { + { "ltg", 0x02, INSTR_RXY_RRRD }, + { "lrag", 0x03, INSTR_RXY_RRRD }, + { "lg", 0x04, INSTR_RXY_RRRD }, + { "cvby", 0x06, INSTR_RXY_RRRD }, + { "ag", 0x08, INSTR_RXY_RRRD }, + { "sg", 0x09, INSTR_RXY_RRRD }, + { "alg", 0x0a, INSTR_RXY_RRRD }, + { "slg", 0x0b, INSTR_RXY_RRRD }, + { "msg", 0x0c, INSTR_RXY_RRRD }, + { "dsg", 0x0d, INSTR_RXY_RRRD }, + { "cvbg", 0x0e, INSTR_RXY_RRRD }, + { "lrvg", 0x0f, INSTR_RXY_RRRD }, + { "lt", 0x12, INSTR_RXY_RRRD }, + { "lray", 0x13, INSTR_RXY_RRRD }, + { "lgf", 0x14, INSTR_RXY_RRRD }, + { "lgh", 0x15, INSTR_RXY_RRRD }, + { "llgf", 0x16, INSTR_RXY_RRRD }, + { "llgt", 0x17, INSTR_RXY_RRRD }, + { "agf", 0x18, INSTR_RXY_RRRD }, + { "sgf", 0x19, INSTR_RXY_RRRD }, + { "algf", 0x1a, INSTR_RXY_RRRD }, + { "slgf", 0x1b, INSTR_RXY_RRRD }, + { "msgf", 0x1c, INSTR_RXY_RRRD }, + { "dsgf", 0x1d, INSTR_RXY_RRRD }, + { "cg", 0x20, INSTR_RXY_RRRD }, + { "clg", 0x21, INSTR_RXY_RRRD }, + { "stg", 0x24, INSTR_RXY_RRRD }, + { "cvdy", 0x26, INSTR_RXY_RRRD }, + { "cvdg", 0x2e, INSTR_RXY_RRRD }, + { "strvg", 0x2f, INSTR_RXY_RRRD }, + { "cgf", 0x30, INSTR_RXY_RRRD }, + { "clgf", 0x31, INSTR_RXY_RRRD }, + { "strvh", 0x3f, INSTR_RXY_RRRD }, + { "bctg", 0x46, INSTR_RXY_RRRD }, + { "sty", 0x50, INSTR_RXY_RRRD }, + { "msy", 0x51, INSTR_RXY_RRRD }, + { "ny", 0x54, INSTR_RXY_RRRD }, + { "cly", 0x55, INSTR_RXY_RRRD }, + { "oy", 0x56, INSTR_RXY_RRRD }, + { "xy", 0x57, INSTR_RXY_RRRD }, + { "ly", 0x58, INSTR_RXY_RRRD }, + { "cy", 0x59, INSTR_RXY_RRRD }, + { "ay", 0x5a, INSTR_RXY_RRRD }, + { "sy", 0x5b, INSTR_RXY_RRRD }, + { "aly", 0x5e, INSTR_RXY_RRRD }, + { "sly", 0x5f, INSTR_RXY_RRRD }, + { "sthy", 0x70, INSTR_RXY_RRRD }, + { "lay", 0x71, INSTR_RXY_RRRD }, + { "stcy", 0x72, INSTR_RXY_RRRD }, + { "icy", 0x73, INSTR_RXY_RRRD }, + { "lb", 0x76, INSTR_RXY_RRRD }, + { "lgb", 0x77, INSTR_RXY_RRRD }, + { "lhy", 0x78, INSTR_RXY_RRRD }, + { "chy", 0x79, INSTR_RXY_RRRD }, + { "ahy", 0x7a, INSTR_RXY_RRRD }, + { "shy", 0x7b, INSTR_RXY_RRRD }, + { "ng", 0x80, INSTR_RXY_RRRD }, + { "og", 0x81, INSTR_RXY_RRRD }, + { "xg", 0x82, INSTR_RXY_RRRD }, + { "mlg", 0x86, INSTR_RXY_RRRD }, + { "dlg", 0x87, INSTR_RXY_RRRD }, + { "alcg", 0x88, INSTR_RXY_RRRD }, + { "slbg", 0x89, INSTR_RXY_RRRD }, + { "stpq", 0x8e, INSTR_RXY_RRRD }, + { "lpq", 0x8f, INSTR_RXY_RRRD }, + { "llgc", 0x90, INSTR_RXY_RRRD }, + { "llgh", 0x91, INSTR_RXY_RRRD }, + { "llc", 0x94, INSTR_RXY_RRRD }, + { "llh", 0x95, INSTR_RXY_RRRD }, + { "cgh", 0x34, INSTR_RXY_RRRD }, + { "laey", 0x75, INSTR_RXY_RRRD }, + { "ltgf", 0x32, INSTR_RXY_RRRD }, + { "mfy", 0x5c, INSTR_RXY_RRRD }, + { "mhy", 0x7c, INSTR_RXY_RRRD }, + { "pfd", 0x36, INSTR_RXY_URRD }, + { "lrv", 0x1e, INSTR_RXY_RRRD }, + { "lrvh", 0x1f, INSTR_RXY_RRRD }, + { "strv", 0x3e, INSTR_RXY_RRRD }, + { "ml", 0x96, INSTR_RXY_RRRD }, + { "dl", 0x97, INSTR_RXY_RRRD }, + { "alc", 0x98, INSTR_RXY_RRRD }, + { "slb", 0x99, INSTR_RXY_RRRD }, + { "", 0, INSTR_INVALID } +} + +local opcode_e5 = { + { "strag", 0x02, INSTR_SSE_RDRD }, + { "chhsi", 0x54, INSTR_SIL_RDI }, + { "chsi", 0x5c, INSTR_SIL_RDI }, + { "cghsi", 0x58, INSTR_SIL_RDI }, + { "clhhsi", 0x55, INSTR_SIL_RDU }, + { "clfhsi", 0x5d, INSTR_SIL_RDU }, + { "clghsi", 0x59, INSTR_SIL_RDU }, + { "mvhhi", 0x44, INSTR_SIL_RDI }, + { "mvhi", 0x4c, INSTR_SIL_RDI }, + { "mvghi", 0x48, INSTR_SIL_RDI }, + { "lasp", 0x00, INSTR_SSE_RDRD }, + { "tprot", 0x01, INSTR_SSE_RDRD }, + { "mvcsk", 0x0e, INSTR_SSE_RDRD }, + { "mvcdk", 0x0f, INSTR_SSE_RDRD }, + { "", 0, INSTR_INVALID } +} + +local opcode_eb = { + { "lmg", 0x04, INSTR_RSY_RRRD }, + { "srag", 0x0a, INSTR_RSY_RRRD }, + { "slag", 0x0b, INSTR_RSY_RRRD }, + { "srlg", 0x0c, INSTR_RSY_RRRD }, + { "sllg", 0x0d, INSTR_RSY_RRRD }, + { "tracg", 0x0f, INSTR_RSY_RRRD }, + { "csy", 0x14, INSTR_RSY_RRRD }, + { "rllg", 0x1c, INSTR_RSY_RRRD }, + { "clmh", 0x20, INSTR_RSY_RURD }, + { "clmy", 0x21, INSTR_RSY_RURD }, + { "stmg", 0x24, INSTR_RSY_RRRD }, + { "stctg", 0x25, INSTR_RSY_CCRD }, + { "stmh", 0x26, INSTR_RSY_RRRD }, + { "stcmh", 0x2c, INSTR_RSY_RURD }, + { "stcmy", 0x2d, INSTR_RSY_RURD }, + { "lctlg", 0x2f, INSTR_RSY_CCRD }, + { "csg", 0x30, INSTR_RSY_RRRD }, + { "cdsy", 0x31, INSTR_RSY_RRRD }, + { "cdsg", 0x3e, INSTR_RSY_RRRD }, + { "bxhg", 0x44, INSTR_RSY_RRRD }, + { "bxleg", 0x45, INSTR_RSY_RRRD }, + { "tmy", 0x51, INSTR_SIY_URD }, + { "mviy", 0x52, INSTR_SIY_URD }, + { "niy", 0x54, INSTR_SIY_URD }, + { "cliy", 0x55, INSTR_SIY_URD }, + { "oiy", 0x56, INSTR_SIY_URD }, + { "xiy", 0x57, INSTR_SIY_URD }, + { "icmh", 0x80, INSTR_RSE_RURD }, + { "icmh", 0x80, INSTR_RSY_RURD }, + { "icmy", 0x81, INSTR_RSY_RURD }, + { "clclu", 0x8f, INSTR_RSY_RRRD }, + { "stmy", 0x90, INSTR_RSY_RRRD }, + { "lmh", 0x96, INSTR_RSY_RRRD }, + { "lmy", 0x98, INSTR_RSY_RRRD }, + { "lamy", 0x9a, INSTR_RSY_AARD }, + { "stamy", 0x9b, INSTR_RSY_AARD }, + { "asi", 0x6a, INSTR_SIY_IRD }, + { "agsi", 0x7a, INSTR_SIY_IRD }, + { "alsi", 0x6e, INSTR_SIY_IRD }, + { "algsi", 0x7e, INSTR_SIY_IRD }, + { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "rll", 0x1d, INSTR_RSY_RRRD }, + { "mvclu", 0x8e, INSTR_RSY_RRRD }, + { "tp", 0xc0, INSTR_RSL_R0RD }, + { "", 0, INSTR_INVALID } +} + +local opcode_ec = { + { "brxhg", 0x44, INSTR_RIE_RRP }, + { "brxlg", 0x45, INSTR_RIE_RRP }, + { "crb", 0xf6, INSTR_RRS_RRRDU }, + { "cgrb", 0xe4, INSTR_RRS_RRRDU }, + { "crj", 0x76, INSTR_RIE_RRPU }, + { "cgrj", 0x64, INSTR_RIE_RRPU }, + { "cib", 0xfe, INSTR_RIS_RURDI }, + { "cgib", 0xfc, INSTR_RIS_RURDI }, + { "cij", 0x7e, INSTR_RIE_RUPI }, + { "cgij", 0x7c, INSTR_RIE_RUPI }, + { "cit", 0x72, INSTR_RIE_R0IU }, + { "cgit", 0x70, INSTR_RIE_R0IU }, + { "clrb", 0xf7, INSTR_RRS_RRRDU }, + { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "clrj", 0x77, INSTR_RIE_RRPU }, + { "clgrj", 0x65, INSTR_RIE_RRPU }, + { "clib", 0xff, INSTR_RIS_RURDU }, + { "clgib", 0xfd, INSTR_RIS_RURDU }, + { "clij", 0x7f, INSTR_RIE_RUPU }, + { "clgij", 0x7d, INSTR_RIE_RUPU }, + { "clfit", 0x73, INSTR_RIE_R0UU }, + { "clgit", 0x71, INSTR_RIE_R0UU }, + { "rnsbg", 0x54, INSTR_RIE_RRUUU }, + { "rxsbg", 0x57, INSTR_RIE_RRUUU }, + { "rosbg", 0x56, INSTR_RIE_RRUUU }, + { "risbg", 0x55, INSTR_RIE_RRUUU }, + { "", 0, INSTR_INVALID } +} + +local opcode_ed[] = { + { "mayl", 0x38, INSTR_RXF_FRRDF }, + { "myl", 0x39, INSTR_RXF_FRRDF }, + { "may", 0x3a, INSTR_RXF_FRRDF }, + { "my", 0x3b, INSTR_RXF_FRRDF }, + { "mayh", 0x3c, INSTR_RXF_FRRDF }, + { "myh", 0x3d, INSTR_RXF_FRRDF }, + { "ley", 0x64, INSTR_RXY_FRRD }, + { "ldy", 0x65, INSTR_RXY_FRRD }, + { "stey", 0x66, INSTR_RXY_FRRD }, + { "stdy", 0x67, INSTR_RXY_FRRD }, + { "sldt", 0x40, INSTR_RXF_FRRDF }, + { "slxt", 0x48, INSTR_RXF_FRRDF }, + { "srdt", 0x41, INSTR_RXF_FRRDF }, + { "srxt", 0x49, INSTR_RXF_FRRDF }, + { "tdcet", 0x50, INSTR_RXE_FRRD }, + { "tdcdt", 0x54, INSTR_RXE_FRRD }, + { "tdcxt", 0x58, INSTR_RXE_FRRD }, + { "tdget", 0x51, INSTR_RXE_FRRD }, + { "tdgdt", 0x55, INSTR_RXE_FRRD }, + { "tdgxt", 0x59, INSTR_RXE_FRRD }, + { "ldeb", 0x04, INSTR_RXE_FRRD }, + { "lxdb", 0x05, INSTR_RXE_FRRD }, + { "lxeb", 0x06, INSTR_RXE_FRRD }, + { "mxdb", 0x07, INSTR_RXE_FRRD }, + { "keb", 0x08, INSTR_RXE_FRRD }, + { "ceb", 0x09, INSTR_RXE_FRRD }, + { "aeb", 0x0a, INSTR_RXE_FRRD }, + { "seb", 0x0b, INSTR_RXE_FRRD }, + { "mdeb", 0x0c, INSTR_RXE_FRRD }, + { "deb", 0x0d, INSTR_RXE_FRRD }, + { "maeb", 0x0e, INSTR_RXF_FRRDF }, + { "mseb", 0x0f, INSTR_RXF_FRRDF }, + { "tceb", 0x10, INSTR_RXE_FRRD }, + { "tcdb", 0x11, INSTR_RXE_FRRD }, + { "tcxb", 0x12, INSTR_RXE_FRRD }, + { "sqeb", 0x14, INSTR_RXE_FRRD }, + { "sqdb", 0x15, INSTR_RXE_FRRD }, + { "meeb", 0x17, INSTR_RXE_FRRD }, + { "kdb", 0x18, INSTR_RXE_FRRD }, + { "cdb", 0x19, INSTR_RXE_FRRD }, + { "adb", 0x1a, INSTR_RXE_FRRD }, + { "sdb", 0x1b, INSTR_RXE_FRRD }, + { "mdb", 0x1c, INSTR_RXE_FRRD }, + { "ddb", 0x1d, INSTR_RXE_FRRD }, + { "madb", 0x1e, INSTR_RXF_FRRDF }, + { "msdb", 0x1f, INSTR_RXF_FRRDF }, + { "lde", 0x24, INSTR_RXE_FRRD }, + { "lxd", 0x25, INSTR_RXE_FRRD }, + { "lxe", 0x26, INSTR_RXE_FRRD }, + { "mae", 0x2e, INSTR_RXF_FRRDF }, + { "mse", 0x2f, INSTR_RXF_FRRDF }, + { "sqe", 0x34, INSTR_RXE_FRRD }, + { "sqd", 0x35, INSTR_RXE_FRRD }, + { "mee", 0x37, INSTR_RXE_FRRD }, + { "mad", 0x3e, INSTR_RXF_FRRDF }, + { "msd", 0x3f, INSTR_RXF_FRRDF }, + { "", 0, INSTR_INVALID } +} + +-- Extracts an operand value from an instruction. +local function extract_operand(code, operand) + code += operand[2] / 8; + bits = band(operand[2], 7) + operand[1] + val = 0 + repeat + val = lshift(val, 8) + val = bor(val, *code++) + bits -= 8 + until(bits > 0) + + val = rshift(val, -bits) + val = band(val, lshift(lshift(1U,operand[1] - 1), 1) - 1) + + -- Check for special long displacement case. + if(operand[1] == 20 && operand[2] == 20) then + val = bor(lshift(band(val, 0xff), 12), rshift(band(val, 0xfff00), 8)) + end + + -- Sign extend value if the operand is signed or pc relative. + if(band(operand->flags, bor(OPERAND_SIGNED, OPERAND_PCREL)) && band(val, lshift(1U,(operand[1] - 1)))) then + val = bor(val, lshift(lshift(-1U, (operand[1] - 1)), 1)) + end + + -- Double value if the operand is pc relative. + if(band(operand[2], OPERAND_PCREL)) then + val = lshift(val, 1) + end + + -- Length x in an instructions has real length x + 1. + if(band(operand[2], OPERAND_LENGTH)) then + val++ + end + return val +end + +local function insn_length(code) + return lshift((rshift((tonumber(code) + 64), 7) + 1), 1); +end + +local find_insn(code){ + opfrag = code[1] + table = opcode + + if(code[0] == 0x01) then + table = opcode_01 + elseif(code[0] == 0xa5) + table = opcode_a5 + elseif(code[0] == 0xa7) + table = opcode_a7 + elseif(code[0] == 0xb2) + table = opcode_b2 + elseif(code[0] == 0xb3) + table = opcode_b3 + elseif(code[0] == 0xb9) + table = opcode_b9 + elseif(code[0] == 0xc0) + table = opcode_c0 + elseif(code[0] == 0xc2) + table = opcode_c2 + elseif(code[0] == 0xc4) + table = opcode_c4 + elseif(code[0] == 0xc6) + table = opcode_c6 + elseif(code[0] == 0xc8) + table = opcode_c8 + elseif(code[0] == 0xe3) + table = opcode_e3 + opfrag = code[5] + elseif(code[0] == 0xe5) + table = opcode_e5 + elseif(code[0] == 0xeb) + table = opcode_eb + opfrag = code[5] + elseif(code[0] == 0xec) + table = opcode_ec + opfrag = code[5] + elseif(code[0] == 0xed) + table = opcode_ed + opfrag = code[5] + else + opfrag = code[0] + end + + for k, insn in pairs(table) do + opmask = formats[insn[3]][1] + if(insn[2] == band(opfrag, opmask)) then + return insn + end + end + return NULL +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then + extra = "\t->"..sym + elseif band(ctx.op, 0x0e000000) ~= 0x0a000000 then + extra = "\t; 0x"..tohex(ctx.rel) + end + end + if ctx.hexdump > 0 then + ctx.out(format("%08x %s %-5s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) + else + ctx.out(format("%08x %-5s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + ctx.pos = pos + 4 + end + + -- Fallback for unknown opcodes. + local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) + end + + -- Format operand 2 of load/store opcodes. + local function fmtload(ctx, op, pos) + local base = map_gpr[band(rshift(op, 16), 15)] + local x, ofs + local ext = (band(op, 0x04000000) == 0) + if not ext and band(op, 0x02000000) == 0 then + ofs = band(op, 4095) + if band(op, 0x00800000) == 0 then ofs = -ofs end + if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end + ofs = "#"..ofs + elseif ext and band(op, 0x00400000) ~= 0 then + ofs = band(op, 15) + band(rshift(op, 4), 0xf0) + if band(op, 0x00800000) == 0 then ofs = -ofs end + if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end + ofs = "#"..ofs + else + ofs = map_gpr[band(op, 15)] + if ext or band(op, 0xfe0) == 0 then + elseif band(op, 0xfe0) == 0x60 then + ofs = format("%s, rrx", ofs) + else + local sh = band(rshift(op, 7), 31) + if sh == 0 then sh = 32 end + ofs = format("%s, %s #%d", ofs, map_shift[band(rshift(op, 5), 3)], sh) + end + if band(op, 0x00800000) == 0 then ofs = "-"..ofs end + end + if ofs == "#0" then + x = format("[%s]", base) + elseif band(op, 0x01000000) == 0 then + x = format("[%s], %s", base, ofs) + else + x = format("[%s, %s]", base, ofs) + end + if band(op, 0x01200000) == 0x01200000 then x = x.."!" end + return x + end + + -- Format operand 2 of vector load/store opcodes. + local function fmtvload(ctx, op, pos) + local base = map_gpr[band(rshift(op, 16), 15)] + local ofs = band(op, 255)*4 + if band(op, 0x00800000) == 0 then ofs = -ofs end + if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end + if ofs == 0 then + return format("[%s]", base) + else + return format("[%s, #%d]", base, ofs) + end + end + + local function fmtvr(op, vr, sh0, sh1) + if vr == "s" then + return format("s%d", 2*band(rshift(op, sh0), 15)+band(rshift(op, sh1), 1)) + else + return format("d%d", band(rshift(op, sh0), 15)+band(rshift(op, sh1-4), 16)) + end + end + + -- Disassemble a single instruction. + local function disass_ins(ctx) + local pos = ctx.pos + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + local operands = {} + local suffix = "" + local last, name, pat + local vr + ctx.op = op + ctx.rel = nil + + print("noice") + + -- local cond = rshift(op, 28) + -- local opat + -- if cond == 15 then + -- opat = map_uncondins[band(rshift(op, 25), 7)] + -- else + -- if cond ~= 14 then suffix = map_cond[cond] end + -- opat = map_condins[band(rshift(op, 25), 7)] + -- end + -- while type(opat) ~= "string" do + -- if not opat then return unknown(ctx) end + -- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + -- end + -- name, pat = match(opat, "^([a-z0-9]*)(.*)") + -- if sub(pat, 1, 1) == "." then + -- local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") + -- suffix = suffix..s2 + -- pat = p2 + -- end + + -- for p in gmatch(pat, ".") do + -- local x = nil + -- if p == "D" then + -- x = map_gpr[band(rshift(op, 12), 15)] + -- elseif p == "N" then + -- x = map_gpr[band(rshift(op, 16), 15)] + -- elseif p == "S" then + -- x = map_gpr[band(rshift(op, 8), 15)] + -- elseif p == "M" then + -- x = map_gpr[band(op, 15)] + -- elseif p == "d" then + -- x = fmtvr(op, vr, 12, 22) + -- elseif p == "n" then + -- x = fmtvr(op, vr, 16, 7) + -- elseif p == "m" then + -- x = fmtvr(op, vr, 0, 5) + -- elseif p == "P" then + -- if band(op, 0x02000000) ~= 0 then + -- x = ror(band(op, 255), 2*band(rshift(op, 8), 15)) + -- else + -- x = map_gpr[band(op, 15)] + -- if band(op, 0xff0) ~= 0 then + -- operands[#operands+1] = x + -- local s = map_shift[band(rshift(op, 5), 3)] + -- local r = nil + -- if band(op, 0xf90) == 0 then + -- if s == "ror" then s = "rrx" else r = "#32" end + -- elseif band(op, 0x10) == 0 then + -- r = "#"..band(rshift(op, 7), 31) + -- else + -- r = map_gpr[band(rshift(op, 8), 15)] + -- end + -- if name == "mov" then name = s; x = r + -- elseif r then x = format("%s %s", s, r) + -- else x = s end + -- end + -- end + -- elseif p == "L" then + -- x = fmtload(ctx, op, pos) + -- elseif p == "l" then + -- x = fmtvload(ctx, op, pos) + -- elseif p == "B" then + -- local addr = ctx.addr + pos + 8 + arshift(lshift(op, 8), 6) + -- if cond == 15 then addr = addr + band(rshift(op, 23), 2) end + -- ctx.rel = addr + -- x = "0x"..tohex(addr) + -- elseif p == "F" then + -- vr = "s" + -- elseif p == "G" then + -- vr = "d" + -- elseif p == "." then + -- suffix = suffix..(vr == "s" and ".f32" or ".f64") + -- elseif p == "R" then + -- if band(op, 0x00200000) ~= 0 and #operands == 1 then + -- operands[1] = operands[1].."!" + -- end + -- local t = {} + -- for i=0,15 do + -- if band(rshift(op, i), 1) == 1 then t[#t+1] = map_gpr[i] end + -- end + -- x = "{"..concat(t, ", ").."}" + -- elseif p == "r" then + -- if band(op, 0x00200000) ~= 0 and #operands == 2 then + -- operands[1] = operands[1].."!" + -- end + -- local s = tonumber(sub(last, 2)) + -- local n = band(op, 255) + -- if vr == "d" then n = rshift(n, 1) end + -- operands[#operands] = format("{%s-%s%d}", last, vr, s+n-1) + -- elseif p == "W" then + -- x = band(op, 0x0fff) + band(rshift(op, 4), 0xf000) + -- elseif p == "T" then + -- x = "#0x"..tohex(band(op, 0x00ffffff), 6) + -- elseif p == "U" then + -- x = band(rshift(op, 7), 31) + -- if x == 0 then x = nil end + -- elseif p == "u" then + -- x = band(rshift(op, 7), 31) + -- if band(op, 0x40) == 0 then + -- if x == 0 then x = nil else x = "lsl #"..x end + -- else + -- if x == 0 then x = "asr #32" else x = "asr #"..x end + -- end + -- elseif p == "v" then + -- x = band(rshift(op, 7), 31) + -- elseif p == "w" then + -- x = band(rshift(op, 16), 31) + -- elseif p == "x" then + -- x = band(rshift(op, 16), 31) + 1 + -- elseif p == "X" then + -- x = band(rshift(op, 16), 31) - last + 1 + -- elseif p == "Y" then + -- x = band(rshift(op, 12), 0xf0) + band(op, 0x0f) + -- elseif p == "K" then + -- x = "#0x"..tohex(band(rshift(op, 4), 0x0000fff0) + band(op, 15), 4) + -- elseif p == "s" then + -- if band(op, 0x00100000) ~= 0 then suffix = "s"..suffix end + -- else + -- assert(false) + -- end + -- if x then + -- last = x + -- if type(x) == "number" then x = "#"..x end + -- operands[#operands+1] = x + -- end + -- end + + -- return putop(ctx, name..suffix, operands) + end + + ------------------------------------------------------------------------------ + + -- Disassemble a block of code. + local function disass_block(ctx, ofs, len) + if not ofs then ofs = 0 end + local stop = len and ofs+len or #ctx.code + ctx.pos = ofs + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end + end + + -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). + local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + return ctx + end + + -- Simple API: disassemble code (a string) at address and output via out. + local function disass(code, addr, out) + create(code, addr, out):disass() + end + + -- Return register name for RID. + local function regname(r) + if r < 16 then return map_gpr[r] end + return "d"..(r-16) + end + + -- Public module functions. + return { + create = create, + disass = disass, + regname = regname + } + diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 18e7a4b7..9ddbfb96 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -144,10 +144,11 @@ local function dump_mcode(tr) if not mcode then return end if not disass then disass = require("jit.dis_"..jit.arch) end if addr < 0 then addr = addr + 2^32 end - out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") + out:write("---- TRACE REallly ", tr, " mcode ", #mcode, "\n") local ctx = disass.create(mcode, addr, dumpwrite) ctx.hexdump = 0 ctx.symtab = fillsymtab(tr, info.nexit) + print(info.nexit) if loop ~= 0 then symtab[addr+loop] = "LOOP" ctx:disass(0, loop) @@ -387,13 +388,13 @@ end -- Dump snapshots (not interleaved with IR). local function dump_snap(tr) - out:write("---- TRACE ", tr, " snapshots\n") - for i=0,1000000000 do - local snap = tracesnap(tr, i) - if not snap then break end - out:write(format("#%-3d %04d [ ", i, snap[0])) - printsnap(tr, snap) - end + -- out:write("---- TRACE ", tr, " snapshots\n") + -- for i=0,1000000000 do + -- local snap = tracesnap(tr, i) + -- if not snap then break end + -- out:write(format("#%-3d %04d [ ", i, snap[0])) + -- printsnap(tr, snap) + -- end end -- Return a register name or stack slot for a rid/sp location. @@ -410,20 +411,20 @@ end -- Dump CALL* function ref and return optional ctype. local function dumpcallfunc(tr, ins) - local ctype - if ins > 0 then - local m, ot, op1, op2 = traceir(tr, ins) - if band(ot, 31) == 0 then -- nil type means CARG(func, ctype). - ins = op1 - ctype = formatk(tr, op2) - end - end - if ins < 0 then - out:write(format("[0x%x](", tonumber((tracek(tr, ins))))) - else - out:write(format("%04d (", ins)) - end - return ctype + -- local ctype + -- if ins > 0 then + -- local m, ot, op1, op2 = traceir(tr, ins) + -- if band(ot, 31) == 0 then -- nil type means CARG(func, ctype). + -- ins = op1 + -- ctype = formatk(tr, op2) + -- end + -- end + -- if ins < 0 then + -- out:write(format("[0x%x](", tonumber((tracek(tr, ins))))) + -- else + -- out:write(format("%04d (", ins)) + -- end + -- return ctype end -- Recursively gather CALL* args and dump them. @@ -449,99 +450,99 @@ end -- Dump IR and interleaved snapshots. local function dump_ir(tr, dumpsnap, dumpreg) - local info = traceinfo(tr) - if not info then return end - local nins = info.nins - out:write("---- TRACE ", tr, " IR\n") - local irnames = vmdef.irnames - local snapref = 65536 - local snap, snapno - if dumpsnap then - snap = tracesnap(tr, 0) - snapref = snap[0] - snapno = 0 - end - for ins=1,nins do - if ins >= snapref then - if dumpreg then - out:write(format(".... SNAP #%-3d [ ", snapno)) - else - out:write(format(".... SNAP #%-3d [ ", snapno)) - end - printsnap(tr, snap) - snapno = snapno + 1 - snap = tracesnap(tr, snapno) - snapref = snap and snap[0] or 65536 - end - local m, ot, op1, op2, ridsp = traceir(tr, ins) - local oidx, t = 6*shr(ot, 8), band(ot, 31) - local op = sub(irnames, oidx+1, oidx+6) - if op == "LOOP " then - if dumpreg then - out:write(format("%04d ------------ LOOP ------------\n", ins)) - else - out:write(format("%04d ------ LOOP ------------\n", ins)) - end - elseif op ~= "NOP " and op ~= "CARG " and - (dumpreg or op ~= "RENAME") then - local rid = band(ridsp, 255) - if dumpreg then - out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins))) - else - out:write(format("%04d ", ins)) - end - out:write(format("%s%s %s %s ", - (rid == 254 or rid == 253) and "}" or - (band(ot, 128) == 0 and " " or ">"), - band(ot, 64) == 0 and " " or "+", - irtype[t], op)) - local m1, m2 = band(m, 3), band(m, 3*4) - if sub(op, 1, 4) == "CALL" then - local ctype - if m2 == 1*4 then -- op2 == IRMlit - out:write(format("%-10s (", vmdef.ircall[op2])) - else - ctype = dumpcallfunc(tr, op2) - end - if op1 ~= -1 then dumpcallargs(tr, op1) end - out:write(")") - if ctype then out:write(" ctype ", ctype) end - elseif op == "CNEW " and op2 == -1 then - out:write(formatk(tr, op1)) - elseif m1 ~= 3 then -- op1 != IRMnone - if op1 < 0 then - out:write(formatk(tr, op1)) - else - out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) - end - if m2 ~= 3*4 then -- op2 != IRMnone - if m2 == 1*4 then -- op2 == IRMlit - local litn = litname[op] - if litn and litn[op2] then - out:write(" ", litn[op2]) - elseif op == "UREFO " or op == "UREFC " then - out:write(format(" #%-3d", shr(op2, 8))) - else - out:write(format(" #%-3d", op2)) - end - elseif op2 < 0 then - out:write(" ", formatk(tr, op2)) - else - out:write(format(" %04d", op2)) - end - end - end - out:write("\n") - end - end - if snap then - if dumpreg then - out:write(format(".... SNAP #%-3d [ ", snapno)) - else - out:write(format(".... SNAP #%-3d [ ", snapno)) - end - printsnap(tr, snap) - end + -- local info = traceinfo(tr) + -- if not info then return end + -- local nins = info.nins + -- out:write("---- TRACE ", tr, " IR\n") + -- local irnames = vmdef.irnames + -- local snapref = 65536 + -- local snap, snapno + -- if dumpsnap then + -- snap = tracesnap(tr, 0) + -- snapref = snap[0] + -- snapno = 0 + -- end + -- for ins=1,nins do + -- if ins >= snapref then + -- if dumpreg then + -- out:write(format(".... SNAP #%-3d [ ", snapno)) + -- else + -- out:write(format(".... SNAP #%-3d [ ", snapno)) + -- end + -- printsnap(tr, snap) + -- snapno = snapno + 1 + -- snap = tracesnap(tr, snapno) + -- snapref = snap and snap[0] or 65536 + -- end + -- local m, ot, op1, op2, ridsp = traceir(tr, ins) + -- local oidx, t = 6*shr(ot, 8), band(ot, 31) + -- local op = sub(irnames, oidx+1, oidx+6) + -- if op == "LOOP " then + -- if dumpreg then + -- out:write(format("%04d ------------ LOOP ------------\n", ins)) + -- else + -- out:write(format("%04d ------ LOOP ------------\n", ins)) + -- end + -- elseif op ~= "NOP " and op ~= "CARG " and + -- (dumpreg or op ~= "RENAME") then + -- local rid = band(ridsp, 255) + -- if dumpreg then + -- out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins))) + -- else + -- out:write(format("%04d ", ins)) + -- end + -- out:write(format("%s%s %s %s ", + -- (rid == 254 or rid == 253) and "}" or + -- (band(ot, 128) == 0 and " " or ">"), + -- band(ot, 64) == 0 and " " or "+", + -- irtype[t], op)) + -- local m1, m2 = band(m, 3), band(m, 3*4) + -- if sub(op, 1, 4) == "CALL" then + -- local ctype + -- if m2 == 1*4 then -- op2 == IRMlit + -- out:write(format("%-10s (", vmdef.ircall[op2])) + -- else + -- ctype = dumpcallfunc(tr, op2) + -- end + -- if op1 ~= -1 then dumpcallargs(tr, op1) end + -- out:write(")") + -- if ctype then out:write(" ctype ", ctype) end + -- elseif op == "CNEW " and op2 == -1 then + -- out:write(formatk(tr, op1)) + -- elseif m1 ~= 3 then -- op1 != IRMnone + -- if op1 < 0 then + -- out:write(formatk(tr, op1)) + -- else + -- out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) + -- end + -- if m2 ~= 3*4 then -- op2 != IRMnone + -- if m2 == 1*4 then -- op2 == IRMlit + -- local litn = litname[op] + -- if litn and litn[op2] then + -- out:write(" ", litn[op2]) + -- elseif op == "UREFO " or op == "UREFC " then + -- out:write(format(" #%-3d", shr(op2, 8))) + -- else + -- out:write(format(" #%-3d", op2)) + -- end + -- elseif op2 < 0 then + -- out:write(" ", formatk(tr, op2)) + -- else + -- out:write(format(" %04d", op2)) + -- end + -- end + -- end + -- out:write("\n") + -- end + -- end + -- if snap then + -- if dumpreg then + -- out:write(format(".... SNAP #%-3d [ ", snapno)) + -- else + -- out:write(format(".... SNAP #%-3d [ ", snapno)) + -- end + -- printsnap(tr, snap) + -- end end ------------------------------------------------------------------------------ @@ -560,37 +561,37 @@ end -- Dump trace states. local function dump_trace(what, tr, func, pc, otr, oex) - if what == "stop" or (what == "abort" and dumpmode.a) then - if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop") - elseif dumpmode.s then dump_snap(tr) end - if dumpmode.m then dump_mcode(tr) end - end - if what == "start" then - if dumpmode.H then out:write('
\n') end
-    out:write("---- TRACE ", tr, " ", what)
-    if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
-    out:write(" ", fmtfunc(func, pc), "\n")
-  elseif what == "stop" or what == "abort" then
-    out:write("---- TRACE ", tr, " ", what)
-    if what == "abort" then
-      out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
-    else
-      local info = traceinfo(tr)
-      local link, ltype = info.link, info.linktype
-      if link == tr or link == 0 then
-	out:write(" -> ", ltype, "\n")
-      elseif ltype == "root" then
-	out:write(" -> ", link, "\n")
-      else
-	out:write(" -> ", link, " ", ltype, "\n")
-      end
-    end
-    if dumpmode.H then out:write("
\n\n") else out:write("\n") end - else - if what == "flush" then symtab, nexitsym = {}, 0 end - out:write("---- TRACE ", what, "\n\n") - end - out:flush() + -- if what == "stop" or (what == "abort" and dumpmode.a) then + -- if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop") + -- elseif dumpmode.s then dump_snap(tr) end + -- if dumpmode.m then dump_mcode(tr) end + -- end + -- if what == "start" then + -- if dumpmode.H then out:write('
\n') end
+  --   out:write("---- TRACE ", tr, " ", what)
+  --   if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
+  --   out:write(" ", fmtfunc(func, pc), "\n")
+  -- elseif what == "stop" or what == "abort" then
+  --   out:write("---- TRACE ", tr, " ", what)
+  --   if what == "abort" then
+  --     out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
+  --   else
+  --     local info = traceinfo(tr)
+  --     local link, ltype = info.link, info.linktype
+  --     if link == tr or link == 0 then
+	-- out:write(" -> ", ltype, "\n")
+  --     elseif ltype == "root" then
+	-- out:write(" -> ", link, "\n")
+  --     else
+	-- out:write(" -> ", link, " ", ltype, "\n")
+  --     end
+  --   end
+  --   if dumpmode.H then out:write("
\n\n") else out:write("\n") end + -- else + -- if what == "flush" then symtab, nexitsym = {}, 0 end + -- out:write("---- TRACE ", what, "\n\n") + -- end + -- out:flush() end -- Dump recorded bytecode. @@ -603,6 +604,9 @@ local function dump_record(tr, func, pc, depth) if pc >= 0 then line = bcline(func, pc, recprefix) if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end + if pc > 0 then + line = sub(line, 1, -2) .. " (" .. fmtfunc(func, pc) .. ")\n" + end else line = "0000 "..recprefix.." FUNCC \n" end diff --git a/src/lib_base.c b/src/lib_base.c index 98ec67c7..56addbba 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -36,6 +36,7 @@ #include "lj_strscan.h" #include "lj_strfmt.h" #include "lj_lib.h" +#include "lj_cdata.h" /* -- Base library: checks ------------------------------------------------ */ @@ -669,6 +670,52 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn) setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]); } +#if LJ_HASFFI +LJLIB_NOREG LJLIB_CF(thread_exdata) LJLIB_REC(.) +{ + ptrdiff_t nargs = L->top - L->base; + GCcdata *cd; + + if (nargs == 0) { + CTState *cts = ctype_ctsG(G(L)); + if (cts == NULL) + lj_err_caller(L, LJ_ERR_FFI_NOTLOAD); + cts->L = L; /* Save L for errors and allocations. */ + + cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR); + cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata); + setcdataV(L, L->top++, cd); + return 1; + } + + cd = lj_lib_checkcdata(L, 1); + L->exdata = cdata_getptr(cdataptr(cd), CTSIZE_PTR); + return 0; +} + +LJLIB_NOREG LJLIB_CF(thread_exdata2) LJLIB_REC(.) +{ + ptrdiff_t nargs = L->top - L->base; + GCcdata *cd; + + if (nargs == 0) { + CTState *cts = ctype_ctsG(G(L)); + if (cts == NULL) + lj_err_caller(L, LJ_ERR_FFI_NOTLOAD); + cts->L = L; /* Save L for errors and allocations. */ + + cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR); + cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata2); + setcdataV(L, L->top++, cd); + return 1; + } + + cd = lj_lib_checkcdata(L, 1); + L->exdata2 = cdata_getptr(cdataptr(cd), CTSIZE_PTR); + return 0; +} +#endif + /* ------------------------------------------------------------------------ */ static void newproxy_weaktable(lua_State *L) @@ -682,6 +729,18 @@ static void newproxy_weaktable(lua_State *L) t->nomm = (uint8_t)(~(1u<fp = NULL; errno = ENOSYS; #else iof->fp = tmpfile(); diff --git a/src/lib_jit.c b/src/lib_jit.c index 2867d420..50c2b135 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -148,6 +148,66 @@ LJLIB_CF(jit_attach) return 0; } +LJLIB_CF(jit_prngstate) +{ + GCtab *cur = lj_tab_new(L, 8, 0); + +#if LJ_HASJIT + int i; + jit_State *J = L2J(L); + + /* The old state. */ + for (i = 1; i <= 4; i++) { + setintV(lj_tab_setint(L, cur, i*2-1), J->prng.u[i-1] & 0xffffffff); + setintV(lj_tab_setint(L, cur, i*2), J->prng.u[i-1] >> 32); + } + + /* We need to set new state using the input array. */ + if (L->base < L->top && !tvisnil(L->base)) { + PRNGState prng; + if (tvisnumber(L->base)) { + TValue *o = L->base; + + if (!tvisint(o) && ((double)(uint32_t)numV(o) != numV(o))) + lj_err_arg(L, 1, LJ_ERR_PRNGSTATE); + + prng.u[0] = numberVint(o); + for (i = 1; i < 4; i++) + prng.u[i] = 0; + } else { + GCtab *t = lj_lib_checktab(L, 1); + int i = 1, len = lj_tab_len(t); + + /* The input array must have at most 8 elements. */ + if (len > 8) + lj_err_arg(L, 1, LJ_ERR_PRNGSTATE); + + for (i = 1; i <= len; i++) { + cTValue *v = lj_tab_getint(t, i); + + if (!tvisint(v) && (!tvisnum(v) || (double)(uint32_t)numV(v) != numV(v))) + lj_err_arg(L, 1, LJ_ERR_PRNGSTATE); + + if (i & 1) + prng.u[(i-1)/2] = numberVint(v); + else + prng.u[(i-1)/2] = prng.u[(i-1)/2] | ((uint64_t)numberVint(v) << 32); + } + for (i /= 2; i < 4; i++) + prng.u[i] = 0; + } + + /* Re-initialize the JIT prng. */ + J->prng = prng; + } +#else + for (int i = 1; i <= 8; i++) + setintV(lj_tab_setint(L, cur, i), 0); +#endif + settabV(L, L->top++, cur); + return 1; +} + LJLIB_PUSH(top-5) LJLIB_SET(os) LJLIB_PUSH(top-4) LJLIB_SET(arch) LJLIB_PUSH(top-3) LJLIB_SET(version_num) @@ -231,6 +291,7 @@ LJLIB_CF(jit_util_funcbc) { GCproto *pt = check_Lproto(L, 0); BCPos pc = (BCPos)lj_lib_checkint(L, 2); + int lineinfo = lj_lib_optint(L, 3, 0); if (pc < pt->sizebc) { BCIns ins = proto_bc(pt)[pc]; BCOp op = bc_op(ins); @@ -238,6 +299,11 @@ LJLIB_CF(jit_util_funcbc) setintV(L->top, ins); setintV(L->top+1, lj_bc_mode[op]); L->top += 2; + if (lineinfo) { + setintV(L->top, lj_debug_line(pt, pc)); + L->top += 1; + return 3; + } return 2; } return 0; @@ -718,7 +784,8 @@ static uint32_t jit_cpudetect(void) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif - +#elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ #else #error "Missing CPU detection for this architecture" #endif diff --git a/src/lib_os.c b/src/lib_os.c index 6bcd0147..ce4b90b6 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -76,7 +76,7 @@ LJLIB_CF(os_rename) LJLIB_CF(os_tmpname) { -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX +#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA lj_err_caller(L, LJ_ERR_OSUNIQF); return 0; #else @@ -185,6 +185,7 @@ LJLIB_CF(os_date) #endif } else { #if LJ_TARGET_POSIX + tzset(); stm = localtime_r(&t, &rtm); #else stm = localtime(&t); diff --git a/src/lib_table.c b/src/lib_table.c index a723326a..ed6aaefd 100644 --- a/src/lib_table.c +++ b/src/lib_table.c @@ -169,6 +169,47 @@ LJLIB_CF(table_concat) LJLIB_REC(.) return 1; } +LJLIB_NOREG LJLIB_CF(table_clone) LJLIB_REC(.) +{ + GCtab *src = lj_lib_checktab(L, 1); + GCtab *dup = lj_tab_dup(L, src); + + settabV(L, L->base, dup); + L->top = L->base+1; + + return 1; +} + +LJLIB_NOREG LJLIB_CF(table_isarray) LJLIB_REC(.) +{ + GCtab *src = lj_lib_checktab(L, 1); + + setboolV(L->base, lj_tab_isarray(src)); + L->top = L->base+1; + + return 1; +} + +LJLIB_NOREG LJLIB_CF(table_nkeys) LJLIB_REC(.) +{ + GCtab *src = lj_lib_checktab(L, 1); + + setintV(L->base, lj_tab_nkeys(src)); + L->top = L->base+1; + + return 1; +} + +LJLIB_NOREG LJLIB_CF(table_isempty) LJLIB_REC(.) +{ + GCtab *src = lj_lib_checktab(L, 1); + + setboolV(L->base, lj_tab_isempty(src)); + L->top = L->base+1; + + return 1; +} + /* ------------------------------------------------------------------------ */ static void set2(lua_State *L, int i, int j) @@ -304,6 +345,26 @@ static int luaopen_table_new(lua_State *L) return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new"); } +static int luaopen_table_clone(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_clone, FF_table_clone, "clone"); +} + +static int luaopen_table_nkeys(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_nkeys, FF_table_nkeys, "nkeys"); +} + +static int luaopen_table_isarray(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_isarray, FF_table_isarray, "isarray"); +} + +static int luaopen_table_isempty(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_isempty, FF_table_isempty, "isempty"); +} + static int luaopen_table_clear(lua_State *L) { return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear"); @@ -321,6 +382,10 @@ LUALIB_API int luaopen_table(lua_State *L) lua_setfield(L, -2, "unpack"); #endif lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1)); + lj_lib_prereg(L, LUA_TABLIBNAME ".clone", luaopen_table_clone, tabV(L->top-1)); + lj_lib_prereg(L, LUA_TABLIBNAME ".isarray", luaopen_table_isarray, tabV(L->top-1)); + lj_lib_prereg(L, LUA_TABLIBNAME ".nkeys", luaopen_table_nkeys, tabV(L->top-1)); + lj_lib_prereg(L, LUA_TABLIBNAME ".isempty", luaopen_table_isempty, tabV(L->top-1)); lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1)); return 1; } diff --git a/src/lj_api.c b/src/lj_api.c index d869ebf8..021670fd 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -1143,6 +1143,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) ef = savestack(L, o); } status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); + printf("hello, %d", status); if (status) hook_restore(g, oldh); return status; } @@ -1195,6 +1196,36 @@ LUA_API int lua_isyieldable(lua_State *L) return cframe_canyield(L->cframe); } +LUA_API void lua_resetthread(lua_State *L, lua_State *th) +{ + TValue *stend, *st; + + th->dummy_ffid = FF_C; + th->status = LUA_OK; + + setmrefr(th->glref, L->glref); + setgcrefr(th->env, L->env); + + th->cframe = NULL; + + st = tvref(th->stack); + + if (st != NULL) { + lj_state_relimitstack(th); + + stend = st + th->stacksize; + st++; /* Needed for curr_funcisL() on empty stack. */ + if (LJ_FR2) st++; + th->base = th->top = st; + lj_func_closeuv(L, st); + while (st < stend) /* Clear new slots. */ + setnilV(st++); + } + + th->exdata = L->exdata; + th->exdata2 = L->exdata2; +} + LUA_API int lua_yield(lua_State *L, int nresults) { void *cf = L->cframe; @@ -1311,3 +1342,22 @@ LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud) g->allocf = f; } +LUA_API void lua_setexdata(lua_State *L, void *exdata) +{ + L->exdata = exdata; +} + +LUA_API void *lua_getexdata(lua_State *L) +{ + return L->exdata; +} + +LUA_API void lua_setexdata2(lua_State *L, void *exdata2) +{ + L->exdata2 = exdata2; +} + +LUA_API void *lua_getexdata2(lua_State *L) +{ + return L->exdata2; +} diff --git a/src/lj_arch.h b/src/lj_arch.h index 882c99cb..e66dfa8f 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -31,6 +31,8 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 #define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_S390X 8 +#define LUAJIT_ARCH_s390x 8 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -59,6 +61,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_ARM #elif defined(__aarch64__) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 +#elif defined(__s390x__) || defined(__s390x) +#define LUAJIT_TARGET LUAJIT_ARCH_S390X #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) @@ -162,13 +166,6 @@ #define LJ_TARGET_GC64 1 #endif -#ifdef __NX__ -#define LJ_TARGET_NX 1 -#define LJ_TARGET_CONSOLE 1 -#undef NULL -#define NULL ((void*)0) -#endif - #ifdef _UWP #define LJ_TARGET_UWP 1 #if LUAJIT_TARGET == LUAJIT_ARCH_X64 @@ -213,6 +210,10 @@ #error "macOS requires GC64 -- don't disable it" #endif +#ifdef __GNUC__ +#define LJ_HAS_OPTIMISED_HASH 1 +#endif + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM #define LJ_ARCH_NAME "arm" @@ -323,8 +324,18 @@ #if LJ_TARGET_CONSOLE #define LJ_ARCH_PPC32ON64 1 #define LJ_ARCH_NOFFI 1 +#if LJ_TARGET_PS3 +#define LJ_ARCH_PPC_OPD 1 +#endif #elif LJ_ARCH_BITS == 64 -#error "No support for PPC64" +#define LJ_ARCH_PPC32ON64 1 +#define LJ_ARCH_NOJIT 1 /* NYI */ +#if _CALL_ELF == 2 +#define LJ_ARCH_PPC_ELFV2 1 +#else +#define LJ_ARCH_PPC_OPD 1 +#define LJ_ARCH_PPC_OPDENV 1 +#endif #endif #if _ARCH_PWR7 @@ -432,6 +443,20 @@ #define LJ_ARCH_VERSION 10 #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X + +#define LJ_ARCH_NAME "s390x" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_BE +#define LJ_TARGET_S390X 1 +#define LJ_TARGET_EHRETREG 0xe +#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNALIGNED 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#define LJ_TARGET_GC64 1 + #else #error "No target architecture defined" #endif @@ -445,7 +470,7 @@ #error "Need at least GCC 3.4 or newer" #endif #elif LJ_TARGET_X64 -#if __GNUC__ < 4 +#if 0 && __GNUC__ < 4 #error "Need at least GCC 4.0 or newer" #endif #elif LJ_TARGET_ARM @@ -490,9 +515,6 @@ #error "No support for ILP32 model on ARM64" #endif #elif LJ_TARGET_PPC -#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) -#error "No support for little-endian PPC32" -#endif #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 6f5e0c45..d8118088 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1662,6 +1662,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" +#elif LJ_TARGET_S390X +#include "lj_asm_s390x.h" #else #error "Missing assembler for target CPU" #endif diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 1f44d023..8c943475 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) return 0; } +/* Fuse FP neg-multiply-add/sub. */ +static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef ref = ir->op1; + IRIns *irn = IR(ref); + if (irn->o != IR_ADD && irn->o != IR_SUB) + return 0; + + if (!mayfuse(as, ref)) + return 0; + + IRRef lref = irn->op1, rref = irn->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + /* Fuse BAND + BSHL/BSHR into UBFM. */ static int asm_fuseandshift(ASMState *as, IRIns *ir) { @@ -1051,10 +1080,30 @@ static void asm_xload(ASMState *as, IRIns *ir) asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } +static int maybe_zero_val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + + switch(ir->o) { + case IR_KNULL: + return 1; + case IR_KINT: + return 0 == ir->i; + case IR_KINT64: + return 0 == ir_kint64(ir)->u64; + } + + return 0; +} + static void asm_xstore(ASMState *as, IRIns *ir) { if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg src; + if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2)) + src = RID_ZERO; + else + src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, rset_exclude(RSET_GPR, src)); } @@ -1250,7 +1299,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); - Reg r = ra_alloc1(as, ir->op2, allow); + Reg r; + if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2)) + r = RID_ZERO; + else + r = ra_alloc1(as, ir->op2, allow); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ @@ -1266,7 +1320,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ { - Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); + Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow); emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); @@ -1466,7 +1520,8 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_neg(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, A64I_FNEGd); + if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd)) + asm_fpunary(as, ir, A64I_FNEGd); return; } asm_intneg(as, ir); @@ -1919,6 +1974,17 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) IRIns *ir; asm_head_lreg(as); ir = IR(REF_BASE); + + /* IRRefs that get into the side trace from the parent trace may restore + * REF_BASE under severe register pressure and thus reach here holding on to + * the register. Restore such references so that REF_BASE gets RID_BASE back + * when it tries to allocate below. */ + if (!ra_hasreg(ir->r)) { + Reg r = ra_gethint(ir->r); + if (!rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); + } + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ra_spill(as, ir); if (ra_hasspill(irp->s)) { diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 4465efa2..710cbb95 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1298,7 +1298,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -#if !LJ_64 +#if !LJ_64 || (defined(LUAJIT_USE_VALGRIND) && !LJ_GC64) MCLabel l_exit; #endif lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); @@ -1313,7 +1313,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } } asm_guardcc(as, CC_NE); -#if LJ_64 +#if LJ_64 && (!defined(LUAJIT_USE_VALGRIND) || LJ_GC64) if (!irt_ispri(irkey->t)) { Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 25f54dee..8162b950 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -370,6 +370,82 @@ #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ +#if LJ_ARCH_BITS == 64 + +#if LJ_ARCH_PPC_ELFV2 + +#define CCALL_HANDLE_STRUCTRET \ + if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \ + cc->retref = 1; /* Return by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; \ + } + +#define CCALL_HANDLE_STRUCTRET2 \ + int isfp = ccall_classify_fp(cts, ctr); \ + int i; \ + if (isfp == FTYPE_FLOAT) { \ + for (i = 0; i < ctr->size / 4; i++) \ + ((float *)dp)[i] = cc->fpr[i]; \ + } else if (isfp == FTYPE_DOUBLE) { \ + for (i = 0; i < ctr->size / 8; i++) \ + ((double *)dp)[i] = cc->fpr[i]; \ + } else { \ + if (ctr->size < 8 && LJ_BE) { \ + sp += 8 - ctr->size; \ + } \ + memcpy(dp, sp, ctr->size); \ + } + +#else + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#endif + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 2 or 4 GPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_STRUCTARG + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0]; \ + ((float *)dp)[1] = cc->fpr[1]; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0]; \ + ((double *)dp)[1] = cc->fpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + isfp = 1; \ + if (d->size == sizeof(float) * 2) { \ + d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \ + isf32 = 1; \ + } + +#define CCALL_HANDLE_REGARG \ + if (isfp && d->size == sizeof(float)) { \ + d = ctype_get(cts, CTID_DOUBLE); \ + isf32 = 1; \ + } \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + if (ngpr > maxgpr) { \ + nsp += ngpr - 8; \ + ngpr = 8; \ + if (nsp > CCALL_MAXSTACK) { \ + goto err_nyi; \ + } \ + } \ + goto done; \ + } + +#else + #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ cc->gpr[ngpr++] = (GPRArg)dp; @@ -378,13 +454,13 @@ /* Complex values are returned in 2 or 4 GPRs. */ \ cc->retref = 0; -#define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ - #define CCALL_HANDLE_STRUCTARG \ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ sz = CTSIZE_PTR; /* Pass all structs by reference. */ +#define CCALL_HANDLE_COMPLEXRET2 \ + memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ + #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ @@ -420,6 +496,8 @@ } #endif +#endif + #if !LJ_ABI_SOFTFP #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ @@ -574,6 +652,40 @@ goto done; \ } +#elif LJ_TARGET_S390X +/* -- POSIX/s390x calling conventions --------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET \ + cc->retref = 1; /* Return all complex values by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET2 \ + UNUSED(dp); /* Nothing to do. */ + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ + if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex numbers by reference. */ \ + /* TODO: not sure why this is different to structs. */ \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + +#define CCALL_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -816,6 +928,50 @@ noth: /* Not a homogeneous float/double aggregate. */ #endif +/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */ + +#if LJ_ARCH_PPC_ELFV2 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +static unsigned int ccall_classify_fp(CTState *cts, CType *ct) { + if (ctype_isfp(ct->info)) { + if (ct->size == sizeof(float)) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_iscomplex(ct->info)) { + if (ct->size == sizeof(float) * 2) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_isstruct(ct->info)) { + int res = -1; + int sz = ct->size; + while (ct->sib) { + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct)); + if (res == -1) + res = sub; + if (sub != -1 && sub != res) + return 0; + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + return 0; + } + } + if (res > 0 && sz > res * 4 * 8) + return 0; + return res; + } else { + return 0; + } +} + +#endif + /* -- MIPS64 ABI struct classification ---------------------------- */ #if LJ_TARGET_MIPS64 @@ -990,6 +1146,13 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize sz; MSize n, isfp = 0, isva = 0; void *dp, *rp = NULL; +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + int isf32 = 0; +#endif + +#if LJ_TARGET_S390X + uint32_t onstack = 0; +#endif if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); @@ -1028,6 +1191,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ +#if LJ_TARGET_S390X + onstack = 1; +#endif if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; nsp = (nsp + align) & ~align; /* Align argument on stack. */ @@ -1046,7 +1212,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(void **)dp = rp; dp = rp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE + if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) { + dp = (char *)dp + (CTSIZE_PTR - sz); + } +#endif lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (isfp) { + int i; + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + if (isf32) { + int i; + for (i = 0; i < d->size / 8; i++) + ((float *)dp)[i*2] = ((double *)dp)[i]; + } +#endif +#if LJ_ARCH_PPC_ELFV2 + if (ctype_isstruct(d->info)) { + isfp = ccall_classify_fp(cts, d); + int i; + if (isfp == FTYPE_FLOAT) { + for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((float *)dp)[i]; + } else if (isfp == FTYPE_DOUBLE) { + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + } +#endif /* Extend passed integers to 32 bits at least. */ if (ctype_isinteger_or_bool(d->info) && d->size < 4) { if (d->info & CTF_UNSIGNED) @@ -1060,6 +1256,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) + && d->size <= 4) { + if (d->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 @@ -1069,6 +1274,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ } #endif +#if LJ_TARGET_S390X + /* Arguments need to be sign-/zero-extended to 64-bits. */ + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || + (isfp && onstack)) && d->size <= 4) { + if (d->info & CTF_UNSIGNED || isfp) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 0b3c5244..52455539 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -86,10 +86,23 @@ typedef union FPRArg { #elif LJ_TARGET_PPC #define CCALL_NARG_GPR 8 +#if LJ_ARCH_BITS == 64 +#define CCALL_NARG_FPR 13 +#if LJ_ARCH_PPC_ELFV2 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 8 +#define CCALL_SPS_EXTRA 14 +#else +#define CCALL_NRET_GPR 1 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 16 +#endif +#else #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) #define CCALL_NRET_GPR 4 /* For complex double. */ #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) #define CCALL_SPS_EXTRA 4 +#endif #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; @@ -126,6 +139,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_S390X + +#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */ +#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */ +#define CCALL_NRET_GPR 1 /* GPR 2 */ +#define CCALL_NRET_FPR 1 /* FPR 0 */ +#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */ +#define CCALL_SPS_FREE 0 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + float f; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 43e44305..c1e67abd 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -21,6 +21,10 @@ #include "lj_trace.h" #include "lj_vm.h" +#if LJ_ARCH_PPC_ELFV2 +#include "lualib.h" +#endif + /* -- Target-specific handling of callback slots -------------------------- */ #define CALLBACK_MCODE_SIZE (LJ_PAGESIZE * LJ_NUM_CBPAGE) @@ -61,8 +65,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD + +#define CALLBACK_SLOT2OFS(slot) (24*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24) +#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) + +#elif LJ_ARCH_PPC_ELFV2 + +#define CALLBACK_SLOT2OFS(slot) (4*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4) +#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10) + +#else + #define CALLBACK_MCODE_HEAD 24 +#endif + #elif LJ_TARGET_MIPS32 #define CALLBACK_MCODE_HEAD 20 @@ -188,24 +208,59 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) return p; } #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD +register void *vm_toc __asm__("r2"); +static void *callback_mcode_init(global_State *g, uint64_t *page) +{ + uint64_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = (uint64_t)target; + *p++ = (uint64_t)vm_toc; + *p++ = (uint64_t)g | ((uint64_t)slot << 47); + } + return p; +} +#else static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; +#if LJ_ARCH_PPC_ELFV2 + // Needs to be in sync with lj_vm_ffi_callback. + lua_assert(CALLBACK_MCODE_SIZE == 4096); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2); + p++; + } + *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff); + *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff); + *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff); + *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff); + *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1); + *p++ = PPCI_BCTR; +#else *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); + *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16); *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff); *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); *p++ = PPCI_BCTR; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; + *p++ = PPCI_LI | PPCF_T(RID_R12) | slot; *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } +#endif return p; } +#endif #elif LJ_TARGET_MIPS static void *callback_mcode_init(global_State *g, uint32_t *page) { @@ -516,6 +571,15 @@ void lj_ccallback_mcode_free(CTState *cts) if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; +#elif LJ_TARGET_S390X + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -662,6 +726,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (ctr->size <= 4 && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) { + if (ctr->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 2473b57e..569bfe86 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -153,7 +153,7 @@ typedef struct CType { /* Simplify target-specific configuration. Checked in lj_ccall.h. */ #define CCALL_MAX_GPR 8 -#define CCALL_MAX_FPR 8 +#define CCALL_MAX_FPR 14 typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; diff --git a/src/lj_debug.c b/src/lj_debug.c index 112f5358..65dc4ff0 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -109,6 +109,11 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) return pos; } +LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) +{ + return debug_framepc(L, fn, nextframe); +} + /* -- Line numbers -------------------------------------------------------- */ /* Get line number for a bytecode position. */ @@ -703,3 +708,128 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, lua_concat(L, (int)(L->top - L->base) - top); } +#ifdef LUA_USE_TRACE_LOGS + +#include "lj_dispatch.h" + +#define MAX_TRACE_EVENTS 64 + +enum { + LJ_TRACE_EVENT_ENTER, + LJ_TRACE_EVENT_EXIT, + LJ_TRACE_EVENT_START +}; + +typedef struct { + int event; + unsigned traceno; + unsigned exitno; + int directexit; + const BCIns *ins; + lua_State *thread; + GCfunc *fn; +} lj_trace_event_record_t; + +static lj_trace_event_record_t lj_trace_events[MAX_TRACE_EVENTS]; + +static int rb_start = 0; +static int rb_end = 0; +static int rb_full = 0; + +static void +lj_trace_log_event(lj_trace_event_record_t *rec) +{ + lj_trace_events[rb_end] = *rec; + + if (rb_full) { + rb_end++; + if (rb_end == MAX_TRACE_EVENTS) { + rb_end = 0; + } + rb_start = rb_end; + + } else { + rb_end++; + if (rb_end == MAX_TRACE_EVENTS) { + rb_end = 0; + rb_full = MAX_TRACE_EVENTS; + } + } +} + +static GCfunc* +lj_debug_top_frame_fn(lua_State *L, const BCIns *pc) +{ + int size; + cTValue *frame; + + frame = lj_debug_frame(L, 0, &size); + if (frame == NULL) { + return NULL; + } + + return frame_func(frame); +} + +LJ_FUNC void LJ_FASTCALL +lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc, + GCfunc *fn) +{ + lj_trace_event_record_t r; + + r.event = LJ_TRACE_EVENT_START; + r.thread = L; + r.ins = pc; + r.traceno = traceno; + r.fn = fn; + + lj_trace_log_event(&r); +} + +LJ_FUNC void LJ_FASTCALL +lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc) +{ + lj_trace_event_record_t r; + + r.event = LJ_TRACE_EVENT_ENTER; + r.thread = L; + r.ins = pc; + r.traceno = traceno; + r.fn = lj_debug_top_frame_fn(L, pc); + + lj_trace_log_event(&r); +} + +static void +lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct) +{ + if (vmstate >= 0) { + lj_trace_event_record_t r; + + jit_State *J = L2J(L); + + r.event = LJ_TRACE_EVENT_EXIT; + r.thread = L; + r.ins = pc; + r.traceno = vmstate; + r.exitno = J->exitno; + r.directexit = direct; + r.fn = lj_debug_top_frame_fn(L, pc); + + lj_trace_log_event(&r); + } +} + +LJ_FUNC void LJ_FASTCALL +lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc) +{ + lj_log_trace_exit_helper(L, vmstate, pc, 0); +} + +LJ_FUNC void LJ_FASTCALL +lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc) +{ + lj_log_trace_exit_helper(L, vmstate, pc, 1); +} + +#endif /* LUA_USE_TRACE_LOGS */ diff --git a/src/lj_debug.h b/src/lj_debug.h index 28127ae9..8e145d6a 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -26,6 +26,7 @@ typedef struct lj_Debug { int isvararg; } lj_Debug; +LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe); LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); @@ -63,4 +64,15 @@ enum { VARNAME__MAX }; +#ifdef LUA_USE_TRACE_LOGS +LJ_FUNC void LJ_FASTCALL lj_log_trace_direct_exit(lua_State *L, + int vmstate, const BCIns *pc); +LJ_FUNC void LJ_FASTCALL lj_log_trace_normal_exit(lua_State *L, + int vmstate, const BCIns *pc); +LJ_FUNC void LJ_FASTCALL lj_log_trace_entry(lua_State *L, + unsigned traceno, const BCIns *pc); +LJ_FUNC void LJ_FASTCALL lj_log_trace_start_record(lua_State *L, unsigned traceno, + const BCIns *pc, GCfunc *fn); +#endif + #endif diff --git a/src/lj_def.h b/src/lj_def.h index b61297aa..8541a867 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -66,12 +66,16 @@ typedef unsigned int uintptr_t; #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ #define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */ -#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ +#define LJ_MAX_UPVAL 120 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#if defined(__powerpc64__) && _CALL_ELF != 2 +#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ +#else #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ +#endif /* Minimum table/buffer sizes. */ #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ @@ -107,7 +111,11 @@ typedef unsigned int uintptr_t; #define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) +#if defined(__powerpc64__) && _CALL_ELF == 2 +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) +#else #define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) +#endif /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 52762eea..0594af51 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -89,7 +89,7 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ -#if LJ_TARGET_ARM && !LJ_TARGET_NX +#if LJ_TARGET_ARM /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ uint8_t align1[(16-sizeof(global_State))&15]; #endif @@ -99,7 +99,7 @@ typedef struct GG_State { #if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ -#if LJ_TARGET_ARM && !LJ_TARGET_NX +#if LJ_TARGET_ARM /* Ditto for J. */ uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; #endif diff --git a/src/lj_err.c b/src/lj_err.c index 563c7706..56c5ef7e 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, if (version != 1) return _URC_FATAL_PHASE1_ERROR; cf = (void *)_Unwind_GetCFA(ctx); +#ifdef LJ_TARGET_S390X + cf -= 160; /* CFA points 160 bytes above r15. */ +#endif L = cframe_L(cf); if ((actions & _UA_SEARCH_PHASE)) { #if LJ_UNWIND_EXT @@ -753,6 +756,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) G(L)->panic(L); #else #if LJ_HASJIT + g->saved_jit_base = g->jit_base; setmref(g->jit_base, NULL); #endif { diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 2e5c776a..da4121fb 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -109,6 +109,8 @@ ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") ERRDEF(NOJIT, "JIT compiler permanently disabled by build option") #endif ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) +ERRDEF(PRNGSTATE, "PRNG state must be an array with up to 8 integers " + "or an integer") /* Lexer/parser errors. */ ERRDEF(XMODE, "attempt to load chunk with wrong mode") @@ -178,6 +180,7 @@ ERRDEF(FFI_CBACKOV, "too many callbacks") #endif ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") +ERRDEF(FFI_NOTLOAD, "ffi module not loaded (yet)") #endif #if LJ_HASBUFFER diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 60c1d84f..528ebc34 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -29,6 +29,7 @@ #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" +#include "lj_cdata.h" #include "lj_serialize.h" /* Some local macros to save typing. Undef'd at the end. */ @@ -1459,6 +1460,77 @@ static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } +static void LJ_FASTCALL recff_table_clone(jit_State *J, RecordFFData *rd) +{ + TRef src = J->base[0]; + J->base[0] = lj_ir_call(J, IRCALL_lj_tab_clone, src); + UNUSED(rd); +} + +static void LJ_FASTCALL recff_table_isarray(jit_State *J, RecordFFData *rd) +{ + TRef src = J->base[0]; + if (LJ_LIKELY(tref_istab(src))) { + TRef trres = lj_ir_call(J, IRCALL_lj_tab_isarray, src); + GCtab *t = tabV(&rd->argv[0]); + int isarr = lj_tab_isarray(t); + TRef tr0 = lj_ir_kint(J, 0); + emitir(isarr ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0); + J->base[0] = isarr ? TREF_TRUE : TREF_FALSE; + } /* else: Interpreter will throw. */ +} + +static void LJ_FASTCALL recff_table_nkeys(jit_State *J, RecordFFData *rd) +{ + TRef src = J->base[0]; + if (LJ_LIKELY(tref_istab(src))) { + J->base[0] = lj_ir_call(J, IRCALL_lj_tab_nkeys, src); + } /* else: Interpreter will throw. */ +} + +static void LJ_FASTCALL recff_table_isempty(jit_State *J, RecordFFData *rd) +{ + TRef src = J->base[0]; + if (LJ_LIKELY(tref_istab(src))) { + TRef trres = lj_ir_call(J, IRCALL_lj_tab_isempty, src); + GCtab *t = tabV(&rd->argv[0]); + int isempty = lj_tab_isempty(t); + TRef tr0 = lj_ir_kint(J, 0); + emitir(isempty ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0); + J->base[0] = isempty ? TREF_TRUE : TREF_FALSE; + } /* else: Interpreter will throw. */ +} + +/* -- thread library fast functions ------------------------------------------ */ + +#if LJ_HASFFI +void LJ_FASTCALL recff_thread_exdata(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; + if (!tr) { + TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0); + TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA); + TRef trid = lj_ir_kint(J, CTID_P_VOID); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp); + return; + } + recff_nyiu(J, rd); /* this case is too rare to be interesting */ +} + +void LJ_FASTCALL recff_thread_exdata2(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; + if (!tr) { + TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0); + TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA2); + TRef trid = lj_ir_kint(J, CTID_P_VOID); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp); + return; + } + recff_nyiu(J, rd); /* this case is too rare to be interesting */ +} +#endif + /* -- I/O library fast functions ------------------------------------------ */ /* Get FILE* for I/O function. Any I/O error aborts recording, so there's diff --git a/src/lj_frame.h b/src/lj_frame.h index aa1dc11a..40583119 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_MULTRES 408 #define CFRAME_SIZE 384 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_ARCH_PPC_ELFV2 +#define CFRAME_OFS_ERRF 360 +#define CFRAME_OFS_NRES 356 +#define CFRAME_OFS_PREV 336 +#define CFRAME_OFS_L 352 +#define CFRAME_OFS_PC 348 +#define CFRAME_OFS_MULTRES 344 +#define CFRAME_SIZE 368 +#define CFRAME_SHIFT_MULTRES 3 #elif LJ_ARCH_PPC32ON64 #define CFRAME_OFS_ERRF 472 #define CFRAME_OFS_NRES 468 @@ -264,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_S390X +#define CFRAME_OFS_ERRF 280 +#define CFRAME_OFS_NRES 272 +#define CFRAME_OFS_PREV 264 +#define CFRAME_OFS_L 256 +#define CFRAME_OFS_PC 168 +#define CFRAME_OFS_MULTRES 160 +#define CFRAME_SIZE 240 +/* +** TODO: it would be good if we always decoded param*8 like +** the RISC architectures do. If so then SHIFT_MULTRES will +** need to change to 3. +*/ +#define CFRAME_SHIFT_MULTRES 0 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff --git a/src/lj_init.c b/src/lj_init.c new file mode 100644 index 00000000..a6816e1e --- /dev/null +++ b/src/lj_init.c @@ -0,0 +1,69 @@ +#include +#include "lj_arch.h" +#include "lj_jit.h" +#include "lj_vm.h" +#include "lj_str.h" + +#if LJ_TARGET_ARM && LJ_TARGET_LINUX +#include +#endif + +#ifdef _MSC_VER +/* +** Append a function pointer to the static constructor table executed by +** the C runtime. +** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc +** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization. +*/ +#pragma section(".CRT$XCU",read) +#define LJ_INITIALIZER2_(f,p) \ + static void f(void); \ + __declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \ + __pragma(comment(linker,"/include:" p #f "_")) \ + static void f(void) +#ifdef _WIN64 +#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"") +#else +#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_") +#endif + +#else +#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void) +#endif + + +#ifdef LJ_HAS_OPTIMISED_HASH +static void str_hash_init(uint32_t flags) +{ + if (flags & JIT_F_SSE4_2) + str_hash_init_sse42 (); +} + +/* CPU detection for interpreter features such as string hash function + selection. We choose to cherry-pick from lj_cpudetect and not have a single + initializer to make sure that merges with LuaJIT/LuaJIT remain + convenient. */ +LJ_INITIALIZER(lj_init_cpuflags) +{ + uint32_t flags = 0; +#if LJ_TARGET_X86ORX64 + + uint32_t vendor[4]; + uint32_t features[4]; + if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { + flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; + flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; + flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2; + if (vendor[0] >= 7) { + uint32_t xfeatures[4]; + lj_vm_cpuid(7, xfeatures); + flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; + } + } + +#endif + + /* The reason why we initialized early: select our string hash functions. */ + str_hash_init (flags); +} +#endif diff --git a/src/lj_ir.h b/src/lj_ir.h index ed492e93..8aabbec6 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -196,6 +196,8 @@ IRFPMDEF(FPMENUM) _(FUNC_PC, offsetof(GCfunc, l.pc)) \ _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \ _(THREAD_ENV, offsetof(lua_State, env)) \ + _(THREAD_EXDATA, offsetof(lua_State, exdata)) \ + _(THREAD_EXDATA2, offsetof(lua_State, exdata2)) \ _(TAB_META, offsetof(GCtab, metatable)) \ _(TAB_ARRAY, offsetof(GCtab, array)) \ _(TAB_NODE, offsetof(GCtab, node)) \ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 67fb58ae..8db18f08 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -190,6 +190,10 @@ typedef struct CCallInfo { _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ _(ANY, lj_vm_next, 2, FL, PTR, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ + _(ANY, lj_tab_clone, 2, FS, TAB, CCI_L) \ + _(ANY, lj_tab_isarray, 1, FL, INT, 0) \ + _(ANY, lj_tab_nkeys, 1, FL, INT, 0) \ + _(ANY, lj_tab_isempty, 1, FL, INT, 0) \ _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ diff --git a/src/lj_jit.h b/src/lj_jit.h index 32b3861a..74b40fd9 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -7,7 +7,6 @@ #define _LJ_JIT_H #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" /* -- JIT engine flags ---------------------------------------------------- */ @@ -23,6 +22,7 @@ #define JIT_F_SSE3 (JIT_F_CPU << 0) #define JIT_F_SSE4_1 (JIT_F_CPU << 1) #define JIT_F_BMI2 (JIT_F_CPU << 2) +#define JIT_F_SSE4_2 (JIT_F_CPU << 3) #define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" @@ -112,12 +112,12 @@ /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ - _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ - _(\011, maxrecord, 4000) /* Max. # of recorded IR instructions. */ \ + _(\010, maxtrace, 8000) /* Max. # of traces in cache. */ \ + _(\011, maxrecord, 16000) /* Max. # of recorded IR instructions. */ \ _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ - _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ + _(\011, minstitch, 3) /* Min. # of IR ins for a stitched trace. */ \ \ _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ @@ -131,7 +131,7 @@ /* Size of each machine code area (in KBytes). */ \ _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ /* Max. total size of all machine code areas (in KBytes). */ \ - _(\010, maxmcode, 512) \ + _(\010, maxmcode, 40960) \ /* End of list. */ enum { @@ -372,7 +372,6 @@ enum { #endif LJ_K64__MAX, }; -#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -391,7 +390,6 @@ enum { #endif LJ_K32__MAX }; -#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS) /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ @@ -446,13 +444,9 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ -#if LJ_K32__USED uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ -#endif TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ -#if LJ_K64__USED TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ -#endif IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ @@ -516,6 +510,8 @@ typedef struct jit_State { BCLine prev_line; /* Previous line. */ int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ #endif + PRNGState prng; /* PRNG state for the JIT compiler, defaults to prng in + global_State. */ } jit_State; #ifdef LUA_USE_ASSERT @@ -523,6 +519,5 @@ typedef struct jit_State { #else #define lj_assertJ(c, ...) ((void)J) #endif -#endif #endif diff --git a/src/lj_lib.c b/src/lj_lib.c index 82a9e256..10cd254f 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -304,6 +304,14 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) return def; } +GCcdata *lj_lib_checkcdata(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && tviscdata(o))) + lj_err_argt(L, narg, LUA_TCDATA); + return cdataV(o); +} + /* -- Strict type checks -------------------------------------------------- */ /* The following type checks do not coerce between strings and numbers. @@ -356,4 +364,3 @@ badtype: return 0; /* unreachable */ } #endif - diff --git a/src/lj_lib.h b/src/lj_lib.h index a18f52bf..c6053435 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -45,6 +45,7 @@ LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); +LJ_FUNC GCcdata *lj_lib_checkcdata(lua_State *L, int narg); #if LJ_HASBUFFER LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 163aada4..537c8333 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -231,7 +231,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) } /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u<mainthref)->th) @@ -697,6 +698,12 @@ struct lua_State { GCRef env; /* Thread environment (table of globals). */ void *cframe; /* End of C stack frame chain. */ MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */ + void *exdata; /* user extra data pointer. added by OpenResty */ + void *exdata2; /* the 2nd user extra data pointer. added by OpenResty */ +#if LJ_TARGET_ARM + uint32_t unused1; + uint32_t unused2; +#endif }; #define G(L) (mref(L->glref, global_State)) diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 09de2f05..a716ddeb 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -370,7 +370,9 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) ** since they are followed by at least one guarded VLOAD. */ for (ir = IR(J->cur.nins-1); ir > store; ir--) - if (irt_isguard(ir->t) || ir->o == IR_ALEN) + if (irt_isguard(ir->t) || ir->o == IR_ALEN || + (ir->o == IR_CALLL && ir->op2 == IRCALL_lj_tab_nkeys) || + (ir->o == IR_CALLS && ir->op2 == IRCALL_lj_tab_clone)) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; diff --git a/src/lj_prng.c b/src/lj_prng.c index 9e57505e..fd8219de 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -87,10 +87,6 @@ extern int sys_get_random_number(void *buf, uint64_t len); extern int sceRandomGetRandomNumber(void *buf, size_t len); -#elif LJ_TARGET_NX - -#include - #elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE #define WIN32_LEAN_AND_MEAN @@ -180,11 +176,6 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0) goto ok; -#elif LJ_TARGET_NX - - if (getentropy(rs->u, sizeof(rs->u)) == 0) - goto ok; - #elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), diff --git a/src/lj_record.c b/src/lj_record.c index faa9a508..5cca2425 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -624,7 +624,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) if (bc_j(*pc) != -1 && !innerloopleft(J, pc)) lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */ if ((ev != LOOPEV_ENTERLO && - J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0) + J->loopref && J->cur.nins - J->loopref > 100) || --J->loopunroll < 0) lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ J->loopref = J->cur.nins; } @@ -664,17 +664,12 @@ static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) RecordIndex ix; /* Since ITERN is recorded at the start, we need our own loop detection. */ if (J->pc == J->startpc && + (J->cur.nins > REF_FIRST+1 || + (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) && J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { - IRRef ref = REF_FIRST + LJ_HASPROFILE; -#ifdef LUAJIT_ENABLE_CHECKHOOK - ref += 3; -#endif - if (J->cur.nins > ref || - (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) { - J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */ - lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ - return LOOPEV_ENTER; - } + J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ + return LOOPEV_ENTER; } J->maxslot = ra; lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ @@ -1836,7 +1831,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) if (lnk) { /* Possible tail- or up-recursion. */ lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); + hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J->prng) & 15u); } lj_trace_err(J, LJ_TRERR_CUNROLL); } diff --git a/src/lj_state.c b/src/lj_state.c index 0b9c46ba..e28cfed4 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -260,6 +260,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) return NULL; } L->status = LUA_OK; + L->exdata = NULL; + L->exdata2 = NULL; return L; } @@ -319,6 +321,8 @@ lua_State *lj_state_new(lua_State *L) setgcrefr(L1->env, L->env); stack_init(L1, L); /* init stack */ lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); + L1->exdata = L->exdata; + L1->exdata2 = L->exdata2; return L1; } diff --git a/src/lj_str.c b/src/lj_str.c index a5282da6..723bfa63 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -19,6 +19,15 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) { MSize i, n = a->len > b->len ? b->len : a->len; +#ifdef LUAJIT_USE_VALGRIND + for (i = 0; i < n; i++) { + uint8_t va = *(const uint8_t *)(strdata(a)+i); + uint8_t vb = *(const uint8_t *)(strdata(b)+i); + if (va != vb) { + return va < vb ? -1 : 1; + } + } +#else for (i = 0; i < n; i += 4) { /* Note: innocuous access up to end of string + 3. */ uint32_t va = *(const uint32_t *)(strdata(a)+i); @@ -35,6 +44,7 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return va < vb ? -1 : 1; } } +#endif return (int32_t)(a->len - b->len); } @@ -72,8 +82,22 @@ int lj_str_haspattern(GCstr *s) /* -- String hashing ------------------------------------------------------ */ +#ifdef LJ_HAS_OPTIMISED_HASH +static StrHash hash_sparse_def (uint64_t, const char *, MSize); +str_sparse_hashfn hash_sparse = hash_sparse_def; +#if LUAJIT_SECURITY_STRHASH +static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize); +str_dense_hashfn hash_dense = hash_dense_def; +#endif +#else +#define hash_sparse hash_sparse_def +#if LUAJIT_SECURITY_STRHASH +#define hash_dense hash_dense_def +#endif +#endif + /* Keyed sparse ARX string hash. Constant time. */ -static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) +static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len) { /* Constants taken from lookup3 hash by Bob Jenkins. */ StrHash a, b, h = len ^ (StrHash)seed; @@ -97,8 +121,8 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) #if LUAJIT_SECURITY_STRHASH /* Keyed dense ARX string hash. Linear time. */ -static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, - const char *str, MSize len) +static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h, + const char *str, MSize len) { StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); if (len > 12) { @@ -282,8 +306,21 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, s->gct = ~LJ_TSTR; s->len = len; s->hash = hash; + +#ifdef LUAJIT_TEST_FIXED_ORDER + /* If you need predictable key iteration order in lua tables (eg: in data driven test), + * build with + * "XCFLAGS=-DLUAJIT_TEST_FIXED_ORDER=1 -DLUAJIT_SECURITY_STRID=0 + * -DLUAJIT_SECURITY_STRHASH=0 -DLUAJIT_SECURITY_PRNG=0 -DLUAJIT_SECURITY_MCODE=0" + * + * This is for testing only. Please don't use it in production builds. + */ + s->sid = hash; +#else #ifndef STRID_RESEED_INTERVAL - s->sid = g->str.id++; + /* s->sid = g->str.id++; */ + /* if use g->str.id++ as sid, the order of the tab will be indeterminate. */ + s->sid = hash; #elif STRID_RESEED_INTERVAL if (!g->str.idreseed--) { uint64_t r = lj_prng_u64(&g->prng); @@ -293,6 +330,7 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, s->sid = g->str.id++; #else s->sid = (StrID)lj_prng_u64(&g->prng); +#endif #endif s->reserved = 0; s->hashalg = (uint8_t)hashalg; diff --git a/src/lj_str.h b/src/lj_str.h index 28edb5a5..f7b9234b 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) #define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) +#ifdef LJ_HAS_OPTIMISED_HASH +typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize); +extern str_sparse_hashfn hash_sparse; + +#if LUAJIT_SECURITY_STRHASH +typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize); +extern str_dense_hashfn hash_dense; +#endif + +extern void str_hash_init_sse42 (void); +#endif + #endif diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c new file mode 100644 index 00000000..0ee4b5f6 --- /dev/null +++ b/src/lj_str_hash.c @@ -0,0 +1,309 @@ +/* + * This file defines string hash function using CRC32. It takes advantage of + * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32 + * computation. The hash functions try to compute CRC32 of length and up + * to 128 bytes of given string. + */ + +#include "lj_arch.h" + +#if LJ_HAS_OPTIMISED_HASH == 1 || defined(SMOKETEST) +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include +/* Silence deprecated name warning */ +#define getpid _getpid +#else +#include +#endif + +#include "lj_def.h" +#include "lj_str.h" +#include "lj_jit.h" + + +#if defined(_MSC_VER) +/* + * MSVC doesn't seem to restrict intrinsics used based on /arch: value set + * while clang-cl will error on it. + */ +#if defined(__clang__) && !defined(__SSE4_2__) +#error "This file must be built with /arch:AVX1 or higher" +#endif +#else +#if !defined(__SSE4_2__) +#error "This file must be built with -msse4.2" +#endif +#endif + +#define lj_crc32_u32 _mm_crc32_u32 +#define lj_crc32_u64 _mm_crc32_u64 + +#undef LJ_AINLINE +#define LJ_AINLINE + +#if defined(__MINGW32__) || defined(_MSC_VER) +#define random() ((long) rand()) +#define srandom(seed) srand(seed) +#endif + +static const uint64_t* cast_uint64p(const char* str) +{ + return (const uint64_t*)(void*)str; +} + +static const uint32_t* cast_uint32p(const char* str) +{ + return (const uint32_t*)(void*)str; +} + +/* hash string with len in [1, 4) */ +static LJ_AINLINE uint32_t hash_sparse_1_4(uint64_t seed, const char* str, + uint32_t len) +{ +#if 0 + /* TODO: The if-1 part (i.e the original algorithm) is working better when + * the load-factor is high, as revealed by conflict benchmark (via + * 'make benchmark' command); need to understand why it's so. + */ + uint32_t v = str[0]; + v = (v << 8) | str[len >> 1]; + v = (v << 8) | str[len - 1]; + v = (v << 8) | len; + return lj_crc32_u32(0, v); +#else + uint32_t a, b, h = len ^ seed; + + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); + + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + + return h; +#endif +} + +/* hash string with len in [4, 16) */ +static LJ_AINLINE uint32_t hash_sparse_4_16(uint64_t seed, const char* str, + uint32_t len) +{ + uint64_t v1, v2, h; + + if (len >= 8) { + v1 = *cast_uint64p(str); + v2 = *cast_uint64p(str + len - 8); + } else { + v1 = *cast_uint32p(str); + v2 = *cast_uint32p(str + len - 4); + } + + h = lj_crc32_u32(0, len ^ seed); + h = lj_crc32_u64(h, v1); + h = lj_crc32_u64(h, v2); + return h; +} + +/* hash string with length in [16, 128) */ +static uint32_t hash_16_128(uint64_t seed, const char* str, + uint32_t len) +{ + uint64_t h1, h2; + uint32_t i; + + h1 = lj_crc32_u32(0, len ^ seed); + h2 = 0; + + for (i = 0; i < len - 16; i += 16) { + h1 += lj_crc32_u64(h1, *cast_uint64p(str + i)); + h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8)); + }; + + h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16)); + h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8)); + + return lj_crc32_u32(h1, h2); +} + +/* ************************************************************************** + * + * Following is code about hashing string with length >= 128 + * + * ************************************************************************** + */ +static uint32_t random_pos[32][2]; +static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 }; + +/* return floor(log2(n)) */ +static LJ_AINLINE uint32_t log2_floor(uint32_t n) +{ + if (n <= 127) { + return log2_tab[n]; + } + + if ((n >> 8) <= 127) { + return log2_tab[n >> 8] + 8; + } + + if ((n >> 16) <= 127) { + return log2_tab[n >> 16] + 16; + } + + if ((n >> 24) <= 127) { + return log2_tab[n >> 24] + 24; + } + + return 31; +} + +#define POW2_MASK(n) ((1L << (n)) - 1) + +/* This function is to populate `random_pos` such that random_pos[i][*] + * contains random value in the range of [2**i, 2**(i+1)). + */ +static void str_hash_init_random(void) +{ + int i, seed, rml; + + /* Calculate the ceil(log2(RAND_MAX)) */ + rml = log2_floor(RAND_MAX); + if (RAND_MAX & (RAND_MAX - 1)) { + rml += 1; + } + + /* Init seed */ + seed = lj_crc32_u32(0, getpid()); + seed = lj_crc32_u32(seed, time(NULL)); + srandom(seed); + + /* Now start to populate the random_pos[][]. */ + for (i = 0; i < 3; i++) { + /* No need to provide random value for chunk smaller than 8 bytes */ + random_pos[i][0] = random_pos[i][1] = 0; + } + + for (; i < rml; i++) { + random_pos[i][0] = random() & POW2_MASK(i+1); + random_pos[i][1] = random() & POW2_MASK(i+1); + } + + for (; i < 31; i++) { + int j; + for (j = 0; j < 2; j++) { + uint32_t v, scale; + scale = random_pos[i - rml][0]; + if (scale == 0) { + scale = 1; + } + v = (random() * scale) & POW2_MASK(i+1); + random_pos[i][j] = v; + } + } +} +#undef POW2_MASK + +/* Return a pre-computed random number in the range of [1**chunk_sz_order, + * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value + * may be greater than chunk-size; it is up to the caller to make sure + * "chunk-base + return-value-of-this-func" has valid virtual address. + */ +static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order, + uint32_t idx) +{ + uint32_t pos = random_pos[chunk_sz_order][idx & 1]; + return pos; +} + +static LJ_NOINLINE uint32_t hash_128_above(uint64_t seed, const char* str, + uint32_t len) +{ + uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2; + uint64_t h1, h2, v; + const char* chunk_ptr; + + chunk_num = 16; + chunk_sz = len / chunk_num; + chunk_sz_log2 = log2_floor(chunk_sz); + + pos1 = get_random_pos_unsafe(chunk_sz_log2, 0); + pos2 = get_random_pos_unsafe(chunk_sz_log2, 1); + + h1 = lj_crc32_u32(0, len ^ seed); + h2 = 0; + + /* loop over 14 chunks, 2 chunks at a time */ + for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1); + chunk_ptr += chunk_sz, i++) { + + v = *cast_uint64p(chunk_ptr + pos1); + h1 = lj_crc32_u64(h1, v); + + v = *cast_uint64p(chunk_ptr + chunk_sz + pos2); + h2 = lj_crc32_u64(h2, v); + } + + /* the last two chunks */ + v = *cast_uint64p(chunk_ptr + pos1); + h1 = lj_crc32_u64(h1, v); + + v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2); + h2 = lj_crc32_u64(h2, v); + + /* process the trailing part */ + h1 = lj_crc32_u64(h1, *cast_uint64p(str)); + h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8)); + + h1 = lj_crc32_u32(h1, h2); + return h1; +} + +/* NOTE: the "len" should not be zero */ +static StrHash hash_sparse_sse42(uint64_t seed, const char* str, MSize len) +{ + if (len < 4 || len >= 128) + return hash_sparse_1_4(seed, str, len); + + if (len >= 16) /* [16, 128) */ + return hash_16_128(seed, str, len); + + /* [4, 16) */ + return hash_sparse_4_16(seed, str, len); +} + +#if LUAJIT_SECURITY_STRHASH +static StrHash hash_dense_sse42(uint64_t seed, uint32_t h, const char* str, + MSize len) +{ + uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4)); + + if (len <= 16) + return b; + + if (len < 128) /* [16, 128), try with a different seed. */ + return hash_16_128(b, str, len); + + /* Otherwise, do the slow crc32 randomization for long strings. */ + return hash_128_above(b, str, len); +} +#endif + +void str_hash_init_sse42(void) +{ + hash_sparse = hash_sparse_sse42; +#if LUAJIT_SECURITY_STRHASH + hash_dense = hash_dense_sse42; +#endif + str_hash_init_random(); +} +#endif diff --git a/src/lj_tab.c b/src/lj_tab.c index c3609b38..9b93ffe1 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -14,6 +14,8 @@ #include "lj_err.h" #include "lj_tab.h" +#include + /* -- Object hashing ------------------------------------------------------ */ /* Hash an arbitrary key and return its anchor position in the hash table. */ @@ -691,3 +693,85 @@ MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) } #endif + +GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src) +{ + return lj_tab_dup(L, src); +} + +int LJ_FASTCALL lj_tab_isarray(const GCtab *src) +{ + Node *node; + cTValue *o; + ptrdiff_t i; + + node = noderef(src->node); + for (i = (ptrdiff_t)src->hmask; i >= 0; i--) + if (!tvisnil(&node[i].val)) { + o = &node[i].key; + if (LJ_UNLIKELY(tvisint(o))) { + continue; + } + if (LJ_UNLIKELY(tvisnum(o))) { + lua_Number n = numberVnum(o); + if (LJ_LIKELY(rint((double) n) == n)) { + continue; + } + } + return 0; + } + + return 1; +} + +MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *t) +{ + MSize narr = (MSize)t->asize; + cTValue *e; + Node *node; + MSize i, cnt = 0; + + e = tvref(t->array); + for (i = 0; i < narr; i++) + if (LJ_LIKELY(!tvisnil(&e[i]))) + cnt++; + + if (t->hmask <= 0) + return cnt; + + node = noderef(t->node); + for (i = 0; i <= (MSize)t->hmask; i++) { + Node *n = &node[i]; + if (LJ_LIKELY(!tvisnil(&n->val))) { + cnt++; + } + } + + return cnt; +} + +int LJ_FASTCALL lj_tab_isempty(const GCtab *t) +{ + MSize narr = (MSize)t->asize; + cTValue *e; + Node *node; + MSize i; + + e = tvref(t->array); + for (i = 0; i < narr; i++) + if (LJ_LIKELY(!tvisnil(&e[i]))) + return 0; + + if (t->hmask <= 0) + return 1; + + node = noderef(t->node); + for (i = 0; i <= (MSize)t->hmask; i++) { + Node *n = &node[i]; + if (LJ_LIKELY(!tvisnil(&n->val))) { + return 0; + } + } + + return 1; +} diff --git a/src/lj_tab.h b/src/lj_tab.h index 2a3f76bf..ed0348a9 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -93,4 +93,9 @@ LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); #endif +LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src); +LJ_FUNCA int LJ_FASTCALL lj_tab_isarray(const GCtab *src); +LJ_FUNCA MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *src); +LJ_FUNCA int LJ_FASTCALL lj_tab_isempty(const GCtab *t); + #endif diff --git a/src/lj_target.h b/src/lj_target.h index 19716928..3831cb60 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -144,6 +144,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" +#elif LJ_TARGET_S390X +#include "lj_target_s390x.h" #else #error "Missing include for target CPU" #endif diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index bc9802a4..41378c9d 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -30,8 +30,13 @@ enum { /* Calling conventions. */ RID_RET = RID_R3, +#if LJ_LE + RID_RETHI = RID_R4, + RID_RETLO = RID_R3, +#else RID_RETHI = RID_R3, RID_RETLO = RID_R4, +#endif RID_FPRET = RID_F1, /* These definitions must match with the *.dasc file(s): */ @@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) #define PPCF_C(r) ((r) << 6) #define PPCF_MB(n) ((n) << 6) #define PPCF_ME(n) ((n) << 1) +#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) +#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) #define PPCF_Y 0x00200000 #define PPCF_DOT 0x00000001 @@ -200,6 +207,13 @@ typedef enum PPCIns { PPCI_RLWINM = 0x54000000, PPCI_RLWIMI = 0x50000000, + PPCI_RLDICL = 0x78000000, + PPCI_RLDICR = 0x78000004, + PPCI_RLDIC = 0x78000008, + PPCI_RLDIMI = 0x7800000c, + PPCI_RLDCL = 0x78000010, + PPCI_RLDCR = 0x78000012, + PPCI_B = 0x48000000, PPCI_BL = 0x48000001, PPCI_BC = 0x40800000, diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h new file mode 100644 index 00000000..10b4bd58 --- /dev/null +++ b/src/lj_target_s390x.h @@ -0,0 +1,81 @@ +/* +** Definitions for IBM z/Architecture (s390x) CPUs. +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_S390X_H +#define _LJ_TARGET_S390X_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) \ + _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) \ + _(F12) _(F13) _(F14) _(F15) +// TODO: VREG? + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + + /* Calling conventions. */ + RID_SP = RID_R15, + RID_RET = RID_R2, + RID_FPRET = RID_F0, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_R7, /* Interpreter BASE. */ + RID_LPC = RID_R9, /* Interpreter PC. */ + RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_R0, + RID_MIN_FPR = RID_F0, + RID_MAX_GPR = RID_MIN_FPR, + RID_MAX_FPR = RID_MAX, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, +}; + +/* -- Register sets ------------------------------------------------------- */ + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 2 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +#define EXITSTUB_SPACING 4 +#define EXITSTUBS_PER_GROUP 32 + +/* -- Instructions -------------------------------------------------------- */ + +#endif + diff --git a/src/lj_trace.c b/src/lj_trace.c index c2329394..f816337c 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -312,6 +312,8 @@ void lj_trace_initstate(global_State *g) jit_State *J = G2J(g); TValue *tv; + J->prng = g->prng; + /* Initialize aligned SIMD constants. */ tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv[0].u64 = U64x(7fffffff,ffffffff); @@ -390,7 +392,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + - (lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u< PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; @@ -414,6 +416,9 @@ static void trace_start(jit_State *J) { lua_State *L; TraceNo traceno; +#ifdef LUA_USE_TRACE_LOGS + const BCIns *pc = J->pc; +#endif if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { @@ -474,6 +479,9 @@ static void trace_start(jit_State *J) } ); lj_record_setup(J); +#ifdef LUA_USE_TRACE_LOGS + lj_log_trace_start_record(L, (unsigned) J->cur.traceno, pc, J->fn); +#endif } /* Stop tracing. */ @@ -604,21 +612,22 @@ static int trace_abort(jit_State *J) J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; lj_vmevent_send(L, TRACE, - TValue *frame; - const BCIns *pc; + cTValue *frame; + int size; + BCIns pc; GCfunc *fn; setstrV(L, L->top++, lj_str_newlit(L, "abort")); setintV(L->top++, traceno); - /* Find original Lua function call to generate a better error message. */ - frame = J->L->base-1; - pc = J->pc; - while (!isluafunc(frame_func(frame))) { - pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1; - frame = frame_prev(frame); - } + /* Find original function call to generate a better error message. */ + frame = lj_debug_frame(L, 0, &size); + lj_assertL(frame != NULL, "missing debug frame"); fn = frame_func(frame); + if (frame == L->base-1 && isluafunc(fn)) + pc = proto_bcpos(funcproto(fn), J->pc); + else + pc = lj_debug_framepc(L, fn, frame); setfuncV(L, L->top++, fn); - setintV(L->top++, proto_bcpos(funcproto(fn), pc)); + setintV(L->top++, pc); copyTV(L, L->top++, restorestack(L, errobj)); copyTV(L, L->top++, &J->errinfo); ); @@ -931,6 +940,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } } } +#ifdef LUA_USE_TRACE_LOGS + lj_log_trace_normal_exit(L, (int) T->traceno, pc); +#endif /* Return MULTRES or 0. */ ERRNO_RESTORE switch (bc_op(*pc)) { diff --git a/src/ljamalg.c b/src/ljamalg.c index cae8356c..4d85950a 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -88,4 +88,3 @@ #include "lib_ffi.c" #include "lib_buffer.c" #include "lib_init.c" - diff --git a/src/lua.h b/src/lua.h index 6d1634d1..3f631aa1 100644 --- a/src/lua.h +++ b/src/lua.h @@ -112,6 +112,9 @@ LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud); LUA_API void (lua_close) (lua_State *L); LUA_API lua_State *(lua_newthread) (lua_State *L); +#define HAVE_LUA_RESETTHREAD 1 +LUA_API void (lua_resetthread) (lua_State *L, lua_State *th); + LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf); @@ -245,7 +248,12 @@ LUA_API void (lua_concat) (lua_State *L, int n); LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud); LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud); +LUA_API void lua_setexdata(lua_State *L, void *exdata); +LUA_API void *lua_getexdata(lua_State *L); +#define HAVE_LUA_EXDATA2 1 +LUA_API void lua_setexdata2(lua_State *L, void *exdata2); +LUA_API void *lua_getexdata2(lua_State *L); /* ** =============================================================== diff --git a/src/luajit.c b/src/luajit.c index 6dd64026..6e309260 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -303,8 +303,9 @@ static int loadjitmodule(lua_State *L) lua_concat(L, 2); if (lua_pcall(L, 1, 1, 0)) { const char *msg = lua_tostring(L, -1); - if (msg && !strncmp(msg, "module ", 7)) - goto nomodule; + if (msg && !strncmp(msg, "module ", 7)){ + printf("hehe\n"); + goto nomodule;} return report(L, 1); } lua_getfield(L, -1, "start"); @@ -542,7 +543,6 @@ static int pmain(lua_State *L) } if ((flags & FLAGS_VERSION)) print_version(); - s->status = runargs(L, argv, argn); if (s->status != LUA_OK) return 0; diff --git a/src/luajit.h b/src/luajit.h index 31f1eb1f..a4d33001 100644 --- a/src/luajit.h +++ b/src/luajit.h @@ -30,6 +30,8 @@ #include "lua.h" +#define OPENRESTY_LUAJIT + #define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ #define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 diff --git a/src/lualib.h b/src/lualib.h index 87748456..fd84cc54 100644 --- a/src/lualib.h +++ b/src/lualib.h @@ -21,6 +21,7 @@ #define LUA_BITLIBNAME "bit" #define LUA_JITLIBNAME "jit" #define LUA_FFILIBNAME "ffi" +#define LUA_THRLIBNAME "thread" LUALIB_API int luaopen_base(lua_State *L); LUALIB_API int luaopen_math(lua_State *L); diff --git a/src/nxbuild.bat b/src/nxbuild.bat deleted file mode 100644 index c4a21f05..00000000 --- a/src/nxbuild.bat +++ /dev/null @@ -1,159 +0,0 @@ -@rem Script to build LuaJIT with NintendoSDK + NX Addon. -@rem Donated to the public domain by Swyter. -@rem -@rem To run this script you must open a "Native Tools Command Prompt for VS". -@rem -@rem Either the x86 version for NX32, or x64 for the NX64 target. -@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe) -@rem must match the cross-compiled target (32 or 64 bits). -@rem -@rem Then cd to this directory and run this script. -@rem -@rem Recommended invocation: -@rem -@rem nxbuild # release build, amalgamated -@rem nxbuild debug # debug build, amalgamated -@rem -@rem Additional command-line options (not generally recommended): -@rem -@rem noamalg # (after debug) non-amalgamated build - -@if not defined INCLUDE goto :FAIL -@if not defined NINTENDO_SDK_ROOT goto :FAIL -@if not defined PLATFORM goto :FAIL - -@if "%platform%" == "x86" goto :DO_NX32 -@if "%platform%" == "x64" goto :DO_NX64 - -@echo Error: Current host platform is %platform%! -@echo. -@goto :FAIL - -@setlocal - -:DO_NX32 -@set DASC=vm_arm.dasc -@set DASMFLAGS= -D HFABI -D FPU -@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM -@set HOST_PTR_SIZE=4 -goto :BEGIN - -:DO_NX64 -@set DASC=vm_arm64.dasc -@set DASMFLAGS= -D ENDIAN_LE -@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64 -@set HOST_PTR_SIZE=8 - -:BEGIN -@rem ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files. -@minilua -@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK - -@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%). -@echo Check that the script is being ran under the correct x86/x64 VS prompt. -@goto :BAD - -:PASSED_PTR_CHECK -@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% -@if errorlevel 1 goto :BAD -%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m elfasm -o lj_vm.s -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@rem ---- Cross compiler ---- -@if "%platform%" neq "x64" goto :NX32_CROSSBUILD -@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c -@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc -@set TARGETLIB_SUFFIX=nx64 - -%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s -goto :DEBUGCHECK - -:NX32_CROSSBUILD -@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c -@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc -@set TARGETLIB_SUFFIX=nx32 - -%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s -:DEBUGCHECK - -@if "%1" neq "debug" goto :NODEBUG -@shift -@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0 -@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a -goto :BUILD -:NODEBUG -@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3 -@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a -:BUILD -del %TARGETLIB% -@if "%1" neq "noamalg" goto :AMALG -for %%f in (lj_*.c lib_*.c) do ( - %LJCOMPILE% %%f - @if errorlevel 1 goto :BAD -) - -%LJLIB% %TARGETLIB% lj_*.o lib_*.o -@if errorlevel 1 goto :BAD -@goto :NOAMALG -:AMALG -%LJCOMPILE% ljamalg.c -@if errorlevel 1 goto :BAD -%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o -@if errorlevel 1 goto :BAD -:NOAMALG - -@del *.o *.obj *.manifest minilua.exe buildvm.exe -@echo. -@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo To run this script you must open a "Native Tools Command Prompt for VS". -@echo. -@echo Either the x86 version for NX32, or x64 for the NX64 target. -@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe) -@echo must match the cross-compiled target (32 or 64 bits). -@echo. -@echo Keep in mind that NintendoSDK + NX Addon must be installed, too. -:END diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 3cad37d2..14a7f821 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -21,35 +21,40 @@ |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). -|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). +|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3). |// Function pointers are really a struct: code, TOC, env (optional). -|// TOCENV Function pointers have an environment pointer, too (not on PS3). +|// OPDENV Function pointers have an environment pointer, too (not on PS3). +|// ELFV2 The 64-bit ELF V2 ABI is in use. |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). |// Must avoid (slow) micro-coded instructions. | |.if P64 -|.define TOC, 1 -|.define TOCENV, 1 |.macro lpx, a, b, c; ldx a, b, c; .endmacro |.macro lp, a, b; ld a, b; .endmacro |.macro stp, a, b; std a, b; .endmacro +|.macro stpx, a, b, c; stdx a, b, c; .endmacro |.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif +|.define PSIZE, 8 |.else |.macro lpx, a, b, c; lwzx a, b, c; .endmacro |.macro lp, a, b; lwz a, b; .endmacro |.macro stp, a, b; stw a, b; .endmacro +|.macro stpx, a, b, c; stwx a, b, c; .endmacro |.define decode_OPP, decode_OP4 +|.define PSIZE, 4 |.endif | |// Convenience macros for TOC handling. -|.if TOC +|.if OPD or ELFV2 |// Linker needs a TOC patch area for every external call relocation. -|.macro blex, target; bl extern target@plt; nop; .endmacro +|.macro blex, target; bl extern target; nop; .endmacro |.macro .toc, a, b; a, b; .endmacro +|.else +|.macro blex, target; bl extern target@plt; .endmacro +|.macro .toc, a, b; .endmacro +|.endif +|.if OPD +|.macro .opd, a, b; a, b; .endmacro |.if P64 |.define TOC_OFS, 8 |.define ENV_OFS, 16 @@ -57,13 +62,13 @@ |.define TOC_OFS, 4 |.define ENV_OFS, 8 |.endif -|.else // No TOC. -|.macro blex, target; bl extern target@plt; .endmacro -|.macro .toc, a, b; .endmacro +|.else // No OPD. +|.macro .opd, a, b; .endmacro |.endif -|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro +|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro | |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro +|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro | |.macro andix., y, a, i |.if PPE @@ -74,29 +79,6 @@ |.endif |.endmacro | -|.macro clrso, reg -|.if PPE -| li reg, 0 -| mtxer reg -|.else -| mcrxr cr0 -|.endif -|.endmacro -| -|.macro checkov, reg, noov -|.if PPE -| mfxer reg -| add reg, reg, reg -| cmpwi reg, 0 -| li reg, 0 -| mtxer reg -| bgey noov -|.else -| mcrxr cr0 -| bley noov -|.endif -|.endmacro -| |//----------------------------------------------------------------------- | |// Fixed register assignments for the interpreter. @@ -122,6 +104,7 @@ |.define LREG, r18 // Register holding lua_State (also in SAVE_L). |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. |.define JGL, r31 // On-trace: global_State + 32768. +|.define BASEP4, r26 // Equal to BASE + 4 | |// Constants for type-comparisons, stores and conversions. C callee-save. |.define TISNUM, r22 @@ -158,6 +141,12 @@ |.if FPU |.define FARG1, f1 |.define FARG2, f2 +|.define FARG3, f3 +|.define FARG4, f4 +|.define FARG5, f5 +|.define FARG6, f6 +|.define FARG7, f7 +|.define FARG8, f8 |.endif | |.define CRET1, r3 @@ -165,6 +154,7 @@ | |.define TOCREG, r2 // TOC register (only used by C code). |.define ENVREG, r11 // Environment pointer (nested C functions). +|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD) | |// Stack layout while in interpreter. Must match with lj_frame.h. |.if GPR64 @@ -198,6 +188,49 @@ |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI | +|.elif ELFV2 +| +|// 392(sp) // \ 32 bit C frame info. +|.define SAVE_LR, 384(sp) +|.define SAVE_CR, 376(sp) // 64 bit CR save. +|.define CFRAME_SPACE, 368 // Delta for sp. +|// Back chain for sp: 368(sp) <-- sp entering interpreter +|.define SAVE_ERRF, 360(sp) // | +|.define SAVE_NRES, 356(sp) // | +|.define SAVE_L, 352(sp) // > Parameter save area. +|.define SAVE_PC, 348(sp) // | +|.define SAVE_MULTRES, 344(sp) // | +|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain. +|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves. +|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves. +|.if ENDIAN_LE +|.define TMPD_HI, 44(sp) +|.define TMPD_LO, 40(sp) +|.define TONUM_HI, 36(sp) +|.define TONUM_LO, 32(sp) +|.else +|.define TMPD_LO, 44(sp) +|.define TMPD_HI, 40(sp) +|.define TONUM_LO, 36(sp) +|.define TONUM_HI, 32(sp) +|.endif +|.define SAVE_TOC, 24(sp) // TOC save area. +|// Next frame lr: 16(sp) +|// Next frame cr: 8(sp) +|// Back chain for sp: 0(sp) <-- sp while in interpreter +| +|.if ENDIAN_LE +|.define TMPD_BLO, 32(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else +|.define TMPD_BLO, 39(sp) +|.define TMPD, TMPD_HI +|.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 32 +| |.else | |// 508(sp) // \ 32 bit C frame info. @@ -208,23 +241,39 @@ |.define SAVE_MULTRES, 456(sp) // | |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. |.define SAVE_LR, 416(sp) +|.define SAVE_CR, 408(sp) // 64 bit CR save. |.define CFRAME_SPACE, 400 // Delta for sp. |// Back chain for sp: 400(sp) <-- sp entering interpreter |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. |// 48(sp) // Callee parameter save area (ABI mandated). |.define SAVE_TOC, 40(sp) // TOC save area. +|.if ENDIAN_LE +|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated). +|.define TMPD_LO, 32(sp) // / +|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated). +|.define TONUM_LO, 24(sp) // / +|.else |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). |.define TMPD_HI, 32(sp) // / |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). |.define TONUM_HI, 24(sp) // / +|.endif |// Next frame lr: 16(sp) -|.define SAVE_CR, 8(sp) // 64 bit CR save. +|// Next frame cr: 8(sp) |// Back chain for sp: 0(sp) <-- sp while in interpreter | +|.if ENDIAN_LE +|.define TMPD_BLO, 32(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else |.define TMPD_BLO, 39(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 112 | |.endif |.else @@ -249,10 +298,17 @@ |.define SAVE_MULTRES, 28(sp) |.define UNUSED1, 24(sp) |.if FPU +|.if ENDIAN_LE +|.define TMPD_HI, 20(sp) +|.define TMPD_LO, 16(sp) +|.define TONUM_HI, 12(sp) +|.define TONUM_LO, 8(sp) +|.else |.define TMPD_LO, 20(sp) |.define TMPD_HI, 16(sp) |.define TONUM_LO, 12(sp) |.define TONUM_HI, 8(sp) +|.endif |.else |.define SFSAVE_4, 20(sp) |.define SFSAVE_3, 16(sp) @@ -263,10 +319,22 @@ |// Back chain for sp: 0(sp) <-- sp while in interpreter | |.if FPU +|.if ENDIAN_LE +|.define TMPD_BLO, 16(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else |.define TMPD_BLO, 23(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI |.endif +|.else +|.define TMPD_BLO, 23(sp) +|.define TMPD, TMPD_HI +|.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 16 | |.endif | @@ -383,8 +451,35 @@ |//----------------------------------------------------------------------- | |// Access to frame relative to BASE. +|.if ENDIAN_LE +|.define FRAME_PC, -4 +|.define FRAME_FUNC, -8 +|.define FRAME_CONTPC, -12 +|.define FRAME_CONTRET, -16 +|.define WORD_LO, 0 +|.define WORD_HI, 4 +|.define WORD_BLO, 0 +|.define BASE_LO, BASE +|.define BASE_HI, BASEP4 +|.macro lwzux2, hi, lo, base, idx +| lwzux lo, base, idx +| lwz hi, 4(base) +|.endmacro +|.else |.define FRAME_PC, -8 |.define FRAME_FUNC, -4 +|.define FRAME_CONTPC, -16 +|.define FRAME_CONTRET, -12 +|.define WORD_LO, 4 +|.define WORD_HI, 0 +|.define WORD_BLO, 7 +|.define BASE_LO, BASEP4 +|.define BASE_HI, BASE +|.macro lwzux2, hi, lo, base, idx +| lwzux hi, base, idx +| lwz lo, 4(base) +|.endmacro +|.endif | |// Instruction decode. |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro @@ -445,6 +540,7 @@ |// Call decode and dispatch. |.macro ins_callt | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| addi BASEP4, BASE, 4 | lwz PC, LFUNC:RB->pc | lwz INS, 0(PC) | addi PC, PC, 4 @@ -537,7 +633,12 @@ static void build_subroutines(BuildCtx *ctx) | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | mr BASE, TMP2 // Restore caller base. | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. + | .if ENDIAN_LE + | addi RA, RA, -8 + | stw TMP1, WORD_HI(RA) // Prepend true to results. + | .else + | stwu TMP1, -8(RA) // Prepend true to results. + | .endif | |->vm_returnc: | addi RD, RD, 8 // RD = (nresults+1)*8. @@ -603,7 +704,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP1, L->maxstack | cmplw BASE, TMP1 | bge >8 - | stw TISNIL, 0(BASE) + | stw TISNIL, WORD_HI(BASE) | addi RD, RD, 8 | addi BASE, BASE, 8 | b <2 @@ -654,7 +755,12 @@ static void build_subroutines(BuildCtx *ctx) |->vm_unwind_ff_eh: // Landing pad for external unwinder. | lwz L, SAVE_L | .toc ld TOCREG, SAVE_TOC + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp BASE, L->base | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lwz DISPATCH, L->glref // Setup pointer to dispatch table. @@ -669,7 +775,7 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) // Results start at BASE-8. | .FPU stw TMP3, TMPD | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. + | stw TMP1, WORD_HI(RA) // Prepend false to error message. | li RD, 16 // 2 results: false + error message. | st_vmstate | .FPU lfs TONUM, TMPD @@ -730,7 +836,12 @@ static void build_subroutines(BuildCtx *ctx) | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mr RA, BASE | lp BASE, L->base + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp TMP1, L->top | lwz PC, FRAME_PC(BASE) | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). @@ -780,7 +891,12 @@ static void build_subroutines(BuildCtx *ctx) |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | stw L, DISPATCH_GL(cur_L)(DISPATCH) | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp TMP1, L->top | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | add PC, PC, BASE @@ -800,8 +916,8 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_call_dispatch: | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lwz TMP0, FRAME_PC(BASE) - | lwz LFUNC:RB, FRAME_FUNC(BASE) + | lwz TMP0, WORD_HI-8(BASE) + | lwz LFUNC:RB, WORD_LO-8(BASE) | checkfunc TMP0; bne ->vmeta_call | |->vm_call_dispatch_f: @@ -820,7 +936,9 @@ static void build_subroutines(BuildCtx *ctx) | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | lp TMP1, L->cframe | addi DISPATCH, DISPATCH, GG_G2DISP - | .toc lp CARG4, 0(CARG4) + | .opd lp TOCREG, TOC_OFS(CARG4) + | .opdenv lp ENVREG, ENV_OFS(CARG4) + | .opd lp CARG4, 0(CARG4) | li TMP2, 0 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | stw TMP2, SAVE_ERRF // No error function. @@ -828,7 +946,9 @@ static void build_subroutines(BuildCtx *ctx) | stp sp, L->cframe // Add our C frame to cframe chain. | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mtctr CARG4 + | .elfv2 mr FUNCREG, CARG4 | bctrl // (lua_State *L, lua_CFunction func, void *ud) + | .toc lp TOCREG, SAVE_TOC |.if PPE | mr BASE, CRET1 | cmpwi CRET1, 0 @@ -850,7 +970,7 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_dispatch: | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. + | lwz TMP0, FRAME_CONTRET(BASE) // Continuation. | mr RB, BASE | mr BASE, TMP2 // Restore caller BASE. | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) @@ -860,6 +980,9 @@ static void build_subroutines(BuildCtx *ctx) | lwz PC, -16(RB) // Restore PC from [cont|PC]. | subi TMP2, RD, 8 | stwx TISNIL, RA, TMP2 // Ensure one valid arg. + |.if P64 + | ld TMP3, 0(DISPATCH) + |.endif |.if FFI | ble >1 |.endif @@ -914,20 +1037,20 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TSTR | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | b >1 | |->vmeta_tgets: | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) + | stw TAB:RB, WORD_LO(CARG2) | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) + | stw TMP0, WORD_HI(CARG2) | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) + | stw STR:RC, WORD_LO(CARG3) + | stw TMP1, WORD_HI(CARG3) | b >1 | |->vmeta_tgetb: // TMP0 = index @@ -938,8 +1061,8 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | add CARG2, BASE, RB |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) + | stw TISNUM, WORD_HI(CARG3) + | stw TMP0, WORD_LO(CARG3) |.else | stfd f0, 0(CARG3) |.endif @@ -977,7 +1100,7 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, L->top = new base, stack = cont/func/t/k | subfic TMP1, BASE, FRAME_CONT | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] + | stw PC, FRAME_CONTPC(BASE) // [cont|PC] | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 16 // 2 args for func(t, k). @@ -996,7 +1119,10 @@ static void build_subroutines(BuildCtx *ctx) |.endif | b ->BC_TGETR_Z |1: - | stwx TISNIL, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, RA | b ->cont_nop | |//----------------------------------------------------------------------- @@ -1005,20 +1131,20 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TSTR | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | b >1 | |->vmeta_tsets: | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) + | stw TAB:RB, WORD_LO(CARG2) | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) + | stw TMP0, WORD_HI(CARG2) | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) + | stw STR:RC, WORD_LO(CARG3) + | stw TMP1, WORD_HI(CARG3) | b >1 | |->vmeta_tsetb: // TMP0 = index @@ -1029,8 +1155,8 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | add CARG2, BASE, RB |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) + | stw TISNUM, WORD_HI(CARG3) + | stw TMP0, WORD_LO(CARG3) |.else | stfd f0, 0(CARG3) |.endif @@ -1069,7 +1195,7 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | subfic TMP1, BASE, FRAME_CONT | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] + | stw PC, FRAME_CONTPC(BASE) // [cont|PC] | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 24 // 3 args for func(t, k, v) @@ -1100,17 +1226,9 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_comp: | mr CARG1, L | subi PC, PC, 4 - |.if DUALNUM - | mr CARG2, RA - |.else | add CARG2, BASE, RA - |.endif | stw PC, SAVE_PC - |.if DUALNUM - | mr CARG3, RD - |.else | add CARG3, BASE, RD - |.endif | stp BASE, L->base | decode_OP1 CARG4, INS | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) @@ -1147,7 +1265,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->cont_nop | |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) + | lwz TMP0, WORD_HI(RA) | .gpr64 extsw TMP0, TMP0 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. | subfe CRET1, CRET1, CRET1 @@ -1155,7 +1273,7 @@ static void build_subroutines(BuildCtx *ctx) | b <4 | |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) + | lwz TMP0, WORD_HI(RA) | .gpr64 extsw TMP0, TMP0 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. | subfe CRET1, CRET1, CRET1 @@ -1207,8 +1325,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vmeta_unm: - | mr CARG3, RD - | mr CARG4, RD + | add CARG3, BASE, RD + | add CARG4, BASE, RD | b >1 | |->vmeta_arith_vn: @@ -1243,7 +1361,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_binop: | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] + | stw PC, FRAME_CONTPC(CRET1) // [cont|PC] | mr TMP2, BASE | addi PC, TMP1, FRAME_CONT | mr BASE, CRET1 @@ -1254,7 +1372,7 @@ static void build_subroutines(BuildCtx *ctx) #if LJ_52 | mr SAVE0, CARG1 #endif - | mr CARG2, RD + | add CARG2, BASE, RD | stp BASE, L->base | mr CARG1, L | stw PC, SAVE_PC @@ -1331,25 +1449,25 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_1, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz CARG1, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz CARG1, WORD_LO(BASE) | blt ->fff_fallback |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz CARG4, 8(BASE) - | lwz CARG1, 4(BASE) - | lwz CARG2, 12(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz CARG4, WORD_HI+8(BASE) + | lwz CARG1, WORD_LO(BASE) + | lwz CARG2, WORD_LO+8(BASE) | blt ->fff_fallback |.endmacro | |.macro .ffunc_n, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else @@ -1362,15 +1480,15 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_nn, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG3, 8(BASE) + | lwz CARG3, WORD_HI+8(BASE) | lfd FARG2, 8(BASE) |.else - | lwz CARG2, 4(BASE) - | lwz CARG3, 8(BASE) - | lwz CARG4, 12(BASE) + | lwz CARG2, WORD_LO(BASE) + | lwz CARG3, WORD_HI+8(BASE) + | lwz CARG4, WORD_LO+8(BASE) |.endif | blt ->fff_fallback | checknum CARG1; bge ->fff_fallback @@ -1393,17 +1511,17 @@ static void build_subroutines(BuildCtx *ctx) | cmplw cr1, CARG3, TMP1 | lwz PC, FRAME_PC(BASE) | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) + | stw CARG3, WORD_HI(RA) | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | addi TMP1, BASE, 8 | add TMP2, RA, NARGS8:RC - | stw CARG1, 4(RA) + | stw CARG1, WORD_LO(RA) | beq ->fff_res // Done if exactly 1 argument. |1: | cmplw TMP1, TMP2 |.if FPU | lfd f0, 0(TMP1) - | stfd f0, 0(TMP1) + | stfd f0, -8(TMP1) |.else | lwz CARG1, 0(TMP1) | lwz CARG2, 4(TMP1) @@ -1416,14 +1534,28 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc type | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) | blt ->fff_fallback | .gpr64 extsw CARG1, CARG1 + |.if P64 + | li TMP0, LJ_TNUMX + | srawi TMP3, CARG1, 15 + | subfc TMP1, TMP0, CARG1 + |.else | subfc TMP0, TISNUM, CARG1 - | subfe TMP2, CARG1, CARG1 + |.endif + | subfe TMP2, CARG1, CARG1 + |.if P64 + | cmpwi TMP3, -2 + | orc TMP1, TMP2, TMP1 + | subf TMP1, TMP0, TMP1 + | beq >1 + |.else | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 + | subf TMP1, TISNUM, TMP1 + |.endif | slwi TMP1, TMP1, 3 + |2: |.if FPU | la TMP2, CFUNC:RB->upvalue | lfdx FARG1, TMP2, TMP1 @@ -1433,6 +1565,11 @@ static void build_subroutines(BuildCtx *ctx) | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo |.endif | b ->fff_resn + |.if P64 + |1: + | li TMP1, ~LJ_TLIGHTUD<<3 + | b <2 + |.endif | |//-- Base library: getters and setters --------------------------------- | @@ -1455,10 +1592,10 @@ static void build_subroutines(BuildCtx *ctx) | sub TMP1, TMP0, TMP1 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |3: // Rearranged logic, because we expect _not_ to find the key. - | lwz CARG4, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) + | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) + | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) | checkstr CARG4; bne >4 | cmpw TMP0, STR:RC; beq >5 |4: @@ -1476,14 +1613,33 @@ static void build_subroutines(BuildCtx *ctx) |6: | cmpwi CARG3, LJ_TUDATA; beq <1 | .gpr64 extsw CARG3, CARG3 + |.if P64 + | li TMP0, LJ_TNUMX + | srawi TMP3, CARG3, 15 + | subfc TMP1, TMP0, CARG3 + |.else | subfc TMP0, TISNUM, CARG3 + |.endif | subfe TMP2, CARG3, CARG3 + |.if P64 + | cmpwi TMP3, -2 + | orc TMP1, TMP2, TMP1 + | subf TMP1, TMP0, TMP1 + | beq >7 + |.else | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 + | subf TMP1, TISNUM, TMP1 + |.endif | slwi TMP1, TMP1, 2 + |8: | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) | lwzx TAB:CARG1, TMP2, TMP1 | b <2 + |.if P64 + |7: + | li TMP1, ~LJ_TLIGHTUD<<2 + | b <8 + |.endif | |.ffunc_2 setmetatable | // Fast path: no mt for table yet and not clearing the mt. @@ -1501,8 +1657,8 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc rawget | cmplwi NARGS8:RC, 16 - | lwz CARG4, 0(BASE) - | lwz TAB:CARG2, 4(BASE) + | lwz CARG4, WORD_HI(BASE) + | lwz TAB:CARG2, WORD_LO(BASE) | blt ->fff_fallback | checktab CARG4; bne ->fff_fallback | la CARG3, 8(BASE) @@ -1522,11 +1678,11 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc tonumber | // Only handles the number case inline (without a base argument). | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else - | lwz CARG2, 4(BASE) + | lwz CARG2, WORD_LO(BASE) |.endif | bne ->fff_fallback // Exactly one argument. | checknum CARG1; bgt ->fff_fallback @@ -1602,7 +1758,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | la RA, -8(BASE) #endif - | stw TISNIL, 8(BASE) + | stw TISNIL, 8+WORD_HI(BASE) | li RD, (3+1)*8 |.if FPU | stfd f0, 0(RA) @@ -1614,11 +1770,11 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc ipairs_aux | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz TAB:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz TAB:CARG1, WORD_LO(BASE) + | lwz CARG4, 8+WORD_HI(BASE) |.if DUALNUM - | lwz TMP2, 12(BASE) + | lwz TMP2, 8+WORD_LO(BASE) |.else | lfd FARG2, 8(BASE) |.endif @@ -1647,20 +1803,20 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) | cmplw TMP0, TMP2 |.if DUALNUM - | stw TISNUM, 0(RA) + | stw TISNUM, WORD_HI(RA) | slwi TMP3, TMP2, 3 - | stw TMP2, 4(RA) + | stw TMP2, WORD_LO(RA) |.else | slwi TMP3, TMP2, 3 | stfd FARG2, 0(RA) |.endif | ble >2 // Not in array part? |.if FPU - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 + | lfdux f0, TMP1, TMP3 + | lwz TMP2, WORD_HI(TMP1) |.else | lwzux TMP2, TMP1, TMP3 - | lwz TMP3, 4(TMP1) + | lwz TMP3, WORD_HI(TMP1) |.endif |1: | checknil TMP2 @@ -1685,7 +1841,7 @@ static void build_subroutines(BuildCtx *ctx) | cmplwi CRET1, 0 | li RD, (0+1)*8 | beq ->fff_res - | lwz TMP2, 0(CRET1) + | lwz TMP2, WORD_HI(CRET1) |.if FPU | lfd f0, 0(CRET1) |.else @@ -1718,11 +1874,11 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) #endif |.if DUALNUM - | stw TISNUM, 8(BASE) + | stw TISNUM, 8+WORD_HI(BASE) |.else - | stw ZERO, 8(BASE) + | stw ZERO, 8+WORD_HI(BASE) |.endif - | stw ZERO, 12(BASE) + | stw ZERO, 8+WORD_LO(BASE) | li RD, (3+1)*8 |.if FPU | stfd f0, 0(RA) @@ -1748,7 +1904,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc xpcall | cmplwi NARGS8:RC, 16 - | lwz CARG3, 8(BASE) + | lwz CARG3, 8+WORD_HI(BASE) |.if FPU | lfd FARG2, 8(BASE) | lfd FARG1, 0(BASE) @@ -1879,7 +2035,7 @@ static void build_subroutines(BuildCtx *ctx) |.if resume | li TMP1, LJ_TTRUE | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. + | stw TMP1, WORD_HI-8(BASE) // Prepend true to results. | addi RD, RD, 16 |.else | mr RA, BASE @@ -1904,7 +2060,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. + | stw TMP1, WORD_HI-8(BASE) // Prepend false to results. | la RA, -8(BASE) |.if FPU | stfd f0, 0(BASE) // Copy error message. @@ -1962,8 +2118,8 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resi: | lwz PC, FRAME_PC(BASE) | la RA, -8(BASE) - | stw TISNUM, -8(BASE) - | stw CRET1, -4(BASE) + | stw TISNUM, WORD_HI-8(BASE) + | stw CRET1, WORD_LO-8(BASE) | b ->fff_res1 |1: | lus CARG3, 0x41e0 // 2^31. @@ -1978,9 +2134,9 @@ static void build_subroutines(BuildCtx *ctx) |->fff_restv: | // CARG3/CARG1 = TValue result. | lwz PC, FRAME_PC(BASE) - | stw CARG3, -8(BASE) + | stw CARG3, WORD_HI-8(BASE) | la RA, -8(BASE) - | stw CARG1, -4(BASE) + | stw CARG1, WORD_LO-8(BASE) |->fff_res1: | // RA = results, PC = return. | li RD, (1+1)*8 @@ -1998,10 +2154,11 @@ static void build_subroutines(BuildCtx *ctx) | ins_next1 | // Adjust BASE. KBASE is assumed to be set for the calling frame. | sub BASE, RA, TMP0 + | addi BASEP4, BASE, 4 | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, RA, TMP1 | b <5 @@ -2119,7 +2276,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc math_log | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) | bne ->fff_fallback // Need exactly 1 argument. | checknum CARG1; bge ->fff_fallback |.if FPU @@ -2148,20 +2305,20 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM |.ffunc math_ldexp | cmplwi NARGS8:RC, 16 - | lwz TMP0, 0(BASE) + | lwz TMP0, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else - | lwz CARG1, 0(BASE) - | lwz CARG2, 4(BASE) + | lwz CARG1, WORD_HI(BASE) + | lwz CARG2, WORD_LO(BASE) |.endif - | lwz TMP1, 8(BASE) + | lwz TMP1, WORD_HI+8(BASE) |.if GPR64 - | lwz CARG2, 12(BASE) + | lwz CARG2, WORD_LO+8(BASE) |.elif FPU - | lwz CARG1, 12(BASE) + | lwz CARG1, WORD_LO+8(BASE) |.else - | lwz CARG3, 12(BASE) + | lwz CARG3, WORD_LO+8(BASE) |.endif | blt ->fff_fallback | checknum TMP0; bge ->fff_fallback @@ -2200,8 +2357,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | li RD, (2+1)*8 |.if DUALNUM - | stw TISNUM, 8(RA) - | stw TMP1, 12(RA) + | stw TISNUM, WORD_HI+8(RA) + | stw TMP1, WORD_LO+8(RA) |.else | stfd FARG2, 8(RA) |.endif @@ -2235,9 +2392,9 @@ static void build_subroutines(BuildCtx *ctx) | add SAVE1, BASE, NARGS8:RC | bne >4 |1: // Handle integers. - | lwz CARG4, 0(SAVE0) + | lwz CARG4, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 - | lwz CARG2, 4(SAVE0) + | lwz CARG2, WORD_LO(SAVE0) | bge cr1, ->fff_resi | checknum CARG4 | xoris TMP0, CARG1, 0x8000 @@ -2278,7 +2435,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | bge ->fff_fallback |5: // Handle numbers. - | lwz CARG3, 0(SAVE0) + | lwz CARG3, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 |.if FPU | lfd FARG2, 0(SAVE0) @@ -2317,7 +2474,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | b <5 |7: // Convert integer to number and continue above. - | lwz CARG3, 4(SAVE0) + | lwz CARG3, WORD_LO(SAVE0) | bne ->fff_fallback |.if FPU | tonum_i FARG2, CARG3 @@ -2329,7 +2486,12 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc_n name | li TMP1, 8 |1: + |.if ENDIAN_LE + | add CARG2, BASE, TMP1 + | lwz CARG2, WORD_HI(CARG2) + |.else | lwzx CARG2, BASE, TMP1 + |.endif | lfdx FARG2, BASE, TMP1 | cmplw cr1, TMP1, NARGS8:RC | checknum CARG2 @@ -2353,8 +2515,8 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_byte // Only handle the 1-arg case here. | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz STR:CARG1, WORD_LO(BASE) | bne ->fff_fallback // Need exactly 1 argument. | checkstr CARG3 | bne ->fff_fallback @@ -2385,12 +2547,12 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) + | lwz CARG3, WORD_HI(BASE) |.if DUALNUM - | lwz TMP0, 4(BASE) + | lwz TMP0, WORD_LO(BASE) | bne ->fff_fallback // Exactly 1 argument. | checknum CARG3; bne ->fff_fallback - | la CARG2, 7(BASE) + | la CARG2, WORD_BLO(BASE) |.else | lfd FARG1, 0(BASE) | bne ->fff_fallback // Exactly 1 argument. @@ -2414,16 +2576,16 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc string_sub | ffgccheck | cmplwi NARGS8:RC, 16 - | lwz CARG3, 16(BASE) + | lwz CARG3, WORD_HI+16(BASE) |.if not DUALNUM | lfd f0, 16(BASE) |.endif - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) + | lwz TMP0, WORD_HI(BASE) + | lwz STR:CARG1, WORD_LO(BASE) | blt ->fff_fallback - | lwz CARG2, 8(BASE) + | lwz CARG2, WORD_HI+8(BASE) |.if DUALNUM - | lwz TMP1, 12(BASE) + | lwz TMP1, WORD_LO+8(BASE) |.else | lfd f1, 8(BASE) |.endif @@ -2431,7 +2593,7 @@ static void build_subroutines(BuildCtx *ctx) | beq >1 |.if DUALNUM | checknum CARG3 - | lwz TMP2, 20(BASE) + | lwz TMP2, WORD_LO+16(BASE) | bne ->fff_fallback |1: | checknum CARG2; bne ->fff_fallback @@ -2487,8 +2649,8 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc string_ .. name | ffgccheck | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG2, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz STR:CARG2, WORD_LO(BASE) | blt ->fff_fallback | checkstr CARG3 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) @@ -2526,10 +2688,10 @@ static void build_subroutines(BuildCtx *ctx) | addi SAVE0, BASE, 8 | add SAVE1, BASE, NARGS8:RC |1: - | lwz CARG4, 0(SAVE0) + | lwz CARG4, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 |.if DUALNUM - | lwz CARG2, 4(SAVE0) + | lwz CARG2, WORD_LO(SAVE0) |.else | lfd FARG1, 0(SAVE0) |.endif @@ -2696,20 +2858,23 @@ static void build_subroutines(BuildCtx *ctx) | |->fff_fallback: // Call fast function fallback handler. | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lp TMP3, CFUNC:RB->f + | lp FUNCREG, CFUNC:RB->f | add TMP1, BASE, NARGS8:RC | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. | addi TMP0, TMP1, 8*LUA_MINSTACK | lwz TMP2, L->maxstack | stw PC, SAVE_PC // Redundant (but a defined value). - | .toc lp TMP3, 0(TMP3) + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | .opd lp FUNCREG, 0(FUNCREG) | cmplw TMP0, TMP2 | stp BASE, L->base | stp TMP1, L->top | mr CARG1, L | bgt >5 // Need to grow stack. - | mtctr TMP3 + | mtctr FUNCREG | bctrl // (lua_State *L) + | .toc lp TOCREG, SAVE_TOC | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | lp BASE, L->base | cmpwi CRET1, 0 @@ -2811,6 +2976,7 @@ static void build_subroutines(BuildCtx *ctx) |3: | lp BASE, L->base |4: // Re-dispatch to static ins. + | addi BASEP4, BASE, 4 | lwz INS, -4(PC) | decode_OPP TMP1, INS | decode_RB8 RB, INS @@ -2824,7 +2990,7 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_hook: // Continue from hook yield. | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. + | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins. | b <4 | |->vm_hotloop: // Hot loop counter underflow. @@ -2866,6 +3032,7 @@ static void build_subroutines(BuildCtx *ctx) | lp BASE, L->base | lp TMP0, L->top | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. + | addi BASEP4, BASE, 4 | sub NARGS8:RC, TMP0, BASE | add RA, BASE, RA | lwz LFUNC:RB, FRAME_FUNC(BASE) @@ -2877,7 +3044,7 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | // RA = resultptr, RB = meta base | lwz INS, -4(PC) - | lwz TRACE:TMP2, -20(RB) // Save previous trace. + | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace. | addic. TMP1, MULTRES, -8 | decode_RA8 RC, INS // Call base. | beq >2 @@ -2923,10 +3090,16 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG2, PC | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | lp BASE, L->base + | addi BASEP4, BASE, 4 | b ->cont_nop | |9: + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + | stwx TISNIL, BASEP4, RC + |.else | stwx TISNIL, BASE, RC + |.endif | addi RC, RC, 8 | b <3 |.endif @@ -2941,6 +3114,7 @@ static void build_subroutines(BuildCtx *ctx) | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | lp BASE, L->base | subi PC, PC, 4 + | addi BASEP4, BASE, 4 | b ->cont_nop #endif | @@ -2950,40 +3124,73 @@ static void build_subroutines(BuildCtx *ctx) | |.macro savex_, a, b, c, d |.if FPU - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) + | stfd f..a, EXIT_OFFSET+a*8(sp) + | stfd f..b, EXIT_OFFSET+b*8(sp) + | stfd f..c, EXIT_OFFSET+c*8(sp) + | stfd f..d, EXIT_OFFSET+d*8(sp) |.endif |.endmacro | + |.macro saver, a + | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp) + |.endmacro + | |->vm_exit_handler: |.if JIT - | addi sp, sp, -(16+32*8+32*4) - | stmw r2, 16+32*8+2*4(sp) + | addi sp, sp, -(EXIT_OFFSET+32*8+32*PSIZE) + | saver 3 // CARG1 + | saver 4 // CARG2 + | saver 5 // CARG3 + | saver 17 // DISPATCH | addi DISPATCH, JGL, -GG_DISP2G-32768 | li CARG2, ~LJ_VMST_EXIT - | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. + | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain. | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) + | saver 2 + | saver 6 + | saver 7 + | saver 8 + | saver 9 + | saver 10 + | saver 11 + | saver 12 + | saver 13 | savex_ 0,1,2,3 - | stw CARG1, 0(sp) // Store extended stack chain. - | clrso TMP1 + | stp CARG1, 0(sp) // Store extended stack chain. + | savex_ 4,5,6,7 - | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. + | saver 14 + | saver 15 + | saver 16 + | saver 18 + | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp. | savex_ 8,9,10,11 - | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. + | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP. | savex_ 12,13,14,15 | mflr CARG3 | li TMP1, 0 | savex_ 16,17,18,19 - | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. + | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP. | savex_ 20,21,22,23 | lhz CARG4, 2(CARG3) // Load trace number. | savex_ 24,25,26,27 | lwz L, DISPATCH_GL(cur_L)(DISPATCH) | savex_ 28,29,30,31 + | saver 19 + | saver 20 + | saver 21 + | saver 22 + | saver 23 + | saver 24 + | saver 25 + | saver 26 + | saver 27 + | saver 28 + | saver 29 + | saver 30 + | saver 31 | sub CARG3, TMP0, CARG3 // Compute exit number. - | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) + | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH) | srwi CARG3, CARG3, 2 | stp L, DISPATCH_J(L)(DISPATCH) | subi CARG3, CARG3, 2 @@ -2992,11 +3199,11 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) | addi CARG1, DISPATCH, GG_DISP2J | stw CARG3, DISPATCH_J(exitno)(DISPATCH) - | addi CARG2, sp, 16 + | addi CARG2, sp, EXIT_OFFSET | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | // Returns MULTRES (unscaled) or negated error code. | lp TMP1, L->cframe - | lwz TMP2, 0(sp) + | lp TMP2, 0(sp) | lp BASE, L->base |.if GPR64 | rldicr sp, TMP1, 0, 61 @@ -3004,7 +3211,7 @@ static void build_subroutines(BuildCtx *ctx) | rlwinm sp, TMP1, 0, 0, 29 |.endif | lwz PC, SAVE_PC // Get SAVE_PC. - | stw TMP2, 0(sp) + | stp TMP2, 0(sp) | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). | b >1 |.endif @@ -3025,7 +3232,12 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) | lwz KBASE, PC2PROTO(k)(TMP1) | // Setup type comparison constants. + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM + |.endif | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | .FPU stw TMP3, TMPD | li ZERO, 0 @@ -3045,14 +3257,14 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA8 RA, INS | lpx TMP0, DISPATCH, TMP1 | mtctr TMP0 - | cmplwi TMP1, BC_FUNCF*4 // Function header? + | cmplwi TMP1, BC_FUNCF*PSIZE // Function header? | bge >2 | decode_RB8 RB, INS | decode_RD8 RD, INS | decode_RC8 RC, INS | bctr |2: - | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? + | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function? | blt >3 | // Check frame below fast function. | lwz TMP1, FRAME_PC(BASE) @@ -3062,7 +3274,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP2, -4(TMP1) | decode_RA8 TMP0, TMP2 | sub TMP1, BASE, TMP0 - | lwz LFUNC:TMP2, -12(TMP1) + | lwz LFUNC:TMP2, WORD_LO-16(TMP1) | lwz TMP1, LFUNC:TMP2->pc | lwz KBASE, PC2PROTO(k)(TMP1) |3: @@ -3111,6 +3323,8 @@ static void build_subroutines(BuildCtx *ctx) | sfi2d CARG3, CARG4 | |->vm_modi: + | li TMP1, 0 + | mtxer TMP1 | divwo. TMP0, CARG1, CARG2 | bso >1 |.if GPR64 @@ -3129,7 +3343,8 @@ static void build_subroutines(BuildCtx *ctx) | cmpwi CARG2, 0 | li CARG1, 0 | beqlr - | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. + | // Clear SO for -2147483648 % -1 and return 0. + | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so | blr | |//----------------------------------------------------------------------- @@ -3142,10 +3357,18 @@ static void build_subroutines(BuildCtx *ctx) |->vm_cachesync: |.if JIT or FFI | // Compute start of first cache line and number of cache lines. + | .if GPR64 + | rldicr CARG1, CARG1, 0, 58 + | .else | rlwinm CARG1, CARG1, 0, 0, 26 + | .endif | sub CARG2, CARG2, CARG1 | addi CARG2, CARG2, 31 + | .if GPR64 + | srdi. CARG2, CARG2, 5 + | .else | rlwinm. CARG2, CARG2, 27, 5, 31 + | .endif | beqlr | mtctr CARG2 | mr CARG3, CARG1 @@ -3172,39 +3395,70 @@ static void build_subroutines(BuildCtx *ctx) |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- | - |// Handler for callback functions. Callback slot number in r11, g in r12. + |// Handler for callback functions. + |// 32-bit: Callback slot number in r12, g in r11. + |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2. + |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11, + |// vm_ffi_callback in r2. |->vm_ffi_callback: |.if FFI |.type CTSTATE, CTState, PC + | .if OPD + | rldicl r12, r11, 17, 47 + | rldicl r11, r11, 0, 17 + | .endif + | .if ELFV2 + | rlwinm r12, r12, 30, 22, 31 + | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha + | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l + | .endif | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] + | lwz CTSTATE, GL:r11->ctype_state + | addi DISPATCH, r11, GG_G2DISP + | stw r12, CTSTATE->cb.slot + | stp r3, CTSTATE->cb.gpr[0] | .FPU stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] + | stp r4, CTSTATE->cb.gpr[1] | .FPU stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] + | stp r5, CTSTATE->cb.gpr[2] | .FPU stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] + | stp r6, CTSTATE->cb.gpr[3] | .FPU stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] + | stp r7, CTSTATE->cb.gpr[4] | .FPU stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] + | stp r8, CTSTATE->cb.gpr[5] | .FPU stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] + | stp r9, CTSTATE->cb.gpr[6] | .FPU stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] + | stp r10, CTSTATE->cb.gpr[7] | .FPU stfd f8, CTSTATE->cb.fpr[7] + | .if GPR64 + | stfd f9, CTSTATE->cb.fpr[8] + | stfd f10, CTSTATE->cb.fpr[9] + | stfd f11, CTSTATE->cb.fpr[10] + | stfd f12, CTSTATE->cb.fpr[11] + | stfd f13, CTSTATE->cb.fpr[12] + | .endif + | .if ELFV2 + | addi TMP0, sp, CFRAME_SPACE+96 + | .elif GPR64 + | addi TMP0, sp, CFRAME_SPACE+112 + | .else | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack + | .endif + | stp TMP0, CTSTATE->cb.stack | mr CARG1, CTSTATE | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | mr CARG2, sp | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | // Returns lua_State *. | lp BASE, L:CRET1->base + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp RC, L:CRET1->top | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | li ZERO, 0 @@ -3233,9 +3487,21 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, CTSTATE | mr CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] + | lp CRET1, CTSTATE->cb.gpr[0] | .FPU lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] + | lp CRET2, CTSTATE->cb.gpr[1] + | .if GPR64 + | lfd FARG2, CTSTATE->cb.fpr[1] + | .else + | lp CARG3, CTSTATE->cb.gpr[2] + | lp CARG4, CTSTATE->cb.gpr[3] + | .endif + | .elfv2 lfd f3, CTSTATE->cb.fpr[2] + | .elfv2 lfd f4, CTSTATE->cb.fpr[3] + | .elfv2 lfd f5, CTSTATE->cb.fpr[4] + | .elfv2 lfd f6, CTSTATE->cb.fpr[5] + | .elfv2 lfd f7, CTSTATE->cb.fpr[6] + | .elfv2 lfd f8, CTSTATE->cb.fpr[7] | b ->vm_leave_unw |.endif | @@ -3248,23 +3514,46 @@ static void build_subroutines(BuildCtx *ctx) | lbz CARG2, CCSTATE->nsp | lbz CARG3, CCSTATE->nfpr | neg TMP1, TMP1 + | .if GPR64 + | std TMP0, 16(sp) + | .else | stw TMP0, 4(sp) + | .endif | cmpwi cr1, CARG3, 0 | mr TMP2, sp | addic. CARG2, CARG2, -1 + | .if GPR64 + | stdux sp, sp, TMP1 + | .else | stwux sp, sp, TMP1 + | .endif | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) + | .if GPR64 + | std r14, -8(TMP2) + | std CCSTATE, -16(TMP2) + | .else + | stw r14, -4(TMP2) + | stw CCSTATE, -8(TMP2) + | .endif | mr r14, TMP2 | la TMP1, CCSTATE->stack + | .if GPR64 + | sldi CARG2, CARG2, 3 + | .else | slwi CARG2, CARG2, 2 + | .endif | blty >2 - | la TMP2, 8(sp) + | .if ELFV2 + | la TMP2, 96(sp) + | .elif GPR64 + | la TMP2, 112(sp) + | .else + | la TMP2, 8(sp) + | .endif |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 + | lpx TMP0, TMP1, CARG2 + | stpx TMP0, TMP2, CARG2 + | addic. CARG2, CARG2, -PSIZE | bge <1 |2: | bney cr1, >3 @@ -3276,28 +3565,55 @@ static void build_subroutines(BuildCtx *ctx) | .FPU lfd f6, CCSTATE->fpr[5] | .FPU lfd f7, CCSTATE->fpr[6] | .FPU lfd f8, CCSTATE->fpr[7] + | .if GPR64 + | .FPU lfd f9, CCSTATE->fpr[8] + | .FPU lfd f10, CCSTATE->fpr[9] + | .FPU lfd f11, CCSTATE->fpr[10] + | .FPU lfd f12, CCSTATE->fpr[11] + | .FPU lfd f13, CCSTATE->fpr[12] + | .endif |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | .toc std TOCREG, SAVE_TOC + | lp FUNCREG, CCSTATE->func + | lp CARG2, CCSTATE->gpr[1] + | lp CARG3, CCSTATE->gpr[2] + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | .opd lp FUNCREG, 0(FUNCREG) + | lp CARG4, CCSTATE->gpr[3] + | lp CARG5, CCSTATE->gpr[4] + | mtctr FUNCREG + | lp r8, CCSTATE->gpr[5] + | lp r9, CCSTATE->gpr[6] + | lp r10, CCSTATE->gpr[7] + | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) + | .toc lp TOCREG, SAVE_TOC + | .if GPR64 + | ld CCSTATE:TMP1, -16(r14) + | ld TMP2, -8(r14) + | ld TMP0, 16(r14) + | .else + | lwz CCSTATE:TMP1, -8(r14) + | lwz TMP2, -4(r14) | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] + | .endif + | stp CARG1, CCSTATE:TMP1->gpr[0] | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] + | stp CARG2, CCSTATE:TMP1->gpr[1] + | .if GPR64 + | stfd FARG2, CCSTATE:TMP1->fpr[1] + | .endif + | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2] + | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3] + | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4] + | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5] + | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6] + | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7] | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] + | stp CARG3, CCSTATE:TMP1->gpr[2] | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] + | stp CARG4, CCSTATE:TMP1->gpr[3] | mr r14, TMP2 | blr |.endif @@ -3320,14 +3636,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, BASE + | lwzx CARG2, BASE_LO, RA + | lwzx CARG3, BASE_HI, RD | lwz TMP2, -4(PC) | checknum cr0, CARG1 - | lwz CARG4, 4(RD) + | lwzx CARG4, BASE_LO, RD | decode_RD4 TMP2, TMP2 | checknum cr1, CARG3 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) @@ -3351,7 +3668,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bgt cr0, ->vmeta_comp | // RA is a number. - | .FPU lfd f0, 0(RA) + | .FPU lfdx f0, BASE, RA | bgt cr1, ->vmeta_comp | blt cr1, >4 | // RA is a number, RD is an integer. @@ -3371,7 +3688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bl ->vm_sfi2d_1 |.endif |4: - | .FPU lfd f1, 0(RD) + | .FPU lfdx f1, BASE, RD |5: |.if FPU | fcmpu cr0, f0, f1 @@ -3392,10 +3709,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | b <1 |.else - | lwzx TMP0, BASE, RA + | lwzx TMP0, BASE_HI, RA | addi PC, PC, 4 | lfdx f0, BASE, RA - | lwzx TMP1, BASE, RD + | lwzx TMP1, BASE_HI, RD | checknum cr0, TMP0 | lwz TMP2, -4(PC) | lfdx f1, BASE, RD @@ -3425,16 +3742,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQV: case BC_ISNEV: vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, BASE + | lwzx CARG2, BASE_LO, RA + | .if ENDIAN_LE + | lwzx CARG3, BASE_HI, RD + | .else + | lwzux CARG3, RD, BASE_HI + | .endif | checknum cr0, CARG1 | lwz SAVE0, -4(PC) | checknum cr1, CARG3 | decode_RD4 SAVE0, SAVE0 - | lwz CARG4, 4(RD) + | .if ENDIAN_LE + | lwzux CARG4, RD, BASE_LO + | .else + | lwz CARG4, WORD_LO(RD) + | .endif | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { @@ -3443,11 +3769,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble cr7, ->BC_ISNEN_Z } |.else - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | lwz SAVE0, 0(PC) - | lfd f0, 0(RA) + | lfdx f0, BASE, RA | addi PC, PC, 4 - | lwzux CARG3, RD, BASE + | lwzx CARG3, BASE_HI, RD | checknum cr0, CARG1 | decode_RD4 SAVE0, SAVE0 | lfd f1, 0(RD) @@ -3468,8 +3794,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |5: // Either or both types are not numbers. |.if not DUALNUM - | lwz CARG2, 4(RA) - | lwz CARG4, 4(RD) + | lwzx CARG2, BASE_LO, RA + | lwzx CARG4, BASE_LO, RD |.endif |.if FFI | cmpwi cr7, CARG1, LJ_TCDATA @@ -3485,10 +3811,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if FFI | beq cr7, ->vmeta_equal_cd |.endif + |.if P64 + | cmplwi cr7, TMP2, ~LJ_TUDATA // Avoid 64 bit lightuserdata. + |.endif | cmplw cr5, CARG2, CARG4 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. + |.if P64 + | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt + |.endif | mr SAVE1, PC | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. @@ -3528,9 +3860,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQS: case BC_ISNES: vk = op == BC_ISEQS; | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | lwzux TMP0, RA, BASE + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RA | srwi RD, RD, 1 - | lwz STR:TMP3, 4(RA) + | lwzx STR:TMP3, BASE_LO, RA | lwz TMP2, 0(PC) | subfic RD, RD, -4 | addi PC, PC, 4 @@ -3561,16 +3894,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQN: case BC_ISNEN: vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, KBASE + | lwzx CARG2, BASE_LO, RA + | lwzux2 CARG3, CARG4, RD, KBASE | checknum cr0, CARG1 | lwz SAVE0, -4(PC) | checknum cr1, CARG3 | decode_RD4 SAVE0, SAVE0 - | lwz CARG4, 4(RD) | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { |->BC_ISEQN_Z: @@ -3587,7 +3920,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { |->BC_ISNEN_Z: // Dummy label. } - | lwzx CARG1, BASE, RA + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 | lfdx f0, BASE, RA | lwz SAVE0, -4(PC) @@ -3625,7 +3958,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bge cr0, <3 | // RA is a number. - | .FPU lfd f0, 0(RA) + | .FPU lfdx f0, BASE, RA | blt cr1, >1 | // RA is a number, RD is an integer. |.if FPU @@ -3657,7 +3990,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQP: case BC_ISNEP: vk = op == BC_ISEQP; | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RA | srwi TMP1, RD, 3 | lwz TMP2, 0(PC) | not TMP1, TMP1 @@ -3687,7 +4021,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | lwzx TMP0, BASE, RD + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RD | lwz INS, 0(PC) | addi PC, PC, 4 if (op == BC_IST || op == BC_ISF) { @@ -3732,7 +4067,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTYPE: | // RA = src*8, RD = -type*8 - | lwzx TMP0, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx TMP0, BASE_HI, RA | srwi TMP1, RD, 3 | ins_next1 |.if not PPE and not GPR64 @@ -3746,7 +4084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ISNUM: | // RA = src*8, RD = -(TISNUM-1)*8 - | lwzx TMP0, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx TMP0, BASE_HI, RA | ins_next1 | checknum TMP0 | bge ->vmeta_istype @@ -3771,18 +4112,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_NOT: | // RA = dst*8, RD = src*8 + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif | ins_next1 - | lwzx TMP0, BASE, RD + | lwzx TMP0, BASE_HI, RD | .gpr64 extsw TMP0, TMP0 | subfic TMP1, TMP0, LJ_TTRUE | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA + | stwx TMP0, BASE_HI, RA | ins_next2 break; case BC_UNM: | // RA = dst*8, RD = src*8 - | lwzux TMP1, RD, BASE - | lwz TMP0, 4(RD) + | addi BASEP4, BASE, 4 + | lwzx TMP1, BASE_HI, RD + | lwzx TMP0, BASE_LO, RD + |.if DUALNUM and not GPR64 + | mtxer ZERO + |.endif | checknum TMP1 |.if DUALNUM | bne >5 @@ -3794,18 +4142,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | nego. TMP0, TMP0 | bso >4 - |1: |.endif | ins_next1 - | stwux TISNUM, RA, BASE - | stw TMP0, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx TMP0, BASE_LO, RA |3: | ins_next2 |4: - |.if not GPR64 - | // Potential overflow. - | checkov TMP1, <1 // Ignore unrelated overflow. - |.endif | lus TMP1, 0x41e0 // 2^31. | li TMP0, 0 | b >7 @@ -3815,8 +4158,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | xoris TMP1, TMP1, 0x8000 |7: | ins_next1 - | stwux TMP1, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP1, BASE_HI, RA + | stwx TMP0, BASE_LO, RA |.if DUALNUM | b <3 |.else @@ -3825,15 +4168,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_LEN: | // RA = dst*8, RD = src*8 - | lwzux TMP0, RD, BASE - | lwz CARG1, 4(RD) + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RD + | lwzx CARG1, BASE_LO, RD | checkstr TMP0; bne >2 | lwz CRET1, STR:CARG1->len |1: |.if DUALNUM | ins_next1 - | stwux TISNUM, RA, BASE - | stw CRET1, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx CRET1, BASE_LO, RA |.else | tonum_u f0, CRET1 // Result is a non-negative integer. | ins_next1 @@ -3865,12 +4209,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.macro ins_arithpre | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + | addi BASEP4, BASE, 4 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzx CARG1, BASE, RB + | .if ENDIAN_LE and DUALNUM + | addi CARG3, RC, 4 + | .endif + | lwzx CARG1, BASE_HI, RB | .if DUALNUM - | lwzx CARG3, KBASE, RC + | .if ENDIAN_LE + | lwzx CARG3, KBASE, CARG3 + | .else + | lwzx CARG3, KBASE, RC + | .endif | .endif | .if FPU | lfdx f14, BASE, RB @@ -3891,9 +4243,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .endif || break; ||case 1: - | lwzx CARG1, BASE, RB + | .if ENDIAN_LE and DUALNUM + | addi CARG3, RC, 4 + | .endif + | lwzx CARG1, BASE_HI, RB | .if DUALNUM - | lwzx CARG3, KBASE, RC + | .if ENDIAN_LE + | lwzx CARG3, KBASE, CARG3 + | .else + | lwzx CARG3, KBASE, RC + | .endif | .endif | .if FPU | lfdx f15, BASE, RB @@ -3914,8 +4273,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .endif || break; ||default: - | lwzx CARG1, BASE, RB - | lwzx CARG3, BASE, RC + | lwzx CARG1, BASE_HI, RB + | lwzx CARG3, BASE_HI, RC | .if FPU | lfdx f14, BASE, RB | lfdx f15, BASE, RC @@ -3998,50 +4357,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |.macro ins_arithdn, intins, fpins, fpcall | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + | addi BASEP4, BASE, 4 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzux CARG1, RB, BASE - | lwzux CARG3, RC, KBASE + | .if ENDIAN_LE + | lwzx TMP1, RB, BASE_HI + | lwzux CARG2, RC, KBASE + | lwz TMP2, 4(RC) + | checknum cr0, TMP1 + | lwzux CARG1, RB, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, KBASE | lwz CARG2, 4(RB) - | checknum cr0, CARG1 - | lwz CARG4, 4(RC) - | checknum cr1, CARG3 + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 + | .endif || break; ||case 1: - | lwzux CARG3, RB, BASE - | lwzux CARG1, RC, KBASE - | lwz CARG4, 4(RB) - | checknum cr0, CARG3 - | lwz CARG2, 4(RC) - | checknum cr1, CARG1 + | .if ENDIAN_LE + | lwzux CARG1, RC, KBASE + | lwzx TMP1, RB, BASE_HI + | lwz TMP2, 4(RC) + | checknum cr0, TMP1 + | lwzux CARG2, RB, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG3, RB, BASE + | lwzux CARG1, RC, KBASE + | lwz CARG4, 4(RB) + | checknum cr0, CARG3 + | lwz CARG2, 4(RC) + | checknum cr1, CARG1 + | .endif || break; ||default: - | lwzux CARG1, RB, BASE - | lwzux CARG3, RC, BASE - | lwz CARG2, 4(RB) - | checknum cr0, CARG1 - | lwz CARG4, 4(RC) - | checknum cr1, CARG3 + | .if ENDIAN_LE + | lwzx TMP1, RB, BASE_HI + | lwzx TMP2, RC, BASE_HI + | lwzux CARG1, RB, BASE + | checknum cr0, TMP1 + | lwzux CARG2, RC, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, BASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 + | .endif || break; ||} | bne >5 | bne cr1, >5 - |.if "intins" == "intmod" - | mr CARG1, CARG2 - | mr CARG2, CARG4 - |.endif + |.if ENDIAN_LE + | intins CARG1, CARG1, CARG2 + |.else + | .if "intins" == "intmod" + | mr CARG1, CARG2 + | mr CARG2, CARG4 + | .endif | intins CARG1, CARG2, CARG4 - | bso >4 - |1: + |.endif + | ins_arithfallback bso | ins_next1 - | stwux TISNUM, RA, BASE - | stw CARG1, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx CARG1, BASE_LO, RA |2: | ins_next2 - |4: // Overflow. - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b |5: // FP variant. |.if FPU ||if (vk == 1) { @@ -4124,8 +4511,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_POW: | // NYI: (partial) integer arithmetic. - | lwzx CARG1, BASE, RB - | lwzx CARG3, BASE, RC + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx CARG1, BASE_HI, RB + | lwzx CARG3, BASE_HI, RC |.if FPU | lfdx FARG1, BASE, RB | lfdx FARG2, BASE, RC @@ -4164,6 +4554,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Returns NULL (finished) or TValue * (metamethod). | cmplwi CRET1, 0 | lp BASE, L->base + | addi BASEP4, BASE, 4 | bne ->vmeta_binop | ins_next1 |.if FPU @@ -4182,42 +4573,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSTR: | // RA = dst*8, RD = str_const*8 (~) + | addi BASEP4, BASE, 4 | srwi TMP1, RD, 1 | subfic TMP1, TMP1, -4 | ins_next1 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 | li TMP2, LJ_TSTR - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP2, BASE_HI, RA + | stwx TMP0, BASE_LO, RA | ins_next2 break; case BC_KCDATA: |.if FFI | // RA = dst*8, RD = cdata_const*8 (~) + | addi BASEP4, BASE, 4 | srwi TMP1, RD, 1 | subfic TMP1, TMP1, -4 | ins_next1 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 | li TMP2, LJ_TCDATA - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP2, BASE_HI, RA + | stwx TMP0, BASE_LO, RA | ins_next2 |.endif break; case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8 + | addi BASEP4, BASE, 4 |.if DUALNUM | slwi RD, RD, 13 | srawi RD, RD, 16 | ins_next1 - | stwux TISNUM, RA, BASE - | stw RD, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx RD, BASE_LO, RA | ins_next2 |.else | // The soft-float approach is faster. | slwi RD, RD, 13 | srawi TMP1, RD, 31 | xor TMP2, TMP1, RD + | .gpr64 extsw RD, RD | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) | cntlzw TMP3, TMP2 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 @@ -4229,8 +4624,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RD, RD, TMP1 // hi = hi + exponent-1 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi | ins_next1 - | stwux RD, RA, BASE - | stw ZERO, 4(RA) + | stwx RD, BASE_HI, RA + | stwx ZERO, BASE_LO, RA | ins_next2 |.endif break; @@ -4250,18 +4645,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_KPRI: | // RA = dst*8, RD = primitive_type*8 (~) + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif | srwi TMP1, RD, 3 | not TMP0, TMP1 | ins_next1 - | stwx TMP0, BASE, RA + | stwx TMP0, BASE_HI, RA | ins_next2 break; case BC_KNIL: | // RA = base*8, RD = end*8 - | stwx TISNIL, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, RA | addi RA, RA, 8 |1: - | stwx TISNIL, BASE, RA + | stwx TISNIL, BASE_HI, RA | cmpw RA, RD | addi RA, RA, 8 | blt <1 @@ -4305,7 +4706,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG2, UPVAL:RB->v | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) + | lwz TMP2, WORD_HI(RD) |.if FPU | stfd f0, 0(CARG2) |.else @@ -4313,7 +4714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw CARG3, 4(CARG2) |.endif | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) + | lwz TMP1, WORD_LO(RD) | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | subi TMP2, TMP2, (LJ_TNUMX+1) | bne >2 // Upvalue is closed and black? @@ -4346,8 +4747,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP3, STR:TMP1->marked | lbz TMP2, UPVAL:RB->closed | li TMP0, LJ_TSTR - | stw STR:TMP1, 4(CARG2) - | stw TMP0, 0(CARG2) + | stw STR:TMP1, WORD_LO(CARG2) + | stw TMP0, WORD_HI(CARG2) | bne >2 |1: | ins_next @@ -4394,7 +4795,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwzx UPVAL:RB, LFUNC:RB, RA | ins_next1 | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) + | stw TMP0, WORD_HI(TMP1) | ins_next2 break; @@ -4409,6 +4810,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add CARG2, BASE, RA | bl extern lj_func_closeuv // (lua_State *L, TValue *level) | lp BASE, L->base + | addi BASEP4, BASE, 4 |1: | ins_next break; @@ -4427,8 +4829,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Returns GCfuncL *. | lp BASE, L->base | li TMP0, LJ_TFUNC - | stwux TMP0, RA, BASE - | stw LFUNC:CRET1, 4(RA) + | addi BASEP4, BASE, 4 + | stwx TMP0, BASE_HI, RA + | stwx LFUNC:CRET1, BASE_LO, RA | ins_next break; @@ -4461,8 +4864,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | lp BASE, L->base | li TMP0, LJ_TTAB - | stwux TMP0, RA, BASE - | stw TAB:CRET1, 4(RA) + | addi BASEP4, BASE, 4 + | stwx TMP0, BASE_HI, RA + | stwx TAB:CRET1, BASE_LO, RA | ins_next if (op == BC_TNEW) { |3: @@ -4495,13 +4899,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_TGETV: | // RA = dst*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB + | lwzx CARG2, BASE_HI, RC + | lwzx TAB:RB, BASE_LO, RB |.if DUALNUM - | lwz RC, 4(RC) + | lwzx RC, BASE_LO, RC |.else - | lfd f0, 0(RC) + | lfdx f0, BASE, RC |.endif | checktab CARG1 | checknum cr1, CARG2 @@ -4528,9 +4933,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP2, TMP2, 3 |.endif | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 |.if FPU - | lfdx f14, TMP1, TMP2 + | .if ENDIAN_LE + | lfdux f14, TMP1, TMP2 + | lwz TMP0, WORD_HI(TMP1) + | .else + | lwzx TMP0, TMP1, TMP2 + | lfdx f14, TMP1, TMP2 + | .endif |.else | lwzux SAVE0, TMP1, TMP2 | lwz SAVE1, 4(TMP1) @@ -4558,21 +4968,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: | checkstr CARG2; bne ->vmeta_tgetv |.if not DUALNUM - | lwz STR:RC, 4(RC) + | lwzx STR:RC, BASE_LO, RC |.endif | b ->BC_TGETS_Z // String key? break; case BC_TGETS: | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | subfic TMP1, TMP1, -4 | checktab CARG1 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 | bne ->vmeta_tgets1 |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | addi BASEP4, BASE, 4 | lwz TMP0, TAB:RB->hmask | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node @@ -4582,16 +4994,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sub TMP1, TMP0, TMP1 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) + | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) + | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) | checkstr CARG1; bne >4 | cmpw TMP0, STR:RC; bne >4 | checknil CARG2; beq >5 // Key found, but nil value? |3: - | stwux CARG2, RA, BASE - | stw TMP1, 4(RA) + | stwx CARG2, BASE_HI, RA + | stwx TMP1, BASE_LO, RA | ins_next | |4: // Follow hash chain. @@ -4612,16 +5024,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TGETB: | // RA = dst*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | checktab CARG1; bne ->vmeta_tgetb | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array | cmplw TMP0, TMP1; bge ->vmeta_tgetb |.if FPU - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC + | .if ENDIAN_LE + | lfdux f0, TMP2, RC + | lwz TMP1, WORD_HI(TMP2) + | .else + | lwzx TMP1, TMP2, RC + | lfdx f0, TMP2, RC + | .endif |.else | lwzux TMP1, TMP2, RC | lwz TMP3, 4(TMP2) @@ -4648,12 +5066,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TGETR: | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG1, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx TAB:CARG1, BASE_LO, RB |.if DUALNUM - | add RC, BASE, RC | lwz TMP0, TAB:CARG1->asize - | lwz CARG2, 4(RC) + | lwzx CARG2, BASE_LO, RC | lwz TMP1, TAB:CARG1->array |.else | lfdx f0, BASE, RC @@ -4683,13 +5100,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_TSETV: | // RA = src*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB + | lwzx CARG2, BASE_HI, RC + | lwzx TAB:RB, BASE_LO, RB |.if DUALNUM - | lwz RC, 4(RC) + | lwzx RC, BASE_LO, RC |.else - | lfd f0, 0(RC) + | lfdx f0, BASE, RC |.endif | checktab CARG1 | checknum cr1, CARG2 @@ -4716,7 +5134,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP0, TMP2, 3 |.endif | ble ->vmeta_tsetv // Integer key and in array part? + | .if ENDIAN_LE + | addi TMP2, TMP1, 4 + | lwzx TMP2, TMP2, TMP0 + | .else | lwzx TMP2, TMP1, TMP0 + | .endif | lbz TMP3, TAB:RB->marked |.if FPU | lfdx f14, BASE, RA @@ -4750,7 +5173,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: | checkstr CARG2; bne ->vmeta_tsetv |.if not DUALNUM - | lwz STR:RC, 4(RC) + | lwzx STR:RC, BASE_LO, RC |.endif | b ->BC_TSETS_Z // String key? | @@ -4760,9 +5183,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETS: | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | subfic TMP1, TMP1, -4 | checktab CARG1 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 @@ -4787,9 +5211,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP3, TAB:RB->marked | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val + | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) | lwz NODE:TMP1, NODE:TMP2->next | checkstr CARG1; bne >5 | cmpw TMP0, STR:RC; bne >5 @@ -4834,9 +5258,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. |6: | li TMP0, LJ_TSTR - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | mr CARG2, TAB:RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | // Returns TValue *. | lp BASE, L->base @@ -4846,6 +5270,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw SAVE0, 0(CRET1) | stw SAVE1, 4(CRET1) |.endif + | addi BASEP4, BASE, 4 | b <3 // No 2nd write barrier needed. | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -4854,9 +5279,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETB: | // RA = src*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | checktab CARG1; bne ->vmeta_tsetb | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array @@ -4870,7 +5296,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz SAVE1, 4(CARG2) |.endif | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC + | .if ENDIAN_LE + | addi TMP1, TMP2, 4 + | lwzx TMP1, TMP1, RC + | .else + | lwzx TMP1, TMP2, RC + | .endif | checknil TMP1; beq >5 |1: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) @@ -4899,13 +5330,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETR: | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG2, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx TAB:CARG2, BASE_LO, RB |.if DUALNUM - | add RC, BASE, RC | lbz TMP3, TAB:CARG2->marked | lwz TMP0, TAB:CARG2->asize - | lwz CARG3, 4(RC) + | lwzx CARG3, BASE_LO, RC | lwz TMP1, TAB:CARG2->array |.else | lfdx f0, BASE, RC @@ -4946,9 +5376,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RA, BASE, RA |1: | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. + | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table. | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. + | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word. | srwi CARG3, TMP0, 3 | beq >4 // Nothing to copy? | add CARG3, CARG3, TMP3 @@ -5007,8 +5437,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_CALL: | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 | mr TMP2, BASE - | lwzux TMP0, BASE, RA - | lwz LFUNC:RB, 4(BASE) + | lwzux2 TMP0, LFUNC:RB, BASE, RA | subi NARGS8:RC, NARGS8:RC, 8 | addi BASE, BASE, 8 | checkfunc TMP0; bne ->vmeta_call @@ -5022,8 +5451,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_CALLT: | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | lwzux TMP0, RA, BASE - | lwz LFUNC:RB, 4(RA) + | lwzux2 TMP0, LFUNC:RB, RA, BASE | subi NARGS8:RC, NARGS8:RC, 8 | lwz TMP1, FRAME_PC(BASE) | checkfunc TMP0 @@ -5086,8 +5514,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) | mr TMP2, BASE | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) + | lwz TMP1, WORD_HI-24(BASE) + | lwz LFUNC:RB, WORD_LO-24(BASE) |.if FPU | lfd f1, -8(BASE) | lfd f0, -16(BASE) @@ -5097,8 +5525,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG3, -16(BASE) | lwz CARG4, -12(BASE) |.endif - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) + | stw TMP1, WORD_HI(BASE) // Copy callable. + | stw LFUNC:RB, WORD_LO(BASE) | checkfunc TMP1 | li NARGS8:RC, 16 // Iterators get 2 arguments. |.if FPU @@ -5121,8 +5549,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |->vm_IITERN: | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. + | lwz TAB:RB, WORD_LO-16(RA) + | lwz RC, WORD_LO-8(RA) // Get index from control var. | lwz TMP0, TAB:RB->asize | lwz TMP1, TAB:RB->array | addi PC, PC, 4 @@ -5130,10 +5558,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmplw RC, TMP0 | slwi TMP3, RC, 3 | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 |.if FPU - | lfdx f0, TMP1, TMP3 + | lfdux f0, TMP3, TMP1 + | lwz TMP2, WORD_HI(TMP3) |.else + | lwzx TMP2, TMP1, TMP3 | lwzux CARG1, TMP3, TMP1 | lwz CARG2, 4(TMP3) |.endif @@ -5141,8 +5570,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz INS, -4(PC) | beq >4 |.if DUALNUM - | stw RC, 4(RA) - | stw TISNUM, 0(RA) + | stw RC, WORD_LO(RA) + | stw TISNUM, WORD_HI(RA) |.else | tonum_u f1, RC |.endif @@ -5155,7 +5584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw CARG2, 12(RA) |.endif | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. + | stw RC, WORD_LO-8(RA) // Update control var. | add PC, TMP1, TMP3 |.if not DUALNUM | stfd f1, 0(RA) @@ -5177,15 +5606,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgty <3 | slwi RB, RC, 3 | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 |.if FPU - | lfdx f0, TMP2, TMP3 + | lfdux f0, TMP3, TMP2 + | lwz RB, WORD_HI(TMP3) |.else | add CARG3, TMP2, TMP3 | lwz CARG1, 0(CARG3) | lwz CARG2, 4(CARG3) - |.endif | add NODE:TMP3, TMP2, TMP3 + |.endif | checknil RB | lwz INS, -4(PC) | beq >7 @@ -5212,7 +5641,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | addi RC, RC, 1 | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. + | stw RC, WORD_LO-8(RA) // Update control var. | b <3 | |7: // Skip holes in hash part. @@ -5223,10 +5652,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISNEXT: | // RA = base*8, RD = target (points to ITERN) | add RA, BASE, RA - | lwz TMP0, -24(RA) - | lwz CFUNC:TMP1, -20(RA) - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) + | lwz TMP0, WORD_HI-24(RA) + | lwz CFUNC:TMP1, WORD_LO-24(RA) + | lwz TMP2, WORD_HI-16(RA) + | lwz TMP3, WORD_HI-8(RA) | cmpwi cr0, TMP2, LJ_TTAB | cmpwi cr1, TMP0, LJ_TFUNC | cmpwi cr6, TMP3, LJ_TNIL @@ -5248,10 +5677,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | li TMP0, BC_JMP | li TMP1, BC_ITERC + | .if ENDIAN_LE + | stb TMP0, -4(PC) + | .else | stb TMP0, -1(PC) + | .endif | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + | .if ENDIAN_LE + | stb TMP1, 0(PC) + | .else | // NYI on big-endian: unpatch JLOOP. | stb TMP1, 3(PC) + | .endif | b <1 break; @@ -5295,7 +5732,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addi RA, RA, 8 | blt cr1, <1 // More vararg slots? |2: // Fill up remainder with nil. - | stw TISNIL, 0(RA) + | stw TISNIL, WORD_HI(RA) | cmplw RA, TMP2 | addi RA, RA, 8 | blt <2 @@ -5342,6 +5779,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RA, BASE, RA | add RC, BASE, SAVE0 | subi TMP3, BASE, 8 + | addi BASEP4, BASE, 4 | b <6 break; @@ -5414,13 +5852,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgt >6 | sub BASE, TMP2, RA | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | addi BASEP4, BASE, 4 | ins_next1 | lwz TMP1, LFUNC:TMP1->pc | lwz KBASE, PC2PROTO(k)(TMP1) | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, TMP2, TMP1 | b <5 @@ -5463,13 +5902,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgt >6 | sub BASE, TMP2, RA | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | addi BASEP4, BASE, 4 | ins_next1 | lwz TMP1, LFUNC:TMP1->pc | lwz KBASE, PC2PROTO(k)(TMP1) | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, TMP2, TMP1 | b <5 @@ -5495,11 +5935,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = (op == BC_IFORL || op == BC_JFORL); |.if DUALNUM | // Integer loop. - | lwzux TMP1, RA, BASE - | lwz CARG1, FORL_IDX*8+4(RA) + | lwzux2 TMP1, CARG1, RA, BASE + if (vk) { + | mtxer ZERO + } | cmplw cr0, TMP1, TISNUM if (vk) { - | lwz CARG3, FORL_STEP*8+4(RA) + | lwz CARG3, FORL_STEP*8+WORD_LO(RA) | bne >9 |.if GPR64 | // Need to check overflow for (a<<32) + (b<<32). @@ -5511,15 +5953,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addo. CARG1, CARG1, CARG3 |.endif | cmpwi cr6, CARG3, 0 - | lwz CARG2, FORL_STOP*8+4(RA) - | bso >6 + | lwz CARG2, FORL_STOP*8+WORD_LO(RA) + | bso >2 |4: - | stw CARG1, FORL_IDX*8+4(RA) + | stw CARG1, FORL_IDX*8+WORD_LO(RA) } else { - | lwz SAVE0, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) + | lwz SAVE0, FORL_STEP*8+WORD_HI(RA) + | lwz CARG3, FORL_STEP*8+WORD_LO(RA) + | lwz TMP2, FORL_STOP*8+WORD_HI(RA) + | lwz CARG2, FORL_STOP*8+WORD_LO(RA) | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq @@ -5530,11 +5972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | blt cr6, >5 | cmpw CARG1, CARG2 |1: - | stw TISNUM, FORL_EXT*8(RA) + | stw TISNUM, FORL_EXT*8+WORD_HI(RA) if (op != BC_JFORL) { | srwi RD, RD, 1 } - | stw CARG1, FORL_EXT*8+4(RA) + | stw CARG1, FORL_EXT*8+WORD_LO(RA) if (op != BC_JFORL) { | add RD, PC, RD } @@ -5554,11 +5996,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Invert check for negative step. | cmpw CARG2, CARG1 | b <1 - if (vk) { - |6: // Potential overflow. - | checkov TMP0, <4 // Ignore unrelated overflow. - | b <2 - } |.endif if (vk) { |.if DUALNUM @@ -5588,12 +6025,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG3, FORL_STOP*8(RA) | lwz CARG4, FORL_STOP*8+4(RA) |.endif - | lwz SAVE0, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8+WORD_HI(RA) } else { |.if DUALNUM |9: // FP loop. |.else + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + | lwzx TMP1, RA, BASE_LO + | add RA, RA, BASE + |.else | lwzux TMP1, RA, BASE + |.endif | lwz SAVE0, FORL_STEP*8(RA) | lwz TMP2, FORL_STOP*8(RA) | cmplw cr0, TMP1, TISNUM @@ -5696,17 +6139,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) #endif case BC_IITERL: | // RA = base*8, RD = target - | lwzux TMP1, RA, BASE - | lwz TMP2, 4(RA) + | lwzux2 TMP1, TMP2, RA, BASE | checknil TMP1; beq >1 // Stop if iterator returned nil. if (op == BC_JITERL) { - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) + | stw TMP1, WORD_HI-8(RA) + | stw TMP2, WORD_LO-8(RA) | b =>BC_JLOOP } else { | branch_RD // Otherwise save control var + branch. - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) + | stw TMP1, WORD_HI-8(RA) + | stw TMP2, WORD_LO-8(RA) } |1: | ins_next @@ -5735,7 +6177,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Traces on PPC don't store the trace number, so use 0. | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) | lwzx TRACE:TMP2, TMP1, RD - | clrso TMP1 + | mtxer ZERO | lp TMP2, TRACE:TMP2->mcode | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) | mtctr TMP2 @@ -5787,7 +6229,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | |3: // Clear missing parameters. - | stwx TISNIL, BASE, NARGS8:RC + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, NARGS8:RC | addi NARGS8:RC, NARGS8:RC, 8 | b <2 break; @@ -5804,11 +6249,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP2, L->maxstack | add TMP1, BASE, RC | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. + | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC. | addi TMP3, RC, 8+FRAME_VARG | lwz KBASE, -4+PC2PROTO(k)(PC) | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. + | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG. | bge ->vm_growstack_l | lbz TMP2, -4+PC2PROTO(numparams)(PC) | mr RA, BASE @@ -5819,18 +6264,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq >3 |1: | cmplw RA, RC // Less args than parameters? - | lwz TMP0, 0(RA) - | lwz TMP3, 4(RA) + | lwz TMP0, WORD_HI(RA) + | lwz TMP3, WORD_LO(RA) | bge >4 - | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). + | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC). | addi RA, RA, 8 |2: | addic. TMP2, TMP2, -1 - | stw TMP0, 8(TMP1) - | stw TMP3, 12(TMP1) + | stw TMP0, WORD_HI+8(TMP1) + | stw TMP3, WORD_LO+8(TMP1) | addi TMP1, TMP1, 8 | bne <1 |3: + | addi BASEP4, BASE, 4 | ins_next2 | |4: // Clear missing parameters. @@ -5842,35 +6288,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCCW: | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 if (op == BC_FUNCC) { - | lp RD, CFUNC:RB->f + | lp FUNCREG, CFUNC:RB->f } else { - | lp RD, DISPATCH_GL(wrapf)(DISPATCH) + | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH) } | add TMP1, RA, NARGS8:RC | lwz TMP2, L->maxstack - | .toc lp TMP3, 0(RD) + | .opd lp TMP3, 0(FUNCREG) | add RC, BASE, NARGS8:RC | stp BASE, L->base | cmplw TMP1, TMP2 | stp RC, L->top | li_vmstate C - |.if TOC + |.if OPD | mtctr TMP3 |.else - | mtctr RD + | mtctr FUNCREG |.endif if (op == BC_FUNCCW) { | lp CARG2, CFUNC:RB->f } | mr CARG1, L | bgt ->vm_growstack_c // Need to grow stack. - | .toc lp TOCREG, TOC_OFS(RD) - | .tocenv lp ENVREG, ENV_OFS(RD) + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) | st_vmstate | bctrl // (lua_State *L [, lua_CFunction f]) + | .toc lp TOCREG, SAVE_TOC | // Returns nresults. | lp BASE, L->base - | .toc ld TOCREG, SAVE_TOC | slwi RD, CRET1, 3 | lp TMP1, L->top | li_vmstate INTERP @@ -5921,7 +6367,11 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x1\n" "\t.string \"\"\n" "\t.uleb128 0x1\n" +#if LJ_ARCH_PPC32ON64 + "\t.sleb128 -8\n" +#else "\t.sleb128 -4\n" +#endif "\t.byte 65\n" "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" "\t.align 2\n" @@ -5934,14 +6384,24 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .Lbegin\n" "\t.long %d\n" "\t.byte 0xe\n\t.uleb128 %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" + "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", +#endif fcofs, CFRAME_SIZE); for (i = 14; i <= 31; i++) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n" "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); +#if LJ_ARCH_PPC32ON64 + 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) +#else + 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) +#endif + ); fprintf(ctx->fp, "\t.align 2\n" ".LEFDE0:\n\n"); @@ -5957,8 +6417,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call\n" #endif "\t.long %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); @@ -5973,7 +6437,11 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x1\n" "\t.string \"zPR\"\n" "\t.uleb128 0x1\n" +#if LJ_ARCH_PPC32ON64 + "\t.sleb128 -8\n" +#else "\t.sleb128 -4\n" +#endif "\t.byte 65\n" "\t.uleb128 6\n" /* augmentation length */ "\t.byte 0x1b\n" /* pcrel|sdata4 */ @@ -5991,14 +6459,24 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" + "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", +#endif fcofs, CFRAME_SIZE); for (i = 14; i <= 31; i++) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n" "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); +#if LJ_ARCH_PPC32ON64 + 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) +#else + 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) +#endif + ); fprintf(ctx->fp, "\t.align 2\n" ".LEFDE2:\n\n"); @@ -6026,8 +6504,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc new file mode 100644 index 00000000..b765f662 --- /dev/null +++ b/src/vm_s390x.dasc @@ -0,0 +1,4306 @@ +|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +| +|// This assembly targets the instruction set available on z10 (and newer) +|// machines. +| +|// ELF ABI registers: +|// r0,r1 | | volatile | +|// r2 | parameter and return value | volatile | +|// r3-r5 | parameter | volatile | +|// r6 | parameter | saved | +|// r7-r11 | | saved | +|// r12 | GOT pointer (needed?) | saved | +|// r13 | literal pool (not needed) | saved | +|// r14 | return address | volatile | +|// r15 | stack pointer | saved | +|// f0,f2,f4,f6 | parameter and return value | volatile | +|// f1,f3,f5,f7 | | volatile | +|// f8-f15 | | saved | +|// ar0,ar1 | TLS | volatile | +|// ar2-ar15 | | volatile | +| +|.arch s390x +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter, callee-saved. +|.define KBASE, r8 // Constants of current Lua function. +|.define PC, r9 // Next PC. +|.define DISPATCH, r10 // Opcode dispatch table. +|.define ITYPE, r11 // Temporary used for type information. +|.define BASE, r13 // Base of current Lua stack frame. +| +|// The following temporaries are not saved across C calls, except for RB. +|.define RA, r4 // Overlaps CARG3. +|.define RB, r7 // Must be callee-save. +|.define RC, r5 // Overlaps CARG4. +|.define RD, r6 // Overlaps CARG5. +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, r2 +|.define CARG2, r3 +|.define CARG3, r4 +|.define CARG4, r5 +|.define CARG5, r6 +| +|.define FARG1, f0 +|.define FARG2, f2 +|.define FARG3, f4 +|.define FARG4, f6 +| +|.define CRET1, r2 +| +|.define TMPR0, r0 +|.define TMPR1, r1 +|.define OP, r2 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned. +| +|// Register save area. +|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes). +|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated). +| +|// Argument save area. +|.define SAVE_ERRF, 280(sp) // Argument 4, in r5. +|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes. +|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3. +|.define SAVE_L, 256(sp) // Argument 1, in r2. +|.define RESERVED, 248(sp) // Reserved for compiler use. +|.define BACKCHAIN, 240(sp) // <- sp entering interpreter. +| +|// Interpreter stack frame. +|.define SAVE_FPR15, 232(sp) +|.define SAVE_FPR14, 224(sp) +|.define SAVE_FPR13, 216(sp) +|.define SAVE_FPR12, 208(sp) +|.define SAVE_FPR11, 200(sp) +|.define SAVE_FPR10, 192(sp) +|.define SAVE_FPR9, 184(sp) +|.define SAVE_FPR8, 176(sp) +|.define SAVE_PC, 168(sp) +|.define SAVE_MULTRES, 160(sp) +|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES +|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES). +| +|// Callee save area (allocated by interpreter). +|.define CALLEESAVE, 000(sp) // <- sp in interpreter. +| +|.macro saveregs +| stmg r6, r15, SAVE_GPRS_P +| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame. +| std f8, SAVE_FPR8 // f8-f15 are callee-saved. +| std f9, SAVE_FPR9 +| std f10, SAVE_FPR10 +| std f11, SAVE_FPR11 +| std f12, SAVE_FPR12 +| std f13, SAVE_FPR13 +| std f14, SAVE_FPR14 +| std f15, SAVE_FPR15 +|.endmacro +| +|.macro restoreregs +| ld f8, SAVE_FPR8 // f8-f15 are callee-saved. +| ld f9, SAVE_FPR9 +| ld f10, SAVE_FPR10 +| ld f11, SAVE_FPR11 +| ld f12, SAVE_FPR12 +| ld f13, SAVE_FPR13 +| ld f14, SAVE_FPR14 +| ld f15, SAVE_FPR15 +| lmg r6, r15, SAVE_GPRS // Restores the stack pointer. +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro +|.macro ins_AB_; srlg RB, RD, 8; .endmacro +|.macro ins_A_C; llgcr RC, RD; .endmacro +|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD +| +|// Instruction decode+dispatch. +|.macro ins_NEXT +| llgc OP, 3(PC) +| llgh RD, 0(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| lg TMPR1, 0(TMPR1, DISPATCH) +| la PC, 4(PC) +| br TMPR1 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| .macro ins_next +| j ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC +| lg PC, LFUNC:RB->pc +| llgc OP, 3(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| la PC, 4(PC) +| lg TMPR1, 0(TMPR1, DISPATCH) +| br TMPR1 +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC, RD = nargs+1 +| stg PC, -8(BASE) +| ins_callt +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|//----------------------------------------------------------------------- +| +|// Macros to clear or set tags. +|.macro cleartp, reg +| nihf reg, 0x7fff +|.endmacro +|.macro settp, reg, tp +| oihf reg, tp<<15 +|.endmacro +|.macro settp, dst, reg, tp +| llihf dst, tp<<15 +| ogr dst, reg +|.endmacro +|.macro setint, reg +| settp reg, LJ_TISNUM +|.endmacro +|.macro setint, dst, reg +| settp dst, reg, LJ_TISNUM +|.endmacro +| +|// Macros to test operand types. +|.macro checktp_nc, reg, tp, target +| srag ITYPE, reg, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktp, reg, tp, target +| srag ITYPE, reg, 47 +| cleartp reg +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktptp, src, tp, target +| srag ITYPE, src, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +| +|.macro checknumx, reg, target, jump +| srag ITYPE, reg, 47 +| clfi ITYPE, LJ_TISNUM +| jump target +|.endmacro +|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro +|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro +|.macro checknumber, src, target; checknumx src, target, jh; .endmacro +| +|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47) +|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47) +| +|.define PC_OP, -1(PC) +|.define PC_RA, -2(PC) +|.define PC_RB, -4(PC) +|.define PC_RC, -3(PC) +|.define PC_RD, -4(PC) +| +|.macro branchPC, reg +| // Must not clobber condition code. +| sllg TMPR1, reg, 2 +| lay PC, (-BCBIAS_J*4)(TMPR1, PC) +|.endmacro +| +|// Decrement hashed hotcount and trigger trace recorder if zero. +|.macro hotloop, reg +| lgr reg, PC +| srlg reg, reg, 1 +| nill reg, HOTCOUNT_PCMASK +| afi reg, GG_DISP2HOT +| agr reg, DISPATCH +| lg TMPR1, (reg) +| aghi TMPR1, -HOTCOUNT_LOOP +| stg TMPR1, (reg) +| jl ->vm_hotloop +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| lghi TMPR1, ~LJ_VMST_..st +| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH) +|.endmacro +| +|// Synthesize binary floating-point constants. +|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +| llihh tmp, 0x4338 +| ldgr reg, tmp +|.endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab) +| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg reg, tab->gclist +|.endmacro + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for s390x target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | tmll PC, FRAME_P + | je ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | nill PC, -8 + | sgr BASE, PC // Restore caller base. + | lay RA, -8(RA, PC) // Rebase RA and prepend one result. + | lg PC, -8(BASE) // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | load_true ITYPE + | stg ITYPE, 0(RA, BASE) // Prepend true to results. + | + |->vm_returnc: + | aghi RD, 1 // RD = nresults+1 + | je ->vm_unwind_yield + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + | lghi TMPR1, FRAME_C + | xgr PC, TMPR1 + | tmll PC, FRAME_TYPE + | jne ->vm_returnp + | + | // Return to C. + | set_vmstate C + | nill PC, -8 + | sgr PC, BASE + | lcgr PC, PC // Previous base = BASE - delta. + | + | aghi RD, -1 + | je >2 + |1: // Move results down. + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, -1 + | jne <1 + |2: + | lg L:RB, SAVE_L + | stg PC, L:RB->base + |3: + | llgf RD, SAVE_MULTRES + | lgf RA, SAVE_NRES // RA = wanted nresults+1 + |4: + | cgr RA, RD + | jne >6 // More/less results wanted? + |5: + | lay BASE, -16(BASE) + | stg BASE, L:RB->top + | + |->vm_leave_cp: + | lg RA, SAVE_CFRAME // Restore previous C frame. + | stg RA, L:RB->cframe + | lghi CRET1, 0 // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | restoreregs + | br r14 + | + |6: + | jl >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cg BASE, L:RB->maxstack + | jh >8 + | lghi TMPR1, LJ_TNIL + | stg TMPR1, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, 1 + | j <4 + | + |7: // Fewer results wanted. + | cghi RA, 0 + | je <5 // But check for LUA_MULTRET+1. + | sgr RA, RD // Negative result! + | sllg TMPR1, RA, 3 + | la BASE, 0(TMPR1, BASE) // Correct top. + | j <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | stg BASE, L:RB->top // Save current top held in BASE (yes). + | st RD, SAVE_MULTRES // Need to fill only remainder with nil. + | lgr CARG2, RA + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->top // Need the (realloced) L->top in BASE. + | j <3 + | + |->vm_unwind_yield: + | lghi CRET1, LUA_YIELD + | j ->vm_unwind_c_eh + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | lgr sp, CARG1 + | lgfr CARG2, CRET1 // Error return status for vm_pcall. + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lg GL:RB, L:RB->glref + | lghi TMPR1, ~LJ_VMST_C + | stg TMPR1, GL:RB->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK. + | lgr sp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lghi RD, 1+1 // Really 1+2 results, incr. later. + | lg BASE, L:RB->base + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | la DISPATCH, GG_G2DISP(DISPATCH) + | lg PC, -8(BASE) // Fetch PC of previous frame. + | load_false RA + | lg RB, 0(BASE) + | stg RA, -16(BASE) // Prepend false to error message. + | stg RB, -8(BASE) + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | set_vmstate INTERP + | j ->vm_returnc // Increments RD/MULTRES and returns. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | lghi CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_v: // Grow stack for vararg Lua function. + | aghi RD, -16 // LJ_FR2 + | j >1 + | + |->vm_growstack_f: // Grow stack for fixarg Lua function. + | // BASE = new base, RD = nargs+1, RB = L, PC = first PC + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + |1: + | llgc RA, (PC2PROTO(framesize)-4)(PC) + | la PC, 4(PC) // Must point after first instruction. + | stg BASE, L:RB->base + | stg RD, L:RB->top + | stg PC, SAVE_PC + | lgr CARG2, RA + |2: + | // RB = L, L->base = new base, L->top = top + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | // BASE = new base, RB = LFUNC, RD = nargs+1 + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 + | lghi PC, FRAME_CP + | lghi RD, 0 + | la KBASE, CFRAME_RESUME(sp) + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | aghi DISPATCH, GG_G2DISP + | stg RD, SAVE_PC // Any value outside of bytecode is ok. + | stg RD, SAVE_CFRAME + | st RD, SAVE_NRES + | stg RD, SAVE_ERRF + | stg KBASE, L:RB->cframe + | clm RD, 1, L:RB->status + | je >2 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | stc RD, L:RB->status + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, RA + | srlg RD, RD, 3 + | aghi RD, 1 // RD = nresults+1 + | sgr RA, BASE // RA = resultofs + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | lghi PC, FRAME_CP + | llgfr CARG4, CARG4 + | stg CARG4, SAVE_ERRF + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | lghi PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | st CARG3, SAVE_NRES + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 // Caveat: RA = CARG3. + | + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | aghi DISPATCH, GG_G2DISP + | stg sp, L:RB->cframe + | + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | set_vmstate INTERP + | lg BASE, L:RB->base // BASE = old base (used in vmeta_call). + | agr PC, RA + | sgr PC, BASE // PC = frame delta + frame type + | + | lg RD, L:RB->top + | sgr RD, RA + | srlg NARGS:RD, NARGS:RD, 3 + | aghi NARGS:RD, 1 // RD = nargs+1 + | + |->vm_call_dispatch: + | lg LFUNC:RB, -16(RA) + | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + | + |->vm_call_dispatch_f: + | lgr BASE, RA + | ins_call + | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | lgr L:RB, CARG1 + | stg L:RB, SAVE_L + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | + | lg KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sg KBASE, L:RB->top + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lghi TMPR0, 0 + | stg TMPR0, SAVE_ERRF // No error function. + | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame. + | aghi DISPATCH, GG_G2DISP + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg sp, L:RB->cframe + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | + | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | // TValue * (new base) or NULL returned in r2 (CRET1/). + | cghi CRET1, 0 + | je ->vm_leave_cp // No base? Just remove C frame. + | lgr RA, CRET1 + | lghi PC, FRAME_CP + | j <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + | agr RA, BASE + | nill PC, -8 + | lgr RB, BASE + | sgr BASE, PC // Restore caller BASE. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg. + | lgr RC, RA // ... in [RC] + | lg PC, -24(RB) // Restore PC from [cont|PC]. + | lg RA, -32(RB) + |.if FFI + | clfi RA, 1 + | jle >1 + |.endif + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | // BASE = base, RC = result, RB = meta base + | br RA // Jump to continuation. + | + |.if FFI + |1: + | je ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: Tail call from C function. + | sgr RB, BASE + | srl RB, 3 + | ahi RB, -3 + | llgfr RD, RB + | j ->vm_call_tail + |.endif + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | llgc RA, PC_RB + | sllg RA, RA, 3 + | aghi RB, -32 + | la RA, 0(RA, BASE) + | sgr RA, RB + | je ->cont_ra + | lcgr RA, RA + | srlg RA, RA, 3 + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgfr CARG3, RA // Caveat: RA == CARG3. + | lg TMPR0, 0(RC) + | stg TMPR0, 0(RB) + | lgr CARG2, RB + | j ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR1, PC_OP + | cghi TMPR1, BC_GGET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tgetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tgetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + |->cont_ra: // BASE = base, RC = result + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RC) + | stg RB, 0(RA, BASE) + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 2+1 // 2 args for func(t, k). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | lgr CARG1, TAB:RB + | lgfr CARG2, RC + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | llgc RA, PC_RA + | ltgr RC, CRET1 + | jne ->BC_TGETR_Z + | lghi ITYPE, LJ_TNIL + | j ->BC_TGETR2_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR0, PC_OP + | cghi TMPR0, BC_GSET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tsetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tsetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | llgc RC, PC_RA + | // Copy value to third argument. + | sllg RB, RC, 3 + | lg RB, 0(RB, BASE) + | stg RB, 16(RA) + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | lg L:CARG1, SAVE_L + | lgr CARG2, TAB:RB + | stg BASE, L:CARG1->base + | lgfr CARG3, RC + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in r2 (CRET1). + | lgr RC, CRET1 + | llgc RA, PC_RA + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RA, BASE) + | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3 + | lgr CARG1, L:RB + | llgc CARG4, PC_OP + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + |3: + | lgr RC, CRET1 + | lg BASE, L:RB->base + | clgfi RC, 1 + | jh ->vmeta_binop + |4: + | la PC, 4(PC) + | jl >6 + |5: + | llgh RD, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | la PC, 4(PC) + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is true. + | jl <5 + | j <6 + | + |->cont_condf: // BASE = base, RC = result + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | cleartp TAB:RD + | lay PC, -4(PC) + | lgr CARG2, RA + | lgfr CARG4, RB + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG3, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + | + |->vmeta_equal_cd: + |.if FFI + | lay PC, -4(PC) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG1, L:RB + | llgf CARG2, -4(PC) + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + |.endif + | + |->vmeta_istype: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG2, RA + | llgfr CARG3, RD // Caveat: CARG3 == RA. + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | lg BASE, L:RB->base + | j <6 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vno: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vn: + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, KBASE) + | j >1 + | + |->vmeta_arith_nvo: + | llgc RC, PC_RC + | llgc RB, PC_RB + |->vmeta_arith_nv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la TMPR1, 0(RC, KBASE) + | la RC, 0(RB, BASE) + | lgr RB, TMPR1 + | j >1 + | + |->vmeta_unm: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | la RC, 0(RD, BASE) + | lgr RB, RC + | j >1 + | + |->vmeta_arith_vvo: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, BASE) + |1: + | llgc RA, PC_RA + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | llgc CARG5, PC_OP // Caveat: CARG5 == RD. + | lgr CARG2, RA + | lgr CARG3, RB // Caveat: CARG3 == RA. + | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out). + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | cghi CRET1, 0 + | lgr RC, CRET1 + | je ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | lgr RA, RC + | sgr RC, BASE + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RC) + | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RD, BASE) + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o) + | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1). + | lgr RC, CRET1 + | lg BASE, L:RB->base +#if LJ_52 + | cghi RC, 0 + | jne ->vmeta_binop // Binop call for compatibility. + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg TAB:CARG1, 0(RD, BASE) + | cleartp TAB:CARG1 + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call_ra: + | la RA, 16(RA, BASE) // RA previously set to RA*8. + |->vmeta_call: // Resolve and call __call metamethod. + | // BASE = old base, RA = new base, RC = nargs+1, PC = return + | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this). + | lgr RB, RA + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lay CARG2, -16(RA) + | sllg RD, RD, 3 + | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA. + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | lgr RA, RB + | lg L:RB, SAVE_L + | lg BASE, L:RB->base + | lg NARGS:RD, SAVE_TMP + | lg LFUNC:RB, -16(RA) + | aghi NARGS:RD, 1 // 32-bit on x64. + | // This is fragile. L->base must not move, KBASE must always be defined. + | cgr KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | cleartp LFUNC:RB + | lgr BASE, RA + | ins_call // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, RA + | lgr CARG1, RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base) + | lg BASE, L:RB->base + | llgc OP, PC_OP + | llgc RA, PC_RA + | llgh RD, PC_RD + | sllg TMPR1, OP, 3 + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI. + | br TMPR1 + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | clfi NARGS:RD, 1+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | clfi NARGS:RD, 2+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | lg TMPR0, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | op f0, 0(BASE) + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_n name, ld + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | lg TMPR1, 0(BASE) + | lg TMPR0, 8(BASE) + | ld FARG1, 0(BASE) + | ld FARG2, 8(BASE) + | checknumtp TMPR1, ->fff_fallback + | checknumtp TMPR0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | lg RB, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH) + | jl >1 + | brasl r14, ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | lg RB, 0(BASE) + | srag ITYPE, RB, 47 + | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | lg RB, 0(BASE) + | stg RB, -16(BASE) + | ahi RD, -2 + | je >2 + | lgr RA, BASE + |1: + | la RA, 8(RA) + | lg RB, 0(RA) + | stg RB, -16(RA) + | brct RD, <1 + |2: + | llgf RD, SAVE_MULTRES + | j ->fff_res_ + | + |.ffunc_1 type + | lg RC, 0(BASE) + | srag RC, RC, 47 + | lghi RB, LJ_TISNUM + | clgr RC, RB + | jnl >1 + | lgr RC, RB + |1: + | lghi TMPR0, -1 + | xgr RC, TMPR0 + |2: + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | sllg RC, RC, 3 + | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB) + | lg PC, -8(BASE) + | settp STR:RC, LJ_TSTR + | stg STR:RC, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | lg TAB:RB, 0(BASE) + | lg PC, -8(BASE) + | checktab TAB:RB, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | lg TAB:RB, TAB:RB->metatable + |2: + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | cghi TAB:RB, 0 + | je ->fff_res1 + | settp TAB:RC, TAB:RB, LJ_TTAB + | stg TAB:RC, -16(BASE) // Store metatable as default result. + | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH) + | llgf RA, TAB:RB->hmask + | n RA, STR:RC->sid + | settp STR:RC, LJ_TSTR + | mghi RA, #NODE + | ag NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cg STR:RC, NODE:RA->key + | je >5 + |4: + | ltg NODE:RA, NODE:RA->next + | jne <3 + | j ->fff_res1 // Not found, keep default result. + |5: + | lg RB, NODE:RA->val + | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + | stg RB, -16(BASE) // Return value of mt.__metatable. + | j ->fff_res1 + | + |6: + | clfi ITYPE, LJ_TUDATA; je <1 + | clfi ITYPE, LJ_TISNUM; jh >7 + | lhi ITYPE, LJ_TISNUM + |7: + | lhi TMPR0, -1 + | xr ITYPE, TMPR0 // not ITYPE + | llgfr ITYPE, ITYPE + | sllg ITYPE, ITYPE, 3 + | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH) + | j <2 + | + |.ffunc_2 setmetatable + | lg TAB:RB, 0(BASE) + | lgr TAB:TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback + | lg TAB:RA, 8(BASE) + | checktab TAB:RA, ->fff_fallback + | stg TAB:RA, TAB:RB->metatable + | lg PC, -8(BASE) + | stg TAB:TMPR1, -16(BASE) // Return original table. + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | je >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | j ->fff_res1 + | + |.ffunc_2 rawget + | lg TAB:CARG2, 0(BASE) + | checktab TAB:CARG2, ->fff_fallback + | la CARG3, 8(BASE) + | lg CARG1, SAVE_L + | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in r2 (CRET1). + | // Copy table slot. + | lg RB, 0(CRET1) + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg RB, 0(BASE) + | checknumber RB, ->fff_fallback + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | lg PC, -8(BASE) + | lg STR:RB, 0(BASE) + | checktp_nc STR:RB, LJ_TSTR, >3 + | // A __tostring method in the string base metatable is ignored. + |2: + | stg STR:RB, -16(BASE) + | j ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1 + | lghi TMPR0, 0 + | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH) + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | lg L:RB, SAVE_L + | stg BASE, L:RB->base // Add frame since C call can throw. + | stg PC, SAVE_PC // Redundant (but a defined value). + | lgr CARG2, BASE // Otherwise: CARG2 == BASE + | lgr L:CARG1, L:RB + | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // GCstr returned in r2 (CRET1). + | lg BASE, L:RB->base + | settp STR:RB, CRET1, LJ_TSTR + | j <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | lg CARG1, 0(BASE) + | lg PC, -8(BASE) + | checktab CARG1, ->fff_fallback + | lgr RB, BASE // Save BASE. + | la CARG2, 8(BASE) + | lay CARG3, -16(BASE) + | brasl r14, extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in r2 (CRET1). + | lgr BASE, RB // Restore BASE. + | ltr RD, CRET1; jh ->fff_res2 // Found key/value. + | jl ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | j ->fff_res1 + |2: // Set missing 2nd arg to nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 8(BASE) + | j <1 + | + |.ffunc_1 pairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | lg TAB:RB, 0(BASE) + | checktab TAB:RB, ->fff_fallback + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lg PC, -8(BASE) + | aghi RA, 1 + | setint ITYPE, RA + | stg ITYPE, -16(BASE) + | cl RA, TAB:RB->asize; jhe >2 // Not in array part? + | lg RD, TAB:RB->array + | lgfr TMPR1, RA + | sllg TMPR1, TMPR1, 3 + | la RD, 0(TMPR1, RD) + |1: + | lg TMPR0, 0(RD) + | cghi TMPR0, LJ_TNIL; je ->fff_res0 + | // Copy array slot. + | stg TMPR0, -8(BASE) + |->fff_res2: + | lghi RD, 1+2 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lt TMPR0, TAB:RB->hmask; je ->fff_res0 + | lgr CARG1, TAB:RB + | lgfr CARG2, RA + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | ltgr RD, CRET1 + | jne <1 + |->fff_res0: + | lghi RD, 1+0 + | j ->fff_res + | + |.ffunc_1 ipairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | llihf RD, LJ_TISNUM<<15 + | stg RD, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | la RA, 16(BASE) + | aghi NARGS:RD, -1 + | lghi PC, 16+FRAME_PCALL + |1: + | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH) + | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0) + | nill RB, 1 // High bits already zero (from load). + | agr PC, RB // Remember active hook before pcall. + | // Note: this does a (harmless) copy of the function to the PC slot, too. + | lgr KBASE, RD + |2: + | sllg TMPR1, KBASE, 3 + | lg RB, -24(TMPR1, RA) + | stg RB, -16(TMPR1, RA) + | aghi KBASE, -1 + | jh <2 + | j ->vm_call_dispatch + | + |.ffunc_2 xpcall + | lg LFUNC:RA, 8(BASE) + | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + | lg LFUNC:RB, 0(BASE) // Swap function and traceback. + | stg LFUNC:RA, 0(BASE) + | stg LFUNC:RB, 8(BASE) + | la RA, 24(BASE) + | aghi NARGS:RD, -2 + | lghi PC, 24+FRAME_PCALL + | j <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | lg L:RB, 0(BASE) + | lgr L:TMPR0, L:RB // Save type for checktptp. + | cleartp L:RB + |.else + |.ffunc coroutine_wrap_aux + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg L:RB, CFUNC:RB->upvalue[0].gcr + | cleartp L:RB + |.endif + | lg PC, -8(BASE) + | stg PC, SAVE_PC + | stg L:RB, SAVE_TMP + |.if resume + | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback + |.endif + | ltg TMPR0, L:RB->cframe; jne ->fff_fallback + | cli L:RB->status, LUA_YIELD; jh ->fff_fallback + | lg RA, L:RB->top + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cg RA, L:RB->base // Check for presence of initial func. + | je ->fff_fallback + | lg PC, -8(RA) // Move initial function up. + | stg PC, 0(RA) + | la RA, 8(RA) + |1: + | sllg TMPR1, NARGS:RD, 3 + |.if resume + | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread). + |.else + | lay PC, -8(TMPR1, RA) // Check stack space (-1). + |.endif + | clg PC, L:RB->maxstack; jh ->fff_fallback + | stg PC, L:RB->top + | + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + |.if resume + | la BASE, 8(BASE) // Keep resumed thread in stack for GC. + |.endif + | stg BASE, L:RB->top + |.if resume + | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move. + |.else + | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move. + |.endif + | sgr RB, PC // Relative to PC. + | + | cgr PC, RA + | je >3 + |2: // Move args to coroutine. + | lg RC, 0(RB, PC) + | stg RC, -8(PC) + | lay PC, -8(PC) + | cgr PC, RA + | jne <2 + |3: + | lgr CARG2, RA + | lg L:CARG1, SAVE_TMP + | lghi CARG3, 0 + | lghi CARG4, 0 + | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | + | lg L:RB, SAVE_L + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | + | clfi CRET1, LUA_YIELD + | jh >8 + |4: + | lg RA, L:PC->base + | lg KBASE, L:PC->top + | stg RA, L:PC->top // Clear coroutine stack. + | lgr PC, KBASE + | sgr PC, RA + | je >6 // No results? + | la RD, 0(PC, BASE) + | llgfr PC, PC + | srlg PC, PC, 3 + | clg RD, L:RB->maxstack + | jh >9 // Need to grow stack? + | + | lgr RB, BASE + | sgr RB, RA + |5: // Move results from coroutine. + | lg RD, 0(RA) + | stg RD, 0(RA, RB) + | la RA, 8(RA) + | cgr RA, KBASE + | jne <5 + |6: + |.if resume + | la RD, 2(PC) // nresults+1 = 1 + true + results. + | load_true ITYPE // Prepend true to results. + | stg ITYPE, -8(BASE) + |.else + | la RD, 1(PC) // nresults+1 = 1 + results. + |.endif + |7: + | lg PC, SAVE_PC + | st RD, SAVE_MULTRES + |.if resume + | lghi RA, -8 + |.else + | lghi RA, 0 + |.endif + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | load_false ITYPE // Prepend false to results. + | stg ITYPE, -8(BASE) + | lg RA, L:PC->top + | aghi RA, -8 + | stg RA, L:PC->top // Clear error from coroutine stack. + | // Copy error message. + | lg RD, 0(RA) + | stg RD, 0(BASE) + | lghi RD, 1+2 // nresults+1 = 1 + false + error. + | j <7 + |.else + | lgr CARG2, L:PC + | lgr CARG1, L:RB + | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | lg L:RA, SAVE_TMP + | stg KBASE, L:RA->top // Undo coroutine stack clearing. + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | j <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | lg L:RB, SAVE_L + | lg TMPR0, L:RB->cframe + | tmll TMPR0, CFRAME_RESUME + | je ->fff_fallback + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | stg RD, L:RB->top + | lghi RD, 0 + | stg RD, L:RB->cframe + | lghi CRET1, LUA_YIELD + | stc CRET1, L:RB->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | lg RB, 0(BASE) + | checkint RB, >3 + | lpr RB, RB; jo >2 + |->fff_resbit: + |->fff_resi: + | setint RB + |->fff_resRB: + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + |2: + | llihh RB, 0x41e0 // 2^31 + | j ->fff_resRB + |3: + | jh ->fff_fallback + | nihh RB, 0x7fff // Clear sign bit. + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_n math_sqrt, sqdb + |->fff_resf0: + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | // fallthrough + | + |->fff_res1: + | lghi RD, 1+1 + |->fff_res: + | st RD, SAVE_MULTRES + |->fff_res_: + | tmll PC, FRAME_TYPE + | jne >7 + |5: + | llgc TMPR1, PC_RB + | clgr TMPR1, RD // More results expected? + | jh >6 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | ins_next + | + |6: // Fill up results with nil. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -24(TMPR1, BASE) + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | j ->vm_return + | + |.macro math_round, func + | .ffunc math_ .. func + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checknumx RB, ->fff_resRB, je + | jh ->fff_fallback + | brasl r14, ->vm_ .. func + | cfdbr RB, 0, f0 + | jo ->fff_resf0 + | llgfr RB, RB + | j ->fff_resi + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | brasl r14, extern log + | j ->fff_resf0 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | lg CARG1, 8(BASE) + | checknumtp TMPR0, ->fff_fallback + | checkinttp CARG1, ->fff_fallback + | lgfr CARG1, CARG1 + | brasl r14, extern ldexp // (double, int) + | j ->fff_resf0 + | + |.ffunc_n math_frexp + | la CARG1, SAVE_TMP + | brasl r14, extern frexp + | llgf RB, SAVE_TMP + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | setint RB + | stg RB, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.ffunc_n math_modf + | lay CARG1, -16(BASE) + | brasl r14, extern modf // (double, double*) + | lg PC, -8(BASE) + | stdy f0, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.macro math_minmax, name, cjmp + | .ffunc name + | lghi RA, 2*8 + | sllg TMPR1, RD, 3 + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >4 + |1: // Handle integers. + | clgr RA, TMPR1; jhe ->fff_resRB + | lg TMPR0, -8(RA, BASE) + | checkint TMPR0, >3 + | cr RB, TMPR0 + | cjmp >2 + | lgr RB, TMPR0 + |2: + | aghi RA, 8 + | j <1 + |3: + | jh ->fff_fallback + | // Convert intermediate result to number and continue below. + | cdfbr f0, RB + | ldgr f1, TMPR0 + | j >6 + |4: + | jh ->fff_fallback + |5: // Handle numbers or integers. + | clgr RA, TMPR1; jhe ->fff_resf0 + | lg RB, -8(RA, BASE) + | ldy f1, -8(RA, BASE) + | checknumx RB, >6, jl + | jh ->fff_fallback + | cdfbr f1, RB + |6: + | cdbr f0, f1 + | cjmp >7 + | ldr f0, f1 + |7: + | aghi RA, 8 + | j <5 + |.endmacro + | + | math_minmax math_min, jnh + | math_minmax math_max, jnl + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | chi NARGS:RD, 1+1; jne ->fff_fallback + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg PC, -8(BASE) + | ltg TMPR0, STR:RB->len + | je ->fff_res0 // Return no results for empty string. + | llgc RB, STR:RB[1] + | j ->fff_resi + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + | lg RB, 0(BASE) + | checkint RB, ->fff_fallback + | clfi RB, 255; jh ->fff_fallback + | strvh RB, SAVE_TMP // Store [c,0]. + | lghi TMPR1, 1 + | la RD, SAVE_TMP // Points to stack. Little-endian. + |->fff_newstr: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG3, TMPR1 // Zero-extended to size_t. + | lgr CARG2, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // GCstr * returned in r2 (CRET1). + | lgr STR:RD, CRET1 + | lg BASE, L:RB->base + | lg PC, -8(BASE) + | settp STR:RD, LJ_TSTR + | stg STR:RD, -16(BASE) + | j ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | lghi TMPR1, -1 + | clfi NARGS:RD, 1+2; jl ->fff_fallback + | jnh >1 + | lg TMPR1, 16(BASE) + | checkint TMPR1, ->fff_fallback + |1: + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg ITYPE, 8(BASE) + | lgfr RA, ITYPE + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM + | jne ->fff_fallback + | llgf RC, STR:RB->len + | clr RC, TMPR1 // len < end? (unsigned compare) + | jl >5 + |2: + | cghi RA, 0 // start <= 0? + | jle >7 + |3: + | sr TMPR1, RA // start > end? + | jnhe ->fff_emptystr + | la RD, (#STR-1)(RA, STR:RB) + | ahi TMPR1, 1 + |4: + | j ->fff_newstr + | + |5: // Negative end or overflow. + | chi TMPR1, 0 + | jnl >6 + | ahi TMPR1, 1 + | ar TMPR1, RC // end = end+(len+1) + | j <2 + |6: // Overflow. + | lr TMPR1, RC // end = len + | j <2 + | + |7: // Negative start or underflow. + | je >8 + | agr RA, RC // start = start+(len+1) + | aghi RA, 1 + | jh <3 // start > 0? + |8: // Underflow. + | lghi RA, 1 // start = 1 + | j <3 + | + |->fff_emptystr: // Range underflow. + | lghi TMPR1, 0 + | j <4 + | + |.macro ffstring_op, name + | .ffunc_1 string_ .. name + | ffgccheck + | lg STR:CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | lg L:RB, SAVE_L + | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH) + | stg BASE, L:RB->base + | lg RC, SBUF:CARG1->b + | stg L:RB, SBUF:CARG1->L + | stg RC, SBUF:CARG1->w + | stg PC, SAVE_PC + | brasl r14, extern lj_buf_putstr_ .. name + | // lgr CARG1, CRET1 (nop, CARG1==CRET1) + | brasl r14, extern lj_buf_tostr + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |.macro .ffunc_bit, name, kind, fdef + | fdef name + |.if kind == 2 + | bfpconst_tobit f1, RB + |.endif + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >1 + |.if kind > 0 + | j >2 + |.else + | j ->fff_resbit + |.endif + |1: + | jh ->fff_fallback + |.if kind < 2 + | bfpconst_tobit f1, RB + |.endif + | adbr f0, f1 + | lgdr RB, f0 + | llgfr RB, RB + |2: + |.endmacro + | + |.macro .ffunc_bit, name, kind + | .ffunc_bit name, kind, .ffunc_1 + |.endmacro + | + |.ffunc_bit bit_tobit, 0 + | j ->fff_resbit + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name, 2 + | lgr TMPR1, NARGS:RD // Save for fallback. + | sllg RD, NARGS:RD, 3 + | lay RD, -16(RD, BASE) + |1: + | clgr RD, BASE + | jle ->fff_resbit + | lg RA, 0(RD) + | checkint RA, >2 + | ins RB, RA + | aghi RD, -8 + | j <1 + |2: + | jh ->fff_fallback_bit_op + | ldgr f0, RA + | adbr f0, f1 + | lgdr RA, f0 + | ins RB, RA + | aghi RD, -8 + | j <1 + |.endmacro + | + |.ffunc_bit_op bit_band, nr + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xr + | + |.ffunc_bit bit_bswap, 1 + | lrvr RB, RB + | j ->fff_resbit + | + |.ffunc_bit bit_bnot, 1 + | xilf RB, -1 + | j ->fff_resbit + | + |->fff_fallback_bit_op: + | lgr NARGS:RD, TMPR1 // Restore for fallback + | j ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + | .ffunc_bit name, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | nill RA, 0x1f // Limit shift to 5-bits. + | ins RB, 0(RA) + | j ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, sll + |.ffunc_bit_sh bit_rshift, srl + |.ffunc_bit_sh bit_arshift, sra + | + |.ffunc_bit bit_rol, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |.ffunc_bit bit_ror, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lcr RA, RA // Right rotate equivalent to negative left rotate. + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | lghi NARGS:RD, 1+2 // Other args are ignored, anyway. + | j ->fff_fallback + |->fff_fallback_1: + | lghi NARGS:RD, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RD = nargs+1 + | lg L:RB, SAVE_L + | lg PC, -8(BASE) // Fallback may overwrite PC. + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler. + | stg RD, L:RB->top + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | clg RA, L:RB->maxstack + | jh >5 // Need to grow stack. + | lgr CARG1, L:RB + | lg TMPR1, CFUNC:RD->f + | basr r14, TMPR1 // (lua_State *L) + | lg BASE, L:RB->base + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | lgr RD, CRET1 + | cghi RD, 0; jh ->fff_res // Returned nresults+1? + |1: + | lg RA, L:RB->top + | sgr RA, BASE + | srlg RA, RA, 3 + | cghi RD, 0 + | la NARGS:RD, 1(RA) + | lg LFUNC:RB, -16(BASE) + | jne ->vm_call_tail // Returned -1? + | cleartp LFUNC:RB + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | lgr RA, BASE + | tmll PC, FRAME_TYPE + | jne >3 + | llgc RB, PC_RA + | lcgr RB, RB + | sllg RB, RB, 3 + | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8 + | j ->vm_call_dispatch // Resolve again for tailcall. + |3: + | lgr RB, PC + | nill RB, -8 + | sgr BASE, RB + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | lghi CARG2, LUA_MINSTACK + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lghi RD, 0 // Simulate a return 0. + | j <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RD = nargs+1 + | stg r14, SAVE_TMP // Save return address + | lg L:RB, SAVE_L + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lgr CARG1, L:RB + | stg RD, L:RB->top + | brasl r14, extern lj_gc_step // (lua_State *L) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | lg r14, SAVE_TMP // Restore return address. + | br r14 + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | stg r0, 0 + | stg r0, 0 + | + |->vm_rethook: // Dispatch target for return hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE + | jne >5 + | j >1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE // Hook already active? + | jne >5 + | + | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT + | je >5 + | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | ahi TMPR0, -1 + | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | je >1 + | tmll RD, LUA_MASKLINE + | je >5 + |1: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, PC + | lgr CARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | lg BASE, L:RB->base + |4: + | llgc RA, PC_RA + |5: + | llgc OP, PC_OP + | sllg TMPR1, OP, 3 + | llgh RD, PC_RD + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) + | br TMPR1 + | + |->cont_hook: // Continue from hook yield. + | stg r0, 0 + | stg r0, 0 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg RB, LFUNC:RB->pc + | llgc RD, (PC2PROTO(framesize))(RB) + | sllg RD, RD, 3 + | la RD, 0(RD, BASE) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | stg RD, L:RB->top + | lgr CARG2, PC + | la CARG1, GG_DISP2J(DISPATCH) + | stg L:RB, (DISPATCH_J(L))(DISPATCH) + | stg PC, SAVE_PC + | brasl r14, extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | j <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | stg PC, SAVE_PC + |.if JIT + | j >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | stg PC, SAVE_PC + | oill PC, 1 // Marker for hot call. + | 1: + |.endif + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | stg RD, L:RB->top + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // ASMFunction returned in r2 (CRET1). + | lghi TMPR0, 0 + | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook. + |.if JIT + |// nill PC, -2 + |.endif + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | lgr RB, CRET1 + | llgc RA, PC_RA + | srl RD, 3 + | ahi NARGS:RD, 1 + | llgfr RD, RD + | br RB + | + |->cont_stitch: // Trace stitching. + | stg r0, 0 + | stg r0, 0 + | + |->vm_profhook: // Dispatch target for profiler hook. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: + | stg r0, 0 + | stg r0, 0 + |->vm_exit_interp: + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions. + |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14. + |.macro vm_round, name, mask + |->name: + | ldr f4, f0 + | lghi r0, 1 + | cdfbr f1, r0 + | didbr f0, f2, f1, mask // f0=remainder, f2=quotient. + | fidbra f4, mask, f4, 0 + | ldr f0, f4 + | jnle >1 + | br r14 + |1: // partial remainder (sanity check) + | stg r0, 0 + |.endmacro + | + | vm_round vm_floor, 7 // Round towards -inf. + | vm_round vm_ceil, 6 // Round towards +inf. + | vm_round vm_trunc, 5 // Round towards 0. + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |->vm_mod: // NYI. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->assert_bad_for_arg_type: + | stg r0, 0 + | stg r0, 0 +#ifdef LUA_USE_ASSERT +#endif + | + |->vm_next: + |.if JIT + |// stg r0, 0 // NYI On big-endian. + |// stg r0, 0 + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in ah/al. + |->vm_ffi_callback: + |// .if FFI + |// .type CTSTATE, CTState, PC + |// saveregs + |// la DISPATCH, GG_G2DISP(RB) + |// lg CTSTATE, GL:RB->ctype_state + |// llgcr RC, RC + |// stg RC, CTSTATE->cb.slot + |// + |// la RC, CFRAME_SIZE(sp) + |// .endif + | + | + | + | + | + | + |->cont_ffi_callback: // Return from FFI callback. + | stg r0, 0 + | stg r0, 0 + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r8 + | stmg r6, r15, 48(sp) + | lgr r13, sp // Use r13 as frame pointer. + | lgr CCSTATE, CARG1 + | lg r7, CCSTATE->func + | + | // Readjust stack. + | sgf sp, CCSTATE->spadj + | + | // Copy stack slots. + | llgc r1, CCSTATE->nsp + | chi r1, 0 + | jh >2 + |1: + | lmg CARG1, CARG5, CCSTATE->gpr[0] + | // TODO: conditionally load FPRs? + | ld FARG1, CCSTATE->fpr[0] + | ld FARG2, CCSTATE->fpr[1] + | ld FARG3, CCSTATE->fpr[2] + | ld FARG4, CCSTATE->fpr[3] + | basr r14, r7 + | + | stg CRET1, CCSTATE->gpr[0] + | std f0, CCSTATE->fpr[0] + | + | lgr sp, r13 + | lmg r6, r15, 48(sp) + | br r14 + | + |2: + | sll r1, 3 + | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source. + | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + |3: + | chi r1, 256 + | jl >4 + | mvc 0(256, r11), 0(r10) + | la r10, 256(r10) + | la r11, 256(r11) + | ahi r1, -256 + | j <3 + | + |4: + | ahi r1, -1 + | jl <1 + | larl r9, >5 + | ex r1, 0(r9) + | j <1 + | + |5: + | // exrl target + | mvc 0(1, r11), 0(r10) + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + (void)vk; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + |.macro jmp_comp, lt, ge, le, gt, target + ||switch (op) { + ||case BC_ISLT: + | lt target + ||break; + ||case BC_ISGE: + | ge target + ||break; + ||case BC_ISLE: + | le target + ||break; + ||case BC_ISGT: + | gt target + ||break; + ||default: break; /* Shut up GCC. */ + ||} + |.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | ld f1, 0(RD, BASE) + | lg RA, 0(RA, BASE) + | lg RD, 0(RD, BASE) + | srag ITYPE, RA, 47 + | srag RB, RD, 47 + | + | clfi ITYPE, LJ_TISNUM; jne >7 + | clfi RB, LJ_TISNUM; jne >8 + | // Both are integers. + | la PC, 4(PC) + | cr RA, RD + | jmp_comp jhe, jl, jh, jle, >9 + |6: + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh ->vmeta_comp + | // RA is a number. + | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is not an integer. + | jh ->vmeta_comp + | // RA is an integer, RD is a number. + | cdfbr f0, RA + |1: + | la PC, 4(PC) + | cdbr f0, f1 + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + | jmp_comp jnl, jl, jnle, jle, <9 + | j <6 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | sllg RD, RD, 3 + | ld f1, 0(RD, BASE) + | lg RD, 0(RD, BASE) + | sllg RA, RA, 3 + | ld f0, 0(RA, BASE) + | lg RA, 0(RA, BASE) + | la PC, 4(PC) + | srag RB, RD, 47 + | srag ITYPE, RA, 47 + | clfi RB, LJ_TISNUM; jne >7 + | clfi ITYPE, LJ_TISNUM; jne >8 + | cr RD, RA + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RD is not an integer. + | jh >5 + | // RD is a number. + | clfi ITYPE, LJ_TISNUM; jl >1; jne >5 + | // RD is a number, RA is an integer. + | cdfbr f0, RA + | j >1 + | + |8: // RD is an integer, RA is not an integer. + | jh >5 + | // RD is an integer, RA is a number. + | cdfbr f1, RD + | j >1 + | + |1: + | cdbr f0, f1 + |4: + iseqne_fp: + if (vk) { + | jne >2 // Unordered means not equal. + } else { + | je >1 // Unordered means not equal. + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + |.if not FFI + |3: + |.endif + } else { + |.if not FFI + |3: + |.endif + |2: // NE: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || + op == BC_ISEQN || op == BC_ISNEN)) { + | j <9 + } else { + | ins_next + } + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + |.if FFI + | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd + | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd + |.endif + | cgr RA, RD + | je <1 // Same GCobjs or pvalues? + | cr RB, ITYPE + | jne <2 // Not the same type? + | clfi RB, LJ_TISTABUD + | jh <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | cleartp TAB:RA + | lg TAB:RB, TAB:RA->metatable + | cghi TAB:RB, 0 + | je <2 // No metatable? + | tm TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod. + } else { + |.if FFI + |3: + | clfi ITYPE, LJ_TCDATA + if (LJ_DUALNUM && vk) { + | jne <9 + } else { + | jne <2 + } + | j ->vmeta_equal_cd + |.endif + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RA, BASE) + | la PC, 4(PC) + | checkstr RB, >3 + | cg RB, 0(RD, KBASE) + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | lg RB, 0(RA, BASE) + | ld f1, 0(RD, KBASE) + | lg RD, 0(RD, KBASE) + | la PC, 4(PC) + | checkint RB, >7 + | checkint RD, >8 + | cr RB, RD + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh >3 + | // RA is a number. + | checkint RD, >1 + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is a number. + | cdfbr f0, RB + | cdbr f0, f1 + | j >4 + |1: + | cdbr f0, f1 + |4: + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | la PC, 4(PC) + | cr RB, RD + if (!LJ_HASFFI) goto iseqne_test; + if (vk) { + | jne >3 + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + |3: + | cghi RB, LJ_TCDATA; jne <2 + | j ->vmeta_equal_cd + } else { + | je >2 + | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg ITYPE, 0(RD, BASE) + | la PC, 4(PC) + if (op == BC_ISTC || op == BC_ISFC) { + | lgr RB, ITYPE + } + | srag ITYPE, ITYPE, 47 + | clfi ITYPE, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jhe >1 + } else { + | jl >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | stg RB, 0(RA, BASE) + } + | llgh RD, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | lghr RD, RD + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | agr RB, RD + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | sllg TMPR1, RA, 3 + | lg TMPR1, 0(TMPR1, BASE) + | checknumtp TMPR1, ->vmeta_istype + | ins_next + break; + case BC_MOV: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | sllg RA, RA, 3 + | stg RB, 0(RA, BASE) + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RB, 0(RD, BASE) + | srag RB, RB, 47 + | load_false RC + | clfi RB, LJ_TISTRUECOND + | jl >1 + | load_true RC + |1: + | stg RC, 0(RA, BASE) + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | checkint RB, >3 + | lcr RB, RB; jo >2 + |1: + | stg RB, 0(RA, BASE) + | ins_next + |2: + | llihh RB, 0x41e0 // (double)2^31 + | j <1 + |3: + | jh ->vmeta_unm + | // Toggle sign bit. + | llihh TMPR0, 0x8000 + | xgr RB, TMPR0 + | j <1 + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RD, 0(RD, BASE) + | checkstr RD, >2 + | llgf RD, STR:RD->len + |1: + | sllg RA, RA, 3 + | setint RD + | stg RD, 0(RA, BASE) + | ins_next + |2: + | cghi ITYPE, LJ_TTAB; jne ->vmeta_len + | lgr TAB:CARG1, TAB:RD +#if LJ_52 + | lg TAB:RB, TAB:RD->metatable + | cghi TAB:RB, 0 + | jne >9 + |3: +#endif + |->BC_LEN_Z: + | brasl r14, extern lj_tab_len // (GCtab *t) + | // Length of table returned in r2 (CRET1). + | lgr RD, CRET1 + | llgc RA, PC_RA + | j <1 +#if LJ_52 + |9: // Check for __len. + | tm TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check. +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | sllg RA, RA, 3 + |.endmacro + | + |.macro ins_arithfp, ins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_vno + | checknumtp RC, ->vmeta_arith_vno + | ins f0, f1 + || break; + ||case 1: + | ld f1, 0(RB, BASE) + | ld f0, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_nvo + | checknumtp RC, ->vmeta_arith_nvo + | ins f0, f1 + || break; + ||default: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, BASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checknumtp RB, ->vmeta_arith_vvo + | checknumtp RC, ->vmeta_arith_vvo + | ins f0, f1 + || break; + ||} + | std f0, 0(RA, BASE) + | ins_next + |.endmacro + | + |.macro ins_arithdn, intins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | intins RB, RC; jo ->vmeta_arith_vno + || break; + ||case 1: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | intins RC, RB; jo ->vmeta_arith_nvo + || break; + ||default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | intins RB, RC; jo ->vmeta_arith_vvo + || break; + ||} + ||if (vk == 1) { + | // setint RC + | stg RC, 0(RA, BASE) + ||} else { + | // setint RB + | stg RB, 0(RA, BASE) + ||} + | ins_next + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn ar + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn sr + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithpre + | // For multiplication we use msgfr and check if the result + | // fits in an int32_t. + switch(op) { + case BC_MULVN: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vno + break; + case BC_MULNV: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_nvo + break; + default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vvo + break; + } + | llgfr RB, RB + | setint RB + | stg RB, 0(RA, BASE) + | ins_next + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp ddbr + break; + // TODO: implement fast mod operation. + // x86_64 does floating point mod, however it might be better to use integer mod. + case BC_MODVN: + | j ->vmeta_arith_vno + break; + case BC_MODNV: + | j ->vmeta_arith_nvo + break; + case BC_MODVV: + | j ->vmeta_arith_vvo + break; + case BC_POW: + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | ld FARG1, 0(RB, BASE) + | ld FARG2, 0(RC, BASE) + | lg TMPR0, 0(RB, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | lg TMPR0, 0(RC, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | brasl r14, extern pow // double pow(double x, double y), result in f0. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG3, RC + | sgr CARG3, RB + | sllg RC, RC, 3 + | la CARG2, 0(RC, BASE) + |->BC_CAT_Z: + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | jne ->vmeta_binop + | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | sllg RB, RB, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RC, 0(RB, BASE) + | stg RC, 0(RA, BASE) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | sllg RD, RD, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TSTR + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KCDATA: + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TCDATA + | stg RD, 0(RA, BASE) + | ins_next + |.endif + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + | // Assumes DUALNUM. + | lhr RD, RD // Sign-extend literal to 32-bits. + | setint RD + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | sllg RD, RD, 3 + | ld f0, 0(RD, KBASE) + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + case BC_KPRI: + | ins_AD // RA = dst, RD = primitive type (~) + | sllg RA, RA, 3 + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | la RA, 8(RA, BASE) + | la RD, 0(RD, BASE) + | lghi RB, LJ_TNIL + | stg RB, -8(RA) // Sets minimum 2 slots. + |1: + | stg RB, 0(RA) + | la RA, 8(RA) + | clgr RA, RD + | jle <1 + | ins_next + break; + +/* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB) + | lg RB, UPVAL:RB->v + | lg RD, 0(RB) + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + | ins_AD // RA = upvalue #, RD = src + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sllg RA, RA, 3 + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | tm UPVAL:RB->closed, 0xff + | lg RB, UPVAL:RB->v + | sllg TMPR1, RD, 3 + | lg RA, 0(TMPR1, BASE) + | stg RA, 0(RB) + | je >1 + | // Check barrier for closed upvalue. + | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if new value is collectable and white. + | srag RD, RA, 47 + | ahi RD, -LJ_TISGCV + | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + | jle <1 + | cleartp GCOBJ:RA + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RB + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; +#undef TV2MARKOFS + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg STR:RA, 0(RD, KBASE) + | lg RD, UPVAL:RB->v + | settp STR:ITYPE, STR:RA, LJ_TSTR + | stg STR:ITYPE, 0(RD) + | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + | je <1 + | tm UPVAL:RB->closed, 0xff + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RD + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | ld f0, 0(RD, KBASE) + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg RA, UPVAL:RB->v + | std f0, 0(RA) + | ins_next + break; + case BC_USETP: + | ins_AD // RA = upvalue #, RD = primitive type (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 + | lg RA, UPVAL:RB->v + | stg RD, 0(RA) + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | lg L:RB, SAVE_L + | ltg TMPR0, L:RB->openupval + | je >1 + | stg BASE, L:RB->base + | sllg RA, RA, 3 + | la CARG2, 0(RA, BASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level) + | lg BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg CARG3, -16(BASE) + | cleartp CARG3 + | sllg RD, RD, 3 + | lg CARG2, 0(RD, KBASE) // Fetch GCproto *. + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | brasl r14, extern lj_func_newL_gc + | // GCfuncL * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp LFUNC:CRET1, LJ_TFUNC + | stg LFUNC:CRET1, 0(RA, BASE) + | ins_next + break; + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg PC, SAVE_PC + | jhe >5 + |1: + | srlg CARG3, RD, 11 + | llill TMPR0, 0x7ff + | nr RD, TMPR0 + | cr RD, TMPR0 + | je >3 + |2: + | lgr L:CARG1, L:RB + | llgfr CARG2, RD + | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp TAB:CRET1, LJ_TTAB + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: // Turn 0x7ff into 0x801. + | llill RD, 0x801 + | j <2 + |5: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD + | j <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | lg L:RB, SAVE_L + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | stg PC, SAVE_PC + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg BASE, L:RB->base + | jhe >3 + |2: + | sllg RD, RD, 3 + | lg TAB:CARG2, 0(RD, KBASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | settp TAB:CRET1, LJ_TTAB + | sllg RA, RA, 3 + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD // Need to reload RD. + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not RD + | j <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tgetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tgetv // Not in array part? Use fallback. + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv + | cleartp STR:RC + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR1, -1 + | xgr RC, TMPR1 + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->sid + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >4 + | // Get node value. + | lg ITYPE, NODE:TMPR1->val + | cghi ITYPE, LJ_TNIL + | je >5 // Key found, but nil value? + |2: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |4: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, nil result. + | lghi ITYPE, LJ_TNIL + | + |5: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <2 // No metatable: done. + | tm TAB:TMPR1->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tgetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. + | j <1 + break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | llgf RC, 4(RC, BASE) // Load low word (big endian). + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetr // Not in array part? Use fallback. + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + | lg ITYPE, 0(RC) + |->BC_TGETR2_Z: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tsetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tsetv + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv + | cleartp STR:RC + | j ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR0, -1 + | xgr RC, TMPR0 // ~RC + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->sid + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | mvi TAB:RB->nomm, 0 // Clear metamethod cache. + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(TMPR1) + | je >4 // Previous value is nil? + |2: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |3: // Set node value. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(TMPR1) + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | lg TAB:ITYPE, TAB:RB->metatable + | cghi TAB:ITYPE, 0 + | je <2 + | tm TAB:ITYPE->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + | j <2 + | + |5: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je >6 // No metatable: continue. + | tm TAB:TMPR1->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | stg ITYPE, SAVE_TMP + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | la CARG3, SAVE_TMP + | lgr CARG2, TAB:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in r2 (CRET1). + | lgr TMPR1, CRET1 + | lg L:CRET1, SAVE_L + | lg BASE, L:CRET1->base + | llgc RA, PC_RA + | j <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, ITYPE + | j <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tsetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check. + | j <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetr + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + |1: + | sllg RA, RA, 3 + | sllg TMPR1, RD, 3 + | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word. + | la RA, 0(RA, BASE) + | lg TAB:RB, -8(RA) // Guaranteed to be a table. + | cleartp TAB:RB + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | llgf RD, SAVE_MULTRES + | aghi RD, -1 + | je >4 // Nothing to copy? + | agr RD, TMPR1 // Compute needed size. + | clgf RD, TAB:RB->asize + | jh >5 // Doesn't fit into array part? + | sgr RD, TMPR1 + | sllg TMPR1, TMPR1, 3 + | ag TMPR1, TAB:RB->array + |3: // Copy result slots to table. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(TMPR1) + | la TMPR1, 8(TMPR1) + | brctg RD, <3 + |4: + | ins_next + | + |5: // Need to resize array part. + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, TAB:RB + | lgfr CARG3, RD + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | lg BASE, L:RB->base + | llgc RA, PC_RA // Restore RA. + | llgh RD, PC_RD // Restore RD. + | j <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | j <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + | sllg RA, RA, 3 + | lgr RD, RC + if (op == BC_CALLM) { + | agf NARGS:RD, SAVE_MULTRES + } + | lg LFUNC:RB, 0(RA, BASE) + | checkfunc LFUNC:RB, ->vmeta_call_ra + | la BASE, 16(RA, BASE) + | ins_call + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | a NARGS:RD, SAVE_MULTRES + | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) + | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint. + | lg LFUNC:RB, -16(RA) + | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + |->BC_CALLT_Z: + | lg PC, -8(BASE) + | tmll PC, FRAME_TYPE + | jne >7 + |1: + | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below. + | st NARGS:RD, SAVE_MULTRES + | aghi NARGS:RD, -1 + | je >3 + |2: // Move args down. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(KBASE) + | la KBASE, 8(KBASE) + | brctg NARGS:RD, <2 + | + | lg LFUNC:RB, -16(BASE) + |3: + | cleartp LFUNC:RB + | llgf NARGS:RD, SAVE_MULTRES + | llgc TMPR1, LFUNC:RB->ffid + | cghi TMPR1, 1 // (> FF_C) Calling a fast function? + | jh >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function. + | tmll PC, FRAME_TYPE // Lua frame below? + | jne <4 + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE. + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | j <4 + | + |7: // Tailcall from a vararg function. + | aghi PC, -FRAME_VARG + | tmll PC, FRAME_TYPEP + | jne >8 // Vararg frame below? + | sgr BASE, PC // Need to relocate BASE/KBASE down. + | lgr KBASE, BASE + | lg PC, -8(BASE) + | j <1 + |8: + | aghi PC, FRAME_VARG + | j <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) // fb = base+2 + | lg RB, -32(RA) // Copy state. fb[0] = fb[-4]. + | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3]. + | stg RB, 0(RA) + | stg RC, 8(RA) + | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5] + | stg LFUNC:RB, -16(RA) + | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call. + | checkfunc LFUNC:RB, ->vmeta_call + | lgr BASE, RA + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop RB // NYI: add hotloop, record BC_ITERN. + |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | sllg RA, RA, 3 + | lg TAB:RB, -16(RA, BASE) + | cleartp TAB:RB + | llgf RC, -4(RA, BASE) // Get index from control var. + | llgf TMPR1, TAB:RB->asize + | la PC, 4(PC) + | lg ITYPE, TAB:RB->array + |1: // Traverse array part. + | clr RC, TMPR1; jhe >5 // Index points after array part? + | sllg RD, RC, 3 // Warning: won't work if RD==RC! + | lg TMPR0, 0(RD, ITYPE) + | cghi TMPR0, LJ_TNIL; je >4 + | // Copy array slot to returned value. + | lgr RB, TMPR0 + | stg RB, 8(RA, BASE) + | // Return array index as a numeric key. + | setint ITYPE, RC + | stg ITYPE, 0(RA, BASE) + | ahi RC, 1 + | sty RC, -4(RA, BASE) // Update control var. + |2: + | llgh RD, PC_RD // Get target from ITERL. + | branchPC RD + |3: + | ins_next + | + |4: // Skip holes in array part. + | ahi RC, 1 + | j <1 + | + |5: // Traverse hash part. + | sr RC, TMPR1 + |6: + | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1. + | llgfr ITYPE, RC + | mghi ITYPE, #NODE + | ag NODE:ITYPE, TAB:RB->node + | lghi TMPR0, LJ_TNIL + | cg TMPR0, NODE:ITYPE->val; je >7 + | ar TMPR1, RC + | ahi TMPR1, 1 + | // Copy key and value from hash slot. + | lg RB, NODE:ITYPE->key + | lg RC, NODE:ITYPE->val + | stg RB, 0(RA, BASE) + | stg RC, 8(RA, BASE) + | sty TMPR1, -4(RA, BASE) + | j <2 + | + |7: // Skip holes in hash part. + | ahi RC, 1 + | j <6 + break; + + case BC_ISNEXT: + | ins_AD // RA = base, RD = target (points to ITERN) + | sllg RA, RA, 3 + | lg CFUNC:RB, -24(RA, BASE) + | checkfunc CFUNC:RB, >5 + | lg TMPR1, -16(RA, BASE) + | checktptp TMPR1, LJ_TTAB, >5 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, -8(RA, BASE); jne >5 + | llgc TMPR1, CFUNC:RB->ffid + | clfi TMPR1, (uint8_t)FF_next_N; jne >5 + | branchPC RD + | llihl TMPR1, 0x7fff + | iihh TMPR1, 0xfffe + | stg TMPR1, -8(RA, BASE) // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | lghi TMPR0, BC_JMP + | stcy TMPR0, PC_OP + | branchPC RD + | mvi 3(PC), BC_ITERC + | j <1 + break; + + case BC_VARG: + | ins_ABC // RA = base, RB = nresults+1, RC = numparams + | sllg RA, RA, 3 + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la TMPR1, (16+FRAME_VARG)(RC, BASE) + | la RA, 0(RA, BASE) + | sg TMPR1, -8(BASE) + | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams. + | cghi RB, 0 + | je >5 // Copy all varargs? + | lay RB, -8(RA, RB) + | clgr TMPR1, BASE // No vararg slots? + | lghi TMPR0, LJ_TNIL + | jnl >2 + |1: // Copy vararg slots to destination slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr RA, RB // All destination slots filled? + | jnl >3 + | clgr TMPR1, BASE // No more vararg slots? + | jl <1 + |2: // Fill up remainder with nil. + | stg TMPR0, 0(RA) + | la RA, 8(RA) + | clgr RA, RB + | jl <2 + |3: + | ins_next + | + |5: // Copy all varargs. + | lghi TMPR0, 1 + | st TMPR0, SAVE_MULTRES // MULTRES = 0+1 + | lgr RC, BASE + | slgr RC, TMPR1 + | jno <3 // No vararg slots? (borrow or zero) + | llgfr RB, RC + | srlg RB, RB, 3 + | ahi RB, 1 + | st RB, SAVE_MULTRES // MULTRES = #varargs+1 + | lg L:RB, SAVE_L + | agr RC, RA + | clg RC, L:RB->maxstack + | jh >7 // Need to grow stack? + |6: // Copy all vararg slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr TMPR1, BASE // No more vararg slots? + | jl <6 + | j <3 + | + |7: // Grow stack for varargs. + | stg BASE, L:RB->base + | stg RA, L:RB->top + | stg PC, SAVE_PC + | sgr TMPR1, BASE // Need delta, because BASE may change. + | st TMPR1, SAVE_TMP_HI + | llgf CARG2, SAVE_MULTRES + | aghi CARG2, -1 + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lgf TMPR1, SAVE_TMP_HI + | lg RA, L:RB->top + | agr TMPR1, BASE + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | sllg RA, RA, 3 + } + |1: + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES // Save nresults+1. + | tmll PC, FRAME_TYPE // Check frame type marker. + | jne >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | lgr KBASE, BASE // Use KBASE for result move. + | aghi RD, -1 + | je >3 + |2: // Move results down. + | lg RB, 0(KBASE, RA) + | stg RB, -16(KBASE) + | la KBASE, 8(KBASE) + | brctg RD, <2 + |3: + | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256. + | llgc RB, PC_RB + |5: + | cgr RB, RD // More results expected? + | jh >6 + break; + case BC_RET1: + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + /* fallthrough */ + case BC_RET0: + |5: + | llgc TMPR1, PC_RB + | cgr TMPR1, RD + | jh >6 + default: + break; + } + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, PC2PROTO(k)(KBASE) + | ins_next + | + |6: // Fill up results with nil. + | lghi TMPR1, LJ_TNIL + if (op == BC_RET) { + | stg TMPR1, -16(KBASE) // Note: relies on shifted base. + | la KBASE, 8(KBASE) + } else { + | sllg RC, RD, 3 // RC used as temp. + | stg TMPR1, -24(RC, BASE) + } + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lay RB, -FRAME_VARG(PC) + | tmll RB, FRAME_TYPEP + | jne ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | sgr BASE, RB + if (op != BC_RET0) { + | agr RA, RB + } + | j <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, 0(RA) + |.define FOR_STOP, 8(RA) + |.define FOR_STEP, 16(RA) + |.define FOR_EXT, 24(RA) + + case BC_FORL: + { + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + } + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, FOR_IDX + | checkint RB, >9 + | lg TMPR1, FOR_STOP + if (!vk) { + | checkint TMPR1, ->vmeta_for + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | // lg TMPR1, FOR_STOP + | checkinttp TMPR1, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checkinttp TMPR0, ->assert_bad_for_arg_type +#endif + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | ar RB, ITYPE; jo >1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jle >7 + |1: + |6: + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jle =>BC_JLOOP + |1: + |6: + } else if (op == BC_IFORL) { + | jh >7 + |6: + | branchPC RD + |1: + } else { + | jle =>BC_JLOOP + |1: + |6: + } + |7: + | ins_next + | + |5: // Invert check for negative step. + if (!vk) { + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { + | ar RB, ITYPE; jo <1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jhe <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jhe =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jhe =>BC_JLOOP + } + | j <6 + |9: // Fallback to FP variant. + if (!vk) { + | jhe ->vmeta_for + } + if (!vk) { + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checknumtp TMPR0, ->assert_bad_for_arg_type +#endif + } + | lg RB, FOR_STEP + if (!vk) { + | checknum RB, ->vmeta_for + } + | ld f0, FOR_IDX + | ld f1, FOR_STOP + if (vk) { + | adb f0, FOR_STEP + | std f0, FOR_IDX + } + | cghi RB, 0; jl >3 + | cdbr f1, f0 + |1: + | std f0, FOR_EXT + if (op == BC_FORI) { + | jnl <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jnl =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jnl =>BC_JLOOP + } + | j <6 + | + |3: // Invert comparison if step is negative. + | cdbr f0, f1 + | j <1 + break; + + case BC_ITERL: + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, 0(RA) + | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | stg RB, -8(RA) + | j =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | stg RB, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + | stg r0, 0 + | stg r0, 0 + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + |.if JIT + |// stg r0, 0 + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | lg KBASE, (PC2PROTO(k)-4)(PC) + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) // Top of frame. + | clg RA, L:RB->maxstack + | jh ->vm_growstack_f + | llgc RA, (PC2PROTO(numparams)-4)(PC) + | clgr NARGS:RD, RA // Check for missing parameters. + | jle >3 + |2: + if (op == BC_JFUNCF) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | sllg TMPR1, NARGS:RD, 3 + | lghi TMPR0, LJ_TNIL + |4: + | stg TMPR0, -8(TMPR1, BASE) + | la TMPR1, 8(TMPR1) + | la RD, 1(RD) + | clgr RD, RA + | jle <4 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | stg r0, 0 // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | sllg TMPR1, NARGS:RD, 3 + | la RB, (FRAME_VARG+8)(TMPR1) + | la RD, 8(TMPR1, BASE) + | lg LFUNC:KBASE, -16(BASE) + | stg RB, -8(RD) // Store delta + FRAME_VARG. + | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC. + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, RD) + | cg RA, L:RB->maxstack + | jh ->vm_growstack_v // Need to grow stack. + | lgr RA, BASE + | lgr BASE, RD + | llgc RB, (PC2PROTO(numparams)-4)(PC) + | cghi RB, 0 + | je >2 + | aghi RA, 8 + | lghi TMPR1, LJ_TNIL + |1: // Copy fixarg slots up to new frame. + | la RA, 8(RA) + | cgr RA, BASE + | jnl >3 // Less args than parameters? + | lg KBASE, -16(RA) + | stg KBASE, 0(RD) + | la RD, 8(RD) + | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC). + | brctg RB, <1 + |2: + if (op == BC_JFUNCV) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | lg KBASE, (PC2PROTO(k)-4)(PC) + | ins_next + } + | + |3: // Clear missing parameters. + | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here. + | la RD, 8(RD) + | brctg RB, <3 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | ins_AD // BASE = new base, RD = nargs+1 + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg KBASE, CFUNC:RB->f + | lg L:RB, SAVE_L + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD,BASE) + | stg BASE, L:RB->base + | la RA, (8*LUA_MINSTACK)(RD) + | clg RA, L:RB->maxstack + | stg RD, L:RB->top + | lgr CARG1, L:RB + if (op != BC_FUNCC) { + | lgr CARG2, KBASE + } + | jh ->vm_growstack_c // Need to grow stack. + | set_vmstate C + if (op == BC_FUNCC) { + | basr r14, KBASE // (lua_State *L) + } else { + | // (lua_State *L, lua_CFunction f) + | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH) + | basr r14, TMPR1 + } + | // nresults returned in r2 (CRET1). + | lgr RD, CRET1 + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | sllg TMPR1, RD, 3 + | la RA, 0(TMPR1, BASE) + | lcgr RA, RA + | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + | lg PC, -8(BASE) // Fetch PC of caller. + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; + default: /* No other modes. */ + break; + } +} + diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 8dd48b84..02054033 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2455,6 +2455,19 @@ static void build_subroutines(BuildCtx *ctx) | mov r13, [RA-8] | mov r12, [RA] | mov rsp, RA // Reposition stack to C frame. +#ifdef LUA_USE_TRACE_LOGS + | mov CARG1, SAVE_L + | mov L:CARG1->base, BASE + | mov RB, RD // Save RD + | mov TMP1, PC // Save PC + | mov CARG3, PC // CARG3 == BASE + | mov CARG2d, dword [DISPATCH+DISPATCH_GL(vmstate)] + | call extern lj_log_trace_direct_exit@8 + | mov PC, TMP1 + | mov RD, RB + | mov RB, SAVE_L + | mov BASE, L:RB->base +#endif |.endif | test RDd, RDd; js >9 // Check for error from exit. | mov L:RB, SAVE_L @@ -4512,6 +4525,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_JLOOP: |.if JIT | ins_AD // RA = base (ignored), RD = traceno +#ifdef LUA_USE_TRACE_LOGS + |.if not X64WIN + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Save BASE + | mov TMP1, RD // Save RD + | mov CARG3, PC // CARG3 == BASE + | mov CARG2, RD + | mov CARG1, RB + | call extern lj_log_trace_entry@8 + | mov RD, TMP1 + | mov BASE, L:RB->base + |.endif +#endif | mov RA, [DISPATCH+DISPATCH_J(trace)] | mov TRACE:RD, [RA+RD*8] | mov RD, TRACE:RD->mcode diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index de12ac64..8d3c82c4 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2905,6 +2905,21 @@ static void build_subroutines(BuildCtx *ctx) | mov r13, TMPa | mov r12, TMPQ |.endif +#ifdef LUA_USE_TRACE_LOGS + |.if X64 + | mov FCARG1, SAVE_L + | mov L:FCARG1->base, BASE + | mov RB, RD // Save RD + | mov TMP1, PC // Save PC + | mov CARG3d, PC // CARG3d == BASE + | mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)] + | call extern lj_log_trace_direct_exit@8 + | mov PC, TMP1 + | mov RD, RB + | mov RB, SAVE_L + | mov BASE, L:RB->base + |.endif +#endif | test RD, RD; js >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RD @@ -5306,6 +5321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_JLOOP: |.if JIT | ins_AD // RA = base (ignored), RD = traceno +#ifdef LUA_USE_TRACE_LOGS + |.if X64 + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Save BASE + | mov TMP1, RD // Save RD + | mov CARG3d, PC // CARG3d == BASE + | mov FCARG2, RD + | mov FCARG1, RB + | call extern lj_log_trace_entry@8 + | mov RD, TMP1 + | mov BASE, L:RB->base + |.endif +#endif | mov RA, [DISPATCH+DISPATCH_J(trace)] | mov TRACE:RD, [RA+RD*4] | mov RDa, TRACE:RD->mcode diff --git a/src/x64/Makefile b/src/x64/Makefile new file mode 100644 index 00000000..27277140 --- /dev/null +++ b/src/x64/Makefile @@ -0,0 +1,13 @@ +.PHONY: default test benchmark clean + +default: + @echo "make target include: test bechmark clean" + +test: + $(MAKE) -C test test + +benchmark: + $(MAKE) -C test benchmark + +clean: + $(MAKE) -C test clean diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile new file mode 100644 index 00000000..4326ab3d --- /dev/null +++ b/src/x64/test/Makefile @@ -0,0 +1,47 @@ +.PHONY: default test benchmark + +default: test benchmark + +COMMON_OBJ := test_util.o + +TEST_PROGRAM := ht_test +BENCHMARK_PROGRAM := ht_benchmark + +TEST_PROGRAM_OBJ := $(COMMON_OBJ) test.o +BENCHMARK_PROGRAM_OBJ := $(COMMON_OBJ) benchmark.o + +ifeq ($(WITH_VALGRIND), 1) + VALGRIND := valgrind --leak-check=full +else + VALGRIND := +endif + +CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src + +%.o: %.cxx + $(CXX) $(CXXFLAGS) -MD -c $< + +test: $(TEST_PROGRAM) + @echo "some unit test" + $(VALGRIND) ./$(TEST_PROGRAM) + + @echo "smoke test" + ../../luajit test_str_comp.lua + +benchmark: $(BENCHMARK_PROGRAM) + # micro benchmark + ./$(BENCHMARK_PROGRAM) + +$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ) + cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt + $(CXX) $+ $(CXXFLAGS) -lm -o $@ + +$(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ) + cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt + $(CXX) $+ $(CXXFLAGS) -o $@ + +-include dep1.txt +-include dep2.txt + +clean: + -rm -f *.o *.d dep*.txt $(BENCHMARK_PROGRAM) $(TEST_PROGRAM) diff --git a/src/x64/test/benchmark.cxx b/src/x64/test/benchmark.cxx new file mode 100644 index 00000000..1ea8fb6b --- /dev/null +++ b/src/x64/test/benchmark.cxx @@ -0,0 +1,360 @@ +#include // for gettimeofday() +extern "C" { +#define LUAJIT_SECURITY_STRHASH 1 +#include "../../lj_str.h" +str_sparse_hashfn hash_sparse; +str_dense_hashfn hash_dense; +#include "../../lj_str_hash.c" +} +#include +#include +#include +#include +#include "test_util.hpp" +#include +#include + +using namespace std; + +#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) +#define lj_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n))) + +const char* separator = "-------------------------------------------"; + +static uint32_t LJ_AINLINE +original_hash_sparse(uint64_t seed, const char *str, size_t len) +{ + uint32_t a, b, h = len ^ seed; + if (len >= 4) { + a = lj_getu32(str); h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); + h ^= b; h -= lj_rol(b, 14); + b += lj_getu32(str+(len>>2)-1); + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + } else { + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); + } + + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + + return h; +} + +static uint32_t original_hash_dense(uint64_t seed, uint32_t h, + const char *str, size_t len) +{ + uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4)); + if (len > 12) { + uint32_t a = (uint32_t)seed; + const char *pe = str+len-12, *p = pe, *q = str; + do { + a += lj_getu32(p); + b += lj_getu32(p+4); + h += lj_getu32(p+8); + p = q; q += 12; + h ^= b; h -= lj_rol(b, 14); + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + } while (p < pe); + h ^= b; h -= lj_rol(b, 16); + a ^= h; a -= lj_rol(h, 4); + b ^= a; b -= lj_rol(a, 14); + } + return b; +} + + +template double +BenchmarkHashTmpl(T func, uint64_t seed, char* buf, size_t len) +{ + TestClock timer; + uint32_t h = 0; + + timer.start(); + for(int i = 1; i < 1000000 * 100; i++) { + // So the buf is not loop invariant, hence the F(...) + buf[i % 4096] = i; + h += func(seed, buf, len) ^ i; + } + timer.stop(); + + // make h alive + test_printf("%x", h); + return timer.getElapseInSecond(); +} + +struct TestFuncWasSparse +{ + uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) { + return original_hash_sparse(seed, buf, len); + } +}; + +struct TestFuncIsSparse +{ + uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) { + return hash_sparse_sse42(seed, buf, len); + } +}; + +struct TestFuncWasDense +{ + uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) { + return original_hash_dense(seed, 42, buf, len); + } +}; + +struct TestFuncIsDense +{ + uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) { + return hash_dense_sse42(seed, 42, buf, len); + } +}; + +static void +benchmarkIndividual(uint64_t seed, char* buf) +{ + fprintf(stdout,"\n\nCompare performance of particular len (in second)\n"); + fprintf(stdout, "%-12s%-8s%-8s%s%-8s%-8s%s\n", "len", + "was (s)", "is (s)", "diff (s)", + "was (d)", "is (d)", "diff (d)"); + fprintf(stdout, "-------------------------------------------\n"); + + uint32_t lens[] = {3, 4, 7, 10, 15, 16, 20, 32, 36, 63, 80, 100, + 120, 127, 280, 290, 400}; + for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) { + uint32_t len = lens[i]; + double e1 = BenchmarkHashTmpl(TestFuncWasSparse(), seed, buf, len); + double e2 = BenchmarkHashTmpl(TestFuncIsSparse(), seed, buf, len); + double e3 = BenchmarkHashTmpl(TestFuncWasDense(), seed, buf, len); + double e4 = BenchmarkHashTmpl(TestFuncIsDense(), seed, buf, len); + fprintf(stdout, "len = %4d: %-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", + len, e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3); + } +} + +template double +BenchmarkChangeLenTmpl(T func, uint64_t seed, char* buf, uint32_t* len_vect, + uint32_t len_num) +{ + TestClock timer; + uint32_t h = 0; + + timer.start(); + for(int i = 1; i < 1000000 * 100; i++) { + for (int j = 0; j < (int)len_num; j++) { + // So the buf is not loop invariant, hence the F(...) + buf[(i + j) % 4096] = i; + h += func(seed, buf, len_vect[j]) ^ j; + } + } + timer.stop(); + + // make h alive + test_printf("%x", h); + return timer.getElapseInSecond(); +} + +// It is to measure the performance when length is changing. +// The purpose is to see how balanced branches impact the performance. +// +static void +benchmarkToggleLens(uint64_t seed, char* buf) +{ + double e1, e2, e3, e4; + fprintf(stdout,"\nChanging length (in second):"); + fprintf(stdout, "\n%-24s%-8s%-8s%s%-8s%-8s%s\n%s\n", "len", + "was (s)", "is (s)", "diff (s)", + "was (d)", "is (d)", "diff (d)", + separator); + + uint32_t lens1[] = {4, 9}; + e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens1, 2); + e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens1, 2); + e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens1, 2); + e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens1, 2); + fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "4,9", + e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3); + + uint32_t lens2[] = {1, 4, 9}; + e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens2, 3); + e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens2, 3); + e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens2, 3); + e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens2, 3); + fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "1,4,9", + e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3); + + uint32_t lens3[] = {1, 33, 4, 9}; + e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens3, 4); + e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens3, 4); + e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens3, 4); + e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens3, 4); + fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", + "1,33,4,9", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3); + + uint32_t lens4[] = {16, 33, 64, 89}; + e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens4, 4); + e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens4, 4); + e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens4, 4); + e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens4, 4); + fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", + "16,33,64,89", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3); +} + +static void +genRandomString(uint32_t min, uint32_t max, + uint32_t num, vector& result) +{ + double scale = (max - min) / (RAND_MAX + 1.0); + result.clear(); + result.reserve(num); + for (uint32_t i = 0; i < num; i++) { + uint32_t len = (rand() * scale) + min; + + char* buf = new char[len]; + for (uint32_t l = 0; l < len; l++) { + buf[l] = rand() % 255; + } + result.push_back(string(buf, len)); + delete[] buf; + } +} + +// Return the standard deviation of given array of number +static double +standarDeviation(const vector& v) +{ + uint64_t total = 0; + for (vector::const_iterator i = v.begin(), e = v.end(); + i != e; ++i) { + total += *i; + } + + double avg = total / (double)v.size(); + double sd = 0; + + for (vector::const_iterator i = v.begin(), e = v.end(); + i != e; ++i) { + double t = avg - *i; + sd = sd + t*t; + } + + return sqrt(sd/v.size()); +} + +static vector +benchmarkConflictHelper(uint64_t seed, uint32_t bucketNum, + const vector& strs) +{ + if (bucketNum & (bucketNum - 1)) { + bucketNum = (1L << (log2_floor(bucketNum) + 1)); + } + uint32_t mask = bucketNum - 1; + + vector conflictWasSparse(bucketNum); + vector conflictIsSparse(bucketNum); + vector conflictWasDense(bucketNum); + vector conflictIsDense(bucketNum); + + conflictWasSparse.resize(bucketNum); + conflictIsSparse.resize(bucketNum); + conflictWasDense.resize(bucketNum); + conflictIsDense.resize(bucketNum); + + for (vector::const_iterator i = strs.begin(), e = strs.end(); + i != e; ++i) { + uint32_t h1 = original_hash_sparse(seed, i->c_str(), i->size()); + uint32_t h2 = hash_sparse_sse42(seed, i->c_str(), i->size()); + uint32_t h3 = original_hash_dense(seed, h1, i->c_str(), i->size()); + uint32_t h4 = hash_dense_sse42(seed, h2, i->c_str(), i->size()); + + conflictWasSparse[h1 & mask]++; + conflictIsSparse[h2 & mask]++; + conflictWasDense[h3 & mask]++; + conflictIsDense[h4 & mask]++; + } + +#if 0 + std::sort(conflictWas.begin(), conflictWas.end(), std::greater()); + std::sort(conflictIs.begin(), conflictIs.end(), std::greater()); + + fprintf(stderr, "%d %d %d %d vs %d %d %d %d\n", + conflictWas[0], conflictWas[1], conflictWas[2], conflictWas[3], + conflictIs[0], conflictIs[1], conflictIs[2], conflictIs[3]); +#endif + vector ret(4); + ret[0] = standarDeviation(conflictWasSparse); + ret[1] = standarDeviation(conflictIsSparse); + ret[2] = standarDeviation(conflictWasDense); + ret[3] = standarDeviation(conflictIsDense); + + return ret; +} + +static void +benchmarkConflict(uint64_t seed) +{ + float loadFactor[] = { 0.5f, 1.0f, 2.0f, 4.0f, 8.0f }; + int bucketNum[] = { 512, 1024, 2048, 4096, 8192, 16384}; + int lenRange[][2] = { {1,3}, {4, 15}, {16, 127}, {128, 1024}, {1, 1024}}; + + fprintf(stdout, + "\nBechmarking conflict (stand deviation of conflict)\n%s\n", + separator); + + for (uint32_t k = 0; k < sizeof(lenRange)/sizeof(lenRange[0]); k++) { + fprintf(stdout, "\nlen range from %d - %d\n", lenRange[k][0], + lenRange[k][1]); + fprintf(stdout, "%-10s %-12s %-10s %-10s diff (s) %-10s %-10s diff (d)\n%s\n", + "bucket", "load-factor", "was (s)", "is (s)", "was (d)", "is (d)", + separator); + for (uint32_t i = 0; i < sizeof(bucketNum)/sizeof(bucketNum[0]); ++i) { + for (uint32_t j = 0; + j < sizeof(loadFactor)/sizeof(loadFactor[0]); + ++j) { + int strNum = bucketNum[i] * loadFactor[j]; + vector strs(strNum); + genRandomString(lenRange[k][0], lenRange[k][1], strNum, strs); + + vector p; + p = benchmarkConflictHelper(seed, bucketNum[i], strs); + fprintf(stdout, "%-10d %-12.2f %-10.2f %-10.2f %-10.2f %-10.2f %-10.2f %.2f\n", + bucketNum[i], loadFactor[j], + p[0], p[1], p[0] - p[1], + p[2], p[3], p[2] - p[3]); + } + } + } +} + +static void +benchmarkHashFunc() +{ + srand(time(0)); + + uint64_t seed = (uint32_t) rand(); + char buf[4096]; + char c = getpid() % 'a'; + for (int i = 0; i < (int)sizeof(buf); i++) { + buf[i] = (c + i) % 255; + } + + benchmarkConflict(seed); + benchmarkIndividual(seed, buf); + benchmarkToggleLens(seed, buf); +} + +int +main(int argc, char** argv) +{ + fprintf(stdout, "========================\nMicro benchmark...\n"); + benchmarkHashFunc(); + return 0; +} diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp new file mode 100644 index 00000000..432c7bbb --- /dev/null +++ b/src/x64/test/test.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#define LUAJIT_SECURITY_STRHASH 1 +#include "test_util.hpp" +#include "../../lj_str.h" +str_sparse_hashfn hash_sparse; +str_dense_hashfn hash_dense; +#include "../../lj_str_hash.c" + +using namespace std; + + +static bool +smoke_test() +{ + fprintf(stdout, "running smoke tests...\n"); + char buf[1024]; + char c = getpid() % 'a'; + srand(time(0)); + + for (int i = 0; i < (int)sizeof(buf); i++) { + buf[i] = (c + i) % 255; + } + + uint32_t lens[] = {3, 4, 5, 7, 8, 16, 17, 24, 25, 32, 33, 127, 128, + 255, 256, 257}; + for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) { + string s(buf, lens[i]); + uint32_t h = hash_sparse_sse42(rand(), s.c_str(), lens[i]); + test_printf("%d", h); + test_printf("%d", hash_dense_sse42(rand(), h, s.c_str(), lens[i])); + } + + return true; +} + +static bool +verify_log2() +{ + fprintf(stdout, "verify log2...\n"); + bool err = false; + std::map lm; + lm[0] =(uint32_t)-1; + lm[1] = 0; + lm[2] = 1; + for (int i = 2; i < 31; i++) { + lm[(1<::iterator iter = lm.begin(), iter_e = lm.end(); + iter != iter_e; ++iter) { + uint32_t v = (*iter).first; + uint32_t log2_expect = (*iter).second; + uint32_t log2_get = log2_floor(v); + if (log2_expect != log2_get) { + err = true; + fprintf(stderr, "log2(%u) expect %u, get %u\n", v, log2_expect, log2_get); + exit(1); + } + } + return !err; +} + +int +main(int argc, char** argv) +{ + fprintf(stdout, "=======================\nRun unit testing...\n"); + + ASSERT(smoke_test(), "smoke_test test failed"); + ASSERT(verify_log2(), "log2 failed"); + + fprintf(stdout, TestErrMsgMgr::noError() ? "succ\n\n" : "fail\n\n"); + + return TestErrMsgMgr::noError() ? 0 : -1; +} diff --git a/src/x64/test/test_str_comp.lua b/src/x64/test/test_str_comp.lua new file mode 100644 index 00000000..3a5c3e67 --- /dev/null +++ b/src/x64/test/test_str_comp.lua @@ -0,0 +1,67 @@ +--[[ + Given two content-idental string s1, s2, test if they end up to be the + same string object. The purpose of this test is to make sure hash function + do not accidently include extraneous bytes before and after the string in + question. +]] + +local ffi = require("ffi") +local C = ffi.C + +ffi.cdef[[ + void free(void*); + char* malloc(size_t); + void *memset(void*, int, size_t); + void *memcpy(void*, void*, size_t); + long time(void*); + void srandom(unsigned); + long random(void); +]] + + +local function test_equal(len_min, len_max) + -- source string is wrapped by 16-byte-junk both before and after the + -- string + local x = C.random() + local l = len_min + x % (len_max - len_min); + local buf_len = tonumber(l + 16 * 2) + + local src_buf = C.malloc(buf_len) + for i = 0, buf_len - 1 do + src_buf[i] = C.random() % 255 + end + + -- dest string is the clone of the source string, but it is sandwiched + -- by different junk bytes + local dest_buf = C.malloc(buf_len) + C.memset(dest_buf, 0x5a, buf_len) + + local ofst = 8 + (C.random() % 8) + C.memcpy(dest_buf + ofst, src_buf + 16, l); + + local str1 = ffi.string(src_buf + 16, l) + local str2 = ffi.string(dest_buf + ofst, l) + + C.free(src_buf) + C.free(dest_buf) + + if str1 ~= str2 then + -- Oops, look like hash function mistakenly include extraneous bytes + -- close to the string + return 1 -- wtf + end +end + +--local lens = {1, 4, 16, 128, 1024} +local lens = {128, 1024} +local iter = 1000 + +for i = 1, #lens - 1 do + for j = 1, iter do + if test_equal(lens[i], lens[i+1]) ~= nil then + os.exit(1) + end + end +end + +os.exit(0) diff --git a/src/x64/test/test_util.cxx b/src/x64/test/test_util.cxx new file mode 100644 index 00000000..34b7d675 --- /dev/null +++ b/src/x64/test/test_util.cxx @@ -0,0 +1,21 @@ +#include +#include +#include "test_util.hpp" + +using namespace std; + +std::vector TestErrMsgMgr::_errMsg; + +void +test_printf(const char* format, ...) +{ + va_list args; + va_start (args, format); + + FILE* devNull = fopen("/dev/null", "w"); + if (devNull != 0) { + (void)vfprintf (devNull, format, args); + } + fclose(devNull); + va_end (args); +} diff --git a/src/x64/test/test_util.d b/src/x64/test/test_util.d new file mode 100644 index 00000000..e539432e --- /dev/null +++ b/src/x64/test/test_util.d @@ -0,0 +1,107 @@ +test_util.o: test_util.cxx /usr/include/stdc-predef.h \ + /usr/lib/gcc/x86_64-redhat-linux/10/include/stdarg.h \ + /usr/include/stdio.h /usr/include/bits/libc-header-start.h \ + /usr/include/features.h /usr/include/sys/cdefs.h \ + /usr/include/bits/wordsize.h /usr/include/bits/long-double.h \ + /usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \ + /usr/lib/gcc/x86_64-redhat-linux/10/include/stddef.h \ + /usr/include/bits/types.h /usr/include/bits/timesize.h \ + /usr/include/bits/typesizes.h /usr/include/bits/time64.h \ + /usr/include/bits/types/__fpos_t.h /usr/include/bits/types/__mbstate_t.h \ + /usr/include/bits/types/__fpos64_t.h /usr/include/bits/types/__FILE.h \ + /usr/include/bits/types/FILE.h /usr/include/bits/types/struct_FILE.h \ + /usr/include/bits/types/cookie_io_functions_t.h \ + /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ + /usr/include/bits/stdio.h test_util.hpp /usr/include/sys/time.h \ + /usr/include/bits/types/time_t.h \ + /usr/include/bits/types/struct_timeval.h /usr/include/sys/select.h \ + /usr/include/bits/select.h /usr/include/bits/types/sigset_t.h \ + /usr/include/bits/types/__sigset_t.h \ + /usr/include/bits/types/struct_timespec.h /usr/include/bits/endian.h \ + /usr/include/bits/endianness.h /usr/include/c++/10/string \ + /usr/include/c++/10/x86_64-redhat-linux/bits/c++config.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/os_defines.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/cpu_defines.h \ + /usr/include/c++/10/bits/stringfwd.h \ + /usr/include/c++/10/bits/memoryfwd.h \ + /usr/include/c++/10/bits/char_traits.h \ + /usr/include/c++/10/bits/stl_algobase.h \ + /usr/include/c++/10/bits/functexcept.h \ + /usr/include/c++/10/bits/exception_defines.h \ + /usr/include/c++/10/bits/cpp_type_traits.h \ + /usr/include/c++/10/ext/type_traits.h \ + /usr/include/c++/10/ext/numeric_traits.h \ + /usr/include/c++/10/bits/stl_pair.h /usr/include/c++/10/bits/move.h \ + /usr/include/c++/10/type_traits \ + /usr/include/c++/10/bits/stl_iterator_base_types.h \ + /usr/include/c++/10/bits/stl_iterator_base_funcs.h \ + /usr/include/c++/10/bits/concept_check.h \ + /usr/include/c++/10/debug/assertions.h \ + /usr/include/c++/10/bits/stl_iterator.h \ + /usr/include/c++/10/bits/ptr_traits.h /usr/include/c++/10/debug/debug.h \ + /usr/include/c++/10/bits/predefined_ops.h \ + /usr/include/c++/10/bits/postypes.h /usr/include/c++/10/cwchar \ + /usr/include/wchar.h /usr/include/bits/floatn.h \ + /usr/include/bits/floatn-common.h /usr/include/bits/wchar.h \ + /usr/include/bits/types/wint_t.h /usr/include/bits/types/mbstate_t.h \ + /usr/include/bits/types/locale_t.h /usr/include/bits/types/__locale_t.h \ + /usr/include/c++/10/cstdint \ + /usr/lib/gcc/x86_64-redhat-linux/10/include/stdint.h \ + /usr/include/stdint.h /usr/include/bits/stdint-intn.h \ + /usr/include/bits/stdint-uintn.h /usr/include/c++/10/bits/allocator.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/c++allocator.h \ + /usr/include/c++/10/ext/new_allocator.h /usr/include/c++/10/new \ + /usr/include/c++/10/exception /usr/include/c++/10/bits/exception.h \ + /usr/include/c++/10/bits/exception_ptr.h \ + /usr/include/c++/10/bits/cxxabi_init_exception.h \ + /usr/include/c++/10/typeinfo /usr/include/c++/10/bits/hash_bytes.h \ + /usr/include/c++/10/bits/nested_exception.h \ + /usr/include/c++/10/bits/localefwd.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/c++locale.h \ + /usr/include/c++/10/clocale /usr/include/locale.h \ + /usr/include/bits/locale.h /usr/include/c++/10/iosfwd \ + /usr/include/c++/10/cctype /usr/include/ctype.h \ + /usr/include/c++/10/bits/ostream_insert.h \ + /usr/include/c++/10/bits/cxxabi_forced.h \ + /usr/include/c++/10/bits/stl_function.h \ + /usr/include/c++/10/backward/binders.h \ + /usr/include/c++/10/bits/range_access.h \ + /usr/include/c++/10/initializer_list \ + /usr/include/c++/10/bits/iterator_concepts.h \ + /usr/include/c++/10/concepts /usr/include/c++/10/bits/range_cmp.h \ + /usr/include/c++/10/bits/int_limits.h \ + /usr/include/c++/10/bits/basic_string.h \ + /usr/include/c++/10/ext/atomicity.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/gthr.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/gthr-default.h \ + /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \ + /usr/include/bits/types/struct_sched_param.h /usr/include/bits/cpu-set.h \ + /usr/include/time.h /usr/include/bits/time.h /usr/include/bits/timex.h \ + /usr/include/bits/types/clock_t.h /usr/include/bits/types/struct_tm.h \ + /usr/include/bits/types/clockid_t.h /usr/include/bits/types/timer_t.h \ + /usr/include/bits/types/struct_itimerspec.h \ + /usr/include/bits/pthreadtypes.h /usr/include/bits/thread-shared-types.h \ + /usr/include/bits/pthreadtypes-arch.h /usr/include/bits/struct_mutex.h \ + /usr/include/bits/struct_rwlock.h /usr/include/bits/setjmp.h \ + /usr/include/c++/10/x86_64-redhat-linux/bits/atomic_word.h \ + /usr/include/c++/10/ext/alloc_traits.h \ + /usr/include/c++/10/bits/alloc_traits.h \ + /usr/include/c++/10/bits/stl_construct.h \ + /usr/include/c++/10/ext/string_conversions.h /usr/include/c++/10/cstdlib \ + /usr/include/stdlib.h /usr/include/bits/waitflags.h \ + /usr/include/bits/waitstatus.h /usr/include/sys/types.h \ + /usr/include/endian.h /usr/include/bits/byteswap.h \ + /usr/include/bits/uintn-identity.h /usr/include/alloca.h \ + /usr/include/bits/stdlib-bsearch.h /usr/include/bits/stdlib-float.h \ + /usr/include/c++/10/bits/std_abs.h /usr/include/c++/10/cstdio \ + /usr/include/c++/10/cerrno /usr/include/errno.h \ + /usr/include/bits/errno.h /usr/include/linux/errno.h \ + /usr/include/asm/errno.h /usr/include/asm-generic/errno.h \ + /usr/include/asm-generic/errno-base.h /usr/include/bits/types/error_t.h \ + /usr/include/c++/10/bits/charconv.h \ + /usr/include/c++/10/bits/functional_hash.h \ + /usr/include/c++/10/bits/basic_string.tcc /usr/include/c++/10/vector \ + /usr/include/c++/10/bits/stl_uninitialized.h \ + /usr/include/c++/10/bits/stl_vector.h \ + /usr/include/c++/10/bits/stl_bvector.h \ + /usr/include/c++/10/bits/vector.tcc diff --git a/src/x64/test/test_util.hpp b/src/x64/test/test_util.hpp new file mode 100644 index 00000000..6cc2ea2c --- /dev/null +++ b/src/x64/test/test_util.hpp @@ -0,0 +1,57 @@ +#ifndef _TEST_UTIL_HPP_ +#define _TEST_UTIL_HPP_ + +#include // gettimeofday() +#include +#include + +struct TestErrMsg +{ + const char* fileName; + unsigned lineNo; + std::string errMsg; + + TestErrMsg(const char* FN, unsigned LN, const char* Err): + fileName(FN), lineNo(LN), errMsg(Err) {} +}; + +class TestErrMsgMgr +{ +public: + static std::vector getError(); + static void + addError(const char* fileName, unsigned lineNo, const char* Err) { + _errMsg.push_back(TestErrMsg(fileName, lineNo, Err)); + } + + static bool noError() { + return _errMsg.empty(); + } + +private: + static std::vector _errMsg; +}; + +#define ASSERT(c, e) \ + if (!(c)) { TestErrMsgMgr::addError(__FILE__, __LINE__, (e)); } + +class TestClock +{ +public: + void start() { gettimeofday(&_start, 0); } + void stop() { gettimeofday(&_end, 0); } + double getElapseInSecond() { + return (_end.tv_sec - _start.tv_sec) + + ((long)_end.tv_usec - (long)_start.tv_usec) / 1000000.0; + } + +private: + struct timeval _start, _end; +}; + +// write to /dev/null, the only purpose is to make the data fed to the +// function alive. +extern void test_printf(const char* format, ...) + __attribute__ ((format (printf, 1, 2))); + +#endif //_TEST_UTIL_HPP_