Adding s390x support for LuaJIT

This commit is contained in:
Aditya Bisht 2022-08-29 22:43:19 +05:30
parent 0065cff7e0
commit 5047ff0de0
No known key found for this signature in database
GPG Key ID: 0A812A321D95394B
73 changed files with 9603 additions and 970 deletions

View File

@ -54,9 +54,9 @@ CCOPT_arm64=
CCOPT_ppc=
CCOPT_mips=
#
CCDEBUG=
#CCDEBUG=
# Uncomment the next line to generate debug information:
#CCDEBUG= -g
CCDEBUG= -g
#
CCWARN= -Wall
# Uncomment the next line to enable more warnings:
@ -244,6 +244,9 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
TARGET_LJARCH= s390x
else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
@ -275,6 +278,7 @@ endif
endif
endif
endif
endif
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
@ -461,7 +465,16 @@ ifeq (ppc,$(TARGET_LJARCH))
DASM_AFLAGS+= -D GPR64
endif
ifeq (PS3,$(TARGET_SYS))
DASM_AFLAGS+= -D PPE -D TOC
DASM_AFLAGS+= -D PPE
endif
ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D OPD
endif
ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D OPDENV
endif
ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D ELFV2
endif
endif
endif
@ -501,10 +514,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
lj_lib.o lj_alloc.o lib_aux.o \
$(LJLIB_O) lib_init.o
$(LJLIB_O) lib_init.o lj_str_hash.o
ifeq (x64,$(TARGET_LJARCH))
lj_str_hash-CFLAGS = -msse4.2
endif
F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))
LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o
LIB_VMDEF= jit/vmdef.lua
LIB_VMDEFP= $(LIB_VMDEF)
@ -526,7 +545,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
##############################################################################
# Mixed mode defaults.
TARGET_O= $(LUAJIT_A)
TARGET_O= lj_init.o $(LUAJIT_A)
TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
@ -608,7 +627,7 @@ E= @echo
default all: $(TARGET_T)
amalg:
$(MAKE) all "LJCORE_O=ljamalg.o"
$(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"
clean:
$(HOST_RM) $(ALL_RM)
@ -685,8 +704,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
%.o: %.c
$(E) "CC $@"
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<
%.o: %.S
$(E) "ASM $@"

View File

@ -18,10 +18,8 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_bc.h"
#if LJ_HASJIT
#include "lj_ir.h"
#include "lj_ircall.h"
#endif
#include "lj_frame.h"
#include "lj_dispatch.h"
#if LJ_HASFFI
@ -67,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
#elif LJ_TARGET_S390X
#include "../dynasm/dasm_s390x.h"
#else
#error "No support for this architecture (yet)"
#endif
@ -252,7 +252,6 @@ BCDEF(BCNAME)
NULL
};
#if LJ_HASJIT
const char *const ir_names[] = {
#define IRNAME(name, m, m1, m2) #name,
IRDEF(IRNAME)
@ -293,9 +292,7 @@ static const char *const trace_errors[] = {
#include "lj_traceerr.h"
NULL
};
#endif
#if LJ_HASJIT
static const char *lower(char *buf, const char *s)
{
char *p = buf;
@ -306,7 +303,6 @@ static const char *lower(char *buf, const char *s)
*p = '\0';
return buf;
}
#endif
/* Emit C source code for bytecode-related definitions. */
static void emit_bcdef(BuildCtx *ctx)
@ -324,9 +320,7 @@ static void emit_bcdef(BuildCtx *ctx)
/* Emit VM definitions as Lua code for debug modules. */
static void emit_vmdef(BuildCtx *ctx)
{
#if LJ_HASJIT
char buf[80];
#endif
int i;
fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
fprintf(ctx->fp, "return {\n\n");
@ -335,7 +329,6 @@ static void emit_vmdef(BuildCtx *ctx)
for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
fprintf(ctx->fp, "\",\n\n");
#if LJ_HASJIT
fprintf(ctx->fp, "irnames = \"");
for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
fprintf(ctx->fp, "\",\n\n");
@ -364,7 +357,6 @@ static void emit_vmdef(BuildCtx *ctx)
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
fprintf(ctx->fp, "},\n\n");
#endif
}
/* -- Argument parsing ---------------------------------------------------- */

View File

@ -87,6 +87,54 @@ err:
}
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
#elif LJ_TARGET_S390X
/* Emit halfwords piecewise as assembler text. */
static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
{
uint16_t *cp = (uint16_t*)p;
n /= 2;
int i;
for (i = 0; i < n; i++) {
if ((i & 7) == 0)
fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
else
fprintf(ctx->fp, ",0x%hx", cp[i]);
if ((i & 7) == 7) putc('\n', ctx->fp);
}
if ((n & 7) != 0) putc('\n', ctx->fp);
}
/* Emit s390x text relocations. */
static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
const char *sym)
{
if (n & 1 || n < 2) {
fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
exit(1);
}
n -= 2;
const char *opname = NULL;
const char *argt = ""; /* Inserted before argument. */
int opcode = *(uint16_t*)(&cp[n]);
int arg = (opcode>>4) & 0xf;
switch (opcode & 0xff0f) {
case 0xa705: opname = "bras"; argt = "%r"; break;
case 0xc005: opname = "brasl"; argt = "%r"; break;
case 0xa704: opname = "brc"; break;
case 0xc004: opname = "brcl"; break;
default:
fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
sym);
exit(1);
}
emit_asm_halfwords(ctx, cp, n);
if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
/* Various fixups for external symbols outside of our binary. */
fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
return;
}
fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
}
#else
/* Emit words piecewise as assembler text. */
static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
@ -140,7 +188,11 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
#else
#define TOCPREFIX ""
#endif
if ((ins >> 26) == 16) {
if ((ins >> 26) == 14) {
fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 15) {
fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 16) {
fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
(ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 18) {
@ -242,6 +294,9 @@ void emit_asm(BuildCtx *ctx)
int i, rel;
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
#if LJ_ARCH_PPC_ELFV2
fprintf(ctx->fp, "\t.abiversion 2\n");
#endif
fprintf(ctx->fp, "\t.text\n");
emit_asm_align(ctx, 4);
@ -299,6 +354,9 @@ void emit_asm(BuildCtx *ctx)
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
}
ofs += n+4;
#elif LJ_TARGET_S390X
emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n+4;
#else
emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n;
@ -307,6 +365,8 @@ void emit_asm(BuildCtx *ctx)
}
#if LJ_TARGET_X86ORX64
emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
#elif LJ_TARGET_S390X
emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
#else
emit_asm_words(ctx, ctx->code+ofs, next-ofs);
#endif

View File

@ -5,7 +5,6 @@
#include "buildvm.h"
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_ir.h"
/* Context for the folding hash table generator. */
@ -227,10 +226,4 @@ void emit_fold(BuildCtx *ctx)
makehash(ctx);
}
#else
void emit_fold(BuildCtx *ctx)
{
UNUSED(ctx);
}
#endif

View File

@ -4,67 +4,42 @@ static const int libbc_endian = 0;
static const uint8_t libbc_code[] = {
#if LJ_FR2
/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
220,203,178,130,4,
/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
198,190,199,252,3,
/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0,
BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,1,16,16,0,12,0,16,1,9,0,43,2,
0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15,
0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,
0,11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,
0,88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,
3,1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,
252,127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,
1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,
5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,
18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,
127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,
0,64,11,10,4,79,6,252,127,76,4,2,0,0
#else
/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
220,203,178,130,4,
/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
198,190,199,252,3,
/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0,
BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,1,16,16,0,12,0,16,1,9,0,43,2,
0,0,18,3,0,0,42,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,1,255,255,249,255,15,0,
1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,
11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,
88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,
1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,
127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,
14,0,16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,
5,24,128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,
18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,
127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,
0,64,11,10,4,79,6,252,127,76,4,2,0,0
#endif
0
};
static const struct { const char *name; int ofs; } libbc_map[] = {

View File

@ -79,11 +79,9 @@ local name2itype = {
str = 5, func = 9, tab = 12, int = 14, num = 15
}
local BC, BCN = {}, {}
local BC = {}
for i=0,#bcnames/6-1 do
local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "")
BC[name] = i
BCN[i] = name
BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
end
local xop, xra = isbe and 3 or 0, isbe and 2 or 1
local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
@ -98,7 +96,6 @@ local function fixup_dump(dump, fixup)
p = read_uleb128(p)
p = read_uleb128(p)
p, sizebc = read_uleb128(p)
local startbc = tonumber(p - start)
local rawtab = {}
for i=0,sizebc-1 do
local op = p[xop]
@ -135,7 +132,7 @@ local function fixup_dump(dump, fixup)
local ndump = ffi.string(start, n)
-- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX.
ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f")
return { dump = ndump, startbc = startbc, sizebc = sizebc }
return ndump
end
local function find_defs(src)
@ -155,46 +152,24 @@ local function gen_header(defs)
local function w(x) t[#t+1] = x end
w("/* This is a generated file. DO NOT EDIT! */\n\n")
w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
local s, sb = "", ""
for i,name in ipairs(defs) do
local d = defs[name]
s = s .. d.dump
sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
.. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
.. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
local s = ""
for _,name in ipairs(defs) do
s = s .. defs[name]
end
w("static const uint8_t libbc_code[] = {\n")
local n = 0
for i=1,#s do
local x = string.byte(s, i)
local xb = string.byte(sb, i)
if xb == 255 then
local name = BCN[x]
local m = #name + 4
if n + m > 78 then n = 0; w("\n") end
n = n + m
w("BC_"); w(name)
else
local m = x < 10 and 2 or (x < 100 and 3 or 4)
if xb == 0 then
if n + m > 78 then n = 0; w("\n") end
else
local name = defs[xb]:gsub("_", ".")
if n ~= 0 then w("\n") end
w("/* "); w(name); w(" */ ")
n = #name + 7
w(x); w(",")
n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
if n >= 75 then n = 0; w("\n") end
end
n = n + m
w(x)
end
w(",")
end
w("\n0\n};\n\n")
w("0\n};\n\n")
w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
local m = 0
for _,name in ipairs(defs) do
w('{"'); w(name); w('",'); w(m) w('},\n')
m = m + #defs[name].dump
m = m + #defs[name]
end
w("{NULL,"); w(m); w("}\n};\n\n")
return table.concat(t)

View File

@ -327,12 +327,6 @@ local function rename_tokens2(src)
return gsub(src, "ZY([%w_]+)", "union %1")
end
local function fix_bugs_and_warnings(src)
src = gsub(src, "(luaD_checkstack%(L,p%->maxstacksize)%)", "%1+p->numparams)")
src = gsub(src, "if%(sep==%-1%)(return'%[';)\nelse (luaX_lexerror%b();)", "if (sep!=-1)%2\n%1")
return gsub(src, "(default:{\nNode%*n=mainposition)", "/*fallthrough*/\n%1")
end
local function func_gather(src)
local nodes, list = {}, {}
local pos, len = 1, #src
@ -431,6 +425,5 @@ src = rename_tokens1(src)
src = func_collect(src)
src = rename_tokens2(src)
src = restore_strings(src)
src = fix_bugs_and_warnings(src)
src = merge_header(src, license)
io.write(src)

View File

@ -1639,7 +1639,6 @@ lua_number2int(k,n);
if(luai_numeq(cast_num(k),nvalue(key)))
return luaH_getnum(t,k);
}
/*fallthrough*/
default:{
Node*n=mainposition(t,key);
do{
@ -2906,8 +2905,8 @@ if(sep>=0){
read_long_string(ls,seminfo,sep);
return TK_STRING;
}
else if (sep!=-1)luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
return'[';
else if(sep==-1)return'[';
else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
}
case'=':{
next(ls);

View File

@ -63,15 +63,21 @@ local function ctlsub(c)
end
-- Return one bytecode line.
local function bcline(func, pc, prefix)
local ins, m = funcbc(func, pc)
local function bcline(func, pc, prefix, lineinfo)
local ins, m, l = funcbc(func, pc, lineinfo and 1 or 0)
if not ins then return end
local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
local a = band(shr(ins, 8), 0xff)
local oidx = 6*band(ins, 0xff)
local op = sub(bcnames, oidx+1, oidx+6)
local s = format("%04d %s %-6s %3s ",
local s
if lineinfo then
s = format("%04d %7s %s %-6s %3s ",
pc, "["..l.."]", prefix or " ", op, ma == 0 and "" or a)
else
s = format("%04d %s %-6s %3s ",
pc, prefix or " ", op, ma == 0 and "" or a)
end
local d = shr(ins, 16)
if mc == 13*128 then -- BCMjump
return format("%s=> %04d\n", s, pc+d-0x7fff)
@ -124,20 +130,52 @@ local function bctargets(func)
end
-- Dump bytecode instructions of a function.
local function bcdump(func, out, all)
local function bcdump(func, out, all, lineinfo)
if not out then out = stdout end
local fi = funcinfo(func)
if all and fi.children then
for n=-1,-1000000000,-1 do
local k = funck(func, n)
if not k then break end
if type(k) == "proto" then bcdump(k, out, true) end
if type(k) == "proto" then bcdump(k, out, true, lineinfo) end
end
end
out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
for n=-1,-1000000000,-1 do
local kc = funck(func, n)
if not kc then break end
local typ = type(kc)
if typ == "string" then
kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
out:write(format("KGC %d %s\n", -(n + 1), kc))
elseif typ == "proto" then
local fi = funcinfo(kc)
if fi.ffid then
kc = vmdef.ffnames[fi.ffid]
else
kc = fi.loc
end
out:write(format("KGC %d %s\n", -(n + 1), kc))
elseif typ == "table" then
out:write(format("KGC %d table\n", -(n + 1)))
else
-- error("unknown KGC type: " .. typ)
end
end
for n=1,1000000000 do
local kc = funck(func, n)
if not kc then break end
if type(kc) == "number" then
out:write(format("KN %d %s\n", n, kc))
end
end
local target = bctargets(func)
for pc=1,1000000000 do
local s = bcline(func, pc, target[pc] and "=>")
local s = bcline(func, pc, target[pc] and "=>", lineinfo)
if not s then break end
out:write(s)
end

View File

@ -27,6 +27,7 @@ local function usage()
io.stderr:write[[
Save LuaJIT bytecode: luajit -b[options] input output
-l Only list bytecode.
-L Only list bytecode with lineinfo.
-s Strip debug info (default).
-g Keep debug info.
-n name Set module name (default: auto-detect from input name).
@ -592,9 +593,9 @@ end
------------------------------------------------------------------------------
local function bclist(input, output)
local function bclist(input, output, lineinfo)
local f = readfile(input)
require("jit.bc").dump(f, savefile(output, "w"), true)
require("jit.bc").dump(f, savefile(output, "w"), true, lineinfo)
end
local function bcsave(ctx, input, output)
@ -621,6 +622,7 @@ local function docmd(...)
local arg = {...}
local n = 1
local list = false
local lineinfo = false
local ctx = {
strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false,
@ -634,6 +636,9 @@ local function docmd(...)
local opt = a:sub(m, m)
if opt == "l" then
list = true
elseif opt == "L" then
list = true
lineinfo = true
elseif opt == "s" then
ctx.strip = true
elseif opt == "g" then
@ -662,7 +667,7 @@ local function docmd(...)
end
if list then
if #arg == 0 or #arg > 2 then usage() end
bclist(arg[1], arg[2] or "-")
bclist(arg[1], arg[2] or "-", lineinfo)
else
if #arg ~= 2 then usage() end
bcsave(ctx, arg[1], arg[2])

1594
src/jit/dis_s390x.lua Normal file

File diff suppressed because it is too large Load Diff

View File

@ -144,10 +144,11 @@ local function dump_mcode(tr)
if not mcode then return end
if not disass then disass = require("jit.dis_"..jit.arch) end
if addr < 0 then addr = addr + 2^32 end
out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
out:write("---- TRACE REallly ", tr, " mcode ", #mcode, "\n")
local ctx = disass.create(mcode, addr, dumpwrite)
ctx.hexdump = 0
ctx.symtab = fillsymtab(tr, info.nexit)
print(info.nexit)
if loop ~= 0 then
symtab[addr+loop] = "LOOP"
ctx:disass(0, loop)
@ -387,13 +388,13 @@ end
-- Dump snapshots (not interleaved with IR).
local function dump_snap(tr)
out:write("---- TRACE ", tr, " snapshots\n")
for i=0,1000000000 do
local snap = tracesnap(tr, i)
if not snap then break end
out:write(format("#%-3d %04d [ ", i, snap[0]))
printsnap(tr, snap)
end
-- out:write("---- TRACE ", tr, " snapshots\n")
-- for i=0,1000000000 do
-- local snap = tracesnap(tr, i)
-- if not snap then break end
-- out:write(format("#%-3d %04d [ ", i, snap[0]))
-- printsnap(tr, snap)
-- end
end
-- Return a register name or stack slot for a rid/sp location.
@ -410,20 +411,20 @@ end
-- Dump CALL* function ref and return optional ctype.
local function dumpcallfunc(tr, ins)
local ctype
if ins > 0 then
local m, ot, op1, op2 = traceir(tr, ins)
if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
ins = op1
ctype = formatk(tr, op2)
end
end
if ins < 0 then
out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
else
out:write(format("%04d (", ins))
end
return ctype
-- local ctype
-- if ins > 0 then
-- local m, ot, op1, op2 = traceir(tr, ins)
-- if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
-- ins = op1
-- ctype = formatk(tr, op2)
-- end
-- end
-- if ins < 0 then
-- out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
-- else
-- out:write(format("%04d (", ins))
-- end
-- return ctype
end
-- Recursively gather CALL* args and dump them.
@ -449,99 +450,99 @@ end
-- Dump IR and interleaved snapshots.
local function dump_ir(tr, dumpsnap, dumpreg)
local info = traceinfo(tr)
if not info then return end
local nins = info.nins
out:write("---- TRACE ", tr, " IR\n")
local irnames = vmdef.irnames
local snapref = 65536
local snap, snapno
if dumpsnap then
snap = tracesnap(tr, 0)
snapref = snap[0]
snapno = 0
end
for ins=1,nins do
if ins >= snapref then
if dumpreg then
out:write(format(".... SNAP #%-3d [ ", snapno))
else
out:write(format(".... SNAP #%-3d [ ", snapno))
end
printsnap(tr, snap)
snapno = snapno + 1
snap = tracesnap(tr, snapno)
snapref = snap and snap[0] or 65536
end
local m, ot, op1, op2, ridsp = traceir(tr, ins)
local oidx, t = 6*shr(ot, 8), band(ot, 31)
local op = sub(irnames, oidx+1, oidx+6)
if op == "LOOP " then
if dumpreg then
out:write(format("%04d ------------ LOOP ------------\n", ins))
else
out:write(format("%04d ------ LOOP ------------\n", ins))
end
elseif op ~= "NOP " and op ~= "CARG " and
(dumpreg or op ~= "RENAME") then
local rid = band(ridsp, 255)
if dumpreg then
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
else
out:write(format("%04d ", ins))
end
out:write(format("%s%s %s %s ",
(rid == 254 or rid == 253) and "}" or
(band(ot, 128) == 0 and " " or ">"),
band(ot, 64) == 0 and " " or "+",
irtype[t], op))
local m1, m2 = band(m, 3), band(m, 3*4)
if sub(op, 1, 4) == "CALL" then
local ctype
if m2 == 1*4 then -- op2 == IRMlit
out:write(format("%-10s (", vmdef.ircall[op2]))
else
ctype = dumpcallfunc(tr, op2)
end
if op1 ~= -1 then dumpcallargs(tr, op1) end
out:write(")")
if ctype then out:write(" ctype ", ctype) end
elseif op == "CNEW " and op2 == -1 then
out:write(formatk(tr, op1))
elseif m1 ~= 3 then -- op1 != IRMnone
if op1 < 0 then
out:write(formatk(tr, op1))
else
out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
end
if m2 ~= 3*4 then -- op2 != IRMnone
if m2 == 1*4 then -- op2 == IRMlit
local litn = litname[op]
if litn and litn[op2] then
out:write(" ", litn[op2])
elseif op == "UREFO " or op == "UREFC " then
out:write(format(" #%-3d", shr(op2, 8)))
else
out:write(format(" #%-3d", op2))
end
elseif op2 < 0 then
out:write(" ", formatk(tr, op2))
else
out:write(format(" %04d", op2))
end
end
end
out:write("\n")
end
end
if snap then
if dumpreg then
out:write(format(".... SNAP #%-3d [ ", snapno))
else
out:write(format(".... SNAP #%-3d [ ", snapno))
end
printsnap(tr, snap)
end
-- local info = traceinfo(tr)
-- if not info then return end
-- local nins = info.nins
-- out:write("---- TRACE ", tr, " IR\n")
-- local irnames = vmdef.irnames
-- local snapref = 65536
-- local snap, snapno
-- if dumpsnap then
-- snap = tracesnap(tr, 0)
-- snapref = snap[0]
-- snapno = 0
-- end
-- for ins=1,nins do
-- if ins >= snapref then
-- if dumpreg then
-- out:write(format(".... SNAP #%-3d [ ", snapno))
-- else
-- out:write(format(".... SNAP #%-3d [ ", snapno))
-- end
-- printsnap(tr, snap)
-- snapno = snapno + 1
-- snap = tracesnap(tr, snapno)
-- snapref = snap and snap[0] or 65536
-- end
-- local m, ot, op1, op2, ridsp = traceir(tr, ins)
-- local oidx, t = 6*shr(ot, 8), band(ot, 31)
-- local op = sub(irnames, oidx+1, oidx+6)
-- if op == "LOOP " then
-- if dumpreg then
-- out:write(format("%04d ------------ LOOP ------------\n", ins))
-- else
-- out:write(format("%04d ------ LOOP ------------\n", ins))
-- end
-- elseif op ~= "NOP " and op ~= "CARG " and
-- (dumpreg or op ~= "RENAME") then
-- local rid = band(ridsp, 255)
-- if dumpreg then
-- out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
-- else
-- out:write(format("%04d ", ins))
-- end
-- out:write(format("%s%s %s %s ",
-- (rid == 254 or rid == 253) and "}" or
-- (band(ot, 128) == 0 and " " or ">"),
-- band(ot, 64) == 0 and " " or "+",
-- irtype[t], op))
-- local m1, m2 = band(m, 3), band(m, 3*4)
-- if sub(op, 1, 4) == "CALL" then
-- local ctype
-- if m2 == 1*4 then -- op2 == IRMlit
-- out:write(format("%-10s (", vmdef.ircall[op2]))
-- else
-- ctype = dumpcallfunc(tr, op2)
-- end
-- if op1 ~= -1 then dumpcallargs(tr, op1) end
-- out:write(")")
-- if ctype then out:write(" ctype ", ctype) end
-- elseif op == "CNEW " and op2 == -1 then
-- out:write(formatk(tr, op1))
-- elseif m1 ~= 3 then -- op1 != IRMnone
-- if op1 < 0 then
-- out:write(formatk(tr, op1))
-- else
-- out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
-- end
-- if m2 ~= 3*4 then -- op2 != IRMnone
-- if m2 == 1*4 then -- op2 == IRMlit
-- local litn = litname[op]
-- if litn and litn[op2] then
-- out:write(" ", litn[op2])
-- elseif op == "UREFO " or op == "UREFC " then
-- out:write(format(" #%-3d", shr(op2, 8)))
-- else
-- out:write(format(" #%-3d", op2))
-- end
-- elseif op2 < 0 then
-- out:write(" ", formatk(tr, op2))
-- else
-- out:write(format(" %04d", op2))
-- end
-- end
-- end
-- out:write("\n")
-- end
-- end
-- if snap then
-- if dumpreg then
-- out:write(format(".... SNAP #%-3d [ ", snapno))
-- else
-- out:write(format(".... SNAP #%-3d [ ", snapno))
-- end
-- printsnap(tr, snap)
-- end
end
------------------------------------------------------------------------------
@ -560,37 +561,37 @@ end
-- Dump trace states.
local function dump_trace(what, tr, func, pc, otr, oex)
if what == "stop" or (what == "abort" and dumpmode.a) then
if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
elseif dumpmode.s then dump_snap(tr) end
if dumpmode.m then dump_mcode(tr) end
end
if what == "start" then
if dumpmode.H then out:write('<pre class="ljdump">\n') end
out:write("---- TRACE ", tr, " ", what)
if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
out:write(" ", fmtfunc(func, pc), "\n")
elseif what == "stop" or what == "abort" then
out:write("---- TRACE ", tr, " ", what)
if what == "abort" then
out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
else
local info = traceinfo(tr)
local link, ltype = info.link, info.linktype
if link == tr or link == 0 then
out:write(" -> ", ltype, "\n")
elseif ltype == "root" then
out:write(" -> ", link, "\n")
else
out:write(" -> ", link, " ", ltype, "\n")
end
end
if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
else
if what == "flush" then symtab, nexitsym = {}, 0 end
out:write("---- TRACE ", what, "\n\n")
end
out:flush()
-- if what == "stop" or (what == "abort" and dumpmode.a) then
-- if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
-- elseif dumpmode.s then dump_snap(tr) end
-- if dumpmode.m then dump_mcode(tr) end
-- end
-- if what == "start" then
-- if dumpmode.H then out:write('<pre class="ljdump">\n') end
-- out:write("---- TRACE ", tr, " ", what)
-- if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
-- out:write(" ", fmtfunc(func, pc), "\n")
-- elseif what == "stop" or what == "abort" then
-- out:write("---- TRACE ", tr, " ", what)
-- if what == "abort" then
-- out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
-- else
-- local info = traceinfo(tr)
-- local link, ltype = info.link, info.linktype
-- if link == tr or link == 0 then
-- out:write(" -> ", ltype, "\n")
-- elseif ltype == "root" then
-- out:write(" -> ", link, "\n")
-- else
-- out:write(" -> ", link, " ", ltype, "\n")
-- end
-- end
-- if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
-- else
-- if what == "flush" then symtab, nexitsym = {}, 0 end
-- out:write("---- TRACE ", what, "\n\n")
-- end
-- out:flush()
end
-- Dump recorded bytecode.
@ -603,6 +604,9 @@ local function dump_record(tr, func, pc, depth)
if pc >= 0 then
line = bcline(func, pc, recprefix)
if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
if pc > 0 then
line = sub(line, 1, -2) .. " (" .. fmtfunc(func, pc) .. ")\n"
end
else
line = "0000 "..recprefix.." FUNCC \n"
end

View File

@ -36,6 +36,7 @@
#include "lj_strscan.h"
#include "lj_strfmt.h"
#include "lj_lib.h"
#include "lj_cdata.h"
/* -- Base library: checks ------------------------------------------------ */
@ -669,6 +670,52 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn)
setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]);
}
#if LJ_HASFFI
LJLIB_NOREG LJLIB_CF(thread_exdata) LJLIB_REC(.)
{
ptrdiff_t nargs = L->top - L->base;
GCcdata *cd;
if (nargs == 0) {
CTState *cts = ctype_ctsG(G(L));
if (cts == NULL)
lj_err_caller(L, LJ_ERR_FFI_NOTLOAD);
cts->L = L; /* Save L for errors and allocations. */
cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR);
cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata);
setcdataV(L, L->top++, cd);
return 1;
}
cd = lj_lib_checkcdata(L, 1);
L->exdata = cdata_getptr(cdataptr(cd), CTSIZE_PTR);
return 0;
}
LJLIB_NOREG LJLIB_CF(thread_exdata2) LJLIB_REC(.)
{
ptrdiff_t nargs = L->top - L->base;
GCcdata *cd;
if (nargs == 0) {
CTState *cts = ctype_ctsG(G(L));
if (cts == NULL)
lj_err_caller(L, LJ_ERR_FFI_NOTLOAD);
cts->L = L; /* Save L for errors and allocations. */
cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR);
cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata2);
setcdataV(L, L->top++, cd);
return 1;
}
cd = lj_lib_checkcdata(L, 1);
L->exdata2 = cdata_getptr(cdataptr(cd), CTSIZE_PTR);
return 0;
}
#endif
/* ------------------------------------------------------------------------ */
static void newproxy_weaktable(lua_State *L)
@ -682,6 +729,18 @@ static void newproxy_weaktable(lua_State *L)
t->nomm = (uint8_t)(~(1u<<MM_mode));
}
#if LJ_HASFFI
static int luaopen_thread_exdata(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_thread_exdata, FF_thread_exdata, "exdata");
}
static int luaopen_thread_exdata2(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_thread_exdata2, FF_thread_exdata2, "exdata2");
}
#endif
LUALIB_API int luaopen_base(lua_State *L)
{
/* NOBARRIER: Table and value are the same. */
@ -691,6 +750,12 @@ LUALIB_API int luaopen_base(lua_State *L)
newproxy_weaktable(L); /* top-2. */
LJ_LIB_REG(L, "_G", base);
LJ_LIB_REG(L, LUA_COLIBNAME, coroutine);
#if LJ_HASFFI
lj_lib_prereg(L, LUA_THRLIBNAME ".exdata", luaopen_thread_exdata, env);
lj_lib_prereg(L, LUA_THRLIBNAME ".exdata2", luaopen_thread_exdata2, env);
#endif
return 2;
}

View File

@ -439,7 +439,7 @@ LJLIB_CF(io_popen)
LJLIB_CF(io_tmpfile)
{
IOFileUD *iof = io_file_new(L);
#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
iof->fp = NULL; errno = ENOSYS;
#else
iof->fp = tmpfile();

View File

@ -148,6 +148,66 @@ LJLIB_CF(jit_attach)
return 0;
}
LJLIB_CF(jit_prngstate)
{
GCtab *cur = lj_tab_new(L, 8, 0);
#if LJ_HASJIT
int i;
jit_State *J = L2J(L);
/* The old state. */
for (i = 1; i <= 4; i++) {
setintV(lj_tab_setint(L, cur, i*2-1), J->prng.u[i-1] & 0xffffffff);
setintV(lj_tab_setint(L, cur, i*2), J->prng.u[i-1] >> 32);
}
/* We need to set new state using the input array. */
if (L->base < L->top && !tvisnil(L->base)) {
PRNGState prng;
if (tvisnumber(L->base)) {
TValue *o = L->base;
if (!tvisint(o) && ((double)(uint32_t)numV(o) != numV(o)))
lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
prng.u[0] = numberVint(o);
for (i = 1; i < 4; i++)
prng.u[i] = 0;
} else {
GCtab *t = lj_lib_checktab(L, 1);
int i = 1, len = lj_tab_len(t);
/* The input array must have at most 8 elements. */
if (len > 8)
lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
for (i = 1; i <= len; i++) {
cTValue *v = lj_tab_getint(t, i);
if (!tvisint(v) && (!tvisnum(v) || (double)(uint32_t)numV(v) != numV(v)))
lj_err_arg(L, 1, LJ_ERR_PRNGSTATE);
if (i & 1)
prng.u[(i-1)/2] = numberVint(v);
else
prng.u[(i-1)/2] = prng.u[(i-1)/2] | ((uint64_t)numberVint(v) << 32);
}
for (i /= 2; i < 4; i++)
prng.u[i] = 0;
}
/* Re-initialize the JIT prng. */
J->prng = prng;
}
#else
for (int i = 1; i <= 8; i++)
setintV(lj_tab_setint(L, cur, i), 0);
#endif
settabV(L, L->top++, cur);
return 1;
}
LJLIB_PUSH(top-5) LJLIB_SET(os)
LJLIB_PUSH(top-4) LJLIB_SET(arch)
LJLIB_PUSH(top-3) LJLIB_SET(version_num)
@ -231,6 +291,7 @@ LJLIB_CF(jit_util_funcbc)
{
GCproto *pt = check_Lproto(L, 0);
BCPos pc = (BCPos)lj_lib_checkint(L, 2);
int lineinfo = lj_lib_optint(L, 3, 0);
if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc];
BCOp op = bc_op(ins);
@ -238,6 +299,11 @@ LJLIB_CF(jit_util_funcbc)
setintV(L->top, ins);
setintV(L->top+1, lj_bc_mode[op]);
L->top += 2;
if (lineinfo) {
setintV(L->top, lj_debug_line(pt, pc));
L->top += 1;
return 3;
}
return 2;
}
return 0;
@ -718,7 +784,8 @@ static uint32_t jit_cpudetect(void)
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
}
#endif
#elif LJ_TARGET_S390X
/* No optional CPU features to detect (for now). */
#else
#error "Missing CPU detection for this architecture"
#endif

View File

@ -76,7 +76,7 @@ LJLIB_CF(os_rename)
LJLIB_CF(os_tmpname)
{
#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
lj_err_caller(L, LJ_ERR_OSUNIQF);
return 0;
#else
@ -185,6 +185,7 @@ LJLIB_CF(os_date)
#endif
} else {
#if LJ_TARGET_POSIX
tzset();
stm = localtime_r(&t, &rtm);
#else
stm = localtime(&t);

View File

@ -169,6 +169,47 @@ LJLIB_CF(table_concat) LJLIB_REC(.)
return 1;
}
LJLIB_NOREG LJLIB_CF(table_clone) LJLIB_REC(.)
{
GCtab *src = lj_lib_checktab(L, 1);
GCtab *dup = lj_tab_dup(L, src);
settabV(L, L->base, dup);
L->top = L->base+1;
return 1;
}
LJLIB_NOREG LJLIB_CF(table_isarray) LJLIB_REC(.)
{
GCtab *src = lj_lib_checktab(L, 1);
setboolV(L->base, lj_tab_isarray(src));
L->top = L->base+1;
return 1;
}
LJLIB_NOREG LJLIB_CF(table_nkeys) LJLIB_REC(.)
{
GCtab *src = lj_lib_checktab(L, 1);
setintV(L->base, lj_tab_nkeys(src));
L->top = L->base+1;
return 1;
}
LJLIB_NOREG LJLIB_CF(table_isempty) LJLIB_REC(.)
{
GCtab *src = lj_lib_checktab(L, 1);
setboolV(L->base, lj_tab_isempty(src));
L->top = L->base+1;
return 1;
}
/* ------------------------------------------------------------------------ */
static void set2(lua_State *L, int i, int j)
@ -304,6 +345,26 @@ static int luaopen_table_new(lua_State *L)
return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
}
static int luaopen_table_clone(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_clone, FF_table_clone, "clone");
}
static int luaopen_table_nkeys(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_nkeys, FF_table_nkeys, "nkeys");
}
static int luaopen_table_isarray(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_isarray, FF_table_isarray, "isarray");
}
static int luaopen_table_isempty(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_isempty, FF_table_isempty, "isempty");
}
static int luaopen_table_clear(lua_State *L)
{
return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
@ -321,6 +382,10 @@ LUALIB_API int luaopen_table(lua_State *L)
lua_setfield(L, -2, "unpack");
#endif
lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".clone", luaopen_table_clone, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".isarray", luaopen_table_isarray, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".nkeys", luaopen_table_nkeys, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".isempty", luaopen_table_isempty, tabV(L->top-1));
lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
return 1;
}

View File

@ -1143,6 +1143,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
ef = savestack(L, o);
}
status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
printf("hello, %d", status);
if (status) hook_restore(g, oldh);
return status;
}
@ -1195,6 +1196,36 @@ LUA_API int lua_isyieldable(lua_State *L)
return cframe_canyield(L->cframe);
}
LUA_API void lua_resetthread(lua_State *L, lua_State *th)
{
TValue *stend, *st;
th->dummy_ffid = FF_C;
th->status = LUA_OK;
setmrefr(th->glref, L->glref);
setgcrefr(th->env, L->env);
th->cframe = NULL;
st = tvref(th->stack);
if (st != NULL) {
lj_state_relimitstack(th);
stend = st + th->stacksize;
st++; /* Needed for curr_funcisL() on empty stack. */
if (LJ_FR2) st++;
th->base = th->top = st;
lj_func_closeuv(L, st);
while (st < stend) /* Clear new slots. */
setnilV(st++);
}
th->exdata = L->exdata;
th->exdata2 = L->exdata2;
}
LUA_API int lua_yield(lua_State *L, int nresults)
{
void *cf = L->cframe;
@ -1311,3 +1342,22 @@ LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
g->allocf = f;
}
LUA_API void lua_setexdata(lua_State *L, void *exdata)
{
L->exdata = exdata;
}
LUA_API void *lua_getexdata(lua_State *L)
{
return L->exdata;
}
LUA_API void lua_setexdata2(lua_State *L, void *exdata2)
{
L->exdata2 = exdata2;
}
LUA_API void *lua_getexdata2(lua_State *L)
{
return L->exdata2;
}

View File

@ -31,6 +31,8 @@
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
#define LUAJIT_ARCH_S390X 8
#define LUAJIT_ARCH_s390x 8
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@ -59,6 +61,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__s390x__) || defined(__s390x)
#define LUAJIT_TARGET LUAJIT_ARCH_S390X
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
@ -162,13 +166,6 @@
#define LJ_TARGET_GC64 1
#endif
#ifdef __NX__
#define LJ_TARGET_NX 1
#define LJ_TARGET_CONSOLE 1
#undef NULL
#define NULL ((void*)0)
#endif
#ifdef _UWP
#define LJ_TARGET_UWP 1
#if LUAJIT_TARGET == LUAJIT_ARCH_X64
@ -213,6 +210,10 @@
#error "macOS requires GC64 -- don't disable it"
#endif
#ifdef __GNUC__
#define LJ_HAS_OPTIMISED_HASH 1
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
#define LJ_ARCH_NAME "arm"
@ -323,8 +324,18 @@
#if LJ_TARGET_CONSOLE
#define LJ_ARCH_PPC32ON64 1
#define LJ_ARCH_NOFFI 1
#if LJ_TARGET_PS3
#define LJ_ARCH_PPC_OPD 1
#endif
#elif LJ_ARCH_BITS == 64
#error "No support for PPC64"
#define LJ_ARCH_PPC32ON64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
#if _CALL_ELF == 2
#define LJ_ARCH_PPC_ELFV2 1
#else
#define LJ_ARCH_PPC_OPD 1
#define LJ_ARCH_PPC_OPDENV 1
#endif
#endif
#if _ARCH_PWR7
@ -432,6 +443,20 @@
#define LJ_ARCH_VERSION 10
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
#define LJ_ARCH_NAME "s390x"
#define LJ_ARCH_BITS 64
#define LJ_ARCH_ENDIAN LUAJIT_BE
#define LJ_TARGET_S390X 1
#define LJ_TARGET_EHRETREG 0xe
#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNALIGNED 1
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#define LJ_TARGET_GC64 1
#else
#error "No target architecture defined"
#endif
@ -445,7 +470,7 @@
#error "Need at least GCC 3.4 or newer"
#endif
#elif LJ_TARGET_X64
#if __GNUC__ < 4
#if 0 && __GNUC__ < 4
#error "Need at least GCC 4.0 or newer"
#endif
#elif LJ_TARGET_ARM
@ -490,9 +515,6 @@
#error "No support for ILP32 model on ARM64"
#endif
#elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32"
#endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif

View File

@ -1662,6 +1662,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
#elif LJ_TARGET_S390X
#include "lj_asm_s390x.h"
#else
#error "Missing assembler for target CPU"
#endif

View File

@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
return 0;
}
/* Fuse FP neg-multiply-add/sub. */
static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
{
IRRef ref = ir->op1;
IRIns *irn = IR(ref);
if (irn->o != IR_ADD && irn->o != IR_SUB)
return 0;
if (!mayfuse(as, ref))
return 0;
IRRef lref = irn->op1, rref = irn->op2;
IRIns *irm;
if (lref != rref &&
((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
ra_noreg(irm->r)) ||
(mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
(rref = lref, ra_noreg(irm->r))))) {
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
Reg left = ra_alloc2(as, irm,
rset_exclude(rset_exclude(RSET_FPR, dest), add));
Reg right = (left >> 8); left &= 255;
emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31));
return 1;
}
return 0;
}
/* Fuse BAND + BSHL/BSHR into UBFM. */
static int asm_fuseandshift(ASMState *as, IRIns *ir)
{
@ -1051,10 +1080,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
}
static int maybe_zero_val(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
switch(ir->o) {
case IR_KNULL:
return 1;
case IR_KINT:
return 0 == ir->i;
case IR_KINT64:
return 0 == ir_kint64(ir)->u64;
}
return 0;
}
static void asm_xstore(ASMState *as, IRIns *ir)
{
if (ir->r != RID_SINK) {
Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
Reg src;
if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
src = RID_ZERO;
else
src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
}
@ -1250,7 +1299,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
int32_t ofs = sizeof(GCcdata);
Reg r = ra_alloc1(as, ir->op2, allow);
Reg r;
if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
r = RID_ZERO;
else
r = ra_alloc1(as, ir->op2, allow);
lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
} else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
@ -1266,7 +1320,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
{
Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
@ -1466,6 +1520,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
static void asm_neg(ASMState *as, IRIns *ir)
{
if (irt_isnum(ir->t)) {
if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
asm_fpunary(as, ir, A64I_FNEGd);
return;
}
@ -1919,6 +1974,17 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
/* IRRefs that get into the side trace from the parent trace may restore
* REF_BASE under severe register pressure and thus reach here holding on to
* the register. Restore such references so that REF_BASE gets RID_BASE back
* when it tries to allocate below. */
if (!ra_hasreg(ir->r)) {
Reg r = ra_gethint(ir->r);
if (!rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
}
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
if (ra_hasspill(irp->s)) {

View File

@ -1298,7 +1298,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
#if !LJ_64
#if !LJ_64 || (defined(LUAJIT_USE_VALGRIND) && !LJ_GC64)
MCLabel l_exit;
#endif
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -1313,7 +1313,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
}
}
asm_guardcc(as, CC_NE);
#if LJ_64
#if LJ_64 && (!defined(LUAJIT_USE_VALGRIND) || LJ_GC64)
if (!irt_ispri(irkey->t)) {
Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
emit_rmro(as, XO_CMP, key|REX_64, node,

View File

@ -370,6 +370,82 @@
#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */
#if LJ_ARCH_BITS == 64
#if LJ_ARCH_PPC_ELFV2
#define CCALL_HANDLE_STRUCTRET \
if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \
cc->retref = 1; /* Return by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp; \
}
#define CCALL_HANDLE_STRUCTRET2 \
int isfp = ccall_classify_fp(cts, ctr); \
int i; \
if (isfp == FTYPE_FLOAT) { \
for (i = 0; i < ctr->size / 4; i++) \
((float *)dp)[i] = cc->fpr[i]; \
} else if (isfp == FTYPE_DOUBLE) { \
for (i = 0; i < ctr->size / 8; i++) \
((double *)dp)[i] = cc->fpr[i]; \
} else { \
if (ctr->size < 8 && LJ_BE) { \
sp += 8 - ctr->size; \
} \
memcpy(dp, sp, ctr->size); \
}
#else
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
#endif
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 2 or 4 GPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_STRUCTARG
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0]; \
((float *)dp)[1] = cc->fpr[1]; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0]; \
((double *)dp)[1] = cc->fpr[1]; \
}
#define CCALL_HANDLE_COMPLEXARG \
isfp = 1; \
if (d->size == sizeof(float) * 2) { \
d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \
isf32 = 1; \
}
#define CCALL_HANDLE_REGARG \
if (isfp && d->size == sizeof(float)) { \
d = ctype_get(cts, CTID_DOUBLE); \
isf32 = 1; \
} \
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n; \
if (ngpr > maxgpr) { \
nsp += ngpr - 8; \
ngpr = 8; \
if (nsp > CCALL_MAXSTACK) { \
goto err_nyi; \
} \
} \
goto done; \
}
#else
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
@ -378,13 +454,13 @@
/* Complex values are returned in 2 or 4 GPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_COMPLEXRET2 \
memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
#define CCALL_HANDLE_STRUCTARG \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all structs by reference. */
#define CCALL_HANDLE_COMPLEXRET2 \
memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
@ -420,6 +496,8 @@
}
#endif
#endif
#if !LJ_ABI_SOFTFP
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@ -574,6 +652,40 @@
goto done; \
}
#elif LJ_TARGET_S390X
/* -- POSIX/s390x calling conventions --------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET \
cc->retref = 1; /* Return all complex values by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET2 \
UNUSED(dp); /* Nothing to do. */
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex numbers by reference. */ \
/* TODO: not sure why this is different to structs. */ \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
#define CCALL_HANDLE_REGARG \
if (isfp) { \
if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -816,6 +928,50 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */
#if LJ_ARCH_PPC_ELFV2
#define FTYPE_FLOAT 1
#define FTYPE_DOUBLE 2
static unsigned int ccall_classify_fp(CTState *cts, CType *ct) {
if (ctype_isfp(ct->info)) {
if (ct->size == sizeof(float))
return FTYPE_FLOAT;
else
return FTYPE_DOUBLE;
} else if (ctype_iscomplex(ct->info)) {
if (ct->size == sizeof(float) * 2)
return FTYPE_FLOAT;
else
return FTYPE_DOUBLE;
} else if (ctype_isstruct(ct->info)) {
int res = -1;
int sz = ct->size;
while (ct->sib) {
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct));
if (res == -1)
res = sub;
if (sub != -1 && sub != res)
return 0;
} else if (ctype_isbitfield(ct->info) ||
ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
return 0;
}
}
if (res > 0 && sz > res * 4 * 8)
return 0;
return res;
} else {
return 0;
}
}
#endif
/* -- MIPS64 ABI struct classification ---------------------------- */
#if LJ_TARGET_MIPS64
@ -990,6 +1146,13 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize sz;
MSize n, isfp = 0, isva = 0;
void *dp, *rp = NULL;
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
int isf32 = 0;
#endif
#if LJ_TARGET_S390X
uint32_t onstack = 0;
#endif
if (fid) { /* Get argument type from field. */
CType *ctf = ctype_get(cts, fid);
@ -1028,6 +1191,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
#if LJ_TARGET_S390X
onstack = 1;
#endif
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
@ -1046,7 +1212,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(void **)dp = rp;
dp = rp;
}
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE
if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) {
dp = (char *)dp + (CTSIZE_PTR - sz);
}
#endif
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
if (isfp) {
int i;
for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
cc->fpr[nfpr++] = ((double *)dp)[i];
}
if (isf32) {
int i;
for (i = 0; i < d->size / 8; i++)
((float *)dp)[i*2] = ((double *)dp)[i];
}
#endif
#if LJ_ARCH_PPC_ELFV2
if (ctype_isstruct(d->info)) {
isfp = ccall_classify_fp(cts, d);
int i;
if (isfp == FTYPE_FLOAT) {
for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++)
cc->fpr[nfpr++] = ((float *)dp)[i];
} else if (isfp == FTYPE_DOUBLE) {
for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
cc->fpr[nfpr++] = ((double *)dp)[i];
}
}
#endif
/* Extend passed integers to 32 bits at least. */
if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
if (d->info & CTF_UNSIGNED)
@ -1060,6 +1256,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info))
&& d->size <= 4) {
if (d->info & CTF_UNSIGNED)
*(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
else
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
}
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
@ -1069,6 +1274,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
#if LJ_TARGET_S390X
/* Arguments need to be sign-/zero-extended to 64-bits. */
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && onstack)) && d->size <= 4) {
if (d->info & CTF_UNSIGNED || isfp)
*(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
else
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
}
#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)

View File

@ -86,10 +86,23 @@ typedef union FPRArg {
#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
#if LJ_ARCH_BITS == 64
#define CCALL_NARG_FPR 13
#if LJ_ARCH_PPC_ELFV2
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR 8
#define CCALL_SPS_EXTRA 14
#else
#define CCALL_NRET_GPR 1
#define CCALL_NRET_FPR 2
#define CCALL_SPS_EXTRA 16
#endif
#else
#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
#define CCALL_NRET_GPR 4 /* For complex double. */
#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
#define CCALL_SPS_EXTRA 4
#endif
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
@ -126,6 +139,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#elif LJ_TARGET_S390X
#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
#define CCALL_NRET_GPR 1 /* GPR 2 */
#define CCALL_NRET_FPR 1 /* FPR 0 */
#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
float f;
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -21,6 +21,10 @@
#include "lj_trace.h"
#include "lj_vm.h"
#if LJ_ARCH_PPC_ELFV2
#include "lualib.h"
#endif
/* -- Target-specific handling of callback slots -------------------------- */
#define CALLBACK_MCODE_SIZE (LJ_PAGESIZE * LJ_NUM_CBPAGE)
@ -61,8 +65,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_PPC
#if LJ_ARCH_PPC_OPD
#define CALLBACK_SLOT2OFS(slot) (24*(slot))
#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24)
#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
#elif LJ_ARCH_PPC_ELFV2
#define CALLBACK_SLOT2OFS(slot) (4*(slot))
#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4)
#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10)
#else
#define CALLBACK_MCODE_HEAD 24
#endif
#elif LJ_TARGET_MIPS32
#define CALLBACK_MCODE_HEAD 20
@ -188,24 +208,59 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
return p;
}
#elif LJ_TARGET_PPC
#if LJ_ARCH_PPC_OPD
register void *vm_toc __asm__("r2");
static void *callback_mcode_init(global_State *g, uint64_t *page)
{
uint64_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = (uint64_t)target;
*p++ = (uint64_t)vm_toc;
*p++ = (uint64_t)g | ((uint64_t)slot << 47);
}
return p;
}
#else
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
#if LJ_ARCH_PPC_ELFV2
// Needs to be in sync with lj_vm_ffi_callback.
lua_assert(CALLBACK_MCODE_SIZE == 4096);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
p++;
}
*p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
*p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
*p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
*p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
*p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff);
*p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff);
*p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff);
*p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
*p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
*p++ = PPCI_BCTR;
#else
*p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
*p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
*p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
*p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
*p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
*p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
*p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
*p++ = PPCI_BCTR;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
*p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
*p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
p++;
}
#endif
return p;
}
#endif
#elif LJ_TARGET_MIPS
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
@ -516,6 +571,15 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_S390X
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -662,6 +726,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
if (ctr->size <= 4 &&
(ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
if (ctr->info & CTF_UNSIGNED)
*(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
else
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
}
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&

View File

@ -153,7 +153,7 @@ typedef struct CType {
/* Simplify target-specific configuration. Checked in lj_ccall.h. */
#define CCALL_MAX_GPR 8
#define CCALL_MAX_FPR 8
#define CCALL_MAX_FPR 14
typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;

View File

@ -109,6 +109,11 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
return pos;
}
LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
{
return debug_framepc(L, fn, nextframe);
}
/* -- Line numbers -------------------------------------------------------- */
/* Get line number for a bytecode position. */
@ -703,3 +708,128 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
lua_concat(L, (int)(L->top - L->base) - top);
}
#ifdef LUA_USE_TRACE_LOGS
#include "lj_dispatch.h"
#define MAX_TRACE_EVENTS 64
enum {
LJ_TRACE_EVENT_ENTER,
LJ_TRACE_EVENT_EXIT,
LJ_TRACE_EVENT_START
};
typedef struct {
int event;
unsigned traceno;
unsigned exitno;
int directexit;
const BCIns *ins;
lua_State *thread;
GCfunc *fn;
} lj_trace_event_record_t;
static lj_trace_event_record_t lj_trace_events[MAX_TRACE_EVENTS];
static int rb_start = 0;
static int rb_end = 0;
static int rb_full = 0;
static void
lj_trace_log_event(lj_trace_event_record_t *rec)
{
lj_trace_events[rb_end] = *rec;
if (rb_full) {
rb_end++;
if (rb_end == MAX_TRACE_EVENTS) {
rb_end = 0;
}
rb_start = rb_end;
} else {
rb_end++;
if (rb_end == MAX_TRACE_EVENTS) {
rb_end = 0;
rb_full = MAX_TRACE_EVENTS;
}
}
}
static GCfunc*
lj_debug_top_frame_fn(lua_State *L, const BCIns *pc)
{
int size;
cTValue *frame;
frame = lj_debug_frame(L, 0, &size);
if (frame == NULL) {
return NULL;
}
return frame_func(frame);
}
LJ_FUNC void LJ_FASTCALL
lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc,
GCfunc *fn)
{
lj_trace_event_record_t r;
r.event = LJ_TRACE_EVENT_START;
r.thread = L;
r.ins = pc;
r.traceno = traceno;
r.fn = fn;
lj_trace_log_event(&r);
}
LJ_FUNC void LJ_FASTCALL
lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc)
{
lj_trace_event_record_t r;
r.event = LJ_TRACE_EVENT_ENTER;
r.thread = L;
r.ins = pc;
r.traceno = traceno;
r.fn = lj_debug_top_frame_fn(L, pc);
lj_trace_log_event(&r);
}
static void
lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct)
{
if (vmstate >= 0) {
lj_trace_event_record_t r;
jit_State *J = L2J(L);
r.event = LJ_TRACE_EVENT_EXIT;
r.thread = L;
r.ins = pc;
r.traceno = vmstate;
r.exitno = J->exitno;
r.directexit = direct;
r.fn = lj_debug_top_frame_fn(L, pc);
lj_trace_log_event(&r);
}
}
LJ_FUNC void LJ_FASTCALL
lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc)
{
lj_log_trace_exit_helper(L, vmstate, pc, 0);
}
LJ_FUNC void LJ_FASTCALL
lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc)
{
lj_log_trace_exit_helper(L, vmstate, pc, 1);
}
#endif /* LUA_USE_TRACE_LOGS */

View File

@ -26,6 +26,7 @@ typedef struct lj_Debug {
int isvararg;
} lj_Debug;
LJ_FUNC BCPos lj_debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe);
LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size);
LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc);
LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
@ -63,4 +64,15 @@ enum {
VARNAME__MAX
};
#ifdef LUA_USE_TRACE_LOGS
LJ_FUNC void LJ_FASTCALL lj_log_trace_direct_exit(lua_State *L,
int vmstate, const BCIns *pc);
LJ_FUNC void LJ_FASTCALL lj_log_trace_normal_exit(lua_State *L,
int vmstate, const BCIns *pc);
LJ_FUNC void LJ_FASTCALL lj_log_trace_entry(lua_State *L,
unsigned traceno, const BCIns *pc);
LJ_FUNC void LJ_FASTCALL lj_log_trace_start_record(lua_State *L, unsigned traceno,
const BCIns *pc, GCfunc *fn);
#endif
#endif

View File

@ -66,12 +66,16 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
#define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */
#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
#define LJ_MAX_UPVAL 120 /* Max. # of upvalues. */
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
#if defined(__powerpc64__) && _CALL_ELF != 2
#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
#else
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
#endif
/* Minimum table/buffer sizes. */
#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
@ -107,7 +111,11 @@ typedef unsigned int uintptr_t;
#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
#if defined(__powerpc64__) && _CALL_ELF == 2
#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1)
#else
#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
#endif
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))

View File

@ -89,7 +89,7 @@ typedef uint16_t HotCount;
typedef struct GG_State {
lua_State L; /* Main thread. */
global_State g; /* Global state. */
#if LJ_TARGET_ARM && !LJ_TARGET_NX
#if LJ_TARGET_ARM
/* Make g reachable via K12 encoded DISPATCH-relative addressing. */
uint8_t align1[(16-sizeof(global_State))&15];
#endif
@ -99,7 +99,7 @@ typedef struct GG_State {
#if LJ_HASJIT
jit_State J; /* JIT state. */
HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
#if LJ_TARGET_ARM && !LJ_TARGET_NX
#if LJ_TARGET_ARM
/* Ditto for J. */
uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
#endif

View File

@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
if (version != 1)
return _URC_FATAL_PHASE1_ERROR;
cf = (void *)_Unwind_GetCFA(ctx);
#ifdef LJ_TARGET_S390X
cf -= 160; /* CFA points 160 bytes above r15. */
#endif
L = cframe_L(cf);
if ((actions & _UA_SEARCH_PHASE)) {
#if LJ_UNWIND_EXT
@ -753,6 +756,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
G(L)->panic(L);
#else
#if LJ_HASJIT
g->saved_jit_base = g->jit_base;
setmref(g->jit_base, NULL);
#endif
{

View File

@ -109,6 +109,8 @@ ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
ERRDEF(NOJIT, "JIT compiler permanently disabled by build option")
#endif
ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
ERRDEF(PRNGSTATE, "PRNG state must be an array with up to 8 integers "
"or an integer")
/* Lexer/parser errors. */
ERRDEF(XMODE, "attempt to load chunk with wrong mode")
@ -178,6 +180,7 @@ ERRDEF(FFI_CBACKOV, "too many callbacks")
#endif
ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
ERRDEF(FFI_NOTLOAD, "ffi module not loaded (yet)")
#endif
#if LJ_HASBUFFER

View File

@ -29,6 +29,7 @@
#include "lj_vm.h"
#include "lj_strscan.h"
#include "lj_strfmt.h"
#include "lj_cdata.h"
#include "lj_serialize.h"
/* Some local macros to save typing. Undef'd at the end. */
@ -1459,6 +1460,77 @@ static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
static void LJ_FASTCALL recff_table_clone(jit_State *J, RecordFFData *rd)
{
TRef src = J->base[0];
J->base[0] = lj_ir_call(J, IRCALL_lj_tab_clone, src);
UNUSED(rd);
}
static void LJ_FASTCALL recff_table_isarray(jit_State *J, RecordFFData *rd)
{
TRef src = J->base[0];
if (LJ_LIKELY(tref_istab(src))) {
TRef trres = lj_ir_call(J, IRCALL_lj_tab_isarray, src);
GCtab *t = tabV(&rd->argv[0]);
int isarr = lj_tab_isarray(t);
TRef tr0 = lj_ir_kint(J, 0);
emitir(isarr ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0);
J->base[0] = isarr ? TREF_TRUE : TREF_FALSE;
} /* else: Interpreter will throw. */
}
static void LJ_FASTCALL recff_table_nkeys(jit_State *J, RecordFFData *rd)
{
TRef src = J->base[0];
if (LJ_LIKELY(tref_istab(src))) {
J->base[0] = lj_ir_call(J, IRCALL_lj_tab_nkeys, src);
} /* else: Interpreter will throw. */
}
static void LJ_FASTCALL recff_table_isempty(jit_State *J, RecordFFData *rd)
{
TRef src = J->base[0];
if (LJ_LIKELY(tref_istab(src))) {
TRef trres = lj_ir_call(J, IRCALL_lj_tab_isempty, src);
GCtab *t = tabV(&rd->argv[0]);
int isempty = lj_tab_isempty(t);
TRef tr0 = lj_ir_kint(J, 0);
emitir(isempty ? IRTGI(IR_NE) : IRTGI(IR_EQ), trres, tr0);
J->base[0] = isempty ? TREF_TRUE : TREF_FALSE;
} /* else: Interpreter will throw. */
}
/* -- thread library fast functions ------------------------------------------ */
#if LJ_HASFFI
void LJ_FASTCALL recff_thread_exdata(jit_State *J, RecordFFData *rd)
{
TRef tr = J->base[0];
if (!tr) {
TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA);
TRef trid = lj_ir_kint(J, CTID_P_VOID);
J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp);
return;
}
recff_nyiu(J, rd); /* this case is too rare to be interesting */
}
void LJ_FASTCALL recff_thread_exdata2(jit_State *J, RecordFFData *rd)
{
TRef tr = J->base[0];
if (!tr) {
TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA2);
TRef trid = lj_ir_kint(J, CTID_P_VOID);
J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp);
return;
}
recff_nyiu(J, rd); /* this case is too rare to be interesting */
}
#endif
/* -- I/O library fast functions ------------------------------------------ */
/* Get FILE* for I/O function. Any I/O error aborts recording, so there's

View File

@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES 408
#define CFRAME_SIZE 384
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_ARCH_PPC_ELFV2
#define CFRAME_OFS_ERRF 360
#define CFRAME_OFS_NRES 356
#define CFRAME_OFS_PREV 336
#define CFRAME_OFS_L 352
#define CFRAME_OFS_PC 348
#define CFRAME_OFS_MULTRES 344
#define CFRAME_SIZE 368
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_ARCH_PPC32ON64
#define CFRAME_OFS_ERRF 472
#define CFRAME_OFS_NRES 468
@ -264,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_S390X
#define CFRAME_OFS_ERRF 280
#define CFRAME_OFS_NRES 272
#define CFRAME_OFS_PREV 264
#define CFRAME_OFS_L 256
#define CFRAME_OFS_PC 168
#define CFRAME_OFS_MULTRES 160
#define CFRAME_SIZE 240
/*
** TODO: it would be good if we always decoded param*8 like
** the RISC architectures do. If so then SHIFT_MULTRES will
** need to change to 3.
*/
#define CFRAME_SHIFT_MULTRES 0
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif

69
src/lj_init.c Normal file
View File

@ -0,0 +1,69 @@
#include <stdint.h>
#include "lj_arch.h"
#include "lj_jit.h"
#include "lj_vm.h"
#include "lj_str.h"
#if LJ_TARGET_ARM && LJ_TARGET_LINUX
#include <sys/utsname.h>
#endif
#ifdef _MSC_VER
/*
** Append a function pointer to the static constructor table executed by
** the C runtime.
** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization.
*/
#pragma section(".CRT$XCU",read)
#define LJ_INITIALIZER2_(f,p) \
static void f(void); \
__declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
__pragma(comment(linker,"/include:" p #f "_")) \
static void f(void)
#ifdef _WIN64
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"")
#else
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_")
#endif
#else
#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void)
#endif
#ifdef LJ_HAS_OPTIMISED_HASH
static void str_hash_init(uint32_t flags)
{
if (flags & JIT_F_SSE4_2)
str_hash_init_sse42 ();
}
/* CPU detection for interpreter features such as string hash function
selection. We choose to cherry-pick from lj_cpudetect and not have a single
initializer to make sure that merges with LuaJIT/LuaJIT remain
convenient. */
LJ_INITIALIZER(lj_init_cpuflags)
{
uint32_t flags = 0;
#if LJ_TARGET_X86ORX64
uint32_t vendor[4];
uint32_t features[4];
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
if (vendor[0] >= 7) {
uint32_t xfeatures[4];
lj_vm_cpuid(7, xfeatures);
flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
}
}
#endif
/* The reason why we initialized early: select our string hash functions. */
str_hash_init (flags);
}
#endif

View File

@ -196,6 +196,8 @@ IRFPMDEF(FPMENUM)
_(FUNC_PC, offsetof(GCfunc, l.pc)) \
_(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
_(THREAD_ENV, offsetof(lua_State, env)) \
_(THREAD_EXDATA, offsetof(lua_State, exdata)) \
_(THREAD_EXDATA2, offsetof(lua_State, exdata2)) \
_(TAB_META, offsetof(GCtab, metatable)) \
_(TAB_ARRAY, offsetof(GCtab, array)) \
_(TAB_NODE, offsetof(GCtab, node)) \

View File

@ -190,6 +190,10 @@ typedef struct CCallInfo {
_(ANY, lj_tab_keyindex, 2, FL, INT, 0) \
_(ANY, lj_vm_next, 2, FL, PTR, 0) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \
_(ANY, lj_tab_clone, 2, FS, TAB, CCI_L) \
_(ANY, lj_tab_isarray, 1, FL, INT, 0) \
_(ANY, lj_tab_nkeys, 1, FL, INT, 0) \
_(ANY, lj_tab_isempty, 1, FL, INT, 0) \
_(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \

View File

@ -7,7 +7,6 @@
#define _LJ_JIT_H
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_ir.h"
/* -- JIT engine flags ---------------------------------------------------- */
@ -23,6 +22,7 @@
#define JIT_F_SSE3 (JIT_F_CPU << 0)
#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
#define JIT_F_BMI2 (JIT_F_CPU << 2)
#define JIT_F_SSE4_2 (JIT_F_CPU << 3)
#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
@ -112,12 +112,12 @@
/* Optimization parameters and their defaults. Length is a char in octal! */
#define JIT_PARAMDEF(_) \
_(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
_(\011, maxrecord, 4000) /* Max. # of recorded IR instructions. */ \
_(\010, maxtrace, 8000) /* Max. # of traces in cache. */ \
_(\011, maxrecord, 16000) /* Max. # of recorded IR instructions. */ \
_(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
_(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
_(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
_(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
_(\011, minstitch, 3) /* Min. # of IR ins for a stitched trace. */ \
\
_(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
_(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@ -131,7 +131,7 @@
/* Size of each machine code area (in KBytes). */ \
_(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
/* Max. total size of all machine code areas (in KBytes). */ \
_(\010, maxmcode, 512) \
_(\010, maxmcode, 40960) \
/* End of list. */
enum {
@ -372,7 +372,6 @@ enum {
#endif
LJ_K64__MAX,
};
#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
enum {
#if LJ_TARGET_X86ORX64
@ -391,7 +390,6 @@ enum {
#endif
LJ_K32__MAX
};
#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
@ -446,13 +444,9 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
#if LJ_K32__USED
uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
#endif
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
#if LJ_K64__USED
TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
#endif
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@ -516,6 +510,8 @@ typedef struct jit_State {
BCLine prev_line; /* Previous line. */
int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
#endif
PRNGState prng; /* PRNG state for the JIT compiler, defaults to prng in
global_State. */
} jit_State;
#ifdef LUA_USE_ASSERT
@ -523,6 +519,5 @@ typedef struct jit_State {
#else
#define lj_assertJ(c, ...) ((void)J)
#endif
#endif
#endif

View File

@ -304,6 +304,14 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
return def;
}
GCcdata *lj_lib_checkcdata(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top && tviscdata(o)))
lj_err_argt(L, narg, LUA_TCDATA);
return cdataV(o);
}
/* -- Strict type checks -------------------------------------------------- */
/* The following type checks do not coerce between strings and numbers.
@ -356,4 +364,3 @@ badtype:
return 0; /* unreachable */
}
#endif

View File

@ -45,6 +45,7 @@ LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
LJ_FUNC GCcdata *lj_lib_checkcdata(lua_State *L, int narg);
#if LJ_HASBUFFER
LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);

View File

@ -231,7 +231,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
}
/* Next try probing 64K-aligned pseudo-random addresses. */
do {
hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
hint = lj_prng_u64(&J->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
} while (!(hint + sz < range+range));
hint = target + hint - range;
}

View File

@ -657,6 +657,7 @@ typedef struct global_State {
MRef ctype_state; /* Pointer to C type state. */
PRNGState prng; /* Global PRNG state. */
GCRef gcroot[GCROOT_MAX]; /* GC roots. */
MRef saved_jit_base; /* saved jit_base for lj_err_throw */
} global_State;
#define mainthread(g) (&gcref(g->mainthref)->th)
@ -697,6 +698,12 @@ struct lua_State {
GCRef env; /* Thread environment (table of globals). */
void *cframe; /* End of C stack frame chain. */
MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
void *exdata; /* user extra data pointer. added by OpenResty */
void *exdata2; /* the 2nd user extra data pointer. added by OpenResty */
#if LJ_TARGET_ARM
uint32_t unused1;
uint32_t unused2;
#endif
};
#define G(L) (mref(L->glref, global_State))

View File

@ -370,7 +370,9 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
** since they are followed by at least one guarded VLOAD.
*/
for (ir = IR(J->cur.nins-1); ir > store; ir--)
if (irt_isguard(ir->t) || ir->o == IR_ALEN)
if (irt_isguard(ir->t) || ir->o == IR_ALEN ||
(ir->o == IR_CALLL && ir->op2 == IRCALL_lj_tab_nkeys) ||
(ir->o == IR_CALLS && ir->op2 == IRCALL_lj_tab_clone))
goto doemit; /* No elimination possible. */
/* Remove redundant store from chain and replace with NOP. */
*refp = store->prev;

View File

@ -87,10 +87,6 @@ extern int sys_get_random_number(void *buf, uint64_t len);
extern int sceRandomGetRandomNumber(void *buf, size_t len);
#elif LJ_TARGET_NX
#include <unistd.h>
#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
#define WIN32_LEAN_AND_MEAN
@ -180,11 +176,6 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
goto ok;
#elif LJ_TARGET_NX
if (getentropy(rs->u, sizeof(rs->u)) == 0)
goto ok;
#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),

View File

@ -624,7 +624,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
if (bc_j(*pc) != -1 && !innerloopleft(J, pc))
lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */
if ((ev != LOOPEV_ENTERLO &&
J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0)
J->loopref && J->cur.nins - J->loopref > 100) || --J->loopunroll < 0)
lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
J->loopref = J->cur.nins;
}
@ -664,18 +664,13 @@ static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
RecordIndex ix;
/* Since ITERN is recorded at the start, we need our own loop detection. */
if (J->pc == J->startpc &&
(J->cur.nins > REF_FIRST+1 ||
(J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) &&
J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
IRRef ref = REF_FIRST + LJ_HASPROFILE;
#ifdef LUAJIT_ENABLE_CHECKHOOK
ref += 3;
#endif
if (J->cur.nins > ref ||
(LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) {
J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */
lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
return LOOPEV_ENTER;
}
}
J->maxslot = ra;
lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */
ix.tab = getslot(J, ra-2);
@ -1836,7 +1831,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
if (lnk) { /* Possible tail- or up-recursion. */
lj_trace_flush(J, lnk); /* Flush trace that only returns. */
/* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J->prng) & 15u);
}
lj_trace_err(J, LJ_TRERR_CUNROLL);
}

View File

@ -260,6 +260,8 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
return NULL;
}
L->status = LUA_OK;
L->exdata = NULL;
L->exdata2 = NULL;
return L;
}
@ -319,6 +321,8 @@ lua_State *lj_state_new(lua_State *L)
setgcrefr(L1->env, L->env);
stack_init(L1, L); /* init stack */
lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
L1->exdata = L->exdata;
L1->exdata2 = L->exdata2;
return L1;
}

View File

@ -19,6 +19,15 @@
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
{
MSize i, n = a->len > b->len ? b->len : a->len;
#ifdef LUAJIT_USE_VALGRIND
for (i = 0; i < n; i++) {
uint8_t va = *(const uint8_t *)(strdata(a)+i);
uint8_t vb = *(const uint8_t *)(strdata(b)+i);
if (va != vb) {
return va < vb ? -1 : 1;
}
}
#else
for (i = 0; i < n; i += 4) {
/* Note: innocuous access up to end of string + 3. */
uint32_t va = *(const uint32_t *)(strdata(a)+i);
@ -35,6 +44,7 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
return va < vb ? -1 : 1;
}
}
#endif
return (int32_t)(a->len - b->len);
}
@ -72,8 +82,22 @@ int lj_str_haspattern(GCstr *s)
/* -- String hashing ------------------------------------------------------ */
#ifdef LJ_HAS_OPTIMISED_HASH
static StrHash hash_sparse_def (uint64_t, const char *, MSize);
str_sparse_hashfn hash_sparse = hash_sparse_def;
#if LUAJIT_SECURITY_STRHASH
static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize);
str_dense_hashfn hash_dense = hash_dense_def;
#endif
#else
#define hash_sparse hash_sparse_def
#if LUAJIT_SECURITY_STRHASH
#define hash_dense hash_dense_def
#endif
#endif
/* Keyed sparse ARX string hash. Constant time. */
static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len)
{
/* Constants taken from lookup3 hash by Bob Jenkins. */
StrHash a, b, h = len ^ (StrHash)seed;
@ -97,7 +121,7 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
#if LUAJIT_SECURITY_STRHASH
/* Keyed dense ARX string hash. Linear time. */
static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h,
const char *str, MSize len)
{
StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
@ -282,8 +306,21 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
s->gct = ~LJ_TSTR;
s->len = len;
s->hash = hash;
#ifdef LUAJIT_TEST_FIXED_ORDER
/* If you need predictable key iteration order in lua tables (eg: in data driven test),
* build with
* "XCFLAGS=-DLUAJIT_TEST_FIXED_ORDER=1 -DLUAJIT_SECURITY_STRID=0
* -DLUAJIT_SECURITY_STRHASH=0 -DLUAJIT_SECURITY_PRNG=0 -DLUAJIT_SECURITY_MCODE=0"
*
* This is for testing only. Please don't use it in production builds.
*/
s->sid = hash;
#else
#ifndef STRID_RESEED_INTERVAL
s->sid = g->str.id++;
/* s->sid = g->str.id++; */
/* if use g->str.id++ as sid, the order of the tab will be indeterminate. */
s->sid = hash;
#elif STRID_RESEED_INTERVAL
if (!g->str.idreseed--) {
uint64_t r = lj_prng_u64(&g->prng);
@ -293,6 +330,7 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
s->sid = g->str.id++;
#else
s->sid = (StrID)lj_prng_u64(&g->prng);
#endif
#endif
s->reserved = 0;
s->hashalg = (uint8_t)hashalg;

View File

@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
#ifdef LJ_HAS_OPTIMISED_HASH
typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize);
extern str_sparse_hashfn hash_sparse;
#if LUAJIT_SECURITY_STRHASH
typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize);
extern str_dense_hashfn hash_dense;
#endif
extern void str_hash_init_sse42 (void);
#endif
#endif

309
src/lj_str_hash.c Normal file
View File

@ -0,0 +1,309 @@
/*
* This file defines string hash function using CRC32. It takes advantage of
* Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32
* computation. The hash functions try to compute CRC32 of length and up
* to 128 bytes of given string.
*/
#include "lj_arch.h"
#if LJ_HAS_OPTIMISED_HASH == 1 || defined(SMOKETEST)
#include <stdint.h>
#include <sys/types.h>
#include <time.h>
#include <smmintrin.h>
#if defined(_MSC_VER)
#include <process.h>
/* Silence deprecated name warning */
#define getpid _getpid
#else
#include <unistd.h>
#endif
#include "lj_def.h"
#include "lj_str.h"
#include "lj_jit.h"
#if defined(_MSC_VER)
/*
* MSVC doesn't seem to restrict intrinsics used based on /arch: value set
* while clang-cl will error on it.
*/
#if defined(__clang__) && !defined(__SSE4_2__)
#error "This file must be built with /arch:AVX1 or higher"
#endif
#else
#if !defined(__SSE4_2__)
#error "This file must be built with -msse4.2"
#endif
#endif
#define lj_crc32_u32 _mm_crc32_u32
#define lj_crc32_u64 _mm_crc32_u64
#undef LJ_AINLINE
#define LJ_AINLINE
#if defined(__MINGW32__) || defined(_MSC_VER)
#define random() ((long) rand())
#define srandom(seed) srand(seed)
#endif
static const uint64_t* cast_uint64p(const char* str)
{
return (const uint64_t*)(void*)str;
}
static const uint32_t* cast_uint32p(const char* str)
{
return (const uint32_t*)(void*)str;
}
/* hash string with len in [1, 4) */
static LJ_AINLINE uint32_t hash_sparse_1_4(uint64_t seed, const char* str,
uint32_t len)
{
#if 0
/* TODO: The if-1 part (i.e the original algorithm) is working better when
* the load-factor is high, as revealed by conflict benchmark (via
* 'make benchmark' command); need to understand why it's so.
*/
uint32_t v = str[0];
v = (v << 8) | str[len >> 1];
v = (v << 8) | str[len - 1];
v = (v << 8) | len;
return lj_crc32_u32(0, v);
#else
uint32_t a, b, h = len ^ seed;
a = *(const uint8_t *)str;
h ^= *(const uint8_t *)(str+len-1);
b = *(const uint8_t *)(str+(len>>1));
h ^= b; h -= lj_rol(b, 14);
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
return h;
#endif
}
/* hash string with len in [4, 16) */
static LJ_AINLINE uint32_t hash_sparse_4_16(uint64_t seed, const char* str,
uint32_t len)
{
uint64_t v1, v2, h;
if (len >= 8) {
v1 = *cast_uint64p(str);
v2 = *cast_uint64p(str + len - 8);
} else {
v1 = *cast_uint32p(str);
v2 = *cast_uint32p(str + len - 4);
}
h = lj_crc32_u32(0, len ^ seed);
h = lj_crc32_u64(h, v1);
h = lj_crc32_u64(h, v2);
return h;
}
/* hash string with length in [16, 128) */
static uint32_t hash_16_128(uint64_t seed, const char* str,
uint32_t len)
{
uint64_t h1, h2;
uint32_t i;
h1 = lj_crc32_u32(0, len ^ seed);
h2 = 0;
for (i = 0; i < len - 16; i += 16) {
h1 += lj_crc32_u64(h1, *cast_uint64p(str + i));
h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8));
};
h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16));
h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
return lj_crc32_u32(h1, h2);
}
/* **************************************************************************
*
* Following is code about hashing string with length >= 128
*
* **************************************************************************
*/
static uint32_t random_pos[32][2];
static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
/* return floor(log2(n)) */
static LJ_AINLINE uint32_t log2_floor(uint32_t n)
{
if (n <= 127) {
return log2_tab[n];
}
if ((n >> 8) <= 127) {
return log2_tab[n >> 8] + 8;
}
if ((n >> 16) <= 127) {
return log2_tab[n >> 16] + 16;
}
if ((n >> 24) <= 127) {
return log2_tab[n >> 24] + 24;
}
return 31;
}
#define POW2_MASK(n) ((1L << (n)) - 1)
/* This function is to populate `random_pos` such that random_pos[i][*]
* contains random value in the range of [2**i, 2**(i+1)).
*/
static void str_hash_init_random(void)
{
int i, seed, rml;
/* Calculate the ceil(log2(RAND_MAX)) */
rml = log2_floor(RAND_MAX);
if (RAND_MAX & (RAND_MAX - 1)) {
rml += 1;
}
/* Init seed */
seed = lj_crc32_u32(0, getpid());
seed = lj_crc32_u32(seed, time(NULL));
srandom(seed);
/* Now start to populate the random_pos[][]. */
for (i = 0; i < 3; i++) {
/* No need to provide random value for chunk smaller than 8 bytes */
random_pos[i][0] = random_pos[i][1] = 0;
}
for (; i < rml; i++) {
random_pos[i][0] = random() & POW2_MASK(i+1);
random_pos[i][1] = random() & POW2_MASK(i+1);
}
for (; i < 31; i++) {
int j;
for (j = 0; j < 2; j++) {
uint32_t v, scale;
scale = random_pos[i - rml][0];
if (scale == 0) {
scale = 1;
}
v = (random() * scale) & POW2_MASK(i+1);
random_pos[i][j] = v;
}
}
}
#undef POW2_MASK
/* Return a pre-computed random number in the range of [1**chunk_sz_order,
* 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
* may be greater than chunk-size; it is up to the caller to make sure
* "chunk-base + return-value-of-this-func" has valid virtual address.
*/
static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
uint32_t idx)
{
uint32_t pos = random_pos[chunk_sz_order][idx & 1];
return pos;
}
static LJ_NOINLINE uint32_t hash_128_above(uint64_t seed, const char* str,
uint32_t len)
{
uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
uint64_t h1, h2, v;
const char* chunk_ptr;
chunk_num = 16;
chunk_sz = len / chunk_num;
chunk_sz_log2 = log2_floor(chunk_sz);
pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
h1 = lj_crc32_u32(0, len ^ seed);
h2 = 0;
/* loop over 14 chunks, 2 chunks at a time */
for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
chunk_ptr += chunk_sz, i++) {
v = *cast_uint64p(chunk_ptr + pos1);
h1 = lj_crc32_u64(h1, v);
v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
h2 = lj_crc32_u64(h2, v);
}
/* the last two chunks */
v = *cast_uint64p(chunk_ptr + pos1);
h1 = lj_crc32_u64(h1, v);
v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
h2 = lj_crc32_u64(h2, v);
/* process the trailing part */
h1 = lj_crc32_u64(h1, *cast_uint64p(str));
h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
h1 = lj_crc32_u32(h1, h2);
return h1;
}
/* NOTE: the "len" should not be zero */
static StrHash hash_sparse_sse42(uint64_t seed, const char* str, MSize len)
{
if (len < 4 || len >= 128)
return hash_sparse_1_4(seed, str, len);
if (len >= 16) /* [16, 128) */
return hash_16_128(seed, str, len);
/* [4, 16) */
return hash_sparse_4_16(seed, str, len);
}
#if LUAJIT_SECURITY_STRHASH
static StrHash hash_dense_sse42(uint64_t seed, uint32_t h, const char* str,
MSize len)
{
uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4));
if (len <= 16)
return b;
if (len < 128) /* [16, 128), try with a different seed. */
return hash_16_128(b, str, len);
/* Otherwise, do the slow crc32 randomization for long strings. */
return hash_128_above(b, str, len);
}
#endif
void str_hash_init_sse42(void)
{
hash_sparse = hash_sparse_sse42;
#if LUAJIT_SECURITY_STRHASH
hash_dense = hash_dense_sse42;
#endif
str_hash_init_random();
}
#endif

View File

@ -14,6 +14,8 @@
#include "lj_err.h"
#include "lj_tab.h"
#include <math.h>
/* -- Object hashing ------------------------------------------------------ */
/* Hash an arbitrary key and return its anchor position in the hash table. */
@ -691,3 +693,85 @@ MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
}
#endif
GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src)
{
return lj_tab_dup(L, src);
}
int LJ_FASTCALL lj_tab_isarray(const GCtab *src)
{
Node *node;
cTValue *o;
ptrdiff_t i;
node = noderef(src->node);
for (i = (ptrdiff_t)src->hmask; i >= 0; i--)
if (!tvisnil(&node[i].val)) {
o = &node[i].key;
if (LJ_UNLIKELY(tvisint(o))) {
continue;
}
if (LJ_UNLIKELY(tvisnum(o))) {
lua_Number n = numberVnum(o);
if (LJ_LIKELY(rint((double) n) == n)) {
continue;
}
}
return 0;
}
return 1;
}
MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *t)
{
MSize narr = (MSize)t->asize;
cTValue *e;
Node *node;
MSize i, cnt = 0;
e = tvref(t->array);
for (i = 0; i < narr; i++)
if (LJ_LIKELY(!tvisnil(&e[i])))
cnt++;
if (t->hmask <= 0)
return cnt;
node = noderef(t->node);
for (i = 0; i <= (MSize)t->hmask; i++) {
Node *n = &node[i];
if (LJ_LIKELY(!tvisnil(&n->val))) {
cnt++;
}
}
return cnt;
}
int LJ_FASTCALL lj_tab_isempty(const GCtab *t)
{
MSize narr = (MSize)t->asize;
cTValue *e;
Node *node;
MSize i;
e = tvref(t->array);
for (i = 0; i < narr; i++)
if (LJ_LIKELY(!tvisnil(&e[i])))
return 0;
if (t->hmask <= 0)
return 1;
node = noderef(t->node);
for (i = 0; i <= (MSize)t->hmask; i++) {
Node *n = &node[i];
if (LJ_LIKELY(!tvisnil(&n->val))) {
return 0;
}
}
return 1;
}

View File

@ -93,4 +93,9 @@ LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
#endif
LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_clone(lua_State *L, const GCtab *src);
LJ_FUNCA int LJ_FASTCALL lj_tab_isarray(const GCtab *src);
LJ_FUNCA MSize LJ_FASTCALL lj_tab_nkeys(const GCtab *src);
LJ_FUNCA int LJ_FASTCALL lj_tab_isempty(const GCtab *t);
#endif

View File

@ -144,6 +144,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
#elif LJ_TARGET_S390X
#include "lj_target_s390x.h"
#else
#error "Missing include for target CPU"
#endif

View File

@ -30,8 +30,13 @@ enum {
/* Calling conventions. */
RID_RET = RID_R3,
#if LJ_LE
RID_RETHI = RID_R4,
RID_RETLO = RID_R3,
#else
RID_RETHI = RID_R3,
RID_RETLO = RID_R4,
#endif
RID_FPRET = RID_F1,
/* These definitions must match with the *.dasc file(s): */
@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
#define PPCF_C(r) ((r) << 6)
#define PPCF_MB(n) ((n) << 6)
#define PPCF_ME(n) ((n) << 1)
#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
#define PPCF_Y 0x00200000
#define PPCF_DOT 0x00000001
@ -200,6 +207,13 @@ typedef enum PPCIns {
PPCI_RLWINM = 0x54000000,
PPCI_RLWIMI = 0x50000000,
PPCI_RLDICL = 0x78000000,
PPCI_RLDICR = 0x78000004,
PPCI_RLDIC = 0x78000008,
PPCI_RLDIMI = 0x7800000c,
PPCI_RLDCL = 0x78000010,
PPCI_RLDCR = 0x78000012,
PPCI_B = 0x48000000,
PPCI_BL = 0x48000001,
PPCI_BC = 0x40800000,

81
src/lj_target_s390x.h Normal file
View File

@ -0,0 +1,81 @@
/*
** Definitions for IBM z/Architecture (s390x) CPUs.
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_S390X_H
#define _LJ_TARGET_S390X_H
/* -- Registers IDs ------------------------------------------------------- */
#define GPRDEF(_) \
_(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) \
_(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) \
_(F12) _(F13) _(F14) _(F15)
// TODO: VREG?
#define RIDENUM(name) RID_##name,
enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
/* Calling conventions. */
RID_SP = RID_R15,
RID_RET = RID_R2,
RID_FPRET = RID_F0,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_R7, /* Interpreter BASE. */
RID_LPC = RID_R9, /* Interpreter PC. */
RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_R0,
RID_MIN_FPR = RID_F0,
RID_MAX_GPR = RID_MIN_FPR,
RID_MAX_FPR = RID_MAX,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
};
/* -- Register sets ------------------------------------------------------- */
/* -- Spill slots --------------------------------------------------------- */
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
**
** SPS_FIXED: Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
**
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
*/
#define SPS_FIXED 2
#define SPS_FIRST 2
#define SPOFS_TMP 0
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
/* -- Exit state ---------------------------------------------------------- */
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
#define EXITSTUB_SPACING 4
#define EXITSTUBS_PER_GROUP 32
/* -- Instructions -------------------------------------------------------- */
#endif

View File

@ -312,6 +312,8 @@ void lj_trace_initstate(global_State *g)
jit_State *J = G2J(g);
TValue *tv;
J->prng = g->prng;
/* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff);
@ -390,7 +392,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
/* First try to bump its hotcount several times. */
val = ((uint32_t)J->penalty[i].val << 1) +
(lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1));
(lj_prng_u64(&J->prng) & ((1u<<PENALTY_RNDBITS)-1));
if (val > PENALTY_MAX) {
blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
return;
@ -414,6 +416,9 @@ static void trace_start(jit_State *J)
{
lua_State *L;
TraceNo traceno;
#ifdef LUA_USE_TRACE_LOGS
const BCIns *pc = J->pc;
#endif
if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) {
@ -474,6 +479,9 @@ static void trace_start(jit_State *J)
}
);
lj_record_setup(J);
#ifdef LUA_USE_TRACE_LOGS
lj_log_trace_start_record(L, (unsigned) J->cur.traceno, pc, J->fn);
#endif
}
/* Stop tracing. */
@ -604,21 +612,22 @@ static int trace_abort(jit_State *J)
J->cur.link = 0;
J->cur.linktype = LJ_TRLINK_NONE;
lj_vmevent_send(L, TRACE,
TValue *frame;
const BCIns *pc;
cTValue *frame;
int size;
BCIns pc;
GCfunc *fn;
setstrV(L, L->top++, lj_str_newlit(L, "abort"));
setintV(L->top++, traceno);
/* Find original Lua function call to generate a better error message. */
frame = J->L->base-1;
pc = J->pc;
while (!isluafunc(frame_func(frame))) {
pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
frame = frame_prev(frame);
}
/* Find original function call to generate a better error message. */
frame = lj_debug_frame(L, 0, &size);
lj_assertL(frame != NULL, "missing debug frame");
fn = frame_func(frame);
if (frame == L->base-1 && isluafunc(fn))
pc = proto_bcpos(funcproto(fn), J->pc);
else
pc = lj_debug_framepc(L, fn, frame);
setfuncV(L, L->top++, fn);
setintV(L->top++, proto_bcpos(funcproto(fn), pc));
setintV(L->top++, pc);
copyTV(L, L->top++, restorestack(L, errobj));
copyTV(L, L->top++, &J->errinfo);
);
@ -931,6 +940,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
}
}
}
#ifdef LUA_USE_TRACE_LOGS
lj_log_trace_normal_exit(L, (int) T->traceno, pc);
#endif
/* Return MULTRES or 0. */
ERRNO_RESTORE
switch (bc_op(*pc)) {

View File

@ -88,4 +88,3 @@
#include "lib_ffi.c"
#include "lib_buffer.c"
#include "lib_init.c"

View File

@ -112,6 +112,9 @@ LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
LUA_API void (lua_close) (lua_State *L);
LUA_API lua_State *(lua_newthread) (lua_State *L);
#define HAVE_LUA_RESETTHREAD 1
LUA_API void (lua_resetthread) (lua_State *L, lua_State *th);
LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);
@ -245,7 +248,12 @@ LUA_API void (lua_concat) (lua_State *L, int n);
LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud);
LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud);
LUA_API void lua_setexdata(lua_State *L, void *exdata);
LUA_API void *lua_getexdata(lua_State *L);
#define HAVE_LUA_EXDATA2 1
LUA_API void lua_setexdata2(lua_State *L, void *exdata2);
LUA_API void *lua_getexdata2(lua_State *L);
/*
** ===============================================================

View File

@ -303,8 +303,9 @@ static int loadjitmodule(lua_State *L)
lua_concat(L, 2);
if (lua_pcall(L, 1, 1, 0)) {
const char *msg = lua_tostring(L, -1);
if (msg && !strncmp(msg, "module ", 7))
goto nomodule;
if (msg && !strncmp(msg, "module ", 7)){
printf("hehe\n");
goto nomodule;}
return report(L, 1);
}
lua_getfield(L, -1, "start");
@ -542,7 +543,6 @@ static int pmain(lua_State *L)
}
if ((flags & FLAGS_VERSION)) print_version();
s->status = runargs(L, argv, argn);
if (s->status != LUA_OK) return 0;

View File

@ -30,6 +30,8 @@
#include "lua.h"
#define OPENRESTY_LUAJIT
#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3

View File

@ -21,6 +21,7 @@
#define LUA_BITLIBNAME "bit"
#define LUA_JITLIBNAME "jit"
#define LUA_FFILIBNAME "ffi"
#define LUA_THRLIBNAME "thread"
LUALIB_API int luaopen_base(lua_State *L);
LUALIB_API int luaopen_math(lua_State *L);

View File

@ -1,159 +0,0 @@
@rem Script to build LuaJIT with NintendoSDK + NX Addon.
@rem Donated to the public domain by Swyter.
@rem
@rem To run this script you must open a "Native Tools Command Prompt for VS".
@rem
@rem Either the x86 version for NX32, or x64 for the NX64 target.
@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
@rem must match the cross-compiled target (32 or 64 bits).
@rem
@rem Then cd to this directory and run this script.
@rem
@rem Recommended invocation:
@rem
@rem nxbuild # release build, amalgamated
@rem nxbuild debug # debug build, amalgamated
@rem
@rem Additional command-line options (not generally recommended):
@rem
@rem noamalg # (after debug) non-amalgamated build
@if not defined INCLUDE goto :FAIL
@if not defined NINTENDO_SDK_ROOT goto :FAIL
@if not defined PLATFORM goto :FAIL
@if "%platform%" == "x86" goto :DO_NX32
@if "%platform%" == "x64" goto :DO_NX64
@echo Error: Current host platform is %platform%!
@echo.
@goto :FAIL
@setlocal
:DO_NX32
@set DASC=vm_arm.dasc
@set DASMFLAGS= -D HFABI -D FPU
@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
@set HOST_PTR_SIZE=4
goto :BEGIN
:DO_NX64
@set DASC=vm_arm64.dasc
@set DASMFLAGS= -D ENDIAN_LE
@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
@set HOST_PTR_SIZE=8
:BEGIN
@rem ---- Host compiler ----
@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
@set LJLINK=link /nologo
@set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:minilua.exe minilua.obj
@if errorlevel 1 goto :BAD
if exist minilua.exe.manifest^
%LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
@minilua
@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
@echo Check that the script is being ran under the correct x86/x64 VS prompt.
@goto :BAD
:PASSED_PTR_CHECK
@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:buildvm.exe buildvm*.obj
@if errorlevel 1 goto :BAD
if exist buildvm.exe.manifest^
%LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
buildvm -m elfasm -o lj_vm.s
@if errorlevel 1 goto :BAD
buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
@if errorlevel 1 goto :BAD
buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
@if errorlevel 1 goto :BAD
buildvm -m libdef -o lj_libdef.h %ALL_LIB%
@if errorlevel 1 goto :BAD
buildvm -m recdef -o lj_recdef.h %ALL_LIB%
@if errorlevel 1 goto :BAD
buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
@if errorlevel 1 goto :BAD
buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
@if errorlevel 1 goto :BAD
@rem ---- Cross compiler ----
@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc
@set TARGETLIB_SUFFIX=nx64
%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s
goto :DEBUGCHECK
:NX32_CROSSBUILD
@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc
@set TARGETLIB_SUFFIX=nx32
%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s
:DEBUGCHECK
@if "%1" neq "debug" goto :NODEBUG
@shift
@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
goto :BUILD
:NODEBUG
@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
:BUILD
del %TARGETLIB%
@if "%1" neq "noamalg" goto :AMALG
for %%f in (lj_*.c lib_*.c) do (
%LJCOMPILE% %%f
@if errorlevel 1 goto :BAD
)
%LJLIB% %TARGETLIB% lj_*.o lib_*.o
@if errorlevel 1 goto :BAD
@goto :NOAMALG
:AMALG
%LJCOMPILE% ljamalg.c
@if errorlevel 1 goto :BAD
%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
@if errorlevel 1 goto :BAD
:NOAMALG
@del *.o *.obj *.manifest minilua.exe buildvm.exe
@echo.
@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
@goto :END
:BAD
@echo.
@echo *******************************************************
@echo *** Build FAILED -- Please check the error messages ***
@echo *******************************************************
@goto :END
:FAIL
@echo To run this script you must open a "Native Tools Command Prompt for VS".
@echo.
@echo Either the x86 version for NX32, or x64 for the NX64 target.
@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
@echo must match the cross-compiled target (32 or 64 bits).
@echo.
@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
:END

File diff suppressed because it is too large Load Diff

4306
src/vm_s390x.dasc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2455,6 +2455,19 @@ static void build_subroutines(BuildCtx *ctx)
| mov r13, [RA-8]
| mov r12, [RA]
| mov rsp, RA // Reposition stack to C frame.
#ifdef LUA_USE_TRACE_LOGS
| mov CARG1, SAVE_L
| mov L:CARG1->base, BASE
| mov RB, RD // Save RD
| mov TMP1, PC // Save PC
| mov CARG3, PC // CARG3 == BASE
| mov CARG2d, dword [DISPATCH+DISPATCH_GL(vmstate)]
| call extern lj_log_trace_direct_exit@8
| mov PC, TMP1
| mov RD, RB
| mov RB, SAVE_L
| mov BASE, L:RB->base
#endif
|.endif
| test RDd, RDd; js >9 // Check for error from exit.
| mov L:RB, SAVE_L
@ -4512,6 +4525,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_JLOOP:
|.if JIT
| ins_AD // RA = base (ignored), RD = traceno
#ifdef LUA_USE_TRACE_LOGS
|.if not X64WIN
| mov L:RB, SAVE_L
| mov L:RB->base, BASE // Save BASE
| mov TMP1, RD // Save RD
| mov CARG3, PC // CARG3 == BASE
| mov CARG2, RD
| mov CARG1, RB
| call extern lj_log_trace_entry@8
| mov RD, TMP1
| mov BASE, L:RB->base
|.endif
#endif
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RD*8]
| mov RD, TRACE:RD->mcode

View File

@ -2905,6 +2905,21 @@ static void build_subroutines(BuildCtx *ctx)
| mov r13, TMPa
| mov r12, TMPQ
|.endif
#ifdef LUA_USE_TRACE_LOGS
|.if X64
| mov FCARG1, SAVE_L
| mov L:FCARG1->base, BASE
| mov RB, RD // Save RD
| mov TMP1, PC // Save PC
| mov CARG3d, PC // CARG3d == BASE
| mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)]
| call extern lj_log_trace_direct_exit@8
| mov PC, TMP1
| mov RD, RB
| mov RB, SAVE_L
| mov BASE, L:RB->base
|.endif
#endif
| test RD, RD; js >9 // Check for error from exit.
| mov L:RB, SAVE_L
| mov MULTRES, RD
@ -5306,6 +5321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_JLOOP:
|.if JIT
| ins_AD // RA = base (ignored), RD = traceno
#ifdef LUA_USE_TRACE_LOGS
|.if X64
| mov L:RB, SAVE_L
| mov L:RB->base, BASE // Save BASE
| mov TMP1, RD // Save RD
| mov CARG3d, PC // CARG3d == BASE
| mov FCARG2, RD
| mov FCARG1, RB
| call extern lj_log_trace_entry@8
| mov RD, TMP1
| mov BASE, L:RB->base
|.endif
#endif
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RD*4]
| mov RDa, TRACE:RD->mcode

13
src/x64/Makefile Normal file
View File

@ -0,0 +1,13 @@
.PHONY: default test benchmark clean
default:
@echo "make target include: test bechmark clean"
test:
$(MAKE) -C test test
benchmark:
$(MAKE) -C test benchmark
clean:
$(MAKE) -C test clean

47
src/x64/test/Makefile Normal file
View File

@ -0,0 +1,47 @@
.PHONY: default test benchmark
default: test benchmark
COMMON_OBJ := test_util.o
TEST_PROGRAM := ht_test
BENCHMARK_PROGRAM := ht_benchmark
TEST_PROGRAM_OBJ := $(COMMON_OBJ) test.o
BENCHMARK_PROGRAM_OBJ := $(COMMON_OBJ) benchmark.o
ifeq ($(WITH_VALGRIND), 1)
VALGRIND := valgrind --leak-check=full
else
VALGRIND :=
endif
CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src
%.o: %.cxx
$(CXX) $(CXXFLAGS) -MD -c $<
test: $(TEST_PROGRAM)
@echo "some unit test"
$(VALGRIND) ./$(TEST_PROGRAM)
@echo "smoke test"
../../luajit test_str_comp.lua
benchmark: $(BENCHMARK_PROGRAM)
# micro benchmark
./$(BENCHMARK_PROGRAM)
$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ)
cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt
$(CXX) $+ $(CXXFLAGS) -lm -o $@
$(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ)
cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt
$(CXX) $+ $(CXXFLAGS) -o $@
-include dep1.txt
-include dep2.txt
clean:
-rm -f *.o *.d dep*.txt $(BENCHMARK_PROGRAM) $(TEST_PROGRAM)

360
src/x64/test/benchmark.cxx Normal file
View File

@ -0,0 +1,360 @@
#include <sys/time.h> // for gettimeofday()
extern "C" {
#define LUAJIT_SECURITY_STRHASH 1
#include "../../lj_str.h"
str_sparse_hashfn hash_sparse;
str_dense_hashfn hash_dense;
#include "../../lj_str_hash.c"
}
#include <string>
#include <vector>
#include <utility>
#include <algorithm>
#include "test_util.hpp"
#include <stdio.h>
#include <math.h>
using namespace std;
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
#define lj_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
const char* separator = "-------------------------------------------";
static uint32_t LJ_AINLINE
original_hash_sparse(uint64_t seed, const char *str, size_t len)
{
uint32_t a, b, h = len ^ seed;
if (len >= 4) {
a = lj_getu32(str); h ^= lj_getu32(str+len-4);
b = lj_getu32(str+(len>>1)-2);
h ^= b; h -= lj_rol(b, 14);
b += lj_getu32(str+(len>>2)-1);
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
} else {
a = *(const uint8_t *)str;
h ^= *(const uint8_t *)(str+len-1);
b = *(const uint8_t *)(str+(len>>1));
h ^= b; h -= lj_rol(b, 14);
}
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
return h;
}
static uint32_t original_hash_dense(uint64_t seed, uint32_t h,
const char *str, size_t len)
{
uint32_t b = lj_bswap(lj_rol(h ^ (uint32_t)(seed >> 32), 4));
if (len > 12) {
uint32_t a = (uint32_t)seed;
const char *pe = str+len-12, *p = pe, *q = str;
do {
a += lj_getu32(p);
b += lj_getu32(p+4);
h += lj_getu32(p+8);
p = q; q += 12;
h ^= b; h -= lj_rol(b, 14);
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
} while (p < pe);
h ^= b; h -= lj_rol(b, 16);
a ^= h; a -= lj_rol(h, 4);
b ^= a; b -= lj_rol(a, 14);
}
return b;
}
template<class T> double
BenchmarkHashTmpl(T func, uint64_t seed, char* buf, size_t len)
{
TestClock timer;
uint32_t h = 0;
timer.start();
for(int i = 1; i < 1000000 * 100; i++) {
// So the buf is not loop invariant, hence the F(...)
buf[i % 4096] = i;
h += func(seed, buf, len) ^ i;
}
timer.stop();
// make h alive
test_printf("%x", h);
return timer.getElapseInSecond();
}
struct TestFuncWasSparse
{
uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
return original_hash_sparse(seed, buf, len);
}
};
struct TestFuncIsSparse
{
uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
return hash_sparse_sse42(seed, buf, len);
}
};
struct TestFuncWasDense
{
uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
return original_hash_dense(seed, 42, buf, len);
}
};
struct TestFuncIsDense
{
uint32_t operator()(uint64_t seed, const char* buf, uint32_t len) {
return hash_dense_sse42(seed, 42, buf, len);
}
};
static void
benchmarkIndividual(uint64_t seed, char* buf)
{
fprintf(stdout,"\n\nCompare performance of particular len (in second)\n");
fprintf(stdout, "%-12s%-8s%-8s%s%-8s%-8s%s\n", "len",
"was (s)", "is (s)", "diff (s)",
"was (d)", "is (d)", "diff (d)");
fprintf(stdout, "-------------------------------------------\n");
uint32_t lens[] = {3, 4, 7, 10, 15, 16, 20, 32, 36, 63, 80, 100,
120, 127, 280, 290, 400};
for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
uint32_t len = lens[i];
double e1 = BenchmarkHashTmpl(TestFuncWasSparse(), seed, buf, len);
double e2 = BenchmarkHashTmpl(TestFuncIsSparse(), seed, buf, len);
double e3 = BenchmarkHashTmpl(TestFuncWasDense(), seed, buf, len);
double e4 = BenchmarkHashTmpl(TestFuncIsDense(), seed, buf, len);
fprintf(stdout, "len = %4d: %-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
len, e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
}
}
template<class T> double
BenchmarkChangeLenTmpl(T func, uint64_t seed, char* buf, uint32_t* len_vect,
uint32_t len_num)
{
TestClock timer;
uint32_t h = 0;
timer.start();
for(int i = 1; i < 1000000 * 100; i++) {
for (int j = 0; j < (int)len_num; j++) {
// So the buf is not loop invariant, hence the F(...)
buf[(i + j) % 4096] = i;
h += func(seed, buf, len_vect[j]) ^ j;
}
}
timer.stop();
// make h alive
test_printf("%x", h);
return timer.getElapseInSecond();
}
// It is to measure the performance when length is changing.
// The purpose is to see how balanced branches impact the performance.
//
static void
benchmarkToggleLens(uint64_t seed, char* buf)
{
double e1, e2, e3, e4;
fprintf(stdout,"\nChanging length (in second):");
fprintf(stdout, "\n%-24s%-8s%-8s%s%-8s%-8s%s\n%s\n", "len",
"was (s)", "is (s)", "diff (s)",
"was (d)", "is (d)", "diff (d)",
separator);
uint32_t lens1[] = {4, 9};
e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens1, 2);
e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens1, 2);
e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens1, 2);
e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens1, 2);
fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "4,9",
e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
uint32_t lens2[] = {1, 4, 9};
e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens2, 3);
e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens2, 3);
e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens2, 3);
e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens2, 3);
fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n", "1,4,9",
e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
uint32_t lens3[] = {1, 33, 4, 9};
e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens3, 4);
e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens3, 4);
e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens3, 4);
e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens3, 4);
fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
"1,33,4,9", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
uint32_t lens4[] = {16, 33, 64, 89};
e1 = BenchmarkChangeLenTmpl(TestFuncWasSparse(), seed, buf, lens4, 4);
e2 = BenchmarkChangeLenTmpl(TestFuncIsSparse(), seed, buf, lens4, 4);
e3 = BenchmarkChangeLenTmpl(TestFuncWasDense(), seed, buf, lens4, 4);
e4 = BenchmarkChangeLenTmpl(TestFuncIsDense(), seed, buf, lens4, 4);
fprintf(stdout, "%-20s%-7.3lf %-7.3lf %-7.2f%% %-7.3lf %-7.3lf %.2f%%\n",
"16,33,64,89", e1, e2, 100*(e1-e2)/e1, e3, e4, 100*(e3-e4)/e3);
}
static void
genRandomString(uint32_t min, uint32_t max,
uint32_t num, vector<string>& result)
{
double scale = (max - min) / (RAND_MAX + 1.0);
result.clear();
result.reserve(num);
for (uint32_t i = 0; i < num; i++) {
uint32_t len = (rand() * scale) + min;
char* buf = new char[len];
for (uint32_t l = 0; l < len; l++) {
buf[l] = rand() % 255;
}
result.push_back(string(buf, len));
delete[] buf;
}
}
// Return the standard deviation of given array of number
static double
standarDeviation(const vector<uint32_t>& v)
{
uint64_t total = 0;
for (vector<uint32_t>::const_iterator i = v.begin(), e = v.end();
i != e; ++i) {
total += *i;
}
double avg = total / (double)v.size();
double sd = 0;
for (vector<uint32_t>::const_iterator i = v.begin(), e = v.end();
i != e; ++i) {
double t = avg - *i;
sd = sd + t*t;
}
return sqrt(sd/v.size());
}
static vector<double>
benchmarkConflictHelper(uint64_t seed, uint32_t bucketNum,
const vector<string>& strs)
{
if (bucketNum & (bucketNum - 1)) {
bucketNum = (1L << (log2_floor(bucketNum) + 1));
}
uint32_t mask = bucketNum - 1;
vector<uint32_t> conflictWasSparse(bucketNum);
vector<uint32_t> conflictIsSparse(bucketNum);
vector<uint32_t> conflictWasDense(bucketNum);
vector<uint32_t> conflictIsDense(bucketNum);
conflictWasSparse.resize(bucketNum);
conflictIsSparse.resize(bucketNum);
conflictWasDense.resize(bucketNum);
conflictIsDense.resize(bucketNum);
for (vector<string>::const_iterator i = strs.begin(), e = strs.end();
i != e; ++i) {
uint32_t h1 = original_hash_sparse(seed, i->c_str(), i->size());
uint32_t h2 = hash_sparse_sse42(seed, i->c_str(), i->size());
uint32_t h3 = original_hash_dense(seed, h1, i->c_str(), i->size());
uint32_t h4 = hash_dense_sse42(seed, h2, i->c_str(), i->size());
conflictWasSparse[h1 & mask]++;
conflictIsSparse[h2 & mask]++;
conflictWasDense[h3 & mask]++;
conflictIsDense[h4 & mask]++;
}
#if 0
std::sort(conflictWas.begin(), conflictWas.end(), std::greater<int>());
std::sort(conflictIs.begin(), conflictIs.end(), std::greater<int>());
fprintf(stderr, "%d %d %d %d vs %d %d %d %d\n",
conflictWas[0], conflictWas[1], conflictWas[2], conflictWas[3],
conflictIs[0], conflictIs[1], conflictIs[2], conflictIs[3]);
#endif
vector<double> ret(4);
ret[0] = standarDeviation(conflictWasSparse);
ret[1] = standarDeviation(conflictIsSparse);
ret[2] = standarDeviation(conflictWasDense);
ret[3] = standarDeviation(conflictIsDense);
return ret;
}
static void
benchmarkConflict(uint64_t seed)
{
float loadFactor[] = { 0.5f, 1.0f, 2.0f, 4.0f, 8.0f };
int bucketNum[] = { 512, 1024, 2048, 4096, 8192, 16384};
int lenRange[][2] = { {1,3}, {4, 15}, {16, 127}, {128, 1024}, {1, 1024}};
fprintf(stdout,
"\nBechmarking conflict (stand deviation of conflict)\n%s\n",
separator);
for (uint32_t k = 0; k < sizeof(lenRange)/sizeof(lenRange[0]); k++) {
fprintf(stdout, "\nlen range from %d - %d\n", lenRange[k][0],
lenRange[k][1]);
fprintf(stdout, "%-10s %-12s %-10s %-10s diff (s) %-10s %-10s diff (d)\n%s\n",
"bucket", "load-factor", "was (s)", "is (s)", "was (d)", "is (d)",
separator);
for (uint32_t i = 0; i < sizeof(bucketNum)/sizeof(bucketNum[0]); ++i) {
for (uint32_t j = 0;
j < sizeof(loadFactor)/sizeof(loadFactor[0]);
++j) {
int strNum = bucketNum[i] * loadFactor[j];
vector<string> strs(strNum);
genRandomString(lenRange[k][0], lenRange[k][1], strNum, strs);
vector<double> p;
p = benchmarkConflictHelper(seed, bucketNum[i], strs);
fprintf(stdout, "%-10d %-12.2f %-10.2f %-10.2f %-10.2f %-10.2f %-10.2f %.2f\n",
bucketNum[i], loadFactor[j],
p[0], p[1], p[0] - p[1],
p[2], p[3], p[2] - p[3]);
}
}
}
}
static void
benchmarkHashFunc()
{
srand(time(0));
uint64_t seed = (uint32_t) rand();
char buf[4096];
char c = getpid() % 'a';
for (int i = 0; i < (int)sizeof(buf); i++) {
buf[i] = (c + i) % 255;
}
benchmarkConflict(seed);
benchmarkIndividual(seed, buf);
benchmarkToggleLens(seed, buf);
}
int
main(int argc, char** argv)
{
fprintf(stdout, "========================\nMicro benchmark...\n");
benchmarkHashFunc();
return 0;
}

81
src/x64/test/test.cpp Normal file
View File

@ -0,0 +1,81 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <map>
#define LUAJIT_SECURITY_STRHASH 1
#include "test_util.hpp"
#include "../../lj_str.h"
str_sparse_hashfn hash_sparse;
str_dense_hashfn hash_dense;
#include "../../lj_str_hash.c"
using namespace std;
static bool
smoke_test()
{
fprintf(stdout, "running smoke tests...\n");
char buf[1024];
char c = getpid() % 'a';
srand(time(0));
for (int i = 0; i < (int)sizeof(buf); i++) {
buf[i] = (c + i) % 255;
}
uint32_t lens[] = {3, 4, 5, 7, 8, 16, 17, 24, 25, 32, 33, 127, 128,
255, 256, 257};
for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
string s(buf, lens[i]);
uint32_t h = hash_sparse_sse42(rand(), s.c_str(), lens[i]);
test_printf("%d", h);
test_printf("%d", hash_dense_sse42(rand(), h, s.c_str(), lens[i]));
}
return true;
}
static bool
verify_log2()
{
fprintf(stdout, "verify log2...\n");
bool err = false;
std::map<uint32_t, uint32_t> lm;
lm[0] =(uint32_t)-1;
lm[1] = 0;
lm[2] = 1;
for (int i = 2; i < 31; i++) {
lm[(1<<i) - 2] = i - 1;
lm[(1<<i) - 1] = i - 1;
lm[1<<i] = i;
lm[(1<<i) + 1] = i;
}
lm[(uint32_t)-1] = 31;
for (map<uint32_t, uint32_t>::iterator iter = lm.begin(), iter_e = lm.end();
iter != iter_e; ++iter) {
uint32_t v = (*iter).first;
uint32_t log2_expect = (*iter).second;
uint32_t log2_get = log2_floor(v);
if (log2_expect != log2_get) {
err = true;
fprintf(stderr, "log2(%u) expect %u, get %u\n", v, log2_expect, log2_get);
exit(1);
}
}
return !err;
}
int
main(int argc, char** argv)
{
fprintf(stdout, "=======================\nRun unit testing...\n");
ASSERT(smoke_test(), "smoke_test test failed");
ASSERT(verify_log2(), "log2 failed");
fprintf(stdout, TestErrMsgMgr::noError() ? "succ\n\n" : "fail\n\n");
return TestErrMsgMgr::noError() ? 0 : -1;
}

View File

@ -0,0 +1,67 @@
--[[
Given two content-idental string s1, s2, test if they end up to be the
same string object. The purpose of this test is to make sure hash function
do not accidently include extraneous bytes before and after the string in
question.
]]
local ffi = require("ffi")
local C = ffi.C
ffi.cdef[[
void free(void*);
char* malloc(size_t);
void *memset(void*, int, size_t);
void *memcpy(void*, void*, size_t);
long time(void*);
void srandom(unsigned);
long random(void);
]]
local function test_equal(len_min, len_max)
-- source string is wrapped by 16-byte-junk both before and after the
-- string
local x = C.random()
local l = len_min + x % (len_max - len_min);
local buf_len = tonumber(l + 16 * 2)
local src_buf = C.malloc(buf_len)
for i = 0, buf_len - 1 do
src_buf[i] = C.random() % 255
end
-- dest string is the clone of the source string, but it is sandwiched
-- by different junk bytes
local dest_buf = C.malloc(buf_len)
C.memset(dest_buf, 0x5a, buf_len)
local ofst = 8 + (C.random() % 8)
C.memcpy(dest_buf + ofst, src_buf + 16, l);
local str1 = ffi.string(src_buf + 16, l)
local str2 = ffi.string(dest_buf + ofst, l)
C.free(src_buf)
C.free(dest_buf)
if str1 ~= str2 then
-- Oops, look like hash function mistakenly include extraneous bytes
-- close to the string
return 1 -- wtf
end
end
--local lens = {1, 4, 16, 128, 1024}
local lens = {128, 1024}
local iter = 1000
for i = 1, #lens - 1 do
for j = 1, iter do
if test_equal(lens[i], lens[i+1]) ~= nil then
os.exit(1)
end
end
end
os.exit(0)

View File

@ -0,0 +1,21 @@
#include <stdarg.h>
#include <stdio.h>
#include "test_util.hpp"
using namespace std;
std::vector<TestErrMsg> TestErrMsgMgr::_errMsg;
void
test_printf(const char* format, ...)
{
va_list args;
va_start (args, format);
FILE* devNull = fopen("/dev/null", "w");
if (devNull != 0) {
(void)vfprintf (devNull, format, args);
}
fclose(devNull);
va_end (args);
}

107
src/x64/test/test_util.d Normal file
View File

@ -0,0 +1,107 @@
test_util.o: test_util.cxx /usr/include/stdc-predef.h \
/usr/lib/gcc/x86_64-redhat-linux/10/include/stdarg.h \
/usr/include/stdio.h /usr/include/bits/libc-header-start.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/bits/long-double.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/lib/gcc/x86_64-redhat-linux/10/include/stddef.h \
/usr/include/bits/types.h /usr/include/bits/timesize.h \
/usr/include/bits/typesizes.h /usr/include/bits/time64.h \
/usr/include/bits/types/__fpos_t.h /usr/include/bits/types/__mbstate_t.h \
/usr/include/bits/types/__fpos64_t.h /usr/include/bits/types/__FILE.h \
/usr/include/bits/types/FILE.h /usr/include/bits/types/struct_FILE.h \
/usr/include/bits/types/cookie_io_functions_t.h \
/usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
/usr/include/bits/stdio.h test_util.hpp /usr/include/sys/time.h \
/usr/include/bits/types/time_t.h \
/usr/include/bits/types/struct_timeval.h /usr/include/sys/select.h \
/usr/include/bits/select.h /usr/include/bits/types/sigset_t.h \
/usr/include/bits/types/__sigset_t.h \
/usr/include/bits/types/struct_timespec.h /usr/include/bits/endian.h \
/usr/include/bits/endianness.h /usr/include/c++/10/string \
/usr/include/c++/10/x86_64-redhat-linux/bits/c++config.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/os_defines.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/cpu_defines.h \
/usr/include/c++/10/bits/stringfwd.h \
/usr/include/c++/10/bits/memoryfwd.h \
/usr/include/c++/10/bits/char_traits.h \
/usr/include/c++/10/bits/stl_algobase.h \
/usr/include/c++/10/bits/functexcept.h \
/usr/include/c++/10/bits/exception_defines.h \
/usr/include/c++/10/bits/cpp_type_traits.h \
/usr/include/c++/10/ext/type_traits.h \
/usr/include/c++/10/ext/numeric_traits.h \
/usr/include/c++/10/bits/stl_pair.h /usr/include/c++/10/bits/move.h \
/usr/include/c++/10/type_traits \
/usr/include/c++/10/bits/stl_iterator_base_types.h \
/usr/include/c++/10/bits/stl_iterator_base_funcs.h \
/usr/include/c++/10/bits/concept_check.h \
/usr/include/c++/10/debug/assertions.h \
/usr/include/c++/10/bits/stl_iterator.h \
/usr/include/c++/10/bits/ptr_traits.h /usr/include/c++/10/debug/debug.h \
/usr/include/c++/10/bits/predefined_ops.h \
/usr/include/c++/10/bits/postypes.h /usr/include/c++/10/cwchar \
/usr/include/wchar.h /usr/include/bits/floatn.h \
/usr/include/bits/floatn-common.h /usr/include/bits/wchar.h \
/usr/include/bits/types/wint_t.h /usr/include/bits/types/mbstate_t.h \
/usr/include/bits/types/locale_t.h /usr/include/bits/types/__locale_t.h \
/usr/include/c++/10/cstdint \
/usr/lib/gcc/x86_64-redhat-linux/10/include/stdint.h \
/usr/include/stdint.h /usr/include/bits/stdint-intn.h \
/usr/include/bits/stdint-uintn.h /usr/include/c++/10/bits/allocator.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/c++allocator.h \
/usr/include/c++/10/ext/new_allocator.h /usr/include/c++/10/new \
/usr/include/c++/10/exception /usr/include/c++/10/bits/exception.h \
/usr/include/c++/10/bits/exception_ptr.h \
/usr/include/c++/10/bits/cxxabi_init_exception.h \
/usr/include/c++/10/typeinfo /usr/include/c++/10/bits/hash_bytes.h \
/usr/include/c++/10/bits/nested_exception.h \
/usr/include/c++/10/bits/localefwd.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/c++locale.h \
/usr/include/c++/10/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/10/iosfwd \
/usr/include/c++/10/cctype /usr/include/ctype.h \
/usr/include/c++/10/bits/ostream_insert.h \
/usr/include/c++/10/bits/cxxabi_forced.h \
/usr/include/c++/10/bits/stl_function.h \
/usr/include/c++/10/backward/binders.h \
/usr/include/c++/10/bits/range_access.h \
/usr/include/c++/10/initializer_list \
/usr/include/c++/10/bits/iterator_concepts.h \
/usr/include/c++/10/concepts /usr/include/c++/10/bits/range_cmp.h \
/usr/include/c++/10/bits/int_limits.h \
/usr/include/c++/10/bits/basic_string.h \
/usr/include/c++/10/ext/atomicity.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/gthr.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
/usr/include/bits/types/struct_sched_param.h /usr/include/bits/cpu-set.h \
/usr/include/time.h /usr/include/bits/time.h /usr/include/bits/timex.h \
/usr/include/bits/types/clock_t.h /usr/include/bits/types/struct_tm.h \
/usr/include/bits/types/clockid_t.h /usr/include/bits/types/timer_t.h \
/usr/include/bits/types/struct_itimerspec.h \
/usr/include/bits/pthreadtypes.h /usr/include/bits/thread-shared-types.h \
/usr/include/bits/pthreadtypes-arch.h /usr/include/bits/struct_mutex.h \
/usr/include/bits/struct_rwlock.h /usr/include/bits/setjmp.h \
/usr/include/c++/10/x86_64-redhat-linux/bits/atomic_word.h \
/usr/include/c++/10/ext/alloc_traits.h \
/usr/include/c++/10/bits/alloc_traits.h \
/usr/include/c++/10/bits/stl_construct.h \
/usr/include/c++/10/ext/string_conversions.h /usr/include/c++/10/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/endian.h /usr/include/bits/byteswap.h \
/usr/include/bits/uintn-identity.h /usr/include/alloca.h \
/usr/include/bits/stdlib-bsearch.h /usr/include/bits/stdlib-float.h \
/usr/include/c++/10/bits/std_abs.h /usr/include/c++/10/cstdio \
/usr/include/c++/10/cerrno /usr/include/errno.h \
/usr/include/bits/errno.h /usr/include/linux/errno.h \
/usr/include/asm/errno.h /usr/include/asm-generic/errno.h \
/usr/include/asm-generic/errno-base.h /usr/include/bits/types/error_t.h \
/usr/include/c++/10/bits/charconv.h \
/usr/include/c++/10/bits/functional_hash.h \
/usr/include/c++/10/bits/basic_string.tcc /usr/include/c++/10/vector \
/usr/include/c++/10/bits/stl_uninitialized.h \
/usr/include/c++/10/bits/stl_vector.h \
/usr/include/c++/10/bits/stl_bvector.h \
/usr/include/c++/10/bits/vector.tcc

View File

@ -0,0 +1,57 @@
#ifndef _TEST_UTIL_HPP_
#define _TEST_UTIL_HPP_
#include <sys/time.h> // gettimeofday()
#include <string>
#include <vector>
struct TestErrMsg
{
const char* fileName;
unsigned lineNo;
std::string errMsg;
TestErrMsg(const char* FN, unsigned LN, const char* Err):
fileName(FN), lineNo(LN), errMsg(Err) {}
};
class TestErrMsgMgr
{
public:
static std::vector<TestErrMsg> getError();
static void
addError(const char* fileName, unsigned lineNo, const char* Err) {
_errMsg.push_back(TestErrMsg(fileName, lineNo, Err));
}
static bool noError() {
return _errMsg.empty();
}
private:
static std::vector<TestErrMsg> _errMsg;
};
#define ASSERT(c, e) \
if (!(c)) { TestErrMsgMgr::addError(__FILE__, __LINE__, (e)); }
class TestClock
{
public:
void start() { gettimeofday(&_start, 0); }
void stop() { gettimeofday(&_end, 0); }
double getElapseInSecond() {
return (_end.tv_sec - _start.tv_sec)
+ ((long)_end.tv_usec - (long)_start.tv_usec) / 1000000.0;
}
private:
struct timeval _start, _end;
};
// write to /dev/null, the only purpose is to make the data fed to the
// function alive.
extern void test_printf(const char* format, ...)
__attribute__ ((format (printf, 1, 2)));
#endif //_TEST_UTIL_HPP_