From b95294572ce8efa527e0b0118bb7168117afd171 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 24 Feb 2010 23:17:17 +0100 Subject: [PATCH] Move SIMD constants to jit_State to keep them in the low 4GB. --- src/lj_ir.c | 7 +------ src/lj_iropt.h | 7 +++---- src/lj_jit.h | 28 ++++++++++++++++++++-------- src/lj_state.c | 1 + src/lj_trace.c | 14 ++++++++++++++ src/lj_trace.h | 2 ++ 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/lj_ir.c b/src/lj_ir.c index 30ac026b..602be31a 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -235,6 +235,7 @@ TRef lj_ir_knum_addr(jit_State *J, cTValue *tv) goto found; ref = ir_nextk(J); ir = IR(ref); + lua_assert(checkptr32(tv)); setmref(ir->ptr, tv); ir->t.irt = IRT_NUM; ir->o = IR_KNUM; @@ -250,12 +251,6 @@ TRef lj_ir_knum_nn(jit_State *J, uint64_t nn) return lj_ir_knum_addr(J, ir_knum_find(J, nn)); } -/* Special 16 byte aligned SIMD constants. */ -LJ_DATADEF LJ_ALIGN(16) cTValue lj_ir_knum_tv[4] = { - { U64x(7fffffff,ffffffff) }, { U64x(7fffffff,ffffffff) }, - { U64x(80000000,00000000) }, { U64x(80000000,00000000) } -}; - /* Check whether a number is int and return it. -0 is NOT considered an int. */ static int numistrueint(lua_Number n, int32_t *kp) { diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 947fa820..1884892a 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -64,10 +64,9 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) #define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000)) #define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000)) -/* Special 16 byte aligned SIMD constants. */ -LJ_DATA LJ_ALIGN(16) cTValue lj_ir_knum_tv[4]; -#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[0]) -#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[2]) +/* Special 128 bit SIMD constants. */ +#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, LJ_KSIMD(J, LJ_KSIMD_ABS)) +#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, LJ_KSIMD(J, LJ_KSIMD_NEG)) /* Access to constants. */ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); diff --git a/src/lj_jit.h b/src/lj_jit.h index 23adf30d..f0472282 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -201,6 +201,17 @@ typedef struct BPropEntry { /* Number of slots for the backpropagation cache. Must be a power of 2. */ #define BPROP_SLOTS 16 +/* 128 bit SIMD constants. */ +enum { + LJ_KSIMD_ABS, + LJ_KSIMD_NEG, + LJ_KSIMD__MAX +}; + +/* Get 16 byte aligned pointer to SIMD constant. */ +#define LJ_KSIMD(J, n) \ + ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) + /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { IRIns ins; /* Currently emitted instruction. */ @@ -214,21 +225,21 @@ typedef struct jit_State { lua_State *L; /* Current Lua state. */ const BCIns *pc; /* Current PC. */ - BCReg maxslot; /* Relative to baseslot. */ - - uint32_t flags; /* JIT engine flags. */ - TRef *base; /* Current frame base, points into J->slots. */ - BCReg baseslot; /* Current frame base, offset into J->slots. */ GCfunc *fn; /* Current function. */ GCproto *pt; /* Current prototype. */ + TRef *base; /* Current frame base, points into J->slots. */ - FoldState fold; /* Fold state. */ + uint32_t flags; /* JIT engine flags. */ + BCReg maxslot; /* Relative to baseslot. */ + BCReg baseslot; /* Current frame base, offset into J->slots. */ uint8_t mergesnap; /* Allowed to merge with next snapshot. */ uint8_t needsnap; /* Need snapshot before recording next bytecode. */ IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */ uint8_t unused1; + FoldState fold; /* Fold state. */ + const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */ MSize bc_extent; /* Extent of the range. */ @@ -241,19 +252,20 @@ typedef struct jit_State { int32_t retdepth; /* Return frame depth (count of RETF). */ MRef knum; /* Pointer to chained array of KNUM constants. */ + TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ + MSize sizesnap; /* Size of temp. snapshot buffer. */ SnapShot *snapbuf; /* Temp. snapshot buffer. */ SnapEntry *snapmapbuf; /* Temp. snapshot map buffer. */ - MSize sizesnap; /* Size of temp. snapshot buffer. */ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ - Trace **trace; /* Array of traces. */ TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */ + Trace **trace; /* Array of traces. */ TraceNo freetrace; /* Start of scan for next free trace. */ MSize sizetrace; /* Size of trace array. */ diff --git a/src/lj_state.c b/src/lj_state.c index 8f8be97b..3305fd18 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -134,6 +134,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ g->gc.threshold = 4*g->gc.total; + lj_trace_initstate(g); return NULL; } diff --git a/src/lj_trace.c b/src/lj_trace.c index d55d3a85..e476122c 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -275,6 +275,20 @@ int lj_trace_flushall(lua_State *L) return 0; } +/* Initialize JIT compiler state. */ +void lj_trace_initstate(global_State *g) +{ + jit_State *J = G2J(g); + TValue *tv; + /* Initialize SIMD constants. */ + tv = LJ_KSIMD(J, LJ_KSIMD_ABS); + tv[0].u64 = U64x(7fffffff,ffffffff); + tv[1].u64 = U64x(7fffffff,ffffffff); + tv = LJ_KSIMD(J, LJ_KSIMD_NEG); + tv[0].u64 = U64x(80000000,00000000); + tv[1].u64 = U64x(80000000,00000000); +} + /* Free everything associated with the JIT compiler state. */ void lj_trace_freestate(global_State *g) { diff --git a/src/lj_trace.h b/src/lj_trace.h index 6944bd9c..4d19454f 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -28,6 +28,7 @@ LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); LJ_FUNC int lj_trace_flush(jit_State *J, TraceNo traceno); LJ_FUNC int lj_trace_flushall(lua_State *L); +LJ_FUNC void lj_trace_initstate(global_State *g); LJ_FUNC void lj_trace_freestate(global_State *g); /* Event handling. */ @@ -42,6 +43,7 @@ LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); #else #define lj_trace_flushall(L) (UNUSED(L), 0) +#define lj_trace_initstate(g) UNUSED(g) #define lj_trace_freestate(g) UNUSED(g) #define lj_trace_freeproto(g, pt) (UNUSED(g), UNUSED(pt), (void)0) #define lj_trace_abort(g) UNUSED(g)