From a98aede37772797b4471e1a094452051edff5862 Mon Sep 17 00:00:00 2001
From: Mike Pall
+
+Unlike the other bit.* operations, bit.tobit()
+converts a cdata number via int64_t to int32_t and
+returns a Lua number.
+
+For bit.band(), bit.bor() and bit.bxor(), the
+conversion to int64_t or uint64_t applies to
+all arguments, if any argument is a cdata number.
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does not cause
+a cdata number output.
+
Comparisons of cdata objects
@@ -1222,7 +1238,6 @@ value.
Please make sure to require the module before using any of diff --git a/src/Makefile.dep b/src/Makefile.dep index f841767b..902d2912 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -7,7 +7,8 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h + lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_gc.h lj_cdata.h \ + lj_cconv.h lj_carith.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ lj_libdef.h @@ -17,7 +18,7 @@ lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_state.h \ + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ @@ -58,8 +59,8 @@ lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ - lj_cdata.h lj_carith.h + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ + lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ diff --git a/src/lib_bit.c b/src/lib_bit.c index 93fead92..85821b81 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c @@ -13,25 +13,82 @@ #include "lj_obj.h" #include "lj_err.h" #include "lj_str.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#include "lj_cconv.h" +#include "lj_carith.h" +#endif +#include "lj_ff.h" #include "lj_lib.h" /* ------------------------------------------------------------------------ */ #define LJLIB_MODULE_bit -LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) +#if LJ_HASFFI +static int bit_result64(lua_State *L, CTypeID id, uint64_t x) { + GCcdata *cd = lj_cdata_new_(L, id, 8); + *(uint64_t *)cdataptr(cd) = x; + setcdataV(L, L->base-1, cd); + return FFH_RES(1); +} +#endif + +LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) +{ +#if LJ_HASFFI + CTypeID id = 0; + setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id)); + return FFH_RES(1); +#else lj_lib_checknumber(L, 1); return FFH_RETRY; +#endif +} + +LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) +{ +#if LJ_HASFFI + CTypeID id = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + return id ? bit_result64(L, id, ~x) : FFH_RETRY; +#else + lj_lib_checknumber(L, 1); + return FFH_RETRY; +#endif +} + +LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) +{ +#if LJ_HASFFI + CTypeID id = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; +#else + lj_lib_checknumber(L, 1); + return FFH_RETRY; +#endif } -LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) -LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) { +#if LJ_HASFFI + CTypeID id = 0, id2 = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); + if (id) { + x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); + return bit_result64(L, id, x); + } + if (id2) setintV(L->base+1, sh); + return FFH_RETRY; +#else lj_lib_checknumber(L, 1); lj_lib_checkbit(L, 2); return FFH_RETRY; +#endif } LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) @@ -40,9 +97,29 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) { +#if LJ_HASFFI + CTypeID id = 0; + TValue *o = L->base, *top = L->top; + int i = 0; + do { lj_carith_check64(L, ++i, &id); } while (++o < top); + if (id) { + CTState *cts = ctype_cts(L); + CType *ct = ctype_get(cts, id); + int op = curr_func(L)->c.ffid - (int)FF_bit_bor; + uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0; + o = L->base; + do { + lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0); + if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x; + } while (++o < top); + return bit_result64(L, id, y); + } + return FFH_RETRY; +#else int i = 0; do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); return FFH_RETRY; +#endif } LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) @@ -51,12 +128,21 @@ LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) LJLIB_CF(bit_tohex) { +#if LJ_HASFFI + CTypeID id = 0, id2 = 0; + uint64_t b = lj_carith_check64(L, 1, &id); + int32_t i, dig = id ? 16 : 8; + int32_t n = L->base+1>=L->top ? dig : (int32_t)lj_carith_check64(L, 2, &id2); + char buf[16]; +#else uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); - int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); - const char *hexdigits = "0123456789abcdef"; + int32_t i, dig = 8; + int32_t n = L->base+1>=L->top ? dig : lj_lib_checkbit(L, 2); char buf[8]; +#endif + const char *hexdigits = "0123456789abcdef"; if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } - if (n > 8) n = 8; + if (n > dig) n = dig; for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } lua_pushlstring(L, buf, (size_t)n); return 1; diff --git a/src/lj_carith.c b/src/lj_carith.c index 18708d66..9f3208a8 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -11,10 +11,12 @@ #include "lj_err.h" #include "lj_tab.h" #include "lj_meta.h" +#include "lj_ir.h" #include "lj_ctype.h" #include "lj_cconv.h" #include "lj_cdata.h" #include "lj_carith.h" +#include "lj_strscan.h" /* -- C data arithmetic --------------------------------------------------- */ @@ -270,6 +272,80 @@ int lj_carith_op(lua_State *L, MMS mm) return lj_carith_meta(L, cts, &ca, mm); } +/* -- 64 bit bit operations helpers --------------------------------------- */ + +#if LJ_64 +#define B64DEF(name) \ + static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) +#else +/* Not inlined on 32 bit archs, since some of these are quite lengthy. */ +#define B64DEF(name) \ + uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) +#endif + +B64DEF(shl64) { return x << (sh&63); } +B64DEF(shr64) { return x >> (sh&63); } +B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); } +B64DEF(rol64) { return lj_rol(x, (sh&63)); } +B64DEF(ror64) { return lj_ror(x, (sh&63)); } + +#undef B64DEF + +uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) +{ + switch (op) { + case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break; + case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break; + case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; + case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; + case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; + default: lua_assert(0); break; + } + return x; +} + +/* Equivalent to lj_lib_checkbit(), but handles cdata. */ +uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) +{ + TValue *o = L->base + narg-1; + if (o >= L->top) { + err: + lj_err_argt(L, narg, LUA_TNUMBER); + } else if (LJ_LIKELY(tvisnumber(o))) { + /* Handled below. */ + } else if (tviscdata(o)) { + CTState *cts = ctype_cts(L); + uint8_t *sp = (uint8_t *)cdataptr(cdataV(o)); + CTypeID sid = cdataV(o)->ctypeid; + CType *s = ctype_get(cts, sid); + uint64_t x; + if (ctype_isref(s->info)) { + sp = *(void **)sp; + sid = ctype_cid(s->info); + } + s = ctype_raw(cts, sid); + if (ctype_isenum(s->info)) s = ctype_child(cts, s); + if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == + CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8) + *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ + else if (!*id) + *id = CTID_INT64; /* Use int64_t, unless already set. */ + lj_cconv_ct_ct(cts, ctype_get(cts, *id), s, + (uint8_t *)&x, sp, CCF_ARG(narg)); + return x; + } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) { + goto err; + } + if (LJ_LIKELY(tvisint(o))) { + return intV(o); + } else { + int32_t i = lj_num2bit(numV(o)); + if (LJ_DUALNUM) setintV(o, i); + return i; + } +} + + /* -- 64 bit integer arithmetic helpers ----------------------------------- */ #if LJ_32 && LJ_HASJIT diff --git a/src/lj_carith.h b/src/lj_carith.h index ae17df00..b1a65d35 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h @@ -12,6 +12,16 @@ LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); +#if LJ_32 +LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); +#endif +LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); +LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); + #if LJ_32 && LJ_HASJIT LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); #endif diff --git a/src/lj_crecord.c b/src/lj_crecord.c index a5d896eb..2bf0bc1d 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1626,6 +1626,101 @@ void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) crec_finalizer(J, J->base[0], &rd->argv[1]); } +/* -- 64 bit bit.* library functions -------------------------------------- */ + +/* Determine bit operation type from argument type. */ +static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) +{ + if (tviscdata(tv)) { + CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid); + if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); + if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == + CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8) + return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ + return CTID_INT64; /* Otherwise use int64_t. */ + } + return 0; /* Use regular 32 bit ops. */ +} + +void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, + J->base[0], &rd->argv[0]); + if (!tref_isinteger(tr)) + tr = emitconv(tr, IRT_INT, tref_type(tr), 0); + J->base[0] = tr; +} + +int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id = crec_bit64_type(cts, &rd->argv[0]); + if (id) { + TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + +int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id = 0; + MSize i; + for (i = 0; J->base[i] != 0; i++) { + CTypeID aid = crec_bit64_type(cts, &rd->argv[i]); + if (id < aid) id = aid; /* Determine highest type rank of all arguments. */ + } + if (id) { + CType *ct = ctype_get(cts, id); + uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64); + TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]); + for (i = 1; J->base[i] != 0; i++) { + TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]); + tr = emitir(ot, tr, tr2); + } + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + +int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id; + TRef tsh = 0; + if (J->base[0] && tref_iscdata(J->base[1])) { + tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, + J->base[1], &rd->argv[1]); + if (!tref_isinteger(tsh)) + tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); + J->base[1] = tsh; + } + id = crec_bit64_type(cts, &rd->argv[0]); + if (id) { + TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + uint32_t op = rd->data; + if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); + if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && + !tref_isk(tsh)) + tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); +#ifdef LJ_TARGET_UNIFYROT + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRTI(IR_NEG), tsh, tsh); + } +#endif + tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + /* -- Miscellaneous library functions ------------------------------------- */ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) diff --git a/src/lj_crecord.h b/src/lj_crecord.h index dea05f78..92d777b8 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h @@ -25,6 +25,12 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); + +LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); + LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); #endif diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 730d5c39..4f6aeb37 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -584,40 +584,66 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) /* -- Bit library fast functions ------------------------------------------ */ -/* Record unary bit.tobit, bit.bnot, bit.bswap. */ +/* Record bit.tobit. */ +static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; +#if LJ_HASFFI + if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } +#endif + J->base[0] = lj_opt_narrow_tobit(J, tr); + UNUSED(rd); +} + +/* Record unary bit.bnot, bit.bswap. */ static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); +#if LJ_HASFFI + if (recff_bit64_unary(J, rd)) + return; +#endif + J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); } /* Record N-ary bit.band, bit.bor, bit.bxor. */ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - uint32_t op = rd->data; - BCReg i; - for (i = 1; J->base[i] != 0; i++) - tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); - J->base[0] = tr; +#if LJ_HASFFI + if (recff_bit64_nary(J, rd)) + return; +#endif + { + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); + uint32_t ot = IRTI(rd->data); + BCReg i; + for (i = 1; J->base[i] != 0; i++) + tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i])); + J->base[0] = tr; + } } /* Record bit shifts. */ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); - IROp op = (IROp)rd->data; - if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && - !tref_isk(tsh)) - tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); -#ifdef LJ_TARGET_UNIFYROT - if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { - op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; - tsh = emitir(IRTI(IR_NEG), tsh, tsh); - } +#if LJ_HASFFI + if (recff_bit64_shift(J, rd)) + return; #endif - J->base[0] = emitir(IRTI(op), tr, tsh); + { + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); + TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); + IROp op = (IROp)rd->data; + if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && + !tref_isk(tsh)) + tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); +#ifdef LJ_TARGET_UNIFYROT + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRTI(IR_NEG), tsh, tsh); + } +#endif + J->base[0] = emitir(IRTI(op), tr, tsh); + } } /* -- String library fast functions --------------------------------------- */ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 7fcc532e..2c160bdf 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -172,7 +172,12 @@ typedef struct CCallInfo { _(FFI, memcpy, 3, S, PTR, 0) \ _(FFI, memset, 3, S, PTR, 0) \ _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ - _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) + _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_shl64, 3, N, U64, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_shr64, 3, N, U64, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_sar64, 3, N, U64, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_rol64, 3, N, U64, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_ror64, 3, N, U64, CCI_NOFPRCLOBBER) \ \ /* End of list. */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index e67f3ee6..75db47df 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -22,8 +22,8 @@ #include "lj_trace.h" #if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_carith.h" +#endif #include "lj_vm.h" #include "lj_strscan.h" @@ -336,11 +336,9 @@ LJFOLDF(kfold_intcomp0) static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) { switch (op) { -#if LJ_64 || LJ_HASFFI +#if LJ_HASFFI case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; -#endif -#if LJ_HASFFI case IR_MUL: k1 *= k2; break; case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; @@ -392,20 +390,10 @@ LJFOLD(BROL KINT64 KINT) LJFOLD(BROR KINT64 KINT) LJFOLDF(kfold_int64shift) { -#if LJ_HASFFI || LJ_64 +#if LJ_HASFFI uint64_t k = ir_k64(fleft)->u64; int32_t sh = (fright->i & 63); - switch ((IROp)fins->o) { - case IR_BSHL: k <<= sh; break; -#if LJ_HASFFI - case IR_BSHR: k >>= sh; break; - case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; - case IR_BROL: k = lj_rol(k, sh); break; - case IR_BROR: k = lj_ror(k, sh); break; -#endif - default: lua_assert(0); break; - } - return INT64FOLD(k); + return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); #else UNUSED(J); lua_assert(0); return FAILFOLD; #endif @@ -1192,7 +1180,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) ** But this is mainly intended for simple address arithmetic. ** Also it's easier for the backend to optimize the original multiplies. */ - if (k == 1) { /* i * 1 ==> i */ + if (k == 0) { /* i * 0 ==> 0 */ + return RIGHTFOLD; + } else if (k == 1) { /* i * 1 ==> i */ return LEFTFOLD; } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ fins->o = IR_BSHL; @@ -1205,9 +1195,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) LJFOLD(MUL any KINT) LJFOLDF(simplify_intmul_k32) { - if (fright->i == 0) /* i * 0 ==> 0 */ - return INTFOLD(0); - else if (fright->i > 0) + if (fright->i >= 0) return simplify_intmul_k(J, fright->i); return NEXTFOLD; } @@ -1215,14 +1203,13 @@ LJFOLDF(simplify_intmul_k32) LJFOLD(MUL any KINT64) LJFOLDF(simplify_intmul_k64) { - if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ - return INT64FOLD(0); -#if LJ_64 - /* NYI: SPLIT for BSHL and 32 bit backend support. */ - else if (ir_kint64(fright)->u64 < 0x80000000u) +#if LJ_HASFFI + if (ir_kint64(fright)->u64 < 0x80000000u) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); -#endif return NEXTFOLD; +#else + UNUSED(J); lua_assert(0); return FAILFOLD; +#endif } LJFOLD(MOD any KINT) @@ -1522,7 +1509,7 @@ LJFOLD(BOR BOR KINT64) LJFOLD(BXOR BXOR KINT64) LJFOLDF(reassoc_intarith_k64) { -#if LJ_HASFFI || LJ_64 +#if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { uint64_t k = kfold_int64arith(ir_k64(irk)->u64, diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 2b04e77d..a0526c9d 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -140,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); } +#endif /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, @@ -156,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); } -#endif /* Emit a CALLN with two split 64 bit arguments. */ static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, @@ -196,6 +196,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); } +#if LJ_HASFFI +static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, + IRIns *oir, IRIns *nir, IRIns *ir) +{ + IROp op = ir->o; + IRRef kref = nir->op2; + if (irref_isk(kref)) { /* Optimize constant shifts. */ + int32_t k = (IR(kref)->i & 63); + IRRef lo = nir->op1, hi = hisubst[ir->op1]; + if (op == IR_BROL || op == IR_BROR) { + if (op == IR_BROR) k = (-k & 63); + if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } + if (k == 0) { + passthrough: + J->cur.nins--; + ir->prev = lo; + return hi; + } else { + TRef k1, k2; + IRRef t1, t2, t3, t4; + J->cur.nins--; + k1 = lj_ir_kint(J, k); + k2 = lj_ir_kint(J, (-k & 31)); + t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); + t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); + t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); + t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); + ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); + return split_emit(J, IRTI(IR_BOR), t2, t3); + } + } else if (k == 0) { + goto passthrough; + } else if (k < 32) { + if (op == IR_BSHL) { + IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); + IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); + return split_emit(J, IRTI(IR_BOR), t1, t2); + } else { + IRRef t1 = ir->prev, t2; + lua_assert(op == IR_BSHR || op == IR_BSAR); + nir->o = IR_BSHR; + t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); + ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); + return split_emit(J, IRTI(op), hi, kref); + } + } else { + if (op == IR_BSHL) { + if (k == 32) + J->cur.nins--; + else + lo = ir->prev; + ir->prev = lj_ir_kint(J, 0); + return lo; + } else { + lua_assert(op == IR_BSHR || op == IR_BSAR); + if (k == 32) { + J->cur.nins--; + ir->prev = hi; + } else { + nir->op1 = hi; + } + if (op == IR_BSHR) + return lj_ir_kint(J, 0); + else + return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); + } + } + } + return split_call_li(J, hisubst, oir, ir, + op - IR_BSHL + IRCALL_lj_carith_shl64); +} + +static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, + IRIns *nir, IRIns *ir) +{ + IROp op = ir->o; + IRRef hi, kref = nir->op2; + if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ + int32_t k = IR(kref)->i; + if (k == 0 || k == -1) { + if (op == IR_BAND) k = ~k; + if (k == 0) { + J->cur.nins--; + ir->prev = nir->op1; + } else if (op == IR_BXOR) { + nir->o = IR_BNOT; + nir->op2 = 0; + } else { + J->cur.nins--; + ir->prev = kref; + } + } + } + hi = hisubst[ir->op1]; + kref = hisubst[ir->op2]; + if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ + int32_t k = IR(kref)->i; + if (k == 0 || k == -1) { + if (op == IR_BAND) k = ~k; + if (k == 0) { + return hi; + } else if (op == IR_BXOR) { + return split_emit(J, IRTI(IR_BNOT), hi, 0); + } else { + return kref; + } + } + } + return split_emit(J, IRTI(op), hi, kref); +} +#endif + /* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { @@ -417,6 +529,19 @@ static void split_ir(jit_State *J) irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; + case IR_BNOT: + hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); + break; + case IR_BSWAP: + ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); + hi = nref; + break; + case IR_BAND: case IR_BOR: case IR_BXOR: + hi = split_bitop(J, hisubst, nir, ir); + break; + case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: + hi = split_bitshift(J, hisubst, oir, nir, ir); + break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);