diff --git a/src/lj_def.h b/src/lj_def.h index c60bc118..86f041a4 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -184,6 +184,28 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) #error "missing define for lj_bswap()" #endif +typedef union __attribute__((packed)) Unaligned16 { + uint16_t u; + uint8_t b[2]; +} Unaligned16; + +typedef union __attribute__((packed)) Unaligned32 { + uint32_t u; + uint8_t b[4]; +} Unaligned32; + +/* Unaligned load of uint16_t. */ +static LJ_AINLINE uint16_t lj_getu16(const void *p) +{ + return ((const Unaligned16 *)p)->u; +} + +/* Unaligned load of uint32_t. */ +static LJ_AINLINE uint32_t lj_getu32(const void *p) +{ + return ((const Unaligned32 *)p)->u; +} + #elif defined(_MSC_VER) #define LJ_NORET __declspec(noreturn) @@ -208,6 +230,10 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #define lj_bswap(x) (_byteswap_ulong((x))) #define lj_bswap64(x) (_byteswap_uint64((x))) +/* MSVC is only supported on x86/x64, where unaligned loads are always ok. */ +#define lj_getu16(p) (*(uint16_t *)(p)) +#define lj_getu32(p) (*(uint32_t *)(p)) + #else #error "missing defines for your compiler" #endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 48f2c40d..c8b4edfe 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1647,16 +1647,13 @@ LJFOLDF(comm_bxor) static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p) { int32_t k; -#if !LJ_TARGET_X86ORX64 -#error "Missing support for unaligned loads" -#endif switch (irt_type(ir->t)) { case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p); case IRT_I8: k = (int32_t)*(int8_t *)p; break; case IRT_U8: k = (int32_t)*(uint8_t *)p; break; - case IRT_I16: k = (int32_t)*(int16_t *)p; break; - case IRT_U16: k = (int32_t)*(uint16_t *)p; break; - case IRT_INT: case IRT_U32: k = *(int32_t *)p; break; + case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break; + case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break; + case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break; case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p); default: return 0; } diff --git a/src/lj_str.c b/src/lj_str.c index 2b94d977..516acefe 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -43,18 +43,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -typedef union -#ifdef __GNUC__ -__attribute__((packed)) -#endif -Unaligned32 { uint32_t u; uint8_t b[4]; } Unaligned32; - -/* Unaligned read of uint32_t. */ -static LJ_AINLINE uint32_t str_getu32(const void *p) -{ - return ((const Unaligned32 *)p)->u; -} - /* Fast string data comparison. Caveat: unaligned access to 1st string! */ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) { @@ -62,7 +50,7 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) lua_assert(len > 0); lua_assert((((uintptr_t)a + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); do { /* Note: innocuous access up to end of string + 3. */ - uint32_t v = str_getu32(a+i) ^ *(const uint32_t *)(b+i); + uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); if (v) { i -= len; #if LJ_LE @@ -115,11 +103,11 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) g = G(L); /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ if (len >= 4) { /* Caveat: unaligned access! */ - a = str_getu32(str); - h ^= str_getu32(str+len-4); - b = str_getu32(str+(len>>1)-2); + a = lj_getu32(str); + h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); h ^= b; h -= lj_rol(b, 14); - b += str_getu32(str+(len>>2)-1); + b += lj_getu32(str+(len>>2)-1); } else if (len > 0) { a = *(const uint8_t *)str; h ^= *(const uint8_t *)(str+len-1);