ARM64: Add big-endian support.

Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
This commit is contained in:
Mike Pall 2017-03-30 11:17:15 +02:00
parent 78f5f1cef1
commit 3143b21894
14 changed files with 149 additions and 63 deletions

View File

@ -87,8 +87,8 @@ FILE_PC= luajit.pc
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64el.lua vmdef.lua dis_mips64.lua dis_mips64el.lua vmdef.lua
ifeq (,$(findstring Windows,$(OS))) ifeq (,$(findstring Windows,$(OS)))
HOST_SYS:= $(shell uname -s) HOST_SYS:= $(shell uname -s)

View File

@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm TARGET_LJARCH= arm
else else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
endif
TARGET_LJARCH= arm64 TARGET_LJARCH= arm64
else else
ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))

View File

@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
{ {
int i; int i;
for (i = 0; i < n; i += 4) { for (i = 0; i < n; i += 4) {
uint32_t ins = *(uint32_t *)(p+i);
#if LJ_TARGET_ARM64 && LJ_BE
ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
#endif
if ((i & 15) == 0) if ((i & 15) == 0)
fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); fprintf(ctx->fp, "\t.long 0x%08x", ins);
else else
fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); fprintf(ctx->fp, ",0x%08x", ins);
if ((i & 15) == 12) putc('\n', ctx->fp); if ((i & 15) == 12) putc('\n', ctx->fp);
} }
if ((n & 15) != 0) putc('\n', ctx->fp); if ((n & 15) != 0) putc('\n', ctx->fp);

View File

@ -63,8 +63,8 @@ local map_type = {
} }
local map_arch = { local map_arch = {
x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
mips = true, mipsel = true, ppc = true, mips = true, mipsel = true,
} }
local map_os = { local map_os = {
@ -200,7 +200,7 @@ typedef struct {
]] ]]
local symname = LJBC_PREFIX..ctx.modname local symname = LJBC_PREFIX..ctx.modname
local is64, isbe = false, false local is64, isbe = false, false
if ctx.arch == "x64" or ctx.arch == "arm64" then if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
is64 = true is64 = true
elseif ctx.arch == "ppc" or ctx.arch == "mips" then elseif ctx.arch == "ppc" or ctx.arch == "mips" then
isbe = true isbe = true
@ -237,7 +237,7 @@ typedef struct {
hdr.eendian = isbe and 2 or 1 hdr.eendian = isbe and 2 or 1
hdr.eversion = 1 hdr.eversion = 1
hdr.type = f16(1) hdr.type = f16(1)
hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
if ctx.arch == "mips" or ctx.arch == "mipsel" then if ctx.arch == "mips" or ctx.arch == "mipsel" then
hdr.flags = f32(0x50001006) hdr.flags = f32(0x50001006)
end end

12
src/jit/dis_arm64be.lua Normal file
View File

@ -0,0 +1,12 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module.
--
-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the
-- common ARM64 disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------
return require((string.match(..., ".*%.") or "").."dis_arm64")

View File

@ -215,9 +215,14 @@
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_BITS 64 #define LJ_ARCH_BITS 64
#if defined(__AARCH64EB__)
#define LJ_ARCH_NAME "arm64be"
#define LJ_ARCH_ENDIAN LUAJIT_BE
#else
#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_ENDIAN LUAJIT_LE #define LJ_ARCH_ENDIAN LUAJIT_LE
#endif
#define LJ_TARGET_ARM64 1 #define LJ_TARGET_ARM64 1
#define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRETREG 0
#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
@ -409,9 +414,6 @@
#error "Only ARM EABI or iOS 3.0+ ABI is supported" #error "Only ARM EABI or iOS 3.0+ ABI is supported"
#endif #endif
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64
#if defined(__AARCH64EB__)
#error "No support for big-endian ARM64"
#endif
#if defined(_ILP32) #if defined(_ILP32)
#error "No support for ILP32 model on ARM64" #error "No support for ILP32 model on ARM64"
#endif #endif

View File

@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
if (!as->loopref) if (!as->loopref)
asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
#if LJ_TARGET_MCODE_FIXUP
asm_mcode_fixup(T->mcode, T->szmcode);
#endif
lj_mcode_sync(T->mcode, origtop); lj_mcode_sync(T->mcode, origtop);
} }

View File

@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
asm_mclimit(as); asm_mclimit(as);
/* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
for (i = nexits-1; (int32_t)i >= 0; i--) for (i = nexits-1; (int32_t)i >= 0; i--)
*--mxp = A64I_BL|((-3-i)&0x03ffffffu); *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
*--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
mxp--; mxp--;
*mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
*--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
as->mctop = mxp; as->mctop = mxp;
} }
@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
fpr++; fpr++;
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_FPR); Reg r = ra_alloc1(as, ref, RSET_FPR);
emit_spstore(as, ir, r, ofs); emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
ofs += 8; ofs += 8;
} }
} else { } else {
@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++; gpr++;
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_GPR); Reg r = ra_alloc1(as, ref, RSET_GPR);
emit_spstore(as, ir, r, ofs); emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
ofs += 8; ofs += 8;
} }
} }
@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
src = ra_alloc1(as, ir->op2, allow); src = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, src); rset_clear(allow, src);
if (irt_isinteger(ir->t)) if (irt_isinteger(ir->t))
type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
else else
type = ra_allock(as, irt_toitype(ir->t), allow); type = ra_allock(as, irt_toitype(ir->t), allow);
} else { } else {
@ -1179,7 +1179,8 @@ dotypecheck:
} }
if (ra_hasreg(dest)) { if (ra_hasreg(dest)) {
emit_lso(as, irt_isnum(t) ? A64I_LDRd : emit_lso(as, irt_isnum(t) ? A64I_LDRd :
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
} }
} }
@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
if (spadj == 0) { if (spadj == 0) {
*--p = A64I_NOP; *--p = A64I_LE(A64I_NOP);
as->mctop = p; as->mctop = p;
} else { } else {
/* Patch stack adjustment. */ /* Patch stack adjustment. */
@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as)
asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
} }
#if LJ_BE
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
static void asm_mcode_fixup(MCode *mcode, MSize size)
{
MCode *pe = (MCode *)((char *)mcode + size);
while (mcode < pe) {
MCode ins = *mcode;
*mcode++ = lj_bswap(ins);
}
}
#define LJ_TARGET_MCODE_FIXUP 1
#endif
/* -- Trace patching ------------------------------------------------------ */ /* -- Trace patching ------------------------------------------------------ */
/* Patch exit jumps of existing machine code to a new target. */ /* Patch exit jumps of existing machine code to a new target. */
@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
MCode *px = exitstub_trace_addr(T, exitno); MCode *px = exitstub_trace_addr(T, exitno);
for (; p < pe; p++) { for (; p < pe; p++) {
/* Look for exitstub branch, replace with branch to target. */ /* Look for exitstub branch, replace with branch to target. */
uint32_t ins = *p; MCode ins = A64I_LE(*p);
if ((ins & 0xff000000u) == 0x54000000u && if ((ins & 0xff000000u) == 0x54000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch bcc exitstub. */ /* Patch bcc exitstub. */
*p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1; cend = p+1;
if (!cstart) cstart = p; if (!cstart) cstart = p;
} else if ((ins & 0xfc000000u) == 0x14000000u && } else if ((ins & 0xfc000000u) == 0x14000000u &&
((ins ^ (px-p)) & 0x03ffffffu) == 0) { ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
/* Patch b exitstub. */ /* Patch b exitstub. */
*p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
cend = p+1; cend = p+1;
if (!cstart) cstart = p; if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x34000000u && } else if ((ins & 0x7e000000u) == 0x34000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch cbz/cbnz exitstub. */ /* Patch cbz/cbnz exitstub. */
*p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1; cend = p+1;
if (!cstart) cstart = p; if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x36000000u && } else if ((ins & 0x7e000000u) == 0x36000000u &&
((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
/* Patch tbz/tbnz exitstub. */ /* Patch tbz/tbnz exitstub. */
*p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
cend = p+1; cend = p+1;
if (!cstart) cstart = p; if (!cstart) cstart = p;
} }

View File

@ -301,7 +301,7 @@
unsigned int cl = ccall_classify_struct(cts, ctr); \ unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
CTSize i = (cl >> 8) - 1; \ CTSize i = (cl >> 8) - 1; \
do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
} else { \ } else { \
if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
memcpy(dp, sp, ctr->size); \ memcpy(dp, sp, ctr->size); \
@ -359,6 +359,13 @@
} \ } \
} }
#if LJ_BE
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#endif
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */ /* -- PPC calling conventions --------------------------------------------- */
@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp; (int32_t)*(int16_t *)dp;
} }
#if LJ_TARGET_ARM64 && LJ_BE
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64 #if LJ_TARGET_MIPS64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || || (isfp && nsp == 0)
(isfp && nsp == 0)) && d->size <= 4) { #endif
) && d->size <= 4) {
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
} }
#endif #endif

View File

@ -79,8 +79,8 @@ typedef union FPRArg {
typedef intptr_t GPRArg; typedef intptr_t GPRArg;
typedef union FPRArg { typedef union FPRArg {
double d; double d;
float f; struct { LJ_ENDIAN_LOHI(float f; , float g;) };
uint32_t u32; struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
} FPRArg; } FPRArg;
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC

View File

@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
uint32_t *p = page; uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback; void *target = (void *)lj_vm_ffi_callback;
MSize slot; MSize slot;
*p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
*p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
*p++ = A64I_BR | A64F_N(RID_X11); *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
*p++ = A64I_NOP; *p++ = A64I_LE(A64I_NOP);
((void **)p)[0] = target; ((void **)p)[0] = target;
((void **)p)[1] = g; ((void **)p)[1] = g;
p += 4; p += 4;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
*p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
p++; p++;
} }
lua_assert(p - page <= CALLBACK_MCODE_SIZE); lua_assert(p - page <= CALLBACK_MCODE_SIZE);
@ -623,6 +623,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
#if CCALL_NUM_FPR #if CCALL_NUM_FPR
if (ctype_isfp(ctr->info)) if (ctype_isfp(ctr->info))
dp = (uint8_t *)&cts->cb.fpr[0]; dp = (uint8_t *)&cts->cb.fpr[0];
#endif
#if LJ_TARGET_ARM64 && LJ_BE
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
dp = (uint8_t *)&cts->cb.fpr[0].f[1];
#endif #endif
lj_cconv_ct_tv(cts, ctr, dp, o, 0); lj_cconv_ct_tv(cts, ctr, dp, o, 0);
#ifdef CALLBACK_HANDLE_RET #ifdef CALLBACK_HANDLE_RET
@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp; (int32_t)*(int16_t *)dp;
} }
#if LJ_TARGET_MIPS64 #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 && if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))

View File

@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
} else { } else {
goto nopair; goto nopair;
} }
if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) { if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
*as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
return; return;

View File

@ -107,7 +107,7 @@ typedef struct {
/* Return the address of a per-trace exit stub. */ /* Return the address of a per-trace exit stub. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{ {
while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
return p + 3 + exitno; return p + 3 + exitno;
} }
/* Avoid dependence on lj_jit.h if only including lj_target.h. */ /* Avoid dependence on lj_jit.h if only including lj_target.h. */
@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
/* -- Instructions -------------------------------------------------------- */ /* -- Instructions -------------------------------------------------------- */
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
#if LJ_BE
#define A64I_LE(x) (lj_bswap(x))
#else
#define A64I_LE(x) (x)
#endif
/* Instruction fields. */ /* Instruction fields. */
#define A64F_D(r) (r) #define A64F_D(r) (r)
#define A64F_N(r) ((r) << 5) #define A64F_N(r) ((r) << 5)

View File

@ -151,6 +151,21 @@
|.define FRAME_FUNC, #-16 |.define FRAME_FUNC, #-16
|.define FRAME_PC, #-8 |.define FRAME_PC, #-8
| |
|// Endian-specific defines.
|.if ENDIAN_LE
|.define LO, 0
|.define OFS_RD, 2
|.define OFS_RB, 3
|.define OFS_RA, 1
|.define OFS_OP, 0
|.else
|.define LO, 4
|.define OFS_RD, 0
|.define OFS_RB, 0
|.define OFS_RA, 2
|.define OFS_OP, 3
|.endif
|
|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
@ -717,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx)
| cmp CRET1, #1 | cmp CRET1, #1
| bhi ->vmeta_binop | bhi ->vmeta_binop
|4: |4:
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4 | add PC, PC, #4
| add RB, PC, RB, lsl #2 | add RB, PC, RB, lsl #2
| sub RB, RB, #0x20000 | sub RB, RB, #0x20000
@ -1500,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback | bne ->fff_fallback
| checkint CARG1, ->fff_fallback | checkint CARG1, ->fff_fallback
| mov CARG3, #1 | mov CARG3, #1
| mov CARG2, BASE // Points to stack. Little-endian. | // Point to the char inside the integer in the stack slot.
|.if ENDIAN_LE
| mov CARG2, BASE
|.else
| add CARG2, BASE, #7
|.endif
|->fff_newstr: |->fff_newstr:
| // CARG2 = str, CARG3 = len. | // CARG2 = str, CARG3 = len.
| str BASE, L->base | str BASE, L->base
@ -1703,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx)
| ands TMP0, PC, #FRAME_TYPE | ands TMP0, PC, #FRAME_TYPE
| and TMP1, PC, #~FRAME_TYPEP | and TMP1, PC, #~FRAME_TYPEP
| bne >3 | bne >3
| ldrb RAw, [PC, #-3] | ldrb RAw, [PC, #-4+OFS_RA]
| lsl RA, RA, #3 | lsl RA, RA, #3
| add TMP1, RA, #16 | add TMP1, RA, #16
|3: |3:
@ -1838,7 +1858,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching. |->cont_stitch: // Trace stitching.
|.if JIT |.if JIT
| // RA = resultptr, CARG4 = meta base | // RA = resultptr, CARG4 = meta base
| ldr RB, SAVE_MULTRES | ldr RBw, SAVE_MULTRES
| ldr INSw, [PC, #-4] | ldr INSw, [PC, #-4]
| ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
| subs RB, RB, #8 | subs RB, RB, #8
@ -1869,7 +1889,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
| // Stitch a new trace to the previous trace. | // Stitch a new trace to the previous trace.
| mov CARG1, #GL_J(exitno) | mov CARG1, #GL_J(exitno)
| str RA, [GL, CARG1] | str RAw, [GL, CARG1]
| mov CARG1, #GL_J(L) | mov CARG1, #GL_J(L)
| str L, [GL, CARG1] | str L, [GL, CARG1]
| str BASE, L->base | str BASE, L->base
@ -1936,6 +1956,9 @@ static void build_subroutines(BuildCtx *ctx)
| sub CARG1, CARG1, #2 | sub CARG1, CARG1, #2
| ldr CARG2w, [lr] // Load trace number. | ldr CARG2w, [lr] // Load trace number.
| st_vmstate CARG4 | st_vmstate CARG4
|.if ENDIAN_BE
| rev32 CARG2, CARG2
|.endif
| str BASE, L->base | str BASE, L->base
| ubfx CARG2w, CARG2w, #5, #16 | ubfx CARG2w, CARG2w, #5, #16
| str CARG1w, [GL, #GL_J(exitno)] | str CARG1w, [GL, #GL_J(exitno)]
@ -1967,14 +1990,14 @@ static void build_subroutines(BuildCtx *ctx)
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| movn TISNIL, #0 | movn TISNIL, #0
| and LFUNC:CARG2, CARG2, #LJ_GCVMASK | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
| str RC, SAVE_MULTRES | str RCw, SAVE_MULTRES
| str BASE, L->base | str BASE, L->base
| ldr CARG2, LFUNC:CARG2->pc | ldr CARG2, LFUNC:CARG2->pc
| str xzr, GL->jit_base | str xzr, GL->jit_base
| mv_vmstate CARG4, INTERP | mv_vmstate CARG4, INTERP
| ldr KBASE, [CARG2, #PC2PROTO(k)] | ldr KBASE, [CARG2, #PC2PROTO(k)]
| // Modified copy of ins_next which handles function header dispatch, too. | // Modified copy of ins_next which handles function header dispatch, too.
| ldrb RBw, [PC] | ldrb RBw, [PC, # OFS_OP]
| ldr INSw, [PC], #4 | ldr INSw, [PC], #4
| st_vmstate CARG4 | st_vmstate CARG4
| cmp RBw, #BC_FUNCC+2 // Fast function? | cmp RBw, #BC_FUNCC+2 // Fast function?
@ -2000,7 +2023,7 @@ static void build_subroutines(BuildCtx *ctx)
| ands CARG2, CARG1, #FRAME_TYPE | ands CARG2, CARG1, #FRAME_TYPE
| bne <2 // Trace stitching continuation? | bne <2 // Trace stitching continuation?
| // Otherwise set KBASE for Lua function below fast function. | // Otherwise set KBASE for Lua function below fast function.
| ldr CARG3, [CARG1, #-4] | ldr CARG3w, [CARG1, #-4]
| decode_RA CARG1, CARG3 | decode_RA CARG1, CARG3
| sub CARG2, BASE, CARG1, lsl #3 | sub CARG2, BASE, CARG1, lsl #3
| ldr LFUNC:CARG3, [CARG2, #-32] | ldr LFUNC:CARG3, [CARG2, #-32]
@ -2153,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1, RC = src2, JMP with RC = target | // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3] | ldr CARG1, [BASE, RA, lsl #3]
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [BASE, RC, lsl #3] | ldr CARG2, [BASE, RC, lsl #3]
| add PC, PC, #4 | add PC, PC, #4
| add RB, PC, RB, lsl #2 | add RB, PC, RB, lsl #2
@ -2210,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src1, RC = src2, JMP with RC = target | // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3] | ldr CARG1, [BASE, RA, lsl #3]
| add RC, BASE, RC, lsl #3 | add RC, BASE, RC, lsl #3
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC] | ldr CARG3, [RC]
| add PC, PC, #4 | add PC, PC, #4
| add RB, PC, RB, lsl #2 | add RB, PC, RB, lsl #2
@ -2271,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = str_const (~), JMP with RC = target | // RA = src, RC = str_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3] | ldr CARG1, [BASE, RA, lsl #3]
| mvn RC, RC | mvn RC, RC
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [KBASE, RC, lsl #3] | ldr CARG2, [KBASE, RC, lsl #3]
| add PC, PC, #4 | add PC, PC, #4
| movn TMP0, #~LJ_TSTR | movn TMP0, #~LJ_TSTR
@ -2299,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = num_const (~), JMP with RC = target | // RA = src, RC = num_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3] | ldr CARG1, [BASE, RA, lsl #3]
| add RC, KBASE, RC, lsl #3 | add RC, KBASE, RC, lsl #3
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC] | ldr CARG3, [RC]
| add PC, PC, #4 | add PC, PC, #4
| add RB, PC, RB, lsl #2 | add RB, PC, RB, lsl #2
@ -2359,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQP; vk = op == BC_ISEQP;
| // RA = src, RC = primitive_type (~), JMP with RC = target | // RA = src, RC = primitive_type (~), JMP with RC = target
| ldr TMP0, [BASE, RA, lsl #3] | ldr TMP0, [BASE, RA, lsl #3]
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4 | add PC, PC, #4
| add RC, RC, #1 | add RC, RC, #1
| add RB, PC, RB, lsl #2 | add RB, PC, RB, lsl #2
@ -2384,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| // RA = dst or unused, RC = src, JMP with RC = target | // RA = dst or unused, RC = src, JMP with RC = target
| ldrh RBw, [PC, #2] | ldrh RBw, [PC, # OFS_RD]
| ldr TMP0, [BASE, RC, lsl #3] | ldr TMP0, [BASE, RC, lsl #3]
| add PC, PC, #4 | add PC, PC, #4
| mov_false TMP1 | mov_false TMP1
@ -2631,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| str PC, SAVE_PC | str PC, SAVE_PC
| bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
| // Returns NULL (finished) or TValue * (metamethod). | // Returns NULL (finished) or TValue * (metamethod).
| ldrb RBw, [PC, #-1] | ldrb RBw, [PC, #-4+OFS_RB]
| ldr BASE, L->base | ldr BASE, L->base
| cbnz CRET1, ->vmeta_binop | cbnz CRET1, ->vmeta_binop
| ldr TMP0, [BASE, RB, lsl #3] | ldr TMP0, [BASE, RB, lsl #3]
@ -3262,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_callt | ins_callt
| |
|5: // Tailcall to a fast function with a Lua frame below. |5: // Tailcall to a fast function with a Lua frame below.
| ldrb RAw, [PC, #-3] | ldrb RAw, [PC, #-4+OFS_RA]
| sub CARG1, BASE, RA, lsl #3 | sub CARG1, BASE, RA, lsl #3
| ldr LFUNC:CARG1, [CARG1, #-32] | ldr LFUNC:CARG1, [CARG1, #-32]
| and LFUNC:CARG1, CARG1, #LJ_GCVMASK | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@ -3303,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif |.endif
| add RA, BASE, RA, lsl #3 | add RA, BASE, RA, lsl #3
| ldr TAB:RB, [RA, #-16] | ldr TAB:RB, [RA, #-16]
| ldrh TMP3w, [PC, #2] | ldrh TMP3w, [PC, # OFS_RD]
| ldr CARG1w, [RA, #-8] // Get index from control var. | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
| add PC, PC, #4 | add PC, PC, #4
| add TMP3, PC, TMP3, lsl #2 | add TMP3, PC, TMP3, lsl #2
| and TAB:RB, RB, #LJ_GCVMASK | and TAB:RB, RB, #LJ_GCVMASK
@ -3323,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stp CARG1, TMP0, [RA] | stp CARG1, TMP0, [RA]
| add CARG1, CARG1, #1 | add CARG1, CARG1, #1
|3: |3:
| str CARG1w, [RA, #-8] // Update control var. | str CARG1w, [RA, #-8+LO] // Update control var.
| mov PC, TMP3 | mov PC, TMP3
|4: |4:
| ins_next | ins_next
@ -3369,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Despecialize bytecode if any of the checks fail. |5: // Despecialize bytecode if any of the checks fail.
| mov TMP0, #BC_JMP | mov TMP0, #BC_JMP
| mov TMP1, #BC_ITERC | mov TMP1, #BC_ITERC
| strb TMP0w, [PC, #-4] | strb TMP0w, [PC, #-4+OFS_OP]
| strb TMP1w, [RC] | strb TMP1w, [RC, # OFS_OP]
| b <1 | b <1
break; break;
@ -3576,7 +3599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| csel PC, RC, PC, gt | csel PC, RC, PC, gt
} else if (op == BC_JFORI) { } else if (op == BC_JFORI) {
| mov PC, RC | mov PC, RC
| ldrh RCw, [RC, #-2] | ldrh RCw, [RC, #-4+OFS_RD]
} else if (op == BC_IFORL) { } else if (op == BC_IFORL) {
| csel PC, RC, PC, le | csel PC, RC, PC, le
} }
@ -3617,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FORI) { if (op == BC_FORI) {
| csel PC, RC, PC, hi | csel PC, RC, PC, hi
} else if (op == BC_JFORI) { } else if (op == BC_JFORI) {
| ldrh RCw, [RC, #-2] | ldrh RCw, [RC, #-4+OFS_RD]
| bls =>BC_JLOOP | bls =>BC_JLOOP
} else if (op == BC_IFORL) { } else if (op == BC_IFORL) {
| csel PC, RC, PC, ls | csel PC, RC, PC, ls