ARM64: Add big-endian support.

Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
This commit is contained in:
Mike Pall 2017-03-30 11:17:15 +02:00
parent 78f5f1cef1
commit 3143b21894
14 changed files with 149 additions and 63 deletions

View File

@ -87,8 +87,8 @@ FILE_PC= luajit.pc
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \
dis_mips64el.lua vmdef.lua
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64.lua dis_mips64el.lua vmdef.lua
ifeq (,$(findstring Windows,$(OS)))
HOST_SYS:= $(shell uname -s)

View File

@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
endif
TARGET_LJARCH= arm64
else
ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))

View File

@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
{
int i;
for (i = 0; i < n; i += 4) {
uint32_t ins = *(uint32_t *)(p+i);
#if LJ_TARGET_ARM64 && LJ_BE
ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
#endif
if ((i & 15) == 0)
fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
fprintf(ctx->fp, "\t.long 0x%08x", ins);
else
fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
fprintf(ctx->fp, ",0x%08x", ins);
if ((i & 15) == 12) putc('\n', ctx->fp);
}
if ((n & 15) != 0) putc('\n', ctx->fp);

View File

@ -63,8 +63,8 @@ local map_type = {
}
local map_arch = {
x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
mips = true, mipsel = true,
x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
ppc = true, mips = true, mipsel = true,
}
local map_os = {
@ -200,7 +200,7 @@ typedef struct {
]]
local symname = LJBC_PREFIX..ctx.modname
local is64, isbe = false, false
if ctx.arch == "x64" or ctx.arch == "arm64" then
if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
is64 = true
elseif ctx.arch == "ppc" or ctx.arch == "mips" then
isbe = true
@ -237,7 +237,7 @@ typedef struct {
hdr.eendian = isbe and 2 or 1
hdr.eversion = 1
hdr.type = f16(1)
hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
if ctx.arch == "mips" or ctx.arch == "mipsel" then
hdr.flags = f32(0x50001006)
end

12
src/jit/dis_arm64be.lua Normal file
View File

@ -0,0 +1,12 @@
----------------------------------------------------------------------------
-- LuaJIT ARM64BE disassembler wrapper module.
--
-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- ARM64 instructions are always little-endian. So just forward to the
-- common ARM64 disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------
return require((string.match(..., ".*%.") or "").."dis_arm64")

View File

@ -215,9 +215,14 @@
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_BITS 64
#if defined(__AARCH64EB__)
#define LJ_ARCH_NAME "arm64be"
#define LJ_ARCH_ENDIAN LUAJIT_BE
#else
#define LJ_ARCH_NAME "arm64"
#define LJ_ARCH_ENDIAN LUAJIT_LE
#endif
#define LJ_TARGET_ARM64 1
#define LJ_TARGET_EHRETREG 0
#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
@ -409,9 +414,6 @@
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#endif
#elif LJ_TARGET_ARM64
#if defined(__AARCH64EB__)
#error "No support for big-endian ARM64"
#endif
#if defined(_ILP32)
#error "No support for ILP32 model on ARM64"
#endif

View File

@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
if (!as->loopref)
asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
#if LJ_TARGET_MCODE_FIXUP
asm_mcode_fixup(T->mcode, T->szmcode);
#endif
lj_mcode_sync(T->mcode, origtop);
}

View File

@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
asm_mclimit(as);
/* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
for (i = nexits-1; (int32_t)i >= 0; i--)
*--mxp = A64I_BL|((-3-i)&0x03ffffffu);
*--mxp = A64I_MOVZw|A64F_U16(as->T->traceno);
*--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
*--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
mxp--;
*mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu);
*--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP);
*mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
*--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
as->mctop = mxp;
}
@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
fpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_FPR);
emit_spstore(as, ir, r, ofs);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
ofs += 8;
}
} else {
@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
gpr++;
} else {
Reg r = ra_alloc1(as, ref, RSET_GPR);
emit_spstore(as, ir, r, ofs);
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
ofs += 8;
}
}
@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
src = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, src);
if (irt_isinteger(ir->t))
type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow);
type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
else
type = ra_allock(as, irt_toitype(ir->t), allow);
} else {
@ -1179,7 +1179,8 @@ dotypecheck:
}
if (ra_hasreg(dest)) {
emit_lso(as, irt_isnum(t) ? A64I_LDRd :
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs);
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
}
}
@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
if (spadj == 0) {
*--p = A64I_NOP;
*--p = A64I_LE(A64I_NOP);
as->mctop = p;
} else {
/* Patch stack adjustment. */
@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as)
asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
}
#if LJ_BE
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
static void asm_mcode_fixup(MCode *mcode, MSize size)
{
MCode *pe = (MCode *)((char *)mcode + size);
while (mcode < pe) {
MCode ins = *mcode;
*mcode++ = lj_bswap(ins);
}
}
#define LJ_TARGET_MCODE_FIXUP 1
#endif
/* -- Trace patching ------------------------------------------------------ */
/* Patch exit jumps of existing machine code to a new target. */
@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
MCode *px = exitstub_trace_addr(T, exitno);
for (; p < pe; p++) {
/* Look for exitstub branch, replace with branch to target. */
uint32_t ins = *p;
MCode ins = A64I_LE(*p);
if ((ins & 0xff000000u) == 0x54000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch bcc exitstub. */
*p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u);
*p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0xfc000000u) == 0x14000000u &&
((ins ^ (px-p)) & 0x03ffffffu) == 0) {
/* Patch b exitstub. */
*p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu);
*p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x34000000u &&
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
/* Patch cbz/cbnz exitstub. */
*p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u);
*p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
cend = p+1;
if (!cstart) cstart = p;
} else if ((ins & 0x7e000000u) == 0x36000000u &&
((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
/* Patch tbz/tbnz exitstub. */
*p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u);
*p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
cend = p+1;
if (!cstart) cstart = p;
}

View File

@ -301,7 +301,7 @@
unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
CTSize i = (cl >> 8) - 1; \
do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
} else { \
if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
memcpy(dp, sp, ctr->size); \
@ -359,6 +359,13 @@
} \
}
#if LJ_BE
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#endif
#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */
@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_ARM64 && LJ_BE
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && nsp == 0)) && d->size <= 4) {
|| (isfp && nsp == 0)
#endif
) && d->size <= 4) {
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif

View File

@ -79,8 +79,8 @@ typedef union FPRArg {
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
float f;
uint32_t u32;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
} FPRArg;
#elif LJ_TARGET_PPC

View File

@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
*p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
*p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
*p++ = A64I_BR | A64F_N(RID_X11);
*p++ = A64I_NOP;
*p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
*p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
*p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
*p++ = A64I_LE(A64I_NOP);
((void **)p)[0] = target;
((void **)p)[1] = g;
p += 4;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
*p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
*p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
*p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
p++;
}
lua_assert(p - page <= CALLBACK_MCODE_SIZE);
@ -623,6 +623,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
#if CCALL_NUM_FPR
if (ctype_isfp(ctr->info))
dp = (uint8_t *)&cts->cb.fpr[0];
#endif
#if LJ_TARGET_ARM64 && LJ_BE
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
dp = (uint8_t *)&cts->cb.fpr[0].f[1];
#endif
lj_cconv_ct_tv(cts, ctr, dp, o, 0);
#ifdef CALLBACK_HANDLE_RET
@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_MIPS64
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))

View File

@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
} else {
goto nopair;
}
if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) {
if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
*as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
return;

View File

@ -107,7 +107,7 @@ typedef struct {
/* Return the address of a per-trace exit stub. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */
while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
/* -- Instructions -------------------------------------------------------- */
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
#if LJ_BE
#define A64I_LE(x) (lj_bswap(x))
#else
#define A64I_LE(x) (x)
#endif
/* Instruction fields. */
#define A64F_D(r) (r)
#define A64F_N(r) ((r) << 5)

View File

@ -151,6 +151,21 @@
|.define FRAME_FUNC, #-16
|.define FRAME_PC, #-8
|
|// Endian-specific defines.
|.if ENDIAN_LE
|.define LO, 0
|.define OFS_RD, 2
|.define OFS_RB, 3
|.define OFS_RA, 1
|.define OFS_OP, 0
|.else
|.define LO, 4
|.define OFS_RD, 0
|.define OFS_RB, 0
|.define OFS_RA, 2
|.define OFS_OP, 3
|.endif
|
|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
@ -717,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx)
| cmp CRET1, #1
| bhi ->vmeta_binop
|4:
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
| sub RB, RB, #0x20000
@ -1500,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
| checkint CARG1, ->fff_fallback
| mov CARG3, #1
| mov CARG2, BASE // Points to stack. Little-endian.
| // Point to the char inside the integer in the stack slot.
|.if ENDIAN_LE
| mov CARG2, BASE
|.else
| add CARG2, BASE, #7
|.endif
|->fff_newstr:
| // CARG2 = str, CARG3 = len.
| str BASE, L->base
@ -1703,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx)
| ands TMP0, PC, #FRAME_TYPE
| and TMP1, PC, #~FRAME_TYPEP
| bne >3
| ldrb RAw, [PC, #-3]
| ldrb RAw, [PC, #-4+OFS_RA]
| lsl RA, RA, #3
| add TMP1, RA, #16
|3:
@ -1838,7 +1858,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, CARG4 = meta base
| ldr RB, SAVE_MULTRES
| ldr RBw, SAVE_MULTRES
| ldr INSw, [PC, #-4]
| ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
| subs RB, RB, #8
@ -1869,7 +1889,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Stitch a new trace to the previous trace.
| mov CARG1, #GL_J(exitno)
| str RA, [GL, CARG1]
| str RAw, [GL, CARG1]
| mov CARG1, #GL_J(L)
| str L, [GL, CARG1]
| str BASE, L->base
@ -1936,6 +1956,9 @@ static void build_subroutines(BuildCtx *ctx)
| sub CARG1, CARG1, #2
| ldr CARG2w, [lr] // Load trace number.
| st_vmstate CARG4
|.if ENDIAN_BE
| rev32 CARG2, CARG2
|.endif
| str BASE, L->base
| ubfx CARG2w, CARG2w, #5, #16
| str CARG1w, [GL, #GL_J(exitno)]
@ -1967,14 +1990,14 @@ static void build_subroutines(BuildCtx *ctx)
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| movn TISNIL, #0
| and LFUNC:CARG2, CARG2, #LJ_GCVMASK
| str RC, SAVE_MULTRES
| str RCw, SAVE_MULTRES
| str BASE, L->base
| ldr CARG2, LFUNC:CARG2->pc
| str xzr, GL->jit_base
| mv_vmstate CARG4, INTERP
| ldr KBASE, [CARG2, #PC2PROTO(k)]
| // Modified copy of ins_next which handles function header dispatch, too.
| ldrb RBw, [PC]
| ldrb RBw, [PC, # OFS_OP]
| ldr INSw, [PC], #4
| st_vmstate CARG4
| cmp RBw, #BC_FUNCC+2 // Fast function?
@ -2000,7 +2023,7 @@ static void build_subroutines(BuildCtx *ctx)
| ands CARG2, CARG1, #FRAME_TYPE
| bne <2 // Trace stitching continuation?
| // Otherwise set KBASE for Lua function below fast function.
| ldr CARG3, [CARG1, #-4]
| ldr CARG3w, [CARG1, #-4]
| decode_RA CARG1, CARG3
| sub CARG2, BASE, CARG1, lsl #3
| ldr LFUNC:CARG3, [CARG2, #-32]
@ -2153,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [BASE, RC, lsl #3]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@ -2210,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| add RC, BASE, RC, lsl #3
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@ -2271,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = str_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| mvn RC, RC
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [KBASE, RC, lsl #3]
| add PC, PC, #4
| movn TMP0, #~LJ_TSTR
@ -2299,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = num_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| add RC, KBASE, RC, lsl #3
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@ -2359,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQP;
| // RA = src, RC = primitive_type (~), JMP with RC = target
| ldr TMP0, [BASE, RA, lsl #3]
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4
| add RC, RC, #1
| add RB, PC, RB, lsl #2
@ -2384,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| // RA = dst or unused, RC = src, JMP with RC = target
| ldrh RBw, [PC, #2]
| ldrh RBw, [PC, # OFS_RD]
| ldr TMP0, [BASE, RC, lsl #3]
| add PC, PC, #4
| mov_false TMP1
@ -2631,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| str PC, SAVE_PC
| bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
| // Returns NULL (finished) or TValue * (metamethod).
| ldrb RBw, [PC, #-1]
| ldrb RBw, [PC, #-4+OFS_RB]
| ldr BASE, L->base
| cbnz CRET1, ->vmeta_binop
| ldr TMP0, [BASE, RB, lsl #3]
@ -3262,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_callt
|
|5: // Tailcall to a fast function with a Lua frame below.
| ldrb RAw, [PC, #-3]
| ldrb RAw, [PC, #-4+OFS_RA]
| sub CARG1, BASE, RA, lsl #3
| ldr LFUNC:CARG1, [CARG1, #-32]
| and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@ -3303,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| add RA, BASE, RA, lsl #3
| ldr TAB:RB, [RA, #-16]
| ldrh TMP3w, [PC, #2]
| ldr CARG1w, [RA, #-8] // Get index from control var.
| ldrh TMP3w, [PC, # OFS_RD]
| ldr CARG1w, [RA, #-8+LO] // Get index from control var.
| add PC, PC, #4
| add TMP3, PC, TMP3, lsl #2
| and TAB:RB, RB, #LJ_GCVMASK
@ -3323,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stp CARG1, TMP0, [RA]
| add CARG1, CARG1, #1
|3:
| str CARG1w, [RA, #-8] // Update control var.
| str CARG1w, [RA, #-8+LO] // Update control var.
| mov PC, TMP3
|4:
| ins_next
@ -3369,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Despecialize bytecode if any of the checks fail.
| mov TMP0, #BC_JMP
| mov TMP1, #BC_ITERC
| strb TMP0w, [PC, #-4]
| strb TMP1w, [RC]
| strb TMP0w, [PC, #-4+OFS_OP]
| strb TMP1w, [RC, # OFS_OP]
| b <1
break;
@ -3576,7 +3599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| csel PC, RC, PC, gt
} else if (op == BC_JFORI) {
| mov PC, RC
| ldrh RCw, [RC, #-2]
| ldrh RCw, [RC, #-4+OFS_RD]
} else if (op == BC_IFORL) {
| csel PC, RC, PC, le
}
@ -3617,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FORI) {
| csel PC, RC, PC, hi
} else if (op == BC_JFORI) {
| ldrh RCw, [RC, #-2]
| ldrh RCw, [RC, #-4+OFS_RD]
| bls =>BC_JLOOP
} else if (op == BC_IFORL) {
| csel PC, RC, PC, ls