mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 15:14:08 +00:00
ARM64: Add JIT compiler backend.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc.
This commit is contained in:
parent
13642b75ac
commit
04b60707d7
4
Makefile
4
Makefile
@ -86,8 +86,8 @@ FILE_MAN= luajit.1
|
||||
FILE_PC= luajit.pc
|
||||
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
|
||||
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
|
||||
dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \
|
||||
dis_mips.lua dis_mipsel.lua vmdef.lua
|
||||
dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
|
||||
dis_ppc.lua dis_mips.lua dis_mipsel.lua vmdef.lua
|
||||
|
||||
ifeq (,$(findstring Windows,$(OS)))
|
||||
HOST_SYS:= $(shell uname -s)
|
||||
|
1215
src/jit/dis_arm64.lua
Normal file
1215
src/jit/dis_arm64.lua
Normal file
File diff suppressed because it is too large
Load Diff
@ -226,7 +226,6 @@
|
||||
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
|
||||
#define LJ_TARGET_GC64 1
|
||||
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
|
||||
#define LJ_ARCH_NOJIT 1 /* NYI */
|
||||
|
||||
#define LJ_ARCH_VERSION 80
|
||||
|
||||
|
@ -171,6 +171,8 @@ IRFLDEF(FLOFS)
|
||||
#include "lj_emit_x86.h"
|
||||
#elif LJ_TARGET_ARM
|
||||
#include "lj_emit_arm.h"
|
||||
#elif LJ_TARGET_ARM64
|
||||
#include "lj_emit_arm64.h"
|
||||
#elif LJ_TARGET_PPC
|
||||
#include "lj_emit_ppc.h"
|
||||
#elif LJ_TARGET_MIPS
|
||||
@ -1563,6 +1565,8 @@ static void asm_loop(ASMState *as)
|
||||
#include "lj_asm_x86.h"
|
||||
#elif LJ_TARGET_ARM
|
||||
#include "lj_asm_arm.h"
|
||||
#elif LJ_TARGET_ARM64
|
||||
#include "lj_asm_arm64.h"
|
||||
#elif LJ_TARGET_PPC
|
||||
#include "lj_asm_ppc.h"
|
||||
#elif LJ_TARGET_MIPS
|
||||
|
1823
src/lj_asm_arm64.h
Normal file
1823
src/lj_asm_arm64.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -331,7 +331,7 @@
|
||||
|
||||
#define CCALL_HANDLE_COMPLEXARG \
|
||||
/* Pass complex by value in separate (!) FPRs or on stack. */ \
|
||||
isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
|
||||
isfp = sz == 2*sizeof(float) ? 2 : 1;
|
||||
|
||||
#define CCALL_HANDLE_REGARG \
|
||||
if (LJ_TARGET_IOS && isva) { \
|
||||
|
@ -107,6 +107,7 @@ typedef struct GG_State {
|
||||
#define J2G(J) (&J2GG(J)->g)
|
||||
#define G2J(gl) (&G2GG(gl)->J)
|
||||
#define L2J(L) (&L2GG(L)->J)
|
||||
#define GG_G2J (GG_OFS(J) - GG_OFS(g))
|
||||
#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
|
||||
#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
|
||||
#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
|
||||
|
397
src/lj_emit_arm64.h
Normal file
397
src/lj_emit_arm64.h
Normal file
@ -0,0 +1,397 @@
|
||||
/*
|
||||
** ARM64 instruction emitter.
|
||||
** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
|
||||
**
|
||||
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
|
||||
** Sponsored by Cisco Systems, Inc.
|
||||
*/
|
||||
|
||||
/* -- Constant encoding --------------------------------------------------- */
|
||||
|
||||
static uint64_t get_k64val(IRIns *ir)
|
||||
{
|
||||
if (ir->o == IR_KINT64) {
|
||||
return ir_kint64(ir)->u64;
|
||||
} else if (ir->o == IR_KGC) {
|
||||
return (uint64_t)ir_kgc(ir);
|
||||
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
||||
return (uint64_t)ir_kptr(ir);
|
||||
} else {
|
||||
lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
|
||||
return ir->i; /* Sign-extended. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Encode constant in K12 format for data processing instructions. */
|
||||
static uint32_t emit_isk12(int64_t n)
|
||||
{
|
||||
uint64_t k = (n < 0) ? -n : n;
|
||||
uint32_t m = (n < 0) ? 0x40000000 : 0;
|
||||
if (k < 0x1000) {
|
||||
return A64I_K12|m|A64F_U12(k);
|
||||
} else if ((k & 0xfff000) == k) {
|
||||
return A64I_K12|m|0x400000|A64F_U12(k>>12);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define emit_clz64(n) __builtin_clzll(n)
|
||||
#define emit_ctz64(n) __builtin_ctzll(n)
|
||||
|
||||
/* Encode constant in K13 format for logical data processing instructions. */
|
||||
static uint32_t emit_isk13(uint64_t n, int is64)
|
||||
{
|
||||
int inv = 0, w = 128, lz, tz;
|
||||
if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
|
||||
if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
|
||||
do { /* Find the repeat width. */
|
||||
if (is64 && (uint32_t)(n^(n>>32))) break;
|
||||
n = (uint32_t)n; w = 32; if ((n^(n>>16)) & 0xffff) break;
|
||||
n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
|
||||
n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
|
||||
n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
|
||||
n = n & 0x3; w = 2;
|
||||
} while (0);
|
||||
lz = emit_clz64(n);
|
||||
tz = emit_ctz64(n);
|
||||
if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
|
||||
if (inv)
|
||||
return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
|
||||
else
|
||||
return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
|
||||
}
|
||||
|
||||
static uint32_t emit_isfpk64(uint64_t n)
|
||||
{
|
||||
uint64_t etop9 = ((n >> 54) & 0x1ff);
|
||||
if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
|
||||
return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
|
||||
}
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
/* -- Emit basic instructions --------------------------------------------- */
|
||||
|
||||
static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
|
||||
{
|
||||
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
|
||||
}
|
||||
|
||||
static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
|
||||
{
|
||||
*--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
|
||||
}
|
||||
|
||||
static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
|
||||
{
|
||||
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
|
||||
}
|
||||
|
||||
static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
|
||||
{
|
||||
*--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
|
||||
}
|
||||
|
||||
static void emit_d(ASMState *as, A64Ins ai, Reg rd)
|
||||
{
|
||||
*--as->mcp = ai | A64F_D(rd);
|
||||
}
|
||||
|
||||
static void emit_n(ASMState *as, A64Ins ai, Reg rn)
|
||||
{
|
||||
*--as->mcp = ai | A64F_N(rn);
|
||||
}
|
||||
|
||||
static int emit_checkofs(A64Ins ai, int64_t ofs)
|
||||
{
|
||||
int scale = (ai >> 30) & 3;
|
||||
if (ofs < 0 || (ofs & ((1<<scale)-1))) {
|
||||
return (ofs >= -256 && ofs <= 255) ? -1 : 0;
|
||||
} else {
|
||||
return (ofs < (4096<<scale)) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
|
||||
{
|
||||
int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
|
||||
lua_assert(ot);
|
||||
/* Combine LDR/STR pairs to LDP/STP. */
|
||||
if ((sc == 2 || sc == 3) &&
|
||||
(!(ai & 0x400000) || rd != rn) &&
|
||||
as->mcp != as->mcloop) {
|
||||
uint32_t prev = *as->mcp & ~A64F_D(31);
|
||||
int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
|
||||
A64Ins aip;
|
||||
if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
|
||||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
|
||||
aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
|
||||
} else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
|
||||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
|
||||
aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
|
||||
ofsm = ofs;
|
||||
} else {
|
||||
goto nopair;
|
||||
}
|
||||
if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) {
|
||||
*as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
|
||||
(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
|
||||
return;
|
||||
}
|
||||
}
|
||||
nopair:
|
||||
if (ot == 1)
|
||||
*--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
|
||||
else
|
||||
*--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
|
||||
}
|
||||
|
||||
/* -- Emit loads/stores --------------------------------------------------- */
|
||||
|
||||
/* Prefer rematerialization of BASE/L from global_State over spills. */
|
||||
#define emit_canremat(ref) ((ref) <= ASMREF_L)
|
||||
|
||||
/* Try to find an N-step delta relative to other consts with N < lim. */
|
||||
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
|
||||
{
|
||||
RegSet work = ~as->freeset & RSET_GPR;
|
||||
if (lim <= 1) return 0; /* Can't beat that. */
|
||||
while (work) {
|
||||
Reg r = rset_picktop(work);
|
||||
IRRef ref = regcost_ref(as->cost[r]);
|
||||
lua_assert(r != rd);
|
||||
if (ref < REF_TRUE) {
|
||||
uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
|
||||
get_k64val(IR(ref));
|
||||
int64_t delta = (int64_t)(k - kx);
|
||||
if (delta == 0) {
|
||||
emit_dm(as, A64I_MOVx, rd, r);
|
||||
return 1;
|
||||
} else {
|
||||
uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
|
||||
if (k12) {
|
||||
emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
|
||||
return 1;
|
||||
}
|
||||
/* Do other ops or multi-step deltas pay off? Probably not.
|
||||
** E.g. XOR rarely helps with pointer consts.
|
||||
*/
|
||||
}
|
||||
}
|
||||
rset_clear(work, r);
|
||||
}
|
||||
return 0; /* Failed. */
|
||||
}
|
||||
|
||||
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
|
||||
{
|
||||
uint32_t k13 = emit_isk13(u64, is64);
|
||||
if (k13) { /* Can the constant be represented as a bitmask immediate? */
|
||||
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
|
||||
} else {
|
||||
int i, zeros = 0, ones = 0, neg;
|
||||
if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
|
||||
/* Count homogeneous 16 bit fragments. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
uint64_t frag = (u64 >> i*16) & 0xffff;
|
||||
zeros += (frag == 0);
|
||||
ones += (frag == 0xffff);
|
||||
}
|
||||
neg = ones > zeros; /* Use MOVN if it pays off. */
|
||||
if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
|
||||
int shift = 0, lshift = 0;
|
||||
uint64_t n64 = neg ? ~u64 : u64;
|
||||
if (n64 != 0) {
|
||||
/* Find first/last fragment to be filled. */
|
||||
shift = (63-emit_clz64(n64)) & ~15;
|
||||
lshift = emit_ctz64(n64) & ~15;
|
||||
}
|
||||
/* MOVK requires the original value (u64). */
|
||||
while (shift > lshift) {
|
||||
uint32_t u16 = (u64 >> shift) & 0xffff;
|
||||
/* Skip fragments that are correctly filled by MOVN/MOVZ. */
|
||||
if (u16 != (neg ? 0xffff : 0))
|
||||
emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
|
||||
shift -= 16;
|
||||
}
|
||||
/* But MOVN needs an inverted value (n64). */
|
||||
emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
|
||||
A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Load a 32 bit constant into a GPR. */
|
||||
#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
|
||||
|
||||
/* Load a 64 bit constant into a GPR. */
|
||||
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
|
||||
|
||||
#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
|
||||
|
||||
#define glofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
|
||||
#define mcpofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
|
||||
#define checkmcpofs(as, k) \
|
||||
((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
|
||||
|
||||
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
|
||||
|
||||
/* Get/set from constant pointer. */
|
||||
static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
|
||||
{
|
||||
/* First, check if ip + offset is in range. */
|
||||
if ((ai & 0x00400000) && checkmcpofs(as, p)) {
|
||||
emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
|
||||
} else {
|
||||
Reg base = RID_GL; /* Next, try GL + offset. */
|
||||
int64_t ofs = glofs(as, p);
|
||||
if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
|
||||
int64_t i64 = i64ptr(p);
|
||||
base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
|
||||
ofs = i64 & 0x7fffull;
|
||||
}
|
||||
emit_lso(as, ai, r, base, ofs);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load 64 bit IR constant into register. */
|
||||
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
{
|
||||
const uint64_t *k = &ir_k64(ir)->u64;
|
||||
int64_t ofs;
|
||||
if (r >= RID_MAX_GPR) {
|
||||
uint32_t fpk = emit_isfpk64(*k);
|
||||
if (fpk != ~0u) {
|
||||
emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
|
||||
return;
|
||||
}
|
||||
}
|
||||
ofs = glofs(as, k);
|
||||
if (emit_checkofs(A64I_LDRx, ofs)) {
|
||||
emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
|
||||
(r & 31), RID_GL, ofs);
|
||||
} else {
|
||||
if (r >= RID_MAX_GPR) {
|
||||
emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
|
||||
r = RID_TMP;
|
||||
}
|
||||
if (checkmcpofs(as, k))
|
||||
emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
|
||||
else
|
||||
emit_loadu64(as, r, *k);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get/set global_State fields. */
|
||||
#define emit_getgl(as, r, field) \
|
||||
emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
|
||||
#define emit_setgl(as, r, field) \
|
||||
emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
|
||||
|
||||
/* Trace number is determined from pc of exit instruction. */
|
||||
#define emit_setvmstate(as, i) UNUSED(i)
|
||||
|
||||
/* -- Emit control-flow instructions -------------------------------------- */
|
||||
|
||||
/* Label for internal jumps. */
|
||||
typedef MCode *MCLabel;
|
||||
|
||||
/* Return label pointing to current PC. */
|
||||
#define emit_label(as) ((as)->mcp)
|
||||
|
||||
static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
ptrdiff_t delta = target - (p - 1);
|
||||
lua_assert(((delta + 0x40000) >> 19) == 0);
|
||||
*--p = A64I_BCC | A64F_S19((uint32_t)delta & 0x7ffff) | cond;
|
||||
as->mcp = p;
|
||||
}
|
||||
|
||||
static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
ptrdiff_t delta = target - (p - 1);
|
||||
lua_assert(((delta + 0x02000000) >> 26) == 0);
|
||||
*--p = ai | ((uint32_t)delta & 0x03ffffffu);
|
||||
as->mcp = p;
|
||||
}
|
||||
|
||||
#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
|
||||
|
||||
static void emit_call(ASMState *as, void *target)
|
||||
{
|
||||
MCode *p = --as->mcp;
|
||||
ptrdiff_t delta = (char *)target - (char *)p;
|
||||
if ((((delta>>2) + 0x02000000) >> 26) == 0) {
|
||||
*p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
|
||||
} else { /* Target out of range: need indirect call. But don't use R0-R7. */
|
||||
Reg r = ra_allock(as, i64ptr(target),
|
||||
RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
|
||||
*p = A64I_BLR | A64F_N(r);
|
||||
}
|
||||
}
|
||||
|
||||
/* -- Emit generic operations --------------------------------------------- */
|
||||
|
||||
/* Generic move between two regs. */
|
||||
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
|
||||
{
|
||||
if (dst >= RID_MAX_GPR) {
|
||||
emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
|
||||
(dst & 31), (src & 31));
|
||||
return;
|
||||
}
|
||||
if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
|
||||
MCode ins = *as->mcp, swp = (src^dst);
|
||||
if ((ins & 0xbf800000) == 0xb9000000) {
|
||||
if (!((ins ^ (dst << 5)) & 0x000003e0))
|
||||
*as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
|
||||
if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
|
||||
*as->mcp = ins ^ swp; /* Swap D in store. */
|
||||
}
|
||||
}
|
||||
emit_dm(as, A64I_MOVx, dst, src);
|
||||
}
|
||||
|
||||
/* Generic load of register with base and (small) offset address. */
|
||||
static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
||||
{
|
||||
if (r >= RID_MAX_GPR)
|
||||
emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
|
||||
else
|
||||
emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
|
||||
}
|
||||
|
||||
/* Generic store of register with base and (small) offset address. */
|
||||
static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
||||
{
|
||||
if (r >= RID_MAX_GPR)
|
||||
emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
|
||||
else
|
||||
emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
|
||||
}
|
||||
|
||||
/* Emit an arithmetic operation with a constant operand. */
|
||||
static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
|
||||
int32_t i, RegSet allow)
|
||||
{
|
||||
uint32_t k = emit_isk12(i);
|
||||
if (k)
|
||||
emit_dn(as, ai^k, dest, src);
|
||||
else
|
||||
emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
|
||||
}
|
||||
|
||||
/* Add offset to pointer. */
|
||||
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
||||
{
|
||||
if (ofs)
|
||||
emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
|
||||
ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
|
||||
}
|
||||
|
||||
#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
|
||||
|
@ -296,6 +296,9 @@ enum {
|
||||
#elif LJ_TARGET_ARM
|
||||
DW_REG_SP = 13,
|
||||
DW_REG_RA = 14,
|
||||
#elif LJ_TARGET_ARM64
|
||||
DW_REG_SP = 31,
|
||||
DW_REG_RA = 30,
|
||||
#elif LJ_TARGET_PPC
|
||||
DW_REG_SP = 1,
|
||||
DW_REG_RA = 65,
|
||||
@ -374,6 +377,8 @@ static const ELFheader elfhdr_template = {
|
||||
.machine = 62,
|
||||
#elif LJ_TARGET_ARM
|
||||
.machine = 40,
|
||||
#elif LJ_TARGET_ARM64
|
||||
.machine = 183,
|
||||
#elif LJ_TARGET_PPC
|
||||
.machine = 20,
|
||||
#elif LJ_TARGET_MIPS
|
||||
@ -563,6 +568,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
|
||||
int i;
|
||||
for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); }
|
||||
}
|
||||
#elif LJ_TARGET_ARM64
|
||||
{
|
||||
int i;
|
||||
DB(DW_CFA_offset|31); DUV(2);
|
||||
for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); }
|
||||
for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); }
|
||||
}
|
||||
#elif LJ_TARGET_PPC
|
||||
{
|
||||
int i;
|
||||
|
@ -55,7 +55,7 @@ typedef uint32_t RegSP;
|
||||
/* Bitset for registers. 32 registers suffice for most architectures.
|
||||
** Note that one set holds bits for both GPRs and FPRs.
|
||||
*/
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
|
||||
typedef uint64_t RegSet;
|
||||
#else
|
||||
typedef uint32_t RegSet;
|
||||
@ -69,7 +69,7 @@ typedef uint32_t RegSet;
|
||||
#define rset_set(rs, r) (rs |= RID2RSET(r))
|
||||
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
|
||||
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
|
||||
#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
|
||||
#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
|
||||
#else
|
||||
|
@ -55,7 +55,8 @@ enum {
|
||||
|
||||
/* Make use of all registers, except for x18, fp, lr and sp. */
|
||||
#define RSET_FIXED \
|
||||
(RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP))
|
||||
(RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
|
||||
RID2RSET(RID_GL))
|
||||
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
|
||||
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
|
||||
#define RSET_ALL (RSET_GPR|RSET_FPR)
|
||||
@ -73,25 +74,235 @@ enum {
|
||||
#define REGARG_LASTFPR RID_D7
|
||||
#define REGARG_NUMFPR 8
|
||||
|
||||
/* -- Spill slots --------------------------------------------------------- */
|
||||
|
||||
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
|
||||
**
|
||||
** SPS_FIXED: Available fixed spill slots in interpreter frame.
|
||||
** This definition must match with the vm_arm64.dasc file.
|
||||
** Pre-allocate some slots to avoid sp adjust in every root trace.
|
||||
**
|
||||
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
|
||||
*/
|
||||
#define SPS_FIXED 4
|
||||
#define SPS_FIRST 2
|
||||
|
||||
#define SPOFS_TMP 0
|
||||
|
||||
#define sps_scale(slot) (4 * (int32_t)(slot))
|
||||
#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
|
||||
|
||||
/* -- Exit state ---------------------------------------------------------- */
|
||||
|
||||
/* This definition must match with the *.dasc file(s). */
|
||||
typedef struct {
|
||||
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
|
||||
intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
|
||||
int32_t spill[256]; /* Spill slots. */
|
||||
} ExitState;
|
||||
|
||||
/* PC after instruction that caused an exit. Used to find the trace number. */
|
||||
#define EXITSTATE_PCREG RID_LR
|
||||
/* Highest exit + 1 indicates stack check. */
|
||||
#define EXITSTATE_CHECKEXIT 1
|
||||
|
||||
#define EXITSTUB_SPACING 4
|
||||
#define EXITSTUBS_PER_GROUP 32
|
||||
|
||||
|
||||
/* -- Instructions -------------------------------------------------------- */
|
||||
|
||||
/* Instruction fields. */
|
||||
#define A64F_D(r) (r)
|
||||
#define A64F_N(r) ((r) << 5)
|
||||
#define A64F_A(r) ((r) << 10)
|
||||
#define A64F_M(r) ((r) << 16)
|
||||
#define A64F_N(r) ((r) << 5)
|
||||
#define A64F_A(r) ((r) << 10)
|
||||
#define A64F_M(r) ((r) << 16)
|
||||
#define A64F_IMMS(x) ((x) << 10)
|
||||
#define A64F_IMMR(x) ((x) << 16)
|
||||
#define A64F_U16(x) ((x) << 5)
|
||||
#define A64F_U12(x) ((x) << 10)
|
||||
#define A64F_S26(x) (x)
|
||||
#define A64F_S19(x) ((x) << 5)
|
||||
#define A64F_S9(x) ((x) << 12)
|
||||
#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
|
||||
#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
|
||||
#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
|
||||
#define A64F_FP8(x) ((x) << 13)
|
||||
#define A64F_CC(cc) ((cc) << 12)
|
||||
#define A64F_LSL16(x) (((x) / 16) << 21)
|
||||
#define A64F_BSH(sh) ((sh) << 10)
|
||||
|
||||
typedef enum A64Ins {
|
||||
A64I_S = 0x20000000,
|
||||
A64I_X = 0x80000000,
|
||||
A64I_EX = 0x00200000,
|
||||
A64I_K12 = 0x1a000000,
|
||||
A64I_K13 = 0x18000000,
|
||||
A64I_LS_U = 0x01000000,
|
||||
A64I_LS_S = 0x00800000,
|
||||
A64I_LS_R = 0x01200800,
|
||||
A64I_LS_UXTWx = 0x00005000,
|
||||
A64I_LS_LSLx = 0x00007000,
|
||||
|
||||
A64I_ADDw = 0x0b000000,
|
||||
A64I_ADDx = 0x8b000000,
|
||||
A64I_ADDSw = 0x2b000000,
|
||||
A64I_ADDSx = 0xab000000,
|
||||
A64I_NEGw = 0x4b0003e0,
|
||||
A64I_NEGx = 0xcb0003e0,
|
||||
A64I_SUBw = 0x4b000000,
|
||||
A64I_SUBx = 0xcb000000,
|
||||
A64I_SUBSw = 0x6b000000,
|
||||
A64I_SUBSx = 0xeb000000,
|
||||
|
||||
A64I_MULw = 0x1b007c00,
|
||||
A64I_MULx = 0x9b007c00,
|
||||
A64I_SMULL = 0x9b207c00,
|
||||
|
||||
A64I_ANDw = 0x0a000000,
|
||||
A64I_ANDx = 0x8a000000,
|
||||
A64I_ANDSw = 0x6a000000,
|
||||
A64I_ANDSx = 0xea000000,
|
||||
A64I_EORw = 0x4a000000,
|
||||
A64I_EORx = 0xca000000,
|
||||
A64I_ORRw = 0x2a000000,
|
||||
A64I_ORRx = 0xaa000000,
|
||||
A64I_TSTw = 0x6a00001f,
|
||||
A64I_TSTx = 0xea00001f,
|
||||
|
||||
A64I_CMPw = 0x6b00001f,
|
||||
A64I_CMPx = 0xeb00001f,
|
||||
A64I_CMNw = 0x2b00001f,
|
||||
A64I_CMNx = 0xab00001f,
|
||||
A64I_CCMPw = 0x7a400000,
|
||||
A64I_CCMPx = 0xfa400000,
|
||||
A64I_CSELw = 0x1a800000,
|
||||
A64I_CSELx = 0x9a800000,
|
||||
|
||||
A64I_ASRw = 0x13007c00,
|
||||
A64I_ASRx = 0x9340fc00,
|
||||
A64I_LSLx = 0xd3400000,
|
||||
A64I_LSRx = 0xd340fc00,
|
||||
A64I_SHRw = 0x1ac02000,
|
||||
A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
|
||||
A64I_REVw = 0x5ac00800,
|
||||
A64I_REVx = 0xdac00c00,
|
||||
|
||||
A64I_EXTRw = 0x13800000,
|
||||
A64I_EXTRx = 0x93c00000,
|
||||
A64I_SBFMw = 0x13000000,
|
||||
A64I_SBFMx = 0x93400000,
|
||||
A64I_SXTBw = 0x13001c00,
|
||||
A64I_SXTHw = 0x13003c00,
|
||||
A64I_SXTW = 0x93407c00,
|
||||
A64I_UBFMw = 0x53000000,
|
||||
A64I_UBFMx = 0xd3400000,
|
||||
A64I_UXTBw = 0x53001c00,
|
||||
A64I_UXTHw = 0x53003c00,
|
||||
|
||||
A64I_MOVw = 0x2a0003e0,
|
||||
A64I_MOVx = 0xaa0003e0,
|
||||
A64I_MVNw = 0x2a2003e0,
|
||||
A64I_MVNx = 0xaa2003e0,
|
||||
A64I_MOVKw = 0x72800000,
|
||||
A64I_MOVKx = 0xf2800000,
|
||||
A64I_MOVZw = 0x52800000,
|
||||
A64I_MOVZx = 0xd2800000,
|
||||
A64I_MOVNw = 0x12800000,
|
||||
A64I_MOVNx = 0x92800000,
|
||||
|
||||
A64I_LDRB = 0x39400000,
|
||||
A64I_LDRH = 0x79400000,
|
||||
A64I_LDRw = 0xb9400000,
|
||||
A64I_LDRx = 0xf9400000,
|
||||
A64I_LDRLw = 0x18000000,
|
||||
A64I_LDRLx = 0x58000000,
|
||||
A64I_NOP = 0xd503201f,
|
||||
A64I_STRB = 0x39000000,
|
||||
A64I_STRH = 0x79000000,
|
||||
A64I_STRw = 0xb9000000,
|
||||
A64I_STRx = 0xf9000000,
|
||||
A64I_STPw = 0x29000000,
|
||||
A64I_STPx = 0xa9000000,
|
||||
A64I_LDPw = 0x29400000,
|
||||
A64I_LDPx = 0xa9400000,
|
||||
|
||||
A64I_B = 0x14000000,
|
||||
A64I_BCC = 0x54000000,
|
||||
A64I_BL = 0x94000000,
|
||||
A64I_BR = 0xd61f0000,
|
||||
A64I_BLR = 0xd63f0000,
|
||||
|
||||
A64I_NOP = 0xd503201f,
|
||||
|
||||
/* FP */
|
||||
A64I_FADDd = 0x1e602800,
|
||||
A64I_FSUBd = 0x1e603800,
|
||||
A64I_FMADDd = 0x1f400000,
|
||||
A64I_FMSUBd = 0x1f408000,
|
||||
A64I_FNMADDd = 0x1f600000,
|
||||
A64I_FNMSUBd = 0x1f608000,
|
||||
A64I_FMULd = 0x1e600800,
|
||||
A64I_FDIVd = 0x1e601800,
|
||||
A64I_FNEGd = 0x1e614000,
|
||||
A64I_FABS = 0x1e60c000,
|
||||
A64I_FSQRTd = 0x1e61c000,
|
||||
A64I_LDRs = 0xbd400000,
|
||||
A64I_LDRd = 0xfd400000,
|
||||
A64I_STRs = 0xbd000000,
|
||||
A64I_STRd = 0xfd000000,
|
||||
A64I_LDPs = 0x2d400000,
|
||||
A64I_LDPd = 0x6d400000,
|
||||
A64I_STPs = 0x2d000000,
|
||||
A64I_STPd = 0x6d000000,
|
||||
A64I_FCMPd = 0x1e602000,
|
||||
A64I_FCMPZd = 0x1e602008,
|
||||
A64I_FCSELd = 0x1e600c00,
|
||||
A64I_FRINTMd = 0x1e654000,
|
||||
A64I_FRINTPd = 0x1e64c000,
|
||||
A64I_FRINTZd = 0x1e65c000,
|
||||
|
||||
A64I_FCVT_F32_F64 = 0x1e624000,
|
||||
A64I_FCVT_F64_F32 = 0x1e22c000,
|
||||
A64I_FCVT_F32_S32 = 0x1e220000,
|
||||
A64I_FCVT_F64_S32 = 0x1e620000,
|
||||
A64I_FCVT_F32_U32 = 0x1e230000,
|
||||
A64I_FCVT_F64_U32 = 0x1e630000,
|
||||
A64I_FCVT_F32_S64 = 0x9e220000,
|
||||
A64I_FCVT_F64_S64 = 0x9e620000,
|
||||
A64I_FCVT_F32_U64 = 0x9e230000,
|
||||
A64I_FCVT_F64_U64 = 0x9e630000,
|
||||
A64I_FCVT_S32_F64 = 0x1e780000,
|
||||
A64I_FCVT_S32_F32 = 0x1e380000,
|
||||
A64I_FCVT_U32_F64 = 0x1e790000,
|
||||
A64I_FCVT_U32_F32 = 0x1e390000,
|
||||
A64I_FCVT_S64_F64 = 0x9e780000,
|
||||
A64I_FCVT_S64_F32 = 0x9e380000,
|
||||
A64I_FCVT_U64_F64 = 0x9e790000,
|
||||
A64I_FCVT_U64_F32 = 0x9e390000,
|
||||
|
||||
A64I_FMOV_S = 0x1e204000,
|
||||
A64I_FMOV_D = 0x1e604000,
|
||||
A64I_FMOV_R_S = 0x1e260000,
|
||||
A64I_FMOV_S_R = 0x1e270000,
|
||||
A64I_FMOV_R_D = 0x9e660000,
|
||||
A64I_FMOV_D_R = 0x9e670000,
|
||||
A64I_FMOV_DI = 0x1e601000,
|
||||
} A64Ins;
|
||||
|
||||
typedef enum A64Shift {
|
||||
A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
|
||||
} A64Shift;
|
||||
|
||||
typedef enum A64Extend {
|
||||
A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
|
||||
A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
|
||||
} A64Extend;
|
||||
|
||||
/* ARM condition codes. */
|
||||
typedef enum A64CC {
|
||||
CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
|
||||
CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
|
||||
CC_HS = CC_CS, CC_LO = CC_CC
|
||||
} A64CC;
|
||||
|
||||
#endif
|
||||
|
@ -236,12 +236,17 @@
|
||||
|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
|
||||
|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
|
||||
|
|
||||
#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field))
|
||||
#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
|
||||
|
|
||||
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
||||
|
|
||||
|.macro hotcheck, delta
|
||||
| NYI
|
||||
| lsr CARG1, PC, #1
|
||||
| and CARG1, CARG1, #126
|
||||
| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
|
||||
| ldrh CARG2w, [GL, CARG1]
|
||||
| subs CARG2, CARG2, #delta
|
||||
| strh CARG2w, [GL, CARG1]
|
||||
|.endmacro
|
||||
|
|
||||
|.macro hotloop
|
||||
@ -869,7 +874,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| bl extern lj_meta_for // (lua_State *L, TValue *base)
|
||||
| ldr INSw, [PC, #-4]
|
||||
|.if JIT
|
||||
| uxtb TMP0, INS
|
||||
| uxtb TMP0w, INSw
|
||||
|.endif
|
||||
| decode_RA RA, INS
|
||||
| decode_RD RC, INS
|
||||
@ -1732,7 +1737,20 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-----------------------------------------------------------------------
|
||||
|
|
||||
|->vm_record: // Dispatch target for recording phase.
|
||||
| NYI
|
||||
|.if JIT
|
||||
| ldrb CARG1w, GL->hookmask
|
||||
| tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
|
||||
| bne >5
|
||||
| // Decrement the hookcount for consistency, but always do the call.
|
||||
| ldr CARG2w, GL->hookcount
|
||||
| tst CARG1, #HOOK_ACTIVE
|
||||
| bne >1
|
||||
| sub CARG2w, CARG2w, #1
|
||||
| tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
|
||||
| beq >1
|
||||
| str CARG2w, GL->hookcount
|
||||
| b >1
|
||||
|.endif
|
||||
|
|
||||
|->vm_rethook: // Dispatch target for return hooks.
|
||||
| ldrb TMP2w, GL->hookmask
|
||||
@ -1774,7 +1792,21 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b <4
|
||||
|
|
||||
|->vm_hotloop: // Hot loop counter underflow.
|
||||
| NYI
|
||||
|.if JIT
|
||||
| ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
|
||||
| add CARG1, GL, #GG_G2DISP+GG_DISP2J
|
||||
| and LFUNC:CARG3, CARG3, #LJ_GCVMASK
|
||||
| str PC, SAVE_PC
|
||||
| ldr CARG3, LFUNC:CARG3->pc
|
||||
| mov CARG2, PC
|
||||
| str L, [GL, #GL_J(L)]
|
||||
| ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
|
||||
| str BASE, L->base
|
||||
| add CARG3, BASE, CARG3, lsl #3
|
||||
| str CARG3, L->top
|
||||
| bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
|
||||
| b <3
|
||||
|.endif
|
||||
|
|
||||
|->vm_callhook: // Dispatch target for call hooks.
|
||||
| mov CARG2, PC
|
||||
@ -1804,7 +1836,54 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| br CRET1
|
||||
|
|
||||
|->cont_stitch: // Trace stitching.
|
||||
| NYI
|
||||
|.if JIT
|
||||
| // RA = resultptr, CARG4 = meta base
|
||||
| ldr RB, SAVE_MULTRES
|
||||
| ldr INSw, [PC, #-4]
|
||||
| ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
|
||||
| subs RB, RB, #8
|
||||
| decode_RA RC, INS // Call base.
|
||||
| and CARG3, CARG3, #LJ_GCVMASK
|
||||
| beq >2
|
||||
|1: // Move results down.
|
||||
| ldr CARG1, [RA]
|
||||
| add RA, RA, #8
|
||||
| subs RB, RB, #8
|
||||
| str CARG1, [BASE, RC, lsl #3]
|
||||
| add RC, RC, #1
|
||||
| bne <1
|
||||
|2:
|
||||
| decode_RA RA, INS
|
||||
| decode_RB RB, INS
|
||||
| add RA, RA, RB
|
||||
|3:
|
||||
| cmp RA, RC
|
||||
| bhi >9 // More results wanted?
|
||||
|
|
||||
| ldrh RAw, TRACE:CARG3->traceno
|
||||
| ldrh RCw, TRACE:CARG3->link
|
||||
| cmp RCw, RAw
|
||||
| beq ->cont_nop // Blacklisted.
|
||||
| cmp RCw, #0
|
||||
| bne =>BC_JLOOP // Jump to stitched trace.
|
||||
|
|
||||
| // Stitch a new trace to the previous trace.
|
||||
| mov CARG1, #GL_J(exitno)
|
||||
| str RA, [GL, CARG1]
|
||||
| mov CARG1, #GL_J(L)
|
||||
| str L, [GL, CARG1]
|
||||
| str BASE, L->base
|
||||
| add CARG1, GL, #GG_G2J
|
||||
| mov CARG2, PC
|
||||
| bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
|
||||
| ldr BASE, L->base
|
||||
| b ->cont_nop
|
||||
|
|
||||
|9: // Fill up results with nil.
|
||||
| str TISNIL, [BASE, RC, lsl #3]
|
||||
| add RC, RC, #1
|
||||
| b <3
|
||||
|.endif
|
||||
|
|
||||
|->vm_profhook: // Dispatch target for profiler hook.
|
||||
#if LJ_HASPROFILE
|
||||
@ -1822,10 +1901,120 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//-- Trace exit handler -------------------------------------------------
|
||||
|//-----------------------------------------------------------------------
|
||||
|
|
||||
|.macro savex_, a, b
|
||||
| stp d..a, d..b, [sp, #a*8]
|
||||
| stp x..a, x..b, [sp, #32*8+a*8]
|
||||
|.endmacro
|
||||
|
|
||||
|->vm_exit_handler:
|
||||
| NYI
|
||||
|.if JIT
|
||||
| sub sp, sp, #(64*8)
|
||||
| savex_, 0, 1
|
||||
| savex_, 2, 3
|
||||
| savex_, 4, 5
|
||||
| savex_, 6, 7
|
||||
| savex_, 8, 9
|
||||
| savex_, 10, 11
|
||||
| savex_, 12, 13
|
||||
| savex_, 14, 15
|
||||
| savex_, 16, 17
|
||||
| savex_, 18, 19
|
||||
| savex_, 20, 21
|
||||
| savex_, 22, 23
|
||||
| savex_, 24, 25
|
||||
| savex_, 26, 27
|
||||
| savex_, 28, 29
|
||||
| stp d30, d31, [sp, #30*8]
|
||||
| ldr CARG1, [sp, #64*8] // Load original value of lr.
|
||||
| add CARG3, sp, #64*8 // Recompute original value of sp.
|
||||
| mv_vmstate CARG4, EXIT
|
||||
| ldr CARG2w, [CARG1, #-4]! // Get exit instruction.
|
||||
| stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP.
|
||||
| lsl CARG2, CARG2, #38
|
||||
| add CARG1, CARG1, CARG2, asr #36
|
||||
| ldr CARG2w, [lr] // Load exit stub group offset.
|
||||
| sub CARG1, CARG1, lr
|
||||
| sub CARG1, CARG1, #4
|
||||
| ldr L, GL->cur_L
|
||||
| add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
|
||||
| ldr BASE, GL->jit_base
|
||||
| st_vmstate CARG4
|
||||
| str CARG1w, [GL, #GL_J(exitno)]
|
||||
| str BASE, L->base
|
||||
| str L, [GL, #GL_J(L)]
|
||||
| str xzr, GL->jit_base
|
||||
| add CARG1, GL, #GG_G2J
|
||||
| mov CARG2, sp
|
||||
| bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
|
||||
| // Returns MULTRES (unscaled) or negated error code.
|
||||
| ldr CARG2, L->cframe
|
||||
| ldr BASE, L->base
|
||||
| and sp, CARG2, #CFRAME_RAWMASK
|
||||
| ldr PC, SAVE_PC // Get SAVE_PC.
|
||||
| str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
|
||||
| b >1
|
||||
|.endif
|
||||
|
|
||||
|->vm_exit_interp:
|
||||
| NYI
|
||||
| // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
|
||||
|.if JIT
|
||||
| ldr L, SAVE_L
|
||||
|1:
|
||||
| cmp CARG1w, #0
|
||||
| blt >9 // Check for error from exit.
|
||||
| lsl RC, CARG1, #3
|
||||
| ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
||||
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
|
||||
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
|
||||
| movn TISNIL, #0
|
||||
| and LFUNC:CARG2, CARG2, #LJ_GCVMASK
|
||||
| str RC, SAVE_MULTRES
|
||||
| str BASE, L->base
|
||||
| ldr CARG2, LFUNC:CARG2->pc
|
||||
| str xzr, GL->jit_base
|
||||
| mv_vmstate CARG4, INTERP
|
||||
| ldr KBASE, [CARG2, #PC2PROTO(k)]
|
||||
| // Modified copy of ins_next which handles function header dispatch, too.
|
||||
| ldrb RBw, [PC]
|
||||
| ldr INSw, [PC], #4
|
||||
| st_vmstate CARG4
|
||||
| cmp RBw, #BC_FUNCC+2 // Fast function?
|
||||
| add TMP1, GL, INS, uxtb #3
|
||||
| bhs >4
|
||||
|2:
|
||||
| cmp RBw, #BC_FUNCF // Function header?
|
||||
| add TMP0, GL, RB, uxtb #3
|
||||
| ldr RB, [TMP0, #GG_G2DISP]
|
||||
| decode_RA RA, INS
|
||||
| lsr TMP0, INS, #16
|
||||
| csel RC, TMP0, RC, lo
|
||||
| blo >5
|
||||
| ldr CARG3, [BASE, FRAME_FUNC]
|
||||
| sub RC, RC, #8
|
||||
| add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
|
||||
| and LFUNC:CARG3, CARG3, #LJ_GCVMASK
|
||||
|5:
|
||||
| br RB
|
||||
|
|
||||
|4: // Check frame below fast function.
|
||||
| ldr CARG1, [BASE, FRAME_PC]
|
||||
| ands CARG2, CARG1, #FRAME_TYPE
|
||||
| bne <2 // Trace stitching continuation?
|
||||
| // Otherwise set KBASE for Lua function below fast function.
|
||||
| ldr CARG3, [CARG1, #-4]
|
||||
| decode_RA CARG1, CARG3
|
||||
| sub CARG2, BASE, CARG1, lsl #3
|
||||
| ldr LFUNC:CARG3, [CARG2, #-32]
|
||||
| and LFUNC:CARG3, CARG3, #LJ_GCVMASK
|
||||
| ldr CARG3, LFUNC:CARG3->pc
|
||||
| ldr KBASE, [CARG3, #PC2PROTO(k)]
|
||||
| b <2
|
||||
|
|
||||
|9: // Rethrow error from the right C frame.
|
||||
| neg CARG2, CARG1
|
||||
| mov CARG1, L
|
||||
| bl extern lj_err_throw // (lua_State *L, int errcode)
|
||||
|.endif
|
||||
|
|
||||
|//-----------------------------------------------------------------------
|
||||
|//-- Math helper functions ----------------------------------------------
|
||||
@ -3387,6 +3576,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
if (op == BC_FORI) {
|
||||
| csel PC, RC, PC, gt
|
||||
} else if (op == BC_JFORI) {
|
||||
| mov PC, RC
|
||||
| ldrh RCw, [RC, #-2]
|
||||
} else if (op == BC_IFORL) {
|
||||
| csel PC, RC, PC, le
|
||||
@ -3488,7 +3678,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|
||||
case BC_JLOOP:
|
||||
|.if JIT
|
||||
| NYI
|
||||
| // RA = base (ignored), RC = traceno
|
||||
| ldr CARG1, [GL, #GL_J(trace)]
|
||||
| mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0.
|
||||
| ldr TRACE:RC, [CARG1, RC, lsl #3]
|
||||
| st_vmstate CARG2
|
||||
| ldr RA, TRACE:RC->mcode
|
||||
| str BASE, GL->jit_base
|
||||
| str L, GL->tmpbuf.L
|
||||
| sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
|
||||
| br RA
|
||||
|.endif
|
||||
break;
|
||||
|
||||
@ -3546,10 +3745,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
case BC_IFUNCV:
|
||||
| // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
|
||||
| ldr CARG1, L->maxstack
|
||||
| movn TMP0, #~LJ_TFUNC
|
||||
| add TMP2, BASE, RC
|
||||
| add LFUNC:CARG3, CARG3, TMP0, lsl #47
|
||||
| add RA, RA, RC
|
||||
| add TMP0, RC, #16+FRAME_VARG
|
||||
| str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC.
|
||||
| str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
|
||||
| ldr KBASE, [PC, #-4+PC2PROTO(k)]
|
||||
| cmp RA, CARG1
|
||||
| str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
|
||||
@ -3736,8 +3937,8 @@ static void emit_asm_debug(BuildCtx *ctx)
|
||||
"\t.uleb128 0x1\n"
|
||||
"\t.sleb128 -8\n"
|
||||
"\t.byte 30\n" /* Return address is in lr. */
|
||||
"\t.uleb128 1\n" /* augmentation length */
|
||||
"\t.byte 0x1b\n" /* pcrel|sdata4 */
|
||||
"\t.uleb128 1\n" /* augmentation length */
|
||||
"\t.byte 0x1b\n" /* pcrel|sdata4 */
|
||||
"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
|
||||
"\t.align 3\n"
|
||||
".LECIE2:\n\n");
|
||||
@ -3748,7 +3949,7 @@ static void emit_asm_debug(BuildCtx *ctx)
|
||||
"\t.long .LASFDE3-.Lframe2\n"
|
||||
"\t.long lj_vm_ffi_call-.\n"
|
||||
"\t.long %d\n"
|
||||
"\t.uleb128 0\n" /* augmentation length */
|
||||
"\t.uleb128 0\n" /* augmentation length */
|
||||
"\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
|
||||
"\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
|
||||
"\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
|
||||
|
Loading…
Reference in New Issue
Block a user