mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
ARM64: Emit more efficient trace exits.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
This commit is contained in:
parent
1131fa22a2
commit
81259898ea
@ -47,53 +47,41 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
|
|||||||
|
|
||||||
/* -- Guard handling ------------------------------------------------------ */
|
/* -- Guard handling ------------------------------------------------------ */
|
||||||
|
|
||||||
/* Generate an exit stub group at the bottom of the reserved MCode memory. */
|
|
||||||
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
|
|
||||||
{
|
|
||||||
MCode *mxp = as->mcbot;
|
|
||||||
int i;
|
|
||||||
if (mxp + 3*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
|
|
||||||
asm_mclimit(as);
|
|
||||||
/* str lr, [sp]; bl ->vm_exit_handler; .long group. */
|
|
||||||
*mxp++ = A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP);
|
|
||||||
*mxp = A64I_BL | (((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu);
|
|
||||||
mxp++;
|
|
||||||
*mxp++ = group*EXITSTUBS_PER_GROUP;
|
|
||||||
for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
|
|
||||||
*mxp++ = A64I_B | ((-3-i)&0x03ffffffu);
|
|
||||||
lj_mcode_sync(as->mcbot, mxp);
|
|
||||||
lj_mcode_commitbot(as->J, mxp);
|
|
||||||
as->mcbot = mxp;
|
|
||||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
|
||||||
return mxp - EXITSTUBS_PER_GROUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Setup all needed exit stubs. */
|
/* Setup all needed exit stubs. */
|
||||||
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
||||||
{
|
{
|
||||||
ExitNo i;
|
ExitNo i;
|
||||||
if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
|
MCode *mxp = as->mctop;
|
||||||
lj_trace_err(as->J, LJ_TRERR_SNAPOV);
|
if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
|
||||||
for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
|
asm_mclimit(as);
|
||||||
if (as->J->exitstubgroup[i] == NULL)
|
/* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
|
||||||
as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
|
for (i = nexits-1; (int32_t)i >= 0; i--)
|
||||||
|
*--mxp = A64I_BL|((-3-i)&0x03ffffffu);
|
||||||
|
*--mxp = A64I_MOVZw|A64F_U16(as->T->traceno);
|
||||||
|
mxp--;
|
||||||
|
*mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu);
|
||||||
|
*--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP);
|
||||||
|
as->mctop = mxp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
|
||||||
|
{
|
||||||
|
/* Keep this in-sync with exitstub_trace_addr(). */
|
||||||
|
return as->mctop + exitno + 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emit conditional branch to exit for guard. */
|
/* Emit conditional branch to exit for guard. */
|
||||||
static void asm_guardcc(ASMState *as, A64CC cc)
|
static void asm_guardcc(ASMState *as, A64CC cc)
|
||||||
{
|
{
|
||||||
MCode *target = exitstub_addr(as->J, as->snapno);
|
MCode *target = asm_exitstub_addr(as, as->snapno);
|
||||||
MCode *p = as->mcp;
|
MCode *p = as->mcp;
|
||||||
if (LJ_UNLIKELY(p == as->invmcp)) {
|
if (LJ_UNLIKELY(p == as->invmcp)) {
|
||||||
as->loopinv = 1;
|
as->loopinv = 1;
|
||||||
*p = A64I_BL | ((target-p) & 0x03ffffffu);
|
*p = A64I_B | ((target-p) & 0x03ffffffu);
|
||||||
emit_cond_branch(as, cc^1, p-1);
|
emit_cond_branch(as, cc^1, p-1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* No conditional calls. Emit b.cc/bl instead. */
|
emit_cond_branch(as, cc, target);
|
||||||
/* That's a bad idea. NYI: emit per-trace exit stubs instead, see PPC. */
|
|
||||||
emit_branch(as, A64I_BL, target);
|
|
||||||
emit_cond_branch(as, cc^1, p);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* -- Operand fusion ------------------------------------------------------ */
|
/* -- Operand fusion ------------------------------------------------------ */
|
||||||
@ -1568,8 +1556,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
|||||||
} else {
|
} else {
|
||||||
pbase = RID_BASE;
|
pbase = RID_BASE;
|
||||||
}
|
}
|
||||||
emit_branch(as, A64I_BL, exitstub_addr(as->J, exitno));
|
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
|
||||||
emit_cond_branch(as, CC_LS^1, as->mcp+1);
|
|
||||||
k = emit_isk12((8*topslot));
|
k = emit_isk12((8*topslot));
|
||||||
lua_assert(k);
|
lua_assert(k);
|
||||||
emit_n(as, A64I_CMPx^k, RID_TMP);
|
emit_n(as, A64I_CMPx^k, RID_TMP);
|
||||||
@ -1744,7 +1731,8 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
|||||||
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
|
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
|
||||||
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
|
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
|
||||||
if (spadj == 0) {
|
if (spadj == 0) {
|
||||||
as->mctop = --p;
|
*--p = A64I_NOP;
|
||||||
|
as->mctop = p;
|
||||||
} else {
|
} else {
|
||||||
/* Patch stack adjustment. */
|
/* Patch stack adjustment. */
|
||||||
uint32_t k = emit_isk12(spadj);
|
uint32_t k = emit_isk12(spadj);
|
||||||
@ -1805,13 +1793,18 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|||||||
MCode *pe = (MCode *)((char *)p + T->szmcode);
|
MCode *pe = (MCode *)((char *)p + T->szmcode);
|
||||||
MCode *cstart = NULL, *cend = p;
|
MCode *cstart = NULL, *cend = p;
|
||||||
MCode *mcarea = lj_mcode_patch(J, p, 0);
|
MCode *mcarea = lj_mcode_patch(J, p, 0);
|
||||||
MCode *px = exitstub_addr(J, exitno);
|
MCode *px = exitstub_trace_addr(T, exitno);
|
||||||
for (; p < pe; p++) {
|
for (; p < pe; p++) {
|
||||||
/* Look for bl exitstub, replace with b target. */
|
/* Look for bcc/b exitstub, replace with bcc/b target. */
|
||||||
uint32_t ins = *p;
|
uint32_t ins = *p;
|
||||||
if ((ins & 0xfc000000u) == 0x94000000u &&
|
if ((ins & 0xff000000u) == 0x54000000u &&
|
||||||
|
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
|
||||||
|
*p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u);
|
||||||
|
cend = p+1;
|
||||||
|
if (!cstart) cstart = p;
|
||||||
|
} else if ((ins & 0xfc000000u) == 0x14000000u &&
|
||||||
((ins ^ (px-p)) & 0x03ffffffu) == 0) {
|
((ins ^ (px-p)) & 0x03ffffffu) == 0) {
|
||||||
*p = (ins & 0x7c000000u) | ((target-p) & 0x03ffffffu);
|
*p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu);
|
||||||
cend = p+1;
|
cend = p+1;
|
||||||
if (!cstart) cstart = p;
|
if (!cstart) cstart = p;
|
||||||
}
|
}
|
||||||
|
@ -101,14 +101,18 @@ typedef struct {
|
|||||||
int32_t spill[256]; /* Spill slots. */
|
int32_t spill[256]; /* Spill slots. */
|
||||||
} ExitState;
|
} ExitState;
|
||||||
|
|
||||||
/* PC after instruction that caused an exit. Used to find the trace number. */
|
|
||||||
#define EXITSTATE_PCREG RID_LR
|
|
||||||
/* Highest exit + 1 indicates stack check. */
|
/* Highest exit + 1 indicates stack check. */
|
||||||
#define EXITSTATE_CHECKEXIT 1
|
#define EXITSTATE_CHECKEXIT 1
|
||||||
|
|
||||||
#define EXITSTUB_SPACING 4
|
/* Return the address of a per-trace exit stub. */
|
||||||
#define EXITSTUBS_PER_GROUP 32
|
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
|
||||||
|
{
|
||||||
|
while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */
|
||||||
|
return p + 3 + exitno;
|
||||||
|
}
|
||||||
|
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
|
||||||
|
#define exitstub_trace_addr(T, exitno) \
|
||||||
|
exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
|
||||||
|
|
||||||
/* -- Instructions -------------------------------------------------------- */
|
/* -- Instructions -------------------------------------------------------- */
|
||||||
|
|
||||||
|
@ -1928,19 +1928,18 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| ldr CARG1, [sp, #64*8] // Load original value of lr.
|
| ldr CARG1, [sp, #64*8] // Load original value of lr.
|
||||||
| add CARG3, sp, #64*8 // Recompute original value of sp.
|
| add CARG3, sp, #64*8 // Recompute original value of sp.
|
||||||
| mv_vmstate CARG4, EXIT
|
| mv_vmstate CARG4, EXIT
|
||||||
| ldr CARG2w, [CARG1, #-4]! // Get exit instruction.
|
| stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
|
||||||
| stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP.
|
|
||||||
| lsl CARG2, CARG2, #38
|
|
||||||
| add CARG1, CARG1, CARG2, asr #36
|
|
||||||
| ldr CARG2w, [lr] // Load exit stub group offset.
|
|
||||||
| sub CARG1, CARG1, lr
|
| sub CARG1, CARG1, lr
|
||||||
| sub CARG1, CARG1, #4
|
|
||||||
| ldr L, GL->cur_L
|
| ldr L, GL->cur_L
|
||||||
| add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
|
| lsr CARG1, CARG1, #2
|
||||||
| ldr BASE, GL->jit_base
|
| ldr BASE, GL->jit_base
|
||||||
|
| sub CARG1, CARG1, #2
|
||||||
|
| ldr CARG2w, [lr] // Load trace number.
|
||||||
| st_vmstate CARG4
|
| st_vmstate CARG4
|
||||||
| str CARG1w, [GL, #GL_J(exitno)]
|
|
||||||
| str BASE, L->base
|
| str BASE, L->base
|
||||||
|
| ubfx CARG2w, CARG2w, #5, #16
|
||||||
|
| str CARG1w, [GL, #GL_J(exitno)]
|
||||||
|
| str CARG2w, [GL, #GL_J(parent)]
|
||||||
| str L, [GL, #GL_J(L)]
|
| str L, [GL, #GL_J(L)]
|
||||||
| str xzr, GL->jit_base
|
| str xzr, GL->jit_base
|
||||||
| add CARG1, GL, #GG_G2J
|
| add CARG1, GL, #GG_G2J
|
||||||
|
Loading…
Reference in New Issue
Block a user