mikepaul-LuaJIT/src/vm_s390x.dasc
Michael Munday a3501b062d Various fixes for coroutines.
Now passing the tests.
2017-01-05 23:39:25 -05:00

4113 lines
111 KiB
Plaintext

|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
|
|// ELF ABI registers:
|// r0,r1 | | volatile |
|// r2 | parameter and return value | volatile |
|// r3-r5 | parameter | volatile |
|// r6 | parameter | saved |
|// r7-r11 | | saved |
|// r12 | GOT pointer (needed?) | saved |
|// r13 | literal pool (needed?) | saved |
|// r14 | return address | volatile |
|// r15 | stack pointer | saved |
|// f0,f2,f4,f6 | parameter and return value | volatile |
|// f1,f3,f5,f7 | | volatile |
|// f8-f15 | | saved |
|// ar0,ar1 | TLS | volatile |
|// ar2-ar15 | | volatile |
|
|// Instructions used that are not in base z/Architecture:
|// clfi (compare logical immediate) [requires z9-109]
|// ldgr (load FPR from GPR) [requires z9-109 GA3]
|// lgdr (load GPR from FPR) [requires z9-109 GA3]
|// ldy (load (long bfp)) [requires z900 GA2]
|// stdy (store (long bfp)) [requires z900 GA2]
|// TODO: alternative instructions?
|
|.arch s390x
|.section code_op, code_sub
|
|.actionlist build_actionlist
|.globals GLOB_
|.globalnames globnames
|.externnames extnames
|
|//-----------------------------------------------------------------------
|
|// Fixed register assignments for the interpreter, callee-saved.
|.define BASE, r7 // Base of current Lua stack frame.
|.define KBASE, r8 // Constants of current Lua function.
|.define PC, r9 // Next PC.
|.define DISPATCH, r10 // Opcode dispatch table.
|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
|.define ITYPE, r13 //
|
|// The following temporaries are not saved across C calls, except for RD.
|.define RA, r1
|.define RB, r12
|.define RC, r5 // Overlaps CARG4.
|.define RD, r6 // Overlaps CARG5. Callee-saved.
|
|// Calling conventions. Also used as temporaries.
|.define CARG1, r2
|.define CARG2, r3
|.define CARG3, r4
|.define CARG4, r5
|.define CARG5, r6
|
|.define FARG1, f0
|.define FARG2, f2
|.define FARG3, f4
|.define FARG4, f6
|
|.define CRET1, r2
|
|.define OP, r2
|.define TMPR1, r14
|.define TMPR2, r0
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
|
|// Register save area.
|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
|
|// Argument save area.
|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes.
|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
|.define SAVE_L, 256(sp) // Argument 1, in r2.
|.define RESERVED, 248(sp) // Reserved for compiler use.
|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
|
|// Interpreter stack frame.
|.define SAVE_FPR15, 232(sp)
|.define SAVE_FPR14, 224(sp)
|.define SAVE_FPR13, 216(sp)
|.define SAVE_FPR12, 208(sp)
|.define SAVE_FPR11, 200(sp)
|.define SAVE_FPR10, 192(sp)
|.define SAVE_FPR9, 184(sp)
|.define SAVE_FPR8, 176(sp)
|.define SAVE_PC, 168(sp)
|.define SAVE_MULTRES, 160(sp)
|.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
|.define TMP_STACK_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
|
|// Callee save area (allocated by interpreter).
|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
|
|.macro saveregs
| stmg r6, r15, SAVE_GPRS_P
| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
| // TODO: save backchain?
| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
| std f9, SAVE_FPR9
| std f10, SAVE_FPR10
| std f11, SAVE_FPR11
| std f12, SAVE_FPR12
| std f13, SAVE_FPR13
| std f14, SAVE_FPR14
| std f15, SAVE_FPR15
|.endmacro
|
|.macro restoreregs
| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
| ld f9, SAVE_FPR9
| ld f10, SAVE_FPR10
| ld f11, SAVE_FPR11
| ld f12, SAVE_FPR12
| ld f13, SAVE_FPR13
| ld f14, SAVE_FPR14
| ld f15, SAVE_FPR15
| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
|.endmacro
|
|// Type definitions. Some of these are only used for documentation.
|.type L, lua_State
|.type GL, global_State
|.type TVALUE, TValue
|.type GCOBJ, GCobj
|.type STR, GCstr
|.type TAB, GCtab
|.type LFUNC, GCfuncL
|.type CFUNC, GCfuncC
|.type PROTO, GCproto
|.type UPVAL, GCupval
|.type NODE, Node
|.type NARGS, int
|.type TRACE, GCtrace
|.type SBUF, SBuf
|
|//-----------------------------------------------------------------------
|
|// Instruction headers.
|.macro ins_A; .endmacro
|.macro ins_AD; .endmacro
|.macro ins_AJ; .endmacro
|.macro ins_ABC; .endmacro
|.macro ins_AB_; .endmacro
|.macro ins_A_C; .endmacro
|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
|
|// Instruction decode+dispatch.
| // TODO: tune this, right now we always decode RA-D even if they aren't used.
|.macro ins_NEXT
| llgf RD, 0(PC)
| // 32 63
| // [ B | C | A | OP ]
| // [ D | A | OP ]
| llghr RA, RD
| srlg RA, RA, 8(r0)
| llgcr OP, RD
| srlg RD, RD, 16(r0)
| lgr RB, RD
| srlg RB, RB, 8(r0)
| llgcr RC, RD
| la PC, 4(PC)
| llgfr TMPR1, OP
| sllg TMPR1, TMPR1, 3(r0) // TMPR1=OP*8
| lg TMPR1, 0(TMPR1, DISPATCH)
| br TMPR1
|.endmacro
|
|// Instruction footer.
|.if 1
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
| .define ins_next, ins_NEXT
| .define ins_next_, ins_NEXT
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| .macro ins_next
| j ->ins_next
| .endmacro
| .macro ins_next_
| ->ins_next:
| ins_NEXT
| .endmacro
|.endif
|
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
| lg PC, LFUNC:RB->pc
| llgf RA, 0(PC) // TODO: combine loads?
| llgcr OP, RA
| sllg TMPR1, OP, 3(r0)
| la PC, 4(PC)
| lg TMPR1, 0(TMPR1, DISPATCH)
| br TMPR1
|.endmacro
|
|.macro ins_call
| // BASE = new base, RB = LFUNC, RD = nargs+1
| stg PC, -8(BASE)
| ins_callt
|.endmacro
|
|// Assumes DISPATCH is relative to GL.
#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|//-----------------------------------------------------------------------
|
|// Macros to clear or set tags.
|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
|.macro settp, reg, tp
| oihh reg, ((tp>>1) &0xffff)
| oihl reg, ((tp<<15)&0x8000)
|.endmacro
|.macro settp, dst, reg, tp
| llihh dst, ((tp>>1) &0xffff)
| iihl dst, ((tp<<15)&0x8000)
| ogr dst, reg
|.endmacro
|.macro setint, reg
| settp reg, LJ_TISNUM
|.endmacro
|.macro setint, dst, reg
| settp dst, reg, LJ_TISNUM
|.endmacro
|
|// Macros to test operand types.
|.macro checktp_nc, reg, tp, target
| srag ITYPE, reg, 47(r0)
| clfi ITYPE, tp
| jne target
|.endmacro
|.macro checktp, reg, tp, target
| srag ITYPE, reg, 47(r0)
| cleartp reg
| clfi ITYPE, tp
| jne target
|.endmacro
|.macro checktptp, src, tp, target
| srag ITYPE, src, 47(r0)
| clfi ITYPE, tp
| jne target
|.endmacro
|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
|
|.macro checknumx, reg, target, jump
| srag ITYPE, reg, 47(r0)
| clfi ITYPE, LJ_TISNUM
| jump target
|.endmacro
|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
|
|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
|
|.define PC_OP, -1(PC)
|.define PC_RA, -2(PC)
|.define PC_RB, -4(PC)
|.define PC_RC, -3(PC)
|.define PC_RD, -4(PC)
|
|.macro branchPC, reg
| // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
| // Can't clobber TMPR1 or condition code.
| lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
| sllg TMPR1, reg, 2(r0)
| lay PC, (-BCBIAS_J*4)(TMPR1, PC)
| lgr TMPR1, TMPR2
|.endmacro
|
|// Set current VM state.
|.macro set_vmstate, st
| lghi TMPR1, ~LJ_VMST_..st
| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
|.endmacro
|
|// Synthesize binary floating-point constants.
|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
| llihh tmp, 0x4338
| ldgr reg, tmp
|.endmacro
|
|// Move table write barrier back. Overwrites reg.
|.macro barrierback, tab, reg
| // TODO: more efficient way?
| llgc reg, tab->marked
| nill reg, (uint16_t)~LJ_GC_BLACK // black2gray(tab)
| stc reg, tab->marked
| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
| stg reg, tab->gclist
|.endmacro
#if !LJ_DUALNUM
#error "Only dual-number mode supported for s390x target"
#endif
/* Generate subroutines used by opcodes and other parts of the VM. */
/* The .code_sub section should be last to help static branch prediction. */
static void build_subroutines(BuildCtx *ctx)
{
|.code_sub
|
|//-----------------------------------------------------------------------
|//-- Return handling ----------------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_returnp:
| lghi TMPR2, FRAME_P
| nr TMPR2, PC
| je ->cont_dispatch
|
| // Return from pcall or xpcall fast func.
| nill PC, -8
| sgr BASE, PC // Restore caller base.
| lay RA, -8(RA, PC) // Rebase RA and prepend one result.
| lg PC, -8(BASE) // Fetch PC of previous frame.
| // Prepending may overwrite the pcall frame, so do it at the end.
| load_true ITYPE
| stg ITYPE, 0(RA, BASE) // Prepend true to results.
|
|->vm_returnc:
| aghi RD, 1 // RD = nresults+1
| je ->vm_unwind_yield
| st RD, SAVE_MULTRES
| tmll PC, FRAME_TYPE
| je ->BC_RET_Z // Handle regular return to Lua.
|
|->vm_return:
| // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
| lghi TMPR1, FRAME_C
| xgr PC, TMPR1
| tmll PC, FRAME_TYPE
| jne ->vm_returnp
|
| // Return to C.
| set_vmstate C
| nill PC, -8
| sgr PC, BASE
| lcgr PC, PC // Previous base = BASE - delta.
|
| aghi RD, -1
| je >2
|1: // Move results down.
| lg RB, 0(BASE, RA)
| stg RB, -16(BASE)
| la BASE, 8(BASE)
| aghi RD, -1
| jne <1
|2:
| lg L:RB, SAVE_L
| stg PC, L:RB->base
|3:
| llgf RD, SAVE_MULTRES
| lgf RA, SAVE_NRES // RA = wanted nresults+1
|4:
| cgr RA, RD
| jne >6 // More/less results wanted?
|5:
| lay BASE, -16(BASE)
| stg BASE, L:RB->top
|
|->vm_leave_cp:
| lg RA, SAVE_CFRAME // Restore previous C frame.
| stg RA, L:LREG->cframe
| lghi CRET1, 0 // Ok return status for vm_pcall.
|
|->vm_leave_unw:
| restoreregs
| br r14
|
|6:
| jl >7 // Less results wanted?
| // More results wanted. Check stack size and fill up results with nil.
| cg BASE, L:RB->maxstack
| jh >8
| lghi TMPR1, LJ_TNIL
| stg TMPR1, -16(BASE)
| la BASE, 8(BASE)
| aghi RD, 1
| j <4
|
|7: // Fewer results wanted.
| cghi RA, 0
| je <5 // But check for LUA_MULTRET+1.
| sgr RA, RD // Negative result!
| sllg TMPR1, RA, 3(r0)
| lay BASE, 0(TMPR1, BASE) // Correct top.
| j <5
|
|8: // Corner case: need to grow stack for filling up results.
| // This can happen if:
| // - A C function grows the stack (a lot).
| // - The GC shrinks the stack in between.
| // - A return back from a lua_call() with (high) nresults adjustment.
| stg BASE, L:RB->top // Save current top held in BASE (yes).
| st RD, SAVE_MULTRES // Need to fill only remainder with nil.
| lgr CARG2, RA
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
| j <3
|
|->vm_unwind_yield:
| lghi CRET1, LUA_YIELD
| j ->vm_unwind_c_eh
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
| // (void *cframe, int errcode)
| lgr sp, CARG1
| lgfr CARG2, CRET1 // Error return status for vm_pcall.
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| lg L:RB, SAVE_L
| lg GL:RB, L:RB->glref
| lghi TMPR1, ~LJ_VMST_C
| stg TMPR1, GL:RB->vmstate
| j ->vm_leave_unw
|
|->vm_unwind_rethrow:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe)
| nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
| lgr sp, CARG1
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lg L:RB, SAVE_L
| lghi RD, 1+1 // Really 1+2 results, incr. later.
| lg BASE, L:RB->base
| lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| lay DISPATCH, GG_G2DISP(DISPATCH)
| lg PC, -8(BASE) // Fetch PC of previous frame.
| load_false RA
| lg RB, 0(BASE)
| stg RA, -16(BASE) // Prepend false to error message.
| stg RB, -8(BASE)
| lghi RA, -16 // Results start at BASE+RA = BASE-16.
| set_vmstate INTERP
| j ->vm_returnc // Increments RD/MULTRES and returns.
|
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_growstack_c: // Grow stack for C function.
| lghi CARG2, LUA_MINSTACK
| j >2
|
|->vm_growstack_v: // Grow stack for vararg Lua function.
| aghi RD, -16 // LJ_FR2
| j >1
|
|->vm_growstack_f: // Grow stack for fixarg Lua function.
| // BASE = new base, RD = nargs+1, RB = L, PC = first PC
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD, BASE)
|1:
| llgc RA, (PC2PROTO(framesize)-4)(PC)
| la PC, 4(PC) // Must point after first instruction.
| stg BASE, L:RB->base
| stg RD, L:RB->top
| stg PC, SAVE_PC
| lgr CARG2, RA
|2:
| // RB = L, L->base = new base, L->top = top
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->base
| lg RD, L:RB->top
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| sgr RD, BASE
| srlg RD, RD, 3(r0)
| aghi NARGS:RD, 1
| // BASE = new base, RB = LFUNC, RD = nargs+1
| ins_callt // Just retry the call.
|
|//-----------------------------------------------------------------------
|//-- Entry points into the assembler VM ---------------------------------
|//-----------------------------------------------------------------------
|
|->vm_resume: // Setup C frame and resume thread.
| // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
| saveregs
| lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
| stg CARG1, SAVE_L
| lgr RA, CARG2
| lghi PC, FRAME_CP
| lghi RD, 0
| lay KBASE, CFRAME_RESUME(sp)
| lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| aghi DISPATCH, GG_G2DISP
| stg RD, SAVE_PC // Any value outside of bytecode is ok.
| stg RD, SAVE_CFRAME
| st RD, SAVE_NRES
| stg RD, SAVE_ERRF
| stg KBASE, L:RB->cframe
| lgr LREG, L:RB
| clm RD, 1, L:RB->status
| je >2 // Initial resume (like a call).
|
| // Resume after yield (like a return).
| stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
| set_vmstate INTERP
| stc RD, L:RB->status
| lg BASE, L:RB->base
| lg RD, L:RB->top
| sgr RD, RA
| srlg RD, RD, 3(r0)
| aghi RD, 1 // RD = nresults+1
| sgr RA, BASE // RA = resultofs
| lg PC, -8(BASE)
| st RD, SAVE_MULTRES
| tmll PC, FRAME_TYPE
| je ->BC_RET_Z
| j ->vm_return
|
|->vm_pcall: // Setup protected C frame and enter VM.
| // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
| saveregs
| lghi PC, FRAME_CP
| llgfr CARG4, CARG4
| stg CARG4, SAVE_ERRF
| j >1
|
|->vm_call: // Setup C frame and enter VM.
| // (lua_State *L, TValue *base, int nres1)
| saveregs
| lghi PC, FRAME_C
|
|1: // Entry point for vm_pcall above (PC = ftype).
| st CARG3, SAVE_NRES
| lgr L:RB, CARG1
| stg CARG1, SAVE_L
| lgr RA, CARG2
|
| lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
| stg KBASE, SAVE_CFRAME
| stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
| aghi DISPATCH, GG_G2DISP
| stg sp, L:RB->cframe
| lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
|
|2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
| stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
| set_vmstate INTERP
| lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
| agr PC, RA
| sgr PC, BASE // PC = frame delta + frame type
|
| lg RD, L:LREG->top
| sgr RD, RA
| srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
| aghi NARGS:RD, 1 // RD = nargs+1
|
|->vm_call_dispatch:
| lg LFUNC:RB, -16(RA)
| checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
|
|->vm_call_dispatch_f:
| lgr BASE, RA
| ins_call
| // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
|
|->vm_cpcall: // Setup protected C frame, call C.
| // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
| saveregs
| lgr LREG, CARG1
| stg LREG, SAVE_L
| stg LREG, SAVE_PC // Any value outside of bytecode is ok.
|
| lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
| sg KBASE, L:LREG->top
| lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
| lghi RA, 0
| stg RA, SAVE_ERRF // No error function.
| st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
| aghi DISPATCH, GG_G2DISP
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
| lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
| stg KBASE, SAVE_CFRAME
| stg sp, L:LREG->cframe
| stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
|
| basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
| // TValue * (new base) or NULL returned in r2 (CRET1/).
| cghi CRET1, 0
| je ->vm_leave_cp // No base? Just remove C frame.
| lgr RA, CRET1
| lghi PC, FRAME_CP
| j <2 // Else continue with the call.
|
|//-----------------------------------------------------------------------
|//-- Metamethod handling ------------------------------------------------
|//-----------------------------------------------------------------------
|
|//-- Continuation dispatch ----------------------------------------------
|
|->cont_dispatch:
| // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
| agr RA, BASE
| nill PC, -8
| lgr RB, BASE
| sgr BASE, PC // Restore caller BASE.
| sllg TMPR1, RD, 3(r0)
| lghi TMPR2, LJ_TNIL
| stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
| lgr RC, RA // ... in [RC]
| lg PC, -24(RB) // Restore PC from [cont|PC].
| lg RA, -32(RB)
|.if FFI
| stg r0, 0(r0) // TODO: remove once tested.
| clfi RA, 1
| jle >1
|.endif
| lg LFUNC:KBASE, -16(BASE)
| cleartp LFUNC:KBASE
| lg KBASE, LFUNC:KBASE->pc
| lg KBASE, (PC2PROTO(k))(KBASE)
| // BASE = base, RC = result, RB = meta base
| br RA // Jump to continuation.
|
|.if FFI
|1:
| stg r0, 0(r0) // TODO: remove once tested.
| je ->cont_ffi_callback // cont = 1: return from FFI callback.
| // cont = 0: Tail call from C function.
| sgr RB, BASE
| srl RB, 3(r0)
| ahi RB, -3
| llgf RD, RB
| j ->vm_call_tail
|.endif
|
|->cont_cat: // BASE = base, RC = result, RB = mbase
| llgc RA, PC_RB
| sllg RA, RA, 3(r0)
| aghi RB, -32
| la RA, 0(RA, BASE)
| sgr RA, RB
| je ->cont_ra
| lcgr RA, RA
| srlg RA, RA, 3(r0)
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lgfr CARG3, RA
| lg RA, 0(RC)
| stg RA, 0(RB)
| lgr CARG2, RB
| j ->BC_CAT_Z
|
|//-- Table indexing metamethods -----------------------------------------
|
|->vmeta_tgets:
| settp STR:RC, LJ_TSTR // STR:RC = GCstr *
| stg STR:RC, TMP_STACK
| la RC, TMP_STACK
| llgc TMPR1, PC_OP
| cghi TMPR1, BC_GGET
| jne >1
| settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
| lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
| stg TAB:RA, 0(RB)
| j >2
|
|->vmeta_tgetb:
| llgc RC, PC_RC
| setint RC
| stg RC, TMP_STACK
| la RC, TMP_STACK
| j >1
|
|->vmeta_tgetv:
| llgc RC, PC_RC // Reload TValue *k from RC.
| sllg RC, RC, 3(r0)
| la RC, 0(RC, BASE)
|1:
| llgc RB, PC_RB // Reload TValue *t from RB.
| sllg RB, RB, 3(r0)
| la RB, 0(RB, BASE)
|2:
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lgr CARG2, RB
| lgr CARG3, RC
| lgr L:RB, L:CARG1
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
| // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
| lg BASE, L:RB->base
| ltgr RC, CRET1
| je >3
|->cont_ra: // BASE = base, RC = result
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| lg RB, 0(RC)
| stg RB, 0(RA, BASE)
| ins_next
|
|3: // Call __index metamethod.
| // BASE = base, L->top = new base, stack = cont/func/t/k
| lg RA, L:RB->top
| stg PC, -24(RA) // [cont|PC]
| lay PC, FRAME_CONT(RA)
| sgr PC, BASE
| lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
| lghi NARGS:RD, 2+1 // 2 args for func(t, k).
| cleartp LFUNC:RB
| j ->vm_call_dispatch_f
|
|->vmeta_tgetr:
| lgr CARG1, TAB:RB
| lgr RB, BASE // Save BASE.
| lgfr CARG2, RC
| brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
| // cTValue * or NULL returned in r2 (CRET1).
| llgc RA, PC_RA
| lgr BASE, RB // Restore BASE.
| ltgr RC, CRET1
| jne ->BC_TGETR_Z
| lghi ITYPE, LJ_TNIL
| j ->BC_TGETR2_Z
|
|//-----------------------------------------------------------------------
|
|->vmeta_tsets:
| settp STR:RC, LJ_TSTR // STR:RC = GCstr *
| stg STR:RC, TMP_STACK
| la RC, TMP_STACK
| llgc TMPR2, PC_OP
| cghi TMPR2, BC_GSET
| jne >1
| settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
| lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
| stg TAB:RA, 0(RB)
| j >2
|
|->vmeta_tsetb:
| llgc RC, PC_RC
| setint RC
| stg RC, TMP_STACK
| la RC, TMP_STACK
| j >1
|
|->vmeta_tsetv:
| llgc RC, PC_RC // Reload TValue *k from RC.
| sllg RC, RC, 3(r0)
| la RC, 0(RC, BASE)
|1:
| llgc RB, PC_RB // Reload TValue *t from RB.
| sllg RB, RB, 3(r0)
| la RB, 0(RB, BASE)
|2:
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base // Caveat: CARG2/CARG3 may be BASE.
| lgr CARG2, RB
| lgr CARG3, RC
| lgr L:RB, L:CARG1
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
| // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
| lg BASE, L:RB->base
| ltgr RC, CRET1
| je >3
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| lg RB, 0(RA, BASE)
| stg RB, 0(RC)
|->cont_nop: // BASE = base, (RC = result)
| ins_next
|
|3: // Call __newindex metamethod.
| // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
| lg RA, L:RB->top
| stg PC, -24(RA) // [cont|PC]
| llgc RC, PC_RA
| // Copy value to third argument.
| sllg RB, RC, 3(r0)
| lg RB, 0(RB, BASE)
| stg RB, 16(RA)
| la PC, FRAME_CONT(RA)
| sgr PC, BASE
| lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
| lghi NARGS:RD, 3+1 // 3 args for func(t, k, v).
| cleartp LFUNC:RB
| j ->vm_call_dispatch_f
|
|->vmeta_tsetr:
| lg L:CARG1, SAVE_L
| lgr CARG2, TAB:RB
| stg BASE, L:CARG1->base
| lgr RB, BASE // Save BASE (TODO: BASE is callee-saved anyway on s390x).
| lgfr CARG3, RC
| stg PC, SAVE_PC
| brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
| // TValue * returned in r2 (CRET1).
| lgr RC, CRET1
| llgh RA, PC_RA
| lgr BASE, RB // Restore BASE.
| j ->BC_TSETR_Z
|
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
| llgh RD, PC_RD
| sllg RD, RD, 3(r0)
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| la CARG2, 0(RA, BASE)
| la CARG3, 0(RD, BASE)
| lgr CARG1, L:RB
| llgc CARG4, PC_OP
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
| // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
|3:
| lgr RC, CRET1
| lg BASE, L:RB->base
| clgfi RC, 1
| jh ->vmeta_binop
|4:
| la PC, 4(PC)
| jl >6
|5:
| llgh RD, PC_RD
| branchPC RD
|6:
| ins_next
|
|->cont_condt: // BASE = base, RC = result
| la PC, 4(PC)
| lg ITYPE, 0(RC)
| srag ITYPE, ITYPE, 47(r0)
| lghi TMPR2, LJ_TISTRUECOND
| clr ITYPE, TMPR2 // Branch if result is true.
| jl <5
| j <6
|
|->cont_condf: // BASE = base, RC = result
| lg ITYPE, 0(RC)
| srag ITYPE, ITYPE, 47(r0)
| lghi TMPR2, LJ_TISTRUECOND
| clr ITYPE, TMPR2 // Branch if result is false.
| j <4
|
|->vmeta_equal:
| cleartp TAB:RD
| lay PC, -4(PC)
| lgr CARG2, RA
| lgfr CARG4, RB
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| lgr CARG3, RD
| lgr CARG1, L:RB
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
| // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
| j <3
|
|->vmeta_equal_cd:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vmeta_istype:
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| llgfr CARG2, RA
| llgfr CARG3, RD
| lgr L:CARG1, L:RB
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
| lg BASE, L:RB->base
| j <6
|
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_arith_vno:
| llgc RB, PC_RB
| llgc RC, PC_RC
|->vmeta_arith_vn:
| sllg RB, RB, 3(r0)
| sllg RC, RC, 3(r0)
| lay RB, 0(RB, BASE)
| lay RC, 0(RC, KBASE)
| j >1
|
|->vmeta_arith_nvo:
| llgc RC, PC_RC
| llgc RB, PC_RB
|->vmeta_arith_nv:
| sllg RC, RC, 3(r0)
| sllg RB, RB, 3(r0)
| lay TMPR1, 0(RC, KBASE)
| lay RC, 0(RB, BASE)
| lgr RB, TMPR1
| j >1
|
|->vmeta_unm:
| llgh RD, PC_RD
| sllg RD, RD, 3(r0)
| la RC, 0(RD, BASE)
| lgr RB, RC
| j >1
|
|->vmeta_arith_vvo:
| llgc RB, PC_RB
| llgc RC, PC_RC
|->vmeta_arith_vv:
| sllg RC, RC, 3(r0)
| sllg RB, RB, 3(r0)
| lay RB, 0(RB, BASE)
| lay RC, 0(RC, BASE)
|1:
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| lay RA, 0(RA, BASE)
| llgc CARG5, PC_OP // Caveat: CARG5 == RD.
| lgr CARG2, RA
| lgr CARG3, RB
| // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lgr L:RB, L:CARG1
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
| // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
| lg BASE, L:RB->base
| cghi CRET1, 0
| lgr RC, CRET1
| je ->cont_nop
|
| // Call metamethod for binary op.
|->vmeta_binop:
| // BASE = base, RC = new base, stack = cont/func/o1/o2
| lgr RA, RC
| sgr RC, BASE
| stg PC, -24(RA) // [cont|PC]
| la PC, FRAME_CONT(RC)
| lghi NARGS:RD, 2+1 // 2 args for func(o1, o2).
| j ->vm_call_dispatch
|
|->vmeta_len:
| llgh RD, PC_RD
| sllg RD, RD, 3(r0)
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| la CARG2, 0(RD, BASE)
| lgr L:CARG1, L:RB
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_len // (lua_State *L, TValue *o)
| // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
| lgr RC, CRET1
| lg BASE, L:RB->base
#if LJ_52
| cghi RC, 0
| jne ->vmeta_binop // Binop call for compatibility.
| llgh RD, PC_RD
| sllg RD, RD, 3(r0)
| lg TAB:CARG1, 0(RD, BASE)
| cleartp TAB:CARG1
| j ->BC_LEN_Z
#else
| j ->vmeta_binop // Binop call for compatibility.
#endif
|
|//-- Call metamethod ----------------------------------------------------
|
|->vmeta_call_ra:
| la RA, 16(RA, BASE) // RA previously set to RA*8.
|->vmeta_call: // Resolve and call __call metamethod.
| // BASE = old base, RA = new base, RC = nargs+1, PC = return
| stg NARGS:RD, TMP_STACK // Save RA, RC for us (not sure about this).
| lgr RB, RA
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lay CARG2, -16(RA)
| sllg RD, RD, 3(r0)
| lay CARG3, -8(RA, RD)
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
| lgr RA, RB
| lg L:RB, SAVE_L
| lg BASE, L:RB->base
| lg NARGS:RD, TMP_STACK
| lg LFUNC:RB, -16(RA)
| aghi NARGS:RD, 1 // 32-bit on x64.
| // This is fragile. L->base must not move, KBASE must always be defined.
| cgr KBASE, BASE // Continue with CALLT if flag set.
| je ->BC_CALLT_Z
| cleartp LFUNC:RB
| lgr BASE, RA
| ins_call // Otherwise call resolved metamethod.
|
|//-- Argument coercion for 'for' statement ------------------------------
|
|->vmeta_for:
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| lgr CARG2, RA
| lgr CARG1, RB
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_for // (lua_State *L, TValue *base)
| lg BASE, L:RB->base
| llgc OP, PC_OP
| llgc RA, PC_RA
| llgh RD, PC_RD
| sllg TMPR1, OP, 3(r0)
| lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
| br TMPR1
|
|//-----------------------------------------------------------------------
|//-- Fast functions -----------------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro .ffunc, name
|->ff_ .. name:
|.endmacro
|
|.macro .ffunc_1, name
|->ff_ .. name:
| clfi NARGS:RD, 1+1; jl ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
| clfi NARGS:RD, 2+1; jl ->fff_fallback
|.endmacro
|
|.macro .ffunc_n, name, op
| .ffunc_1 name
| lg TMPR2, 0(BASE)
| checknumtp TMPR2, ->fff_fallback
| op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
|.endmacro
|
|.macro .ffunc_n, name
| .ffunc_n name, ld
|.endmacro
|
|.macro .ffunc_nn, name
| .ffunc_2 name
| lg TMPR1, 0(BASE)
| lg TMPR2, 8(BASE)
| ld FARG1, 0(BASE)
| ld FARG2, 8(BASE)
| checknumtp TMPR1, ->fff_fallback
| checknumtp TMPR2, ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses label 1.
|.macro ffgccheck
| lg RB, (DISPATCH_GL(gc.total))(DISPATCH)
| clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
| jl >1
| brasl r14, ->fff_gcstep
|1:
|.endmacro
|
|//-- Base library: checks -----------------------------------------------
|
|.ffunc_1 assert
| lg RB, 0(BASE)
| srag ITYPE, RB, 47(r0)
| clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
| lg PC, -8(BASE)
| st RD, SAVE_MULTRES
| lg RB, 0(BASE)
| stg RB, -16(BASE)
| ahi RD, -2
| je >2
| lgr RA, BASE
|1:
| la RA, 8(RA)
| lg RB, 0(RA)
| stg RB, -16(RA)
| ahi RD, -1
| jne <1
| // TODO: replace with branch on count (brctg).
|2:
| llgf RD, SAVE_MULTRES
| j ->fff_res_
|
|.ffunc_1 type
| lg RC, 0(BASE)
| srag RC, RC, 47(r0)
| lghi RB, LJ_TISNUM
| clgr RC, RB
| jnl >1
| lgr RC, RB
|1:
| lghi TMPR2, -1
| xgr RC, TMPR2
|2:
| lg CFUNC:RB, -16(BASE)
| cleartp CFUNC:RB
| sllg RC, RC, 3(r0)
| lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
| lg PC, -8(BASE)
| settp STR:RC, LJ_TSTR
| stg STR:RC, -16(BASE)
| j ->fff_res1
|
|//-- Base library: getters and setters ---------------------------------
|
|.ffunc_1 getmetatable
| lg TAB:RB, 0(BASE)
| lg PC, -8(BASE)
| checktab TAB:RB, >6
|1: // Field metatable must be at same offset for GCtab and GCudata!
| lg TAB:RB, TAB:RB->metatable
|2:
| lghi TMPR2, LJ_TNIL
| stg TMPR2, -16(BASE)
| cghi TAB:RB, 0
| je ->fff_res1
| settp TAB:RC, TAB:RB, LJ_TTAB
| stg TAB:RC, -16(BASE) // Store metatable as default result.
| lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
| llgf RA, TAB:RB->hmask
| n RA, STR:RC->hash
| settp STR:RC, LJ_TSTR
| mghi RA, #NODE
| ag NODE:RA, TAB:RB->node
|3: // Rearranged logic, because we expect _not_ to find the key.
| cg STR:RC, NODE:RA->key
| je >5
|4:
| ltg NODE:RA, NODE:RA->next
| jne <3
| j ->fff_res1 // Not found, keep default result.
|5:
| lg RB, NODE:RA->val
| cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
| stg RB, -16(BASE) // Return value of mt.__metatable.
| j ->fff_res1
|
|6:
| clfi ITYPE, LJ_TUDATA; je <1
| clfi ITYPE, LJ_TISNUM; jh >7
| lhi ITYPE, LJ_TISNUM
|7:
| lhi TMPR2, -1
| xr ITYPE, TMPR2 // not ITYPE
| llgfr ITYPE, ITYPE
| sllg ITYPE, ITYPE, 3(r0)
| lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
| j <2
|
|.ffunc_2 setmetatable
| lg TAB:RB, 0(BASE)
| lgr TAB:TMPR1, TAB:RB
| checktab TAB:RB, ->fff_fallback
| // Fast path: no mt for table yet and not clearing the mt.
| lghi TMPR2, 0
| cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
| lg TAB:RA, 8(BASE)
| checktab TAB:RA, ->fff_fallback
| stg TAB:RA, TAB:RB->metatable
| lg PC, -8(BASE)
| stg TAB:TMPR1, -16(BASE) // Return original table.
| // TODO: change to tm
| llgc TMPR2, TAB:RB->marked
| tmll TMPR2, LJ_GC_BLACK // isblack(table)
| je >1
| // Possible write barrier. Table is black, but skip iswhite(mt) check.
| barrierback TAB:RB, RC
|1:
| j ->fff_res1
|
|.ffunc_2 rawget
| lg TAB:CARG2, 0(BASE)
| checktab TAB:CARG2, ->fff_fallback
| lgr RB, BASE // Save BASE.
| la CARG3, 8(BASE)
| lg CARG1, SAVE_L
| brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
| // cTValue * returned in r2 (CRET1).
| lgr BASE, RB // Restore BASE.
| // Copy table slot.
| lg RB, 0(CRET1)
| lg PC, -8(BASE)
| stg RB, -16(BASE)
| j ->fff_res1
|
|//-- Base library: conversions ------------------------------------------
|
|.ffunc tonumber
| // Only handles the number case inline (without a base argument).
| clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
| lg RB, 0(BASE)
| checknumber RB, ->fff_fallback
| lg PC, -8(BASE)
| stg RB, -16(BASE)
| j ->fff_res1
|
|.ffunc_1 tostring
| // Only handles the string or number case inline.
| lg PC, -8(BASE)
| lg STR:RB, 0(BASE)
| checktp_nc STR:RB, LJ_TSTR, >3
| // A __tostring method in the string base metatable is ignored.
|2:
| stg STR:RB, -16(BASE)
| j ->fff_res1
|3: // Handle numbers inline, unless a number base metatable is present.
| clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
| lghi TMPR2, 0
| cg TMPR2, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
| jne ->fff_fallback
| ffgccheck // Caveat: uses label 1.
| lg L:RB, SAVE_L
| stg BASE, L:RB->base // Add frame since C call can throw.
| stg PC, SAVE_PC // Redundant (but a defined value).
| lgr CARG2, BASE // Otherwise: CARG2 == BASE
| lgr L:CARG1, L:RB
| brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
| // GCstr returned in r2 (CRET1).
| lg BASE, L:RB->base
| settp STR:RB, CRET1, LJ_TSTR
| j <2
|
|//-- Base library: iterators -------------------------------------------
|
|.ffunc_1 next
| je >2 // Missing 2nd arg?
|1:
| lg CARG2, 0(BASE)
| checktab CARG2, ->fff_fallback
| lg L:RB, SAVE_L
| stg BASE, L:RB->base // Add frame since C call can throw.
| stg BASE, L:RB->top // Dummy frame length is ok.
| lg PC, -8(BASE)
| la CARG3, 8(BASE)
| lgr CARG1, L:RB
| stg PC, SAVE_PC // Needed for ITERN fallback.
| brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
| // Flag returned in r2 (CRET1).
| lg BASE, L:RB->base
| lgr RD, CRET1 // TODO: high bits needed? low bits load/test (ltr) enough?
| ltr RD, CRET1; je >3 // End of traversal?
| // Copy key and value to results.
| lg RB, 8(BASE)
| lg RD, 16(BASE)
| stg RB, -16(BASE)
| stg RD, -8(BASE)
|->fff_res2:
| lghi RD, 1+2
| j ->fff_res
|2: // Set missing 2nd arg to nil.
| lghi TMPR2, LJ_TNIL
| stg TMPR2, 8(BASE)
| j <1
|3: // End of traversal: return nil.
| lghi TMPR2, LJ_TNIL
| stg TMPR2, -16(BASE)
| j ->fff_res1
|
|.ffunc_1 pairs
| lg TAB:RB, 0(BASE)
| lgr TMPR1, TAB:RB
| checktab TAB:RB, ->fff_fallback
#if LJ_52
| ltg TMPR2, TAB:RB->metatable; jne ->fff_fallback
#endif
| lg CFUNC:RD, -16(BASE)
| cleartp CFUNC:RD
| lg CFUNC:RD, CFUNC:RD->upvalue[0]
| settp CFUNC:RD, LJ_TFUNC
| lg PC, -8(BASE)
| stg CFUNC:RD, -16(BASE)
| stg TMPR1, -8(BASE)
| lghi TMPR2, LJ_TNIL
| stg TMPR2, 0(BASE)
| lghi RD, 1+3
| j ->fff_res
|
|.ffunc_2 ipairs_aux
| lg TAB:RB, 0(BASE)
| checktab TAB:RB, ->fff_fallback
| lg RA, 8(BASE)
| checkint RA, ->fff_fallback
| lg PC, -8(BASE)
| aghi RA, 1
| setint ITYPE, RA
| stg ITYPE, -16(BASE)
| cl RA, TAB:RB->asize; jhe >2 // Not in array part?
| lg RD, TAB:RB->array
| lgfr TMPR1, RA
| sllg TMPR1, TMPR1, 3(r0)
| la RD, 0(TMPR1, RD)
|1:
| lg TMPR2, 0(RD)
| cghi TMPR2, LJ_TNIL; je ->fff_res0
| // Copy array slot.
| stg TMPR2, -8(BASE)
| j ->fff_res2
|2: // Check for empty hash part first. Otherwise call C function.
| lt TMPR2, TAB:RB->hmask; je ->fff_res0
| lgr CARG1, TAB:RB
| lgr RB, BASE // Save BASE. // TODO: needed?
| lgfr CARG2, RA
| brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
| // cTValue * or NULL returned in r2 (CRET1).
| lgr BASE, RB
| ltgr RD, CRET1
| jne <1
|->fff_res0:
| lghi RD, 1+0
| j ->fff_res
|
|.ffunc_1 ipairs
| lg TAB:RB, 0(BASE)
| lgr TMPR1, TAB:RB
| checktab TAB:RB, ->fff_fallback
#if LJ_52
| lghi TMPR2, 0
| cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
#endif
| lg CFUNC:RD, -16(BASE)
| cleartp CFUNC:RD
| lg CFUNC:RD, CFUNC:RD->upvalue[0]
| settp CFUNC:RD, LJ_TFUNC
| lg PC, -8(BASE)
| stg CFUNC:RD, -16(BASE)
| stg TMPR1, -8(BASE)
| llihh RD, ((int)LJ_TISNUM)>>1 // mov64 RD, ((int64_t)LJ_TISNUM<<47) // TODO: write mov64-macro, use all of TISNUM (currently this is very fragile).
| stg RD, 0(BASE)
| lghi RD, 1+3
| j ->fff_res
|
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
| la RA, 16(BASE)
| aghi NARGS:RD, -1
| lghi PC, 16+FRAME_PCALL
|1:
| llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
| srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
| nill RB, 1 // High bits already zero (from load).
| agr PC, RB // Remember active hook before pcall.
| // Note: this does a (harmless) copy of the function to the PC slot, too.
| lgr KBASE, RD
|2:
| sllg TMPR1, KBASE, 3(r0)
| lg RB, -24(TMPR1, RA)
| stg RB, -16(TMPR1, RA)
| aghi KBASE, -1
| jh <2
| j ->vm_call_dispatch
|
|.ffunc_2 xpcall
| lg LFUNC:RA, 8(BASE)
| checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
| lg LFUNC:RB, 0(BASE) // Swap function and traceback.
| stg LFUNC:RA, 0(BASE)
| stg LFUNC:RB, 8(BASE)
| la RA, 24(BASE)
| aghi NARGS:RD, -2
| lghi PC, 24+FRAME_PCALL
| j <1
|
|//-- Coroutine library --------------------------------------------------
|
|.macro coroutine_resume_wrap, resume
|.if resume
|.ffunc_1 coroutine_resume
| lg L:RB, 0(BASE)
| lgr L:TMPR2, L:RB // Save type for checktptp.
| cleartp L:RB
|.else
|.ffunc coroutine_wrap_aux
| lg CFUNC:RB, -16(BASE)
| cleartp CFUNC:RB
| lg L:RB, CFUNC:RB->upvalue[0].gcr
| cleartp L:RB
|.endif
| lg PC, -8(BASE)
| stg PC, SAVE_PC
| stg L:RB, TMP_STACK
|.if resume
| checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
|.endif
| ltg TMPR2, L:RB->cframe; jne ->fff_fallback
| // TODO: replace with cli.
| llgc TMPR1, L:RB->status
| cghi TMPR1, (uint8_t)LUA_YIELD; jh ->fff_fallback
| lg RA, L:RB->top
| je >1 // Status != LUA_YIELD (i.e. 0)?
| cg RA, L:RB->base // Check for presence of initial func.
| je ->fff_fallback
| lg PC, -8(RA) // Move initial function up.
| stg PC, 0(RA)
| la RA, 8(RA)
|1:
| sllg TMPR1, NARGS:RD, 3(r0)
|.if resume
| lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
|.else
| lay PC, -8(TMPR1, RA) // Check stack space (-1).
|.endif
| clg PC, L:RB->maxstack; jh ->fff_fallback
| stg PC, L:RB->top
|
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
|.if resume
| la BASE, 8(BASE) // Keep resumed thread in stack for GC.
|.endif
| stg BASE, L:RB->top
|.if resume
| lay RB, -24(TMPR1, BASE) // RB = end of source for stack move.
|.else
| lay RB, -16(TMPR1, BASE) // RB = end of source for stack move.
|.endif
| sgr RB, PC // Relative to PC.
|
| cgr PC, RA
| je >3
|2: // Move args to coroutine.
| lg RC, 0(RB, PC)
| stg RC, -8(PC)
| // TODO: replace with branch on count/index?
| lay PC, -8(PC)
| cgr PC, RA
| jne <2
|3:
| lgr CARG2, RA
| lg L:CARG1, TMP_STACK
| lghi CARG3, 0
| lghi CARG4, 0
| brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
|
| lg L:RB, SAVE_L
| lg L:PC, TMP_STACK
| lg BASE, L:RB->base
| stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
| set_vmstate INTERP
|
| clfi CRET1, LUA_YIELD
| jh >8
|4:
| lg RA, L:PC->base
| lg KBASE, L:PC->top
| stg RA, L:PC->top // Clear coroutine stack.
| lgr PC, KBASE
| sgr PC, RA
| je >6 // No results?
| la RD, 0(PC, BASE)
| llgfr PC, PC
| srlg PC, PC, 3(r0)
| clg RD, L:RB->maxstack
| jh >9 // Need to grow stack?
|
| lgr RB, BASE
| sgr RB, RA
|5: // Move results from coroutine.
| lg RD, 0(RA)
| stg RD, 0(RA, RB)
| // TODO: branch on count/index?
| la RA, 8(RA)
| cgr RA, KBASE
| jne <5
|6:
|.if resume
| la RD, 2(PC) // nresults+1 = 1 + true + results.
| load_true ITYPE // Prepend true to results.
| stg ITYPE, -8(BASE)
|.else
| la RD, 1(PC) // nresults+1 = 1 + results.
|.endif
|7:
| lg PC, SAVE_PC
| st RD, SAVE_MULTRES
|.if resume
| lghi RA, -8
|.else
| lghi RA, 0
|.endif
| tmll PC, FRAME_TYPE
| je ->BC_RET_Z
| j ->vm_return
|
|8: // Coroutine returned with error (at co->top-1).
|.if resume
| load_false ITYPE // Prepend false to results.
| stg ITYPE, -8(BASE)
| lg RA, L:PC->top
| aghi RA, -8
| stg RA, L:PC->top // Clear error from coroutine stack.
| // Copy error message.
| lg RD, 0(RA)
| stg RD, 0(BASE)
| lghi RD, 1+2 // nresults+1 = 1 + false + error.
| j <7
|.else
| lgr CARG2, L:PC
| lgr CARG1, L:RB
| brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
| // Error function does not return.
|.endif
|
|9: // Handle stack expansion on return from yield.
| lg L:RA, TMP_STACK
| stg KBASE, L:RA->top // Undo coroutine stack clearing.
| lgr CARG2, PC
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg L:PC, TMP_STACK
| lg BASE, L:RB->base
| j <4 // Retry the stack move.
|.endmacro
|
| coroutine_resume_wrap 1 // coroutine.resume
| coroutine_resume_wrap 0 // coroutine.wrap
|
|.ffunc coroutine_yield
| lg L:RB, SAVE_L
| lg TMPR2, L:RB->cframe
| tmll TMPR2, CFRAME_RESUME
| je ->fff_fallback
| stg BASE, L:RB->base
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD, BASE)
| stg RD, L:RB->top
| lghi RD, 0
| stg RD, L:RB->cframe
| lghi CRET1, LUA_YIELD
| stc CRET1, L:RB->status
| j ->vm_leave_unw
|
|//-- Math library -------------------------------------------------------
|
|.ffunc_1 math_abs
| lg RB, 0(BASE)
| checkint RB, >3
| lpr RB, RB; jo >2
|->fff_resbit:
|->fff_resi:
| setint RB
|->fff_resRB:
| lg PC, -8(BASE)
| stg RB, -16(BASE)
| j ->fff_res1
|2:
| llihh RB, 0x41e0 // 2^31
| j ->fff_resRB
|3:
| jh ->fff_fallback
| nihh RB, 0x7fff // Clear sign bit.
| lg PC, -8(BASE)
| stg RB, -16(BASE)
| j ->fff_res1
|
|.ffunc_n math_sqrt, sqdb
|->fff_resf0:
| lg PC, -8(BASE)
| stdy f0, -16(BASE)
| // fallthrough
|
|->fff_res1:
| lghi RD, 1+1
|->fff_res:
| st RD, SAVE_MULTRES
|->fff_res_:
| tmll PC, FRAME_TYPE
| jne >7
|5:
| llgc TMPR1, PC_RB
| clgr TMPR1, RD // More results expected?
| jh >6
| // Adjust BASE. KBASE is assumed to be set for the calling frame.
| llgc RA, PC_RA
| lcgr RA, RA
| sllg RA, RA, 3(r0)
| lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
| ins_next
|
|6: // Fill up results with nil.
| sllg TMPR1, RD, 3(r0)
| lghi TMPR2, LJ_TNIL
| stg TMPR2, -24(TMPR1, BASE)
| la RD, 1(RD)
| j <5
|
|7: // Non-standard return case.
| lghi RA, -16 // Results start at BASE+RA = BASE-16.
| j ->vm_return
|
|.macro math_round, func
| .ffunc math_ .. func
|.endmacro
|
| math_round floor
| math_round ceil
|
|.ffunc math_log
| chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
| lg TMPR2, 0(BASE)
| ld FARG1, 0(BASE)
| checknumtp TMPR2, ->fff_fallback
| lgr RB, BASE
| brasl r14, extern log
| lgr BASE, RB
| j ->fff_resf0
|
|.macro math_extern, func
| .ffunc_n math_ .. func
| lgr RB, BASE
| brasl r14, extern func
| lgr BASE, RB
| j ->fff_resf0
|.endmacro
|
|.macro math_extern2, func
| .ffunc_nn math_ .. func
| lgr RB, BASE
| brasl r14, extern func
| lgr BASE, RB
| j ->fff_resf0
|.endmacro
|
| math_extern log10
| math_extern exp
| math_extern sin
| math_extern cos
| math_extern tan
| math_extern asin
| math_extern acos
| math_extern atan
| math_extern sinh
| math_extern cosh
| math_extern tanh
| math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
|.ffunc_2 math_ldexp
| lg TMPR2, 0(BASE)
| ld FARG1, 0(BASE)
| lg CARG1, 8(BASE)
| checknumtp TMPR2, ->fff_fallback
| checkinttp CARG1, ->fff_fallback
| lgfr CARG1, CARG1
| lgr RB, BASE
| brasl r14, extern ldexp // (double, int)
| lgr BASE, RB
| j ->fff_resf0
|
|.ffunc_n math_frexp
| lgr RB, BASE
| la CARG1, TMP_STACK
| brasl r14, extern frexp
| lgr BASE, RB
| llgf RB, TMP_STACK
| lg PC, -8(BASE)
| stdy f0, -16(BASE)
| setint RB
| stg RB, -8(BASE)
| lghi RD, 1+2
| j ->fff_res
|
|.ffunc_n math_modf
| lgr RB, BASE
| lay CARG1, -16(BASE)
| brasl r14, extern modf // (double, double*)
| lgr BASE, RB
| lg PC, -8(BASE)
| stdy f0, -8(BASE)
| lghi RD, 1+2
| j ->fff_res
|
|.macro math_minmax, name, cjmp
| .ffunc name
| lghi RA, 2*8
| sllg TMPR1, RD, 3(r0)
| lg RB, 0(BASE)
| ld f0, 0(BASE)
| checkint RB, >4
|1: // Handle integers.
| clgr RA, TMPR1; jhe ->fff_resRB
| lg TMPR2, -8(RA, BASE)
| checkint TMPR2, >3
| cr RB, TMPR2
| cjmp >2
| lgr RB, TMPR2
|2:
| aghi RA, 8
| j <1
|3:
| jh ->fff_fallback
| // Convert intermediate result to number and continue below.
| cdfbr f0, RB
| ldgr f1, TMPR2
| j >6
|4:
| jh ->fff_fallback
|5: // Handle numbers or integers.
| clgr RA, TMPR1; jhe ->fff_resf0
| lg RB, -8(RA, BASE)
| ldy f1, -8(RA, BASE)
| checknumx RB, >6, jl
| jh ->fff_fallback
| cdfbr f1, RB
|6:
| cdbr f0, f1
| cjmp >7
| ldr f0, f1
|7:
| aghi RA, 8
| j <5
|.endmacro
|
| math_minmax math_min, jnh
| math_minmax math_max, jnl
|
|//-- String library -----------------------------------------------------
|
|.ffunc string_byte // Only handle the 1-arg case here.
| chi NARGS:RD, 1+1; jne ->fff_fallback
| lg STR:RB, 0(BASE)
| checkstr STR:RB, ->fff_fallback
| lg PC, -8(BASE)
| ltg TMPR2, STR:RB->len
| je ->fff_res0 // Return no results for empty string.
| llgc RB, STR:RB[1]
| j ->fff_resi
|
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
| chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
| lg RB, 0(BASE)
| checkint RB, ->fff_fallback
| clfi RB, 255; jh ->fff_fallback
| strvh RB, TMP_STACK // Store [c,0].
| lghi TMPR1, 1
| la RD, TMP_STACK // Points to stack. Little-endian.
|->fff_newstr:
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| llgfr CARG3, TMPR1 // Zero-extended to size_t.
| lgr CARG2, RD
| lgr CARG1, L:RB
| stg PC, SAVE_PC
| brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l)
|->fff_resstr:
| // GCstr * returned in r2 (CRET1).
| lgr STR:RD, CRET1
| lg BASE, L:RB->base
| lg PC, -8(BASE)
| settp STR:RD, LJ_TSTR
| stg STR:RD, -16(BASE)
| j ->fff_res1
|
|.ffunc string_sub
| ffgccheck
| lghi TMPR1, -1
| clfi NARGS:RD, 1+2; jl ->fff_fallback
| jnh >1
| lg TMPR1, 16(BASE)
| checkint TMPR1, ->fff_fallback
|1:
| lg STR:RB, 0(BASE)
| checkstr STR:RB, ->fff_fallback
| lg ITYPE, 8(BASE)
| lgfr RA, ITYPE
| srag ITYPE, ITYPE, 47(r0)
| cghi ITYPE, LJ_TISNUM
| jne ->fff_fallback
| llgf RC, STR:RB->len
| clr RC, TMPR1 // len < end? (unsigned compare)
| jl >5
|2:
| cghi RA, 0 // start <= 0?
| jle >7
|3:
| sr TMPR1, RA // start > end?
| jnhe ->fff_emptystr // TODO: not sure about this, was jl in x64.
| la RD, (#STR-1)(RA, STR:RB)
| ahi TMPR1, 1
|4:
| j ->fff_newstr
|
|5: // Negative end or overflow.
| chi TMPR1, 0
| jnl >6
| ahi TMPR1, 1
| ar TMPR1, RC // end = end+(len+1)
| j <2
|6: // Overflow.
| lr TMPR1, RC // end = len
| j <2
|
|7: // Negative start or underflow.
| je >8
| agr RA, RC // start = start+(len+1)
| aghi RA, 1
| jh <3 // start > 0?
|8: // Underflow.
| lghi RA, 1 // start = 1
| j <3
|
|->fff_emptystr: // Range underflow.
| lghi TMPR1, 0
| j <4
|
|.macro ffstring_op, name
| .ffunc_1 string_ .. name
| ffgccheck
| lg STR:CARG2, 0(BASE)
| checkstr STR:CARG2, ->fff_fallback
| lg L:RB, SAVE_L
| lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
| stg BASE, L:RB->base
| lg RC, SBUF:CARG1->b
| stg L:RB, SBUF:CARG1->L
| stg RC, SBUF:CARG1->p
| stg PC, SAVE_PC
| brasl r14, extern lj_buf_putstr_ .. name
| // lgr CARG1, CRET1 (nop, CARG1==CRET1)
| brasl r14, extern lj_buf_tostr
| j ->fff_resstr
|.endmacro
|
|ffstring_op reverse
|ffstring_op lower
|ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
|.macro .ffunc_bit, name, kind, fdef
| fdef name
|.if kind == 2
| bfpconst_tobit f1, RB
|.endif
| lg RB, 0(BASE)
| ld f0, 0(BASE)
| checkint RB, >1
|.if kind > 0
| j >2
|.else
| j ->fff_resbit
|.endif
|1:
| jh ->fff_fallback
|.if kind < 2
| bfpconst_tobit f1, RB
|.endif
| adbr f0, f1
| lgdr RB, f0
| llgfr RB, RB
|2:
|.endmacro
|
|.macro .ffunc_bit, name, kind
| .ffunc_bit name, kind, .ffunc_1
|.endmacro
|
|.ffunc_bit bit_tobit, 0
| j ->fff_resbit
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name, 2
| lgr TMPR1, NARGS:RD // Save for fallback.
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -16(RD, BASE)
|1:
| clgr RD, BASE
| jle ->fff_resbit
| lg RA, 0(RD)
| checkint RA, >2
| ins RB, RA
| aghi RD, -8
| j <1
|2:
| jh ->fff_fallback_bit_op
| ldgr f0, RA
| adbr f0, f1
| lgdr RA, f0
| ins RB, RA
| aghi RD, -8
| j <1
|.endmacro
|
|.ffunc_bit_op bit_band, nr
|.ffunc_bit_op bit_bor, or
|.ffunc_bit_op bit_bxor, xr
|
|.ffunc_bit bit_bswap, 1
| lrvr RB, RB
| j ->fff_resbit
|
|.ffunc_bit bit_bnot, 1
| lhi TMPR2, -1
| xr RB, TMPR2 // TODO: use xilf on newer models?
| j ->fff_resbit
|
|->fff_fallback_bit_op:
| lgr NARGS:RD, TMPR1 // Restore for fallback
| j ->fff_fallback
|
|.macro .ffunc_bit_sh, name, ins
| .ffunc_bit name, 1, .ffunc_2
| // Note: no inline conversion from number for 2nd argument!
| lg RA, 8(BASE)
| checkint RA, ->fff_fallback
| nill RA, 0x1f // Limit shift to 5-bits.
| ins RB, 0(RA) // TODO: fix shift args in DynASM.
| j ->fff_resbit
|.endmacro
|
|.ffunc_bit_sh bit_lshift, sll
|.ffunc_bit_sh bit_rshift, srl
|.ffunc_bit_sh bit_arshift, sra
|
|.ffunc_bit bit_rol, 1, .ffunc_2
| // Note: no inline conversion from number for 2nd argument!
| lg RA, 8(BASE)
| checkint RA, ->fff_fallback
| // Note: no need to limit rotate to 5-bits (wraps).
| rll RB, RB, 0(RA)
| j ->fff_resbit
|
|.ffunc_bit bit_ror, 1, .ffunc_2
| // Note: no inline conversion from number for 2nd argument!
| lg RA, 8(BASE)
| checkint RA, ->fff_fallback
| // TODO: shorter sequence of instructions to convert right rotate into left rotate.
| nill RA, 0x1f
| lghi TMPR2, 32
| sr TMPR2, RA
| lr RA, TMPR2
| rll RB, RB, 0(RA)
| j ->fff_resbit
|
|//-----------------------------------------------------------------------
|
|->fff_fallback_2:
| lghi NARGS:RD, 1+2 // Other args are ignored, anyway.
| j ->fff_fallback
|->fff_fallback_1:
| lghi NARGS:RD, 1+1 // Other args are ignored, anyway.
|->fff_fallback: // Call fast function fallback handler.
| // BASE = new base, RD = nargs+1
| lg L:RB, SAVE_L
| lg PC, -8(BASE) // Fallback may overwrite PC.
| stg PC, SAVE_PC // Redundant (but a defined value).
| stg BASE, L:RB->base
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD, BASE)
| la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
| stg RD, L:RB->top
| lg CFUNC:RD, -16(BASE)
| cleartp CFUNC:RD
| clg RA, L:RB->maxstack
| jh >5 // Need to grow stack.
| lgr CARG1, L:RB
| lg TMPR1, CFUNC:RD->f
| basr r14, TMPR1 // (lua_State *L)
| lg BASE, L:RB->base
| // Either throws an error, or recovers and returns -1, 0 or nresults+1.
| lgr RD, CRET1
| cghi RD, 0; jh ->fff_res // Returned nresults+1?
|1:
| lg RA, L:RB->top
| sgr RA, BASE
| srlg RA, RA, 3(r0)
| cghi RD, 0
| la NARGS:RD, 1(RA)
| lg LFUNC:RB, -16(BASE)
| jne ->vm_call_tail // Returned -1?
| cleartp LFUNC:RB
| ins_callt // Returned 0: retry fast path.
|
|// Reconstruct previous base for vmeta_call during tailcall.
|->vm_call_tail:
| lgr RA, BASE
| tmll PC, FRAME_TYPE
| jne >3
| llgc RB, PC_RA
| lcgr RB, RB
| sllg RB, RB, 3(r0)
| lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
| j ->vm_call_dispatch // Resolve again for tailcall.
|3:
| lgr RB, PC
| nill RB, -8
| sgr BASE, RB
| j ->vm_call_dispatch // Resolve again for tailcall.
|
|5: // Grow stack for fallback handler.
| lghi CARG2, LUA_MINSTACK
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->base
| lghi RD, 0 // Simulate a return 0.
| j <1 // Dumb retry (goes through ff first).
|
|->fff_gcstep: // Call GC step function.
| // BASE = new base, RD = nargs+1
| stg r14, TMP_STACK // Save return address
| lg L:RB, SAVE_L
| stg PC, SAVE_PC // Redundant (but a defined value).
| stg BASE, L:RB->base
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD, BASE)
| lgr CARG1, L:RB
| stg RD, L:RB->top
| brasl r14, extern lj_gc_step // (lua_State *L)
| lg BASE, L:RB->base
| lg RD, L:RB->top
| sgr RD, BASE
| srlg RD, RD, 3(r0)
| aghi NARGS:RD, 1
| lg r14, TMP_STACK // Restore return address.
| br r14
|
|//-----------------------------------------------------------------------
|//-- Special dispatch targets -------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_record: // Dispatch target for recording phase.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_rethook: // Dispatch target for return hooks.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_inshook: // Dispatch target for instr/line hooks.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->cont_hook: // Continue from hook yield.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_hotloop: // Hot loop counter underflow.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_callhook: // Dispatch target for call hooks.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_hotcall: // Hot call counter underflow.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->cont_stitch: // Trace stitching.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_profhook: // Dispatch target for profiler hook.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
|// Called from an exit stub with the exit number on the stack.
|// The 16 bit exit number is stored with two (sign-extended) push imm8.
|->vm_exit_handler:
| stg r0, 0(r0)
| stg r0, 0(r0)
|->vm_exit_interp:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
|// FP value rounding. Called by math.floor/math.ceil fast functions.
|// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
|.macro vm_round, name, mask
|->name:
| // TODO: handle edge cases?
| lghi r0, 1
| cdfbr f1, r0
| didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
| jnle >1
| ldr f0, f2
| br r14
|1: // partial remainder (sanity check)
| stg r0, 0(r0)
|.endmacro
|
| vm_round vm_floor, 7 // Round towards -inf.
| vm_round vm_ceil, 6 // Round towards +inf.
| vm_round vm_trunc, 5 // Round towards 0.
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|->vm_mod:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|->vm_powi_sse:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
|// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
|->vm_cpuid:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
|//-- Assertions ---------------------------------------------------------
|//-----------------------------------------------------------------------
|
|->assert_bad_for_arg_type:
| stg r0, 0(r0)
| stg r0, 0(r0)
#ifdef LUA_USE_ASSERT
#endif
|
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
|// Handler for callback functions. Callback slot number in ah/al.
|->vm_ffi_callback:
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->cont_ffi_callback: // Return from FFI callback.
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|->vm_ffi_call: // Call C function via FFI.
|// Note: vm_ffi_call must be the last function in this object file!
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
}
/* Generate the code for a single instruction. */
static void build_ins(BuildCtx *ctx, BCOp op, int defop)
{
int vk = 0;
(void)vk;
|// Note: aligning all instructions does not pay off.
|=>defop:
switch (op) {
/* -- Comparison ops ---------------------------------------------------- */
/* Remember: all ops branch for a true comparison, fall through otherwise. */
|.macro jmp_comp, lt, ge, le, gt, target
||switch (op) {
||case BC_ISLT:
| lt target
||break;
||case BC_ISGE:
| ge target
||break;
||case BC_ISLE:
| le target
||break;
||case BC_ISGT:
| gt target
||break;
||default: break; /* Shut up GCC. */
||}
|.endmacro
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1, RD = src2, JMP with RD = target
| ins_AD
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| ld f0, 0(RA, BASE)
| ld f1, 0(RD, BASE)
| lg RA, 0(RA, BASE)
| lg RD, 0(RD, BASE)
| srag ITYPE, RA, 47(r0)
| srag RB, RD, 47(r0)
|
| clfi ITYPE, LJ_TISNUM; jne >7
| clfi RB, LJ_TISNUM; jne >8
| // Both are integers.
| la PC, 4(PC)
| cr RA, RD
| jmp_comp jhe, jl, jh, jle, >9
|6:
| llgh RD, PC_RD
| branchPC RD
|9:
| ins_next
|
|7: // RA is not an integer.
| jh ->vmeta_comp
| // RA is a number.
| clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
| // RA is a number, RD is an integer.
| cdfbr f1, RD
| j >1
|
|8: // RA is an integer, RD is not an integer.
| jh ->vmeta_comp
| // RA is an integer, RD is a number.
| cdfbr f0, RA
|1:
| la PC, 4(PC)
| cdbr f0, f1
| // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
| jmp_comp jnl, jl, jnle, jle, <9
| j <6
break;
case BC_ISEQV: case BC_ISNEV:
vk = op == BC_ISEQV;
| ins_AD // RA = src1, RD = src2, JMP with RD = target
| sllg RD, RD, 3(r0)
| ld f1, 0(RD, BASE)
| lg RD, 0(RD, BASE)
| sllg RA, RA, 3(r0)
| ld f0, 0(RA, BASE)
| lg RA, 0(RA, BASE)
| la PC, 4(PC)
| srag RB, RD, 47(r0)
| srag ITYPE, RA, 47(r0)
| clfi RB, LJ_TISNUM; jne >7
| clfi ITYPE, LJ_TISNUM; jne >8
| cr RD, RA
if (vk) {
| jne >9
} else {
| je >9
}
| llgh RD, PC_RD
| branchPC RD
|9:
| ins_next
|
|7: // RD is not an integer.
| jh >5
| // RD is a number.
| clfi ITYPE, LJ_TISNUM; jl >1; jne >5
| // RD is a number, RA is an integer.
| cdfbr f0, RA
| j >1
|
|8: // RD is an integer, RA is not an integer.
| jh >5
| // RD is an integer, RA is a number.
| cdfbr f1, RD
| j >1
|
|1:
| cdbr f0, f1
|4:
iseqne_fp:
if (vk) {
| jne >2 // Unordered means not equal.
} else {
| je >1 // Unordered means not equal.
}
iseqne_end:
if (vk) {
|1: // EQ: Branch to the target.
| llgh RD, PC_RD
| branchPC RD
|2: // NE: Fallthrough to next instruction.
|.if not FFI
|3:
|.endif
} else {
|.if not FFI
|3:
|.endif
|2: // NE: Branch to the target.
| llgh RD, PC_RD
| branchPC RD
|1: // EQ: Fallthrough to next instruction.
}
if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
op == BC_ISEQN || op == BC_ISNEN)) {
| j <9
} else {
| ins_next
}
|
if (op == BC_ISEQV || op == BC_ISNEV) {
|5: // Either or both types are not numbers.
|.if FFI
| clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
| clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
|.endif
| cgr RA, RD
| je <1 // Same GCobjs or pvalues?
| cr RB, ITYPE
| jne <2 // Not the same type?
| clfi RB, LJ_TISTABUD
| jh <2 // Different objects and not table/ud?
|
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
| cleartp TAB:RA
| lg TAB:RB, TAB:RA->metatable
| cghi TAB:RB, 0
| je <2 // No metatable?
| llgc TMPR2, TAB:RB->nomm
| tmll TMPR2, 1<<MM_eq
| jne <2 // Or 'no __eq' flag set?
if (vk) {
| lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
} else {
| lghi RB, 1 // ne = 1 // TODO: should be 32-bit?
}
| j ->vmeta_equal // Handle __eq metamethod.
} else {
|.if FFI
|3:
| clfi ITYPE, LJ_TCDATA
if (LJ_DUALNUM && vk) {
| jne <9
} else {
| jne <2
}
| j ->vmeta_equal_cd
|.endif
}
break;
case BC_ISEQS: case BC_ISNES:
vk = op == BC_ISEQS;
| ins_AND // RA = src, RD = str const, JMP with RD = target
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| lg RB, 0(RA, BASE)
| la PC, 4(PC)
| checkstr RB, >3
| cg RB, 0(RD, KBASE)
iseqne_test:
if (vk) {
| jne >2
} else {
| je >1
}
goto iseqne_end;
case BC_ISEQN: case BC_ISNEN:
vk = op == BC_ISEQN;
| ins_AD // RA = src, RD = num const, JMP with RD = target
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| ld f0, 0(RA, BASE)
| lg RB, 0(RA, BASE)
| ld f1, 0(RD, KBASE)
| lg RD, 0(RD, KBASE)
| la PC, 4(PC)
| checkint RB, >7
| checkint RD, >8
| cr RB, RD
if (vk) {
| jne >9
} else {
| je >9
}
| llgh RD, PC_RD
| branchPC RD
|9:
| ins_next
|
|7: // RA is not an integer.
| jh >3
| // RA is a number.
| checkint RD, >1
| // RA is a number, RD is an integer.
| cdfbr f1, RD
| j >1
|
|8: // RA is an integer, RD is a number.
| cdfbr f0, RB
| cdbr f0, f1
| j >4
|1:
| cdbr f0, f1
|4:
goto iseqne_fp;
case BC_ISEQP: case BC_ISNEP:
vk = op == BC_ISEQP;
| ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
| sllg RA, RA, 3(r0)
| lg RB, 0(RA, BASE)
| srag RB, RB, 47(r0)
| la PC, 4(PC)
| cr RB, RD
if (!LJ_HASFFI) goto iseqne_test;
if (vk) {
| jne >3
| llgh RD, PC_RD
| branchPC RD
|2:
| ins_next
|3:
| cghi RB, LJ_TCDATA; jne <2
| j ->vmeta_equal_cd
} else {
| je >2
| cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
| llgh RD, PC_RD
| branchPC RD
|2:
| ins_next
}
break;
/* -- Unary test and copy ops ------------------------------------------- */
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| ins_AD // RA = dst or unused, RD = src, JMP with RD = target
| sllg RD, RD, 3(r0)
| sllg RA, RA, 3(r0)
| lg ITYPE, 0(RD, BASE)
| la PC, 4(PC)
if (op == BC_ISTC || op == BC_ISFC) {
| lgr RB, ITYPE
}
| srag ITYPE, ITYPE, 47(r0)
| clfi ITYPE, LJ_TISTRUECOND
if (op == BC_IST || op == BC_ISTC) {
| jhe >1
} else {
| jl >1
}
if (op == BC_ISTC || op == BC_ISFC) {
| stg RB, 0(RA, BASE)
}
| llgh RD, PC_RD
| branchPC RD
|1: // Fallthrough to the next instruction.
| ins_next
break;
case BC_ISTYPE:
| ins_AD // RA = src, RD = -type
| lghr RD, RD // TODO: always sign extend RD?
| sllg RA, RA, 3(r0)
| lg RB, 0(RA, BASE)
| srag RB, RB, 47(r0)
| agr RB, RD
| jne ->vmeta_istype
| ins_next
break;
case BC_ISNUM:
| ins_AD // RA = src, RD = -(TISNUM-1)
| sllg TMPR1, RA, 3(r0)
| lg TMPR1, 0(TMPR1, BASE)
| checknumtp TMPR1, ->vmeta_istype
| ins_next
break;
case BC_MOV:
| ins_AD // RA = dst, RD = src
| sllg RD, RD, 3(r0)
| lg RB, 0(RD, BASE)
| sllg RA, RA, 3(r0)
| stg RB, 0(RA, BASE)
| ins_next_
break;
case BC_NOT:
| ins_AD // RA = dst, RD = src
| sllg RD, RD, 3(r0)
| sllg RA, RA, 3(r0)
| lg RB, 0(RD, BASE)
| srag RB, RB, 47(r0)
| load_false RC
| cghi RB, LJ_TTRUE
| je >1 // TODO: Maybe do something fancy to avoid the jump?
| load_true RC
|1:
| stg RC, 0(RA, BASE)
| ins_next
break;
case BC_UNM:
| ins_AD // RA = dst, RD = src
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| lg RB, 0(RD, BASE)
| checkint RB, >3
| lcr RB, RB; jo >2
|1:
| stg RB, 0(RA, BASE)
| ins_next
|2:
| llihh RB, 0x41e0 // (double)2^31
| j <1
|3:
| jh ->vmeta_unm
| // Toggle sign bit.
| llihh TMPR2, 0x8000
| xgr RB, TMPR2
| j <1
break;
case BC_LEN:
| ins_AD // RA = dst, RD = src
| sllg RD, RD, 3(r0)
| lg RD, 0(RD, BASE)
| checkstr RD, >2
| llgf RD, STR:RD->len
|1:
| sllg RA, RA, 3(r0)
| setint RD
| stg RD, 0(RA, BASE)
| ins_next
|2:
| cghi ITYPE, LJ_TTAB; jne ->vmeta_len
| lgr TAB:CARG1, TAB:RD
#if LJ_52
| lg TAB:RB, TAB:RD->metatable
| cghi TAB:RB, 0
| jne >9
|3:
#endif
|->BC_LEN_Z:
| lgr RB, BASE // Save BASE.
| brasl r14, extern lj_tab_len // (GCtab *t)
| // Length of table returned in r2 (CRET1).
| lgr RD, CRET1
| lgr BASE, RB // Restore BASE.
| llgc RA, PC_RA
| j <1
#if LJ_52
|9: // Check for __len.
| llgc TMPR2, TAB:RB->nomm
| tmll TMPR2, 1<<MM_len
| jne <3
| j ->vmeta_len // 'no __len' flag NOT set: check.
#endif
break;
/* -- Binary ops -------------------------------------------------------- */
|.macro ins_arithpre
| ins_ABC
| sllg RB, RB, 3(r0)
| sllg RC, RC, 3(r0)
| sllg RA, RA, 3(r0)
|.endmacro
|
|.macro ins_arithfp, ins
| ins_arithpre
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
| ld f0, 0(RB, BASE)
| ld f1, 0(RC, KBASE)
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checknumtp RB, ->vmeta_arith_vno
| checknumtp RC, ->vmeta_arith_vno
| ins f0, f1
|| break;
||case 1:
| ld f1, 0(RB, BASE)
| ld f0, 0(RC, KBASE)
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checknumtp RB, ->vmeta_arith_nvo
| checknumtp RC, ->vmeta_arith_nvo
| ins f0, f1
|| break;
||default:
| ld f0, 0(RB, BASE)
| ld f1, 0(RC, BASE)
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, BASE)
| checknumtp RB, ->vmeta_arith_vvo
| checknumtp RC, ->vmeta_arith_vvo
| ins f0, f1
|| break;
||}
| std f0, 0(RA, BASE)
| ins_next
|.endmacro
|
|.macro ins_arithdn, intins
| ins_arithpre
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checkint RB, ->vmeta_arith_vno
| checkint RC, ->vmeta_arith_vno
| intins RB, RC; jo ->vmeta_arith_vno
|| break;
||case 1:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checkint RB, ->vmeta_arith_nvo
| checkint RC, ->vmeta_arith_nvo
| intins RC, RB; jo ->vmeta_arith_nvo
|| break;
||default:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, BASE)
| checkint RB, ->vmeta_arith_vvo
| checkint RC, ->vmeta_arith_vvo
| intins RB, RC; jo ->vmeta_arith_vvo
|| break;
||}
||if (vk == 1) {
| // setint RC
| stg RC, 0(RA, BASE)
||} else {
| // setint RB
| stg RB, 0(RA, BASE)
||}
| ins_next
|.endmacro
| // RA = dst, RB = src1 or num const, RC = src2 or num const
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
| ins_arithdn ar
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
| ins_arithdn sr
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
| ins_arithpre
| // For multiplication we use msgfr and check if the result
| // fits in an int32_t.
switch(op) {
case BC_MULVN:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checkint RB, ->vmeta_arith_vno
| checkint RC, ->vmeta_arith_vno
| lgfr RB, RB
| msgfr RB, RC
| lgfr RC, RB
| cgr RB, RC; jne ->vmeta_arith_vno
break;
case BC_MULNV:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, KBASE)
| checkint RB, ->vmeta_arith_nvo
| checkint RC, ->vmeta_arith_nvo
| lgfr RB, RB
| msgfr RB, RC
| lgfr RC, RB
| cgr RB, RC; jne ->vmeta_arith_nvo
break;
default:
| lg RB, 0(RB, BASE)
| lg RC, 0(RC, BASE)
| checkint RB, ->vmeta_arith_vvo
| checkint RC, ->vmeta_arith_vvo
| lgfr RB, RB
| msgfr RB, RC
| lgfr RC, RB
| cgr RB, RC; jne ->vmeta_arith_vvo
break;
}
| llgfr RB, RB
| setint RB
| stg RB, 0(RA, BASE)
| ins_next
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
| ins_arithfp ddbr
break;
// TODO: implement fast mod operation.
// x86_64 does floating point mod, however it might be better to use integer mod.
case BC_MODVN:
| j ->vmeta_arith_vno
break;
case BC_MODNV:
| j ->vmeta_arith_nvo
break;
case BC_MODVV:
| j ->vmeta_arith_vvo
break;
case BC_POW:
| ins_ABC
| sllg RB, RB, 3(r0)
| sllg RC, RC, 3(r0)
| ld FARG1, 0(RB, BASE)
| ld FARG2, 0(RC, BASE)
| lg TMPR2, 0(RB, BASE)
| checknumtp TMPR2, ->vmeta_arith_vvo
| lg TMPR2, 0(RC, BASE)
| checknumtp TMPR2, ->vmeta_arith_vvo
| lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
| brasl r14, extern pow // double pow(double x, double y), result in f0.
| llgc RA, PC_RA
| lgr BASE, RB
| sllg RA, RA, 3(r0)
| std f0, 0(RA, BASE)
| ins_next
break;
case BC_CAT:
| ins_ABC // RA = dst, RB = src_start, RC = src_end
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lgr CARG3, RC
| sgr CARG3, RB
| sllg RC, RC, 3(r0)
| la CARG2, 0(RC, BASE)
|->BC_CAT_Z:
| lgr L:RB, L:CARG1
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left)
| // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
| lg BASE, L:RB->base
| ltgr RC, CRET1
| jne ->vmeta_binop
| llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
| sllg RB, RB, 3(r0)
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| lg RC, 0(RB, BASE)
| stg RC, 0(RA, BASE)
| ins_next
break;
/* -- Constant ops ------------------------------------------------------ */
case BC_KSTR:
| ins_AND // RA = dst, RD = str const (~)
| sllg RD, RD, 3(r0)
| lg RD, 0(RD, KBASE)
| settp RD, LJ_TSTR
| sllg RA, RA, 3(r0)
| stg RD, 0(RA, BASE)
| ins_next
break;
case BC_KCDATA:
| stg r0, 0(r0)
| stg r0, 0(r0)
break;
case BC_KSHORT:
| ins_AD // RA = dst, RD = signed int16 literal
| // Assumes DUALNUM.
| lhr RD, RD // Sign-extend literal to 32-bits.
| setint RD
| sllg RA, RA, 3(r0)
| stg RD, 0(RA, BASE)
| ins_next
break;
case BC_KNUM:
| ins_AD // RA = dst, RD = num const
| sllg RD, RD, 3(r0)
| ld f0, 0(RD, KBASE)
| sllg RA, RA, 3(r0)
| std f0, 0(RA, BASE)
| ins_next
break;
case BC_KPRI:
| ins_AD // RA = dst, RD = primitive type (~)
| sllg RA, RA, 3(r0)
| sllg RD, RD, 47(r0)
| lghi TMPR2, -1
| xgr RD, TMPR2 // not
| stg RD, 0(RA, BASE)
| ins_next
break;
case BC_KNIL:
| ins_AD // RA = dst_start, RD = dst_end
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| la RA, 8(RA, BASE)
| la RD, 0(RD, BASE)
| lghi RB, LJ_TNIL
| stg RB, -8(RA) // Sets minimum 2 slots.
|1:
| stg RB, 0(RA)
| la RA, 8(RA)
| clgr RA, RD
| jle <1
| ins_next
break;
/* -- Upvalue and function ops ------------------------------------------ */
case BC_UGET:
| ins_AD // RA = dst, RD = upvalue #
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
| lg RB, UPVAL:RB->v
| lg RD, 0(RB)
| stg RD, 0(RA, BASE)
| ins_next
break;
case BC_USETV:
#define TV2MARKOFS \
((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
| ins_AD // RA = upvalue #, RD = src
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| sllg RA, RA, 3(r0)
| lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
| // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
| llgc TMPR2, UPVAL:RB->closed
| tmll TMPR2, 0xff
| lg RB, UPVAL:RB->v
| sllg TMPR1, RD, 3(r0)
| lg RA, 0(TMPR1, BASE)
| stg RA, 0(RB)
| je >1
| // Check barrier for closed upvalue.
| // TODO: tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
| llgc TMPR2, TV2MARKOFS(RB)
| tmll TMPR2, LJ_GC_BLACK
| jne >2
|1:
| ins_next
|
|2: // Upvalue is black. Check if new value is collectable and white.
| srag RD, RA, 47(r0)
| ahi RD, -LJ_TISGCV
| clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
| jle <1
| cleartp GCOBJ:RA
| // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
| llgc TMPR2, GCOBJ:RA->gch.marked
| tmll TMPR2, LJ_GC_WHITES
| je <1
| // Crossed a write barrier. Move the barrier forward.
| lgr CARG2, RB
| lgr RB, BASE // Save BASE.
| lay GL:CARG1, GG_DISP2G(DISPATCH)
| brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
| lgr BASE, RB // Restore BASE.
| j <1
break;
#undef TV2MARKOFS
case BC_USETS:
| ins_AND // RA = upvalue #, RD = str const (~)
| lg LFUNC:RB, -16(BASE)
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| cleartp LFUNC:RB
| lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
| lg STR:RA, 0(RD, KBASE)
| lg RD, UPVAL:RB->v
| settp STR:ITYPE, STR:RA, LJ_TSTR
| stg STR:ITYPE, 0(RD)
| // TODO: tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
| llgc TMPR2, UPVAL:RB->marked
| tmll TMPR2, LJ_GC_BLACK
| jne >2
|1:
| ins_next
|
|2: // Check if string is white and ensure upvalue is closed.
| // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
| llgc TMPR2, GCOBJ:RA->gch.marked
| tmll TMPR2, LJ_GC_WHITES
| je <1
| // TODO: tm UPVAL:RB->closed, 0xff
| llgc TMPR2, UPVAL:RB->closed
| tmll TMPR2, 0xff
| je <1
| // Crossed a write barrier. Move the barrier forward.
| lgr RB, BASE
| lgr CARG2, RD
| lay GL:CARG1, GG_DISP2G(DISPATCH)
| brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
| lgr BASE, RB // Restore BASE.
| j <1
break;
case BC_USETN:
| ins_AD // RA = upvalue #, RD = num const
| lg LFUNC:RB, -16(BASE)
| sllg RA, RA, 3(r0)
| sllg RD, RD, 3(r0)
| cleartp LFUNC:RB
| ld f0, 0(RD, KBASE)
| lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
| lg RA, UPVAL:RB->v
| std f0, 0(RA)
| ins_next
break;
case BC_USETP:
| ins_AD // RA = upvalue #, RD = primitive type (~)
| lg LFUNC:RB, -16(BASE)
| sllg RA, RA, 3(r0)
| cleartp LFUNC:RB
| lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
| sllg RD, RD, 47(r0)
| lghi TMPR2, -1
| xgr RD, TMPR2
| lg RA, UPVAL:RB->v
| stg RD, 0(RA)
| ins_next
break;
case BC_UCLO:
| ins_AD // RA = level, RD = target
| branchPC RD // Do this first to free RD.
| lg L:RB, SAVE_L
| ltg TMPR2, L:RB->openupval
| je >1
| stg BASE, L:RB->base
| sllg RA, RA, 3(r0)
| la CARG2, 0(RA, BASE)
| lgr L:CARG1, L:RB
| brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
| lg BASE, L:RB->base
|1:
| ins_next
break;
case BC_FNEW:
| ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| lg CARG3, -16(BASE)
| cleartp CARG3
| sllg RD, RD, 3(r0)
| lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
| lgr CARG1, L:RB
| stg PC, SAVE_PC
| // (lua_State *L, GCproto *pt, GCfuncL *parent)
| brasl r14, extern lj_func_newL_gc
| // GCfuncL * returned in r2 (CRET1).
| lg BASE, L:RB->base
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| settp LFUNC:CRET1, LJ_TFUNC
| stg LFUNC:CRET1, 0(RA, BASE)
| ins_next
break;
case BC_TNEW:
| ins_AD // RA = dst, RD = hbits|asize
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
| clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
| stg PC, SAVE_PC
| jhe >5
|1:
| srlg CARG3, RD, 11(r0)
| llill TMPR2, 0x7ff
| nr RD, TMPR2
| cr RD, TMPR2
| je >3
|2:
| lgr L:CARG1, L:RB
| llgfr CARG2, RD
| brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits)
| // Table * returned in r2 (CRET1).
| lg BASE, L:RB->base
| llgc RA, PC_RA
| sllg RA, RA, 3(r0)
| settp TAB:CRET1, LJ_TTAB
| stg TAB:CRET1, 0(RA, BASE)
| ins_next
|3: // Turn 0x7ff into 0x801.
| llill RD, 0x801
| j <2
|5:
| lgr L:CARG1, L:RB
| brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
| llgh RD, PC_RD
| j <1
break;
case BC_TDUP:
| ins_AND // RA = dst, RD = table const (~) (holding template table)
| lg L:RB, SAVE_L
| lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
| stg PC, SAVE_PC
| clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
| stg BASE, L:RB->base
| jhe >3
|2:
| sllg RD, RD, 3(r0)
| lg TAB:CARG2, 0(RD, KBASE)
| lgr L:CARG1, L:RB
| brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
| // Table * returned in r2 (CRET1).
| lg BASE, L:RB->base
| llgc RA, PC_RA
| settp TAB:CRET1, LJ_TTAB
| sllg RA, RA, 3(r0)
| stg TAB:CRET1, 0(RA, BASE)
| ins_next
|3:
| lgr L:CARG1, L:RB
| brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
| llgh RD, PC_RD // Need to reload RD.
| lghi TMPR2, -1
| xgr RD, TMPR2 // not RD
| j <2
break;
case BC_GGET:
| ins_AND // RA = dst, RD = str const (~)
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| lg TAB:RB, LFUNC:RB->env
| sllg TMPR1, RD, 3(r0)
| lg STR:RC, 0(TMPR1, KBASE)
| j ->BC_TGETS_Z
break;
case BC_GSET:
| ins_AND // RA = src, RD = str const (~)
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| lg TAB:RB, LFUNC:RB->env
| sllg TMPR1, RD, 3(r0)
| lg STR:RC, 0(TMPR1, KBASE)
| j ->BC_TSETS_Z
break;
case BC_TGETV:
| ins_ABC // RA = dst, RB = table, RC = key
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| sllg RC, RC, 3(r0)
| lg RC, 0(RC, BASE)
| checktab TAB:RB, ->vmeta_tgetv
|
| // Integer key?
| checkint RC, >5
| cl RC, TAB:RB->asize // Takes care of unordered, too.
| jhe ->vmeta_tgetv // Not in array part? Use fallback.
| llgfr RC, RC
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| // Get array slot.
| lg ITYPE, 0(RC)
| cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
| je >2
|1:
| sllg RA, RA, 3(r0)
| stg ITYPE, 0(RA, BASE)
| ins_next
|
|2: // Check for __index if table value is nil.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je <1
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_index
| je ->vmeta_tgetv // 'no __index' flag NOT set: check.
| j <1
|
|5: // String key?
| cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
| cleartp STR:RC
| j ->BC_TGETS_Z
break;
case BC_TGETS:
| ins_ABC
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| lghi TMPR1, -1
| xgr RC, TMPR1
| sllg RC, RC, 3(r0)
| lg STR:RC, 0(RC, KBASE)
| checktab TAB:RB, ->vmeta_tgets
|->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
| l TMPR1, TAB:RB->hmask
| n TMPR1, STR:RC->hash
| lgfr TMPR1, TMPR1
| mghi TMPR1, #NODE // TODO: not sure about this one, original: imul TMPRd, #NODE
| ag NODE:TMPR1, TAB:RB->node
| settp ITYPE, STR:RC, LJ_TSTR
|1:
| cg ITYPE, NODE:TMPR1->key
| jne >4
| // Get node value.
| lg ITYPE, NODE:TMPR1->val
| cghi ITYPE, LJ_TNIL
| je >5 // Key found, but nil value?
|2:
| sllg RA, RA, 3(r0)
| stg ITYPE, 0(RA, BASE)
| ins_next
|
|4: // Follow hash chain.
| lg NODE:TMPR1, NODE:TMPR1->next
| cghi NODE:TMPR1, 0
| jne <1
| // End of hash chain: key not found, nil result.
| lghi ITYPE, LJ_TNIL
|
|5: // Check for __index if table value is nil.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je <2 // No metatable: done.
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_index
| jne <2 // 'no __index' flag set: done.
| j ->vmeta_tgets // Caveat: preserve STR:RC.
break;
case BC_TGETB:
| ins_ABC // RA = dst, RB = table, RC = byte literal
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| checktab TAB:RB, ->vmeta_tgetb
| cl RC, TAB:RB->asize
| jhe ->vmeta_tgetb
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| // Get array slot.
| lg ITYPE, 0(RC)
| cghi ITYPE, LJ_TNIL
| je >2
|1:
| sllg RA, RA, 3(r0)
| stg ITYPE, 0(RA, BASE)
| ins_next
|
|2: // Check for __index if table value is nil.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je <1
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_index
| je ->vmeta_tgetb // 'no __index' flag NOT set: check.
| j <1
break;
case BC_TGETR:
| ins_ABC // RA = dst, RB = table, RC = key
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| cleartp TAB:RB
| sllg RC, RC, 3(r0)
| llgf RC, 4(RC, BASE) // Load low word (big endian).
| cl RC, TAB:RB->asize
| jhe ->vmeta_tgetr // Not in array part? Use fallback.
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| // Get array slot.
|->BC_TGETR_Z:
| lg ITYPE, 0(RC)
|->BC_TGETR2_Z:
| sllg RA, RA, 3(r0)
| stg ITYPE, 0(RA, BASE)
| ins_next
break;
case BC_TSETV:
| ins_ABC // RA = src, RB = table, RC = key
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| sllg RC, RC, 3(r0)
| lg RC, 0(RC, BASE)
| checktab TAB:RB, ->vmeta_tsetv
|
| // Integer key?
| checkint RC, >5
| cl RC, TAB:RB->asize // Takes care of unordered, too.
| jhe ->vmeta_tsetv
| llgfr RC, RC
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| lghi TMPR2, LJ_TNIL
| cg TMPR2, 0(RC)
| je >3 // Previous value is nil?
|1:
| llgc TMPR1, TAB:RB->marked
| tmll TMPR1, LJ_GC_BLACK // isblack(table)
| jne >7
|2: // Set array slot.
| sllg RA, RA, 3(r0)
| lg RB, 0(RA, BASE)
| stg RB, 0(RC)
| ins_next
|
|3: // Check for __newindex if previous value is nil.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je <1
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_newindex
| je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
| j <1
|
|5: // String key?
| cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
| cleartp STR:RC
| j ->BC_TSETS_Z
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMPR1
| j <2
break;
case BC_TSETS:
| ins_ABC // RA = src, RB = table, RC = str const (~)
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| lghi TMPR2, -1
| xgr RC, TMPR2 // ~RC
| sllg RC, RC, 3(r0)
| lg STR:RC, 0(RC, KBASE)
| checktab TAB:RB, ->vmeta_tsets
|->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
| l TMPR1, TAB:RB->hmask
| n TMPR1, STR:RC->hash
| lgfr TMPR1, TMPR1
| mghi TMPR1, #NODE
| xr TMPR2, TMPR2
| stc TMPR2, TAB:RB->nomm // Clear metamethod cache.
| ag NODE:TMPR1, TAB:RB->node
| settp ITYPE, STR:RC, LJ_TSTR
|1:
| cg ITYPE, NODE:TMPR1->key
| jne >5
| // Ok, key found. Assumes: offsetof(Node, val) == 0
| lghi TMPR2, LJ_TNIL
| cg TMPR2, 0(TMPR1)
| je >4 // Previous value is nil?
|2:
| llgc TMPR2, TAB:RB->marked
| tmll TMPR2, LJ_GC_BLACK // isblack(table)
| jne >7
|3: // Set node value.
| sllg RA, RA, 3(r0)
| lg ITYPE, 0(RA, BASE)
| stg ITYPE, 0(TMPR1)
| ins_next
|
|4: // Check for __newindex if previous value is nil.
| lg TAB:ITYPE, TAB:RB->metatable
| cghi TAB:ITYPE, 0
| je <2
| llgc TMPR2, TAB:ITYPE->nomm
| tmll TMPR2, 1<<MM_newindex
| je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
| j <2
|
|5: // Follow hash chain.
| lg NODE:TMPR1, NODE:TMPR1->next
| cghi NODE:TMPR1, 0
| jne <1
| // End of hash chain: key not found, add a new one.
|
| // But check for __newindex first.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je >6 // No metatable: continue.
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_newindex
| je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|6:
| stg ITYPE, TMP_STACK
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| la CARG3, TMP_STACK // TODO: lea CARG3, ITYPE... not sure.
| lgr CARG2, TAB:RB
| stg PC, SAVE_PC
| brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
| lgr TMPR1, CRET1
| lg L:CRET1, SAVE_L
| lg BASE, L:CRET1->base
| llgc RA, PC_RA
| j <2 // Must check write barrier for value.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, ITYPE
| j <3
break;
case BC_TSETB:
| ins_ABC // RA = src, RB = table, RC = byte literal
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| checktab TAB:RB, ->vmeta_tsetb
| cl RC, TAB:RB->asize
| jhe ->vmeta_tsetb
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| lghi TMPR2, LJ_TNIL
| cg TMPR2, 0(RC)
| je >3 // Previous value is nil?
|1:
| llgc TMPR1, TAB:RB->marked
| tmll TMPR1, LJ_GC_BLACK // isblack(table)
| jne >7
|2: // Set array slot.
| sllg RA, RA, 3(r0)
| lg ITYPE, 0(RA, BASE)
| stg ITYPE, 0(RC)
| ins_next
|
|3: // Check for __newindex if previous value is nil.
| lg TAB:TMPR1, TAB:RB->metatable
| cghi TAB:TMPR1, 0
| je <1
| llgc TMPR2, TAB:TMPR1->nomm
| tmll TMPR2, 1<<MM_newindex
| je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
| j <1
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMPR1
| j <2
break;
case BC_TSETR:
| ins_ABC // RA = src, RB = table, RC = key
| sllg RB, RB, 3(r0)
| lg TAB:RB, 0(RB, BASE)
| cleartp TAB:RB
| sllg RC, RC, 3(r0)
| lg RC, 0(RC, BASE)
| llgc TMPR2, TAB:RB->marked
| tmll TMPR2, LJ_GC_BLACK // isblack(table)
| jne >7
|2:
| cl RC, TAB:RB->asize
| jhe ->vmeta_tsetr
| llgfr RC, RC
| sllg RC, RC, 3(r0)
| ag RC, TAB:RB->array
| // Set array slot.
|->BC_TSETR_Z:
| sllg RA, RA, 3(r0)
| lg ITYPE, 0(RA, BASE)
| stg ITYPE, 0(RC)
| ins_next
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMPR1
| j <2
break;
case BC_TSETM:
| ins_AD // RA = base (table at base-1), RD = num const (start index)
|1:
| sllg RA, RA, 3(r0)
| sllg TMPR1, RD, 3(r0)
| llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
| la RA, 0(RA, BASE)
| lg TAB:RB, -8(RA) // Guaranteed to be a table.
| cleartp TAB:RB
| llgc TMPR2, TAB:RB->marked
| tmll TMPR2, LJ_GC_BLACK // isblack(table)
| jne >7
|2:
| llgf RD, SAVE_MULTRES
| aghi RD, -1
| je >4 // Nothing to copy?
| agr RD, TMPR1 // Compute needed size.
| clgf RD, TAB:RB->asize
| jh >5 // Doesn't fit into array part?
| sgr RD, TMPR1
| sllg TMPR1, TMPR1, 3(r0)
| ag TMPR1, TAB:RB->array
|3: // Copy result slots to table.
| lg RB, 0(RA)
| la RA, 8(RA)
| stg RB, 0(TMPR1)
| la TMPR1, 8(TMPR1)
| aghi RD, -1
| jne <3
| // TODO: replace decrement/branch with branch on count.
|4:
| ins_next
|
|5: // Need to resize array part.
| lg L:CARG1, SAVE_L
| stg BASE, L:CARG1->base
| lgr CARG2, TAB:RB
| lgfr CARG3, RD
| lgr L:RB, L:CARG1
| stg PC, SAVE_PC
| brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
| lg BASE, L:RB->base
| llgc RA, PC_RA // Restore RA.
| llgh RD, PC_RD // Restore RD.
| j <1 // Retry.
|
|7: // Possible table write barrier for any value. Skip valiswhite check.
| barrierback TAB:RB, RD
| j <2
break;
/* -- Calls and vararg handling ----------------------------------------- */
case BC_CALL: case BC_CALLM:
| ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
| lgr RD, RC
if (op == BC_CALLM) {
| agf NARGS:RD, SAVE_MULTRES
}
| sllg RA, RA, 3(r0)
| lg LFUNC:RB, 0(BASE, RA)
| checkfunc LFUNC:RB, ->vmeta_call_ra
| la BASE, 16(RA, BASE)
| ins_call
break;
case BC_CALLMT:
| ins_AD // RA = base, RD = extra_nargs
| a NARGS:RD, SAVE_MULTRES
| // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
break;
case BC_CALLT:
| ins_AD // RA = base, RD = nargs+1
| sllg RA, RA, 3(r0)
| la RA, 16(RA, BASE)
| lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
| lg LFUNC:RB, -16(RA)
| checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
|->BC_CALLT_Z:
| lg PC, -8(BASE)
| tmll PC, FRAME_TYPE
| jne >7
|1:
| stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
| st NARGS:RD, SAVE_MULTRES
| aghi NARGS:RD, -1
| je >3
|2: // Move args down.
| // TODO: mvc or something here?
| lg RB, 0(RA)
| la RA, 8(RA)
| stg RB, 0(KBASE)
| la KBASE, 8(KBASE)
| // TODO: replace decrement/branch with brctg
| aghi NARGS:RD, -1
| jne <2
|
| lg LFUNC:RB, -16(BASE)
|3:
| cleartp LFUNC:RB
| llgf NARGS:RD, SAVE_MULTRES
| llgc TMPR1, LFUNC:RB->ffid
| cghi TMPR1, 1 // (> FF_C) Calling a fast function?
| jh >5
|4:
| ins_callt
|
|5: // Tailcall to a fast function.
| tmll PC, FRAME_TYPE // Lua frame below?
| jne <4
| llgc RA, PC_RA
| lcgr RA, RA
| sllg RA, RA, 3(r0)
| lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
| cleartp LFUNC:KBASE
| lg KBASE, LFUNC:KBASE->pc
| lg KBASE, (PC2PROTO(k))(KBASE)
| j <4
|
|7: // Tailcall from a vararg function.
| aghi PC, -FRAME_VARG
| tmll PC, FRAME_TYPEP
| jne >8 // Vararg frame below?
| sgr BASE, PC // Need to relocate BASE/KBASE down.
| lgr KBASE, BASE
| lg PC, -8(BASE)
| j <1
|8:
| aghi PC, FRAME_VARG
| j <1
break;
case BC_ITERC:
| ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
| sllg RA, RA, 3(r0)
| la RA, 16(RA, BASE) // fb = base+2
| lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
| lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
| stg RB, 0(RA)
| stg RC, 8(RA)
| lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5]
| stg LFUNC:RB, -16(RA)
| lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call.
| checkfunc LFUNC:RB, ->vmeta_call
| lgr BASE, RA
| ins_call
break;
case BC_ITERN:
| ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
|.if JIT
| // NYI: add hotloop, record BC_ITERN.
|.endif
| sllg RA, RA, 3(r0)
| lg TAB:RB, -16(RA, BASE)
| cleartp TAB:RB
| llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS DRAGONS.
| llgf TMPR1, TAB:RB->asize
| la PC, 4(PC)
| lg ITYPE, TAB:RB->array
|1: // Traverse array part.
| clr RC, TMPR1; jhe >5 // Index points after array part?
| sllg RD, RC, 3(r0) // Warning: won't work if RD==RC!
| lg TMPR2, 0(RD, ITYPE)
| cghi TMPR2, LJ_TNIL; je >4
| // Copy array slot to returned value.
| lgr RB, TMPR2
| stg RB, 8(RA, BASE)
| // Return array index as a numeric key.
| setint ITYPE, RC
| stg ITYPE, 0(RA, BASE)
| ahi RC, 1
| sty RC, -4(RA, BASE) // Update control var. // TODO: ENDIANNESS DRAGONS
|2:
| llgh RD, PC_RD // Get target from ITERL.
| branchPC RD
|3:
| ins_next
|
|4: // Skip holes in array part.
| ahi RC, 1
| j <1
|
|5: // Traverse hash part.
| sr RC, TMPR1
|6:
| cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1.
| llgfr ITYPE, RC
| mghi ITYPE, #NODE
| ag NODE:ITYPE, TAB:RB->node
| lghi TMPR2, LJ_TNIL
| cg TMPR2, NODE:ITYPE->val; je >7
| ar TMPR1, RC
| ahi TMPR1, 1
| // Copy key and value from hash slot.
| lg RB, NODE:ITYPE->key
| lg RC, NODE:ITYPE->val
| stg RB, 0(RA, BASE)
| stg RC, 8(RA, BASE)
| sty TMPR1, -4(RA, BASE) // TODO: ENDIANNESS DRAGONS
| j <2
|
|7: // Skip holes in hash part.
| ahi RC, 1
| j <6
break;
case BC_ISNEXT:
| ins_AD // RA = base, RD = target (points to ITERN)
| sllg RA, RA, 3(r0)
| lg CFUNC:RB, -24(RA, BASE)
| checkfunc CFUNC:RB, >5
| lg TMPR1, -16(RA, BASE)
| checktptp TMPR1, LJ_TTAB, >5
| lghi TMPR2, LJ_TNIL
| cg TMPR2, -8(RA, BASE); jne >5
| llgc TMPR1, CFUNC:RB->ffid
| clfi TMPR1, (uint8_t)FF_next_N; jne >5
| branchPC RD
| llihl TMPR1, 0x7fff
| iihh TMPR1, 0xfffe
| stg TMPR1, -8(RA, BASE) // Initialize control var.
|1:
| ins_next
|5: // Despecialize bytecode if any of the checks fail.
| lghi TMPR2, BC_JMP
| stcy TMPR2, PC_OP
| branchPC RD
| lghi TMPR2, BC_ITERC
| stc TMPR2, 3(PC)
| j <1
break;
case BC_VARG:
| // TODO: some opportunities for branch on index in here.
| ins_ABC // RA = base, RB = nresults+1, RC = numparams
| sllg RA, RA, 3(r0)
| sllg RB, RB, 3(r0)
| sllg RC, RC, 3(r0)
| la TMPR1, (16+FRAME_VARG)(RC, BASE)
| la RA, 0(RA, BASE)
| sg TMPR1, -8(BASE)
| // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
| cghi RB, 0
| je >5 // Copy all varargs?
| lay RB, -8(RA, RB)
| clgr TMPR1, BASE // No vararg slots?
| jnl >2
|1: // Copy vararg slots to destination slots.
| lg RC, -16(TMPR1)
| la TMPR1, 8(TMPR1)
| stg RC, 0(RA)
| la RA, 8(RA)
| clgr RA, RB // All destination slots filled?
| jnl >3
| clgr TMPR1, BASE // No more vararg slots?
| jl <1
| lghi TMPR2, LJ_TNIL
|2: // Fill up remainder with nil.
| stg TMPR2, 0(RA)
| la RA, 8(RA)
| clgr RA, RB
| jl <2
|3:
| ins_next
|
|5: // Copy all varargs.
| lghi TMPR2, 1
| st TMPR2, SAVE_MULTRES // MULTRES = 0+1
| lgr RC, BASE
| slgr RC, TMPR1
| jno <3 // No vararg slots? (borrow or zero)
| llgfr RB, RC
| srlg RB, RB, 3(r0)
| ahi RB, 1
| st RB, SAVE_MULTRES // MULTRES = #varargs+1
| lg L:RB, SAVE_L
| agr RC, RA
| clg RC, L:RB->maxstack
| jh >7 // Need to grow stack?
|6: // Copy all vararg slots.
| lg RC, -16(TMPR1)
| la TMPR1, 8(TMPR1)
| stg RC, 0(RA)
| la RA, 8(RA)
| clgr TMPR1, BASE // No more vararg slots?
| jl <6
| j <3
|
|7: // Grow stack for varargs.
| stg BASE, L:RB->base
| stg RA, L:RB->top
| stg PC, SAVE_PC
| sgr TMPR1, BASE // Need delta, because BASE may change.
| st TMPR1, TMP_STACK_HI
| llgf CARG2, SAVE_MULTRES
| aghi CARG2, -1
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->base
| llgf TMPR1, TMP_STACK_HI
| lg RA, L:RB->top
| agr TMPR1, BASE
| j <6
break;
/* -- Returns ----------------------------------------------------------- */
case BC_RETM:
| ins_AD // RA = results, RD = extra_nresults
| agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1.
| // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
break;
case BC_RET: case BC_RET0: case BC_RET1:
| ins_AD // RA = results, RD = nresults+1
if (op != BC_RET0) {
| sllg RA, RA, 3(r0)
}
|1:
| lg PC, -8(BASE)
| st RD, SAVE_MULTRES // Save nresults+1.
| tmll PC, FRAME_TYPE // Check frame type marker.
| jne >7 // Not returning to a fixarg Lua func?
switch (op) {
case BC_RET:
|->BC_RET_Z:
| lgr KBASE, BASE // Use KBASE for result move.
| aghi RD, -1
| je >3
|2: // Move results down.
| lg RB, 0(KBASE, RA)
| stg RB, -16(KBASE)
| la KBASE, 8(KBASE)
| // TODO: replace with brctg RD, <2 once supported.
| aghi RD, -1
| jne <2
|3:
| llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
| llgc RB, PC_RB
|5:
| cgr RB, RD // More results expected?
| jh >6
break;
case BC_RET1:
| lg RB, 0(BASE, RA)
| stg RB, -16(BASE)
/* fallthrough */
case BC_RET0:
|5:
| llgc TMPR1, PC_RB
| cgr TMPR1, RD
| jh >6
default:
break;
}
| llgc RA, PC_RA
| lcgr RA, RA
| sllg RA, RA, 3(r0)
| lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
| lg LFUNC:KBASE, -16(BASE)
| cleartp LFUNC:KBASE
| lg KBASE, LFUNC:KBASE->pc
| lg KBASE, PC2PROTO(k)(KBASE)
| ins_next
|
|6: // Fill up results with nil.
| lghi TMPR1, LJ_TNIL
if (op == BC_RET) {
| stg TMPR1, -16(KBASE) // Note: relies on shifted base.
| la KBASE, 8(KBASE)
} else {
| sllg RC, RD, 3(r0) // RC used as temp.
| stg TMPR1, -24(RC, BASE)
}
| la RD, 1(RD)
| j <5
|
|7: // Non-standard return case.
| lay RB, -FRAME_VARG(PC)
| tmll RB, FRAME_TYPEP
| jne ->vm_return
| // Return from vararg function: relocate BASE down and RA up.
| sgr BASE, RB
if (op != BC_RET0) {
| agr RA, RB
}
| j <1
break;
/* -- Loops and branches ------------------------------------------------ */
|.define FOR_IDX, 0(RA)
|.define FOR_STOP, 8(RA)
|.define FOR_STEP, 16(RA)
|.define FOR_EXT, 24(RA)
case BC_FORL:
|.if JIT
| hotloop RB
|.endif
| // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
break;
case BC_JFORI:
case BC_JFORL:
#if !LJ_HASJIT
break;
#endif
case BC_FORI:
case BC_IFORL:
vk = (op == BC_IFORL || op == BC_JFORL);
| ins_AJ // RA = base, RD = target (after end of loop or start of loop)
| sllg RA, RA, 3(r0)
| la RA, 0(RA, BASE)
| lg RB, FOR_IDX
| checkint RB, >9
| lg TMPR1, FOR_STOP
if (!vk) {
| checkint TMPR1, ->vmeta_for
| lg ITYPE, FOR_STEP
| chi ITYPE, 0; jl >5
| srag ITYPE, ITYPE, 47(r0)
| cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
} else {
#ifdef LUA_USE_ASSERT
| // lg TMPR1, FOR_STOP
| checkinttp TMPR1, ->assert_bad_for_arg_type
| lg TMPR2, FOR_STEP
| checkinttp TMPR2, ->assert_bad_for_arg_type
#endif
| lg ITYPE, FOR_STEP
| chi ITYPE, 0; jl >5
| ar RB, ITYPE; jo >1
| setint RB
| stg RB, FOR_IDX
}
| cr RB, TMPR1
| stg RB, FOR_EXT
if (op == BC_FORI) {
| jle >7
|1:
|6:
| branchPC RD
} else if (op == BC_JFORI) {
| branchPC RD
| llgh RD, PC_RD
| jle =>BC_JLOOP
|1:
|6:
} else if (op == BC_IFORL) {
| jh >7
|6:
| branchPC RD
|1:
} else {
| jle =>BC_JLOOP
|1:
|6:
}
|7:
| ins_next
|
|5: // Invert check for negative step.
if (!vk) {
| srag ITYPE, ITYPE, 47(r0)
| cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
} else {
| ar RB, ITYPE; jo <1
| setint RB
| stg RB, FOR_IDX
}
| cr RB, TMPR1
| stg RB, FOR_EXT
if (op == BC_FORI) {
| jhe <7
} else if (op == BC_JFORI) {
| branchPC RD
| llgh RD, PC_RD
| jhe =>BC_JLOOP
} else if (op == BC_IFORL) {
| jl <7
} else {
| jhe =>BC_JLOOP
}
| j <6
|9: // Fallback to FP variant.
if (!vk) {
| jhe ->vmeta_for
}
if (!vk) {
| lg TMPR2, FOR_STOP
| checknumtp TMPR2, ->vmeta_for
} else {
#ifdef LUA_USE_ASSERT
| lg TMPR2, FOR_STOP
| checknumtp TMPR2, ->assert_bad_for_arg_type
| lg TMPR2, FOR_STEP
| checknumtp TMPR2, ->assert_bad_for_arg_type
#endif
}
| lg RB, FOR_STEP
if (!vk) {
| checknum RB, ->vmeta_for
}
| ld f0, FOR_IDX
| ld f1, FOR_STOP
if (vk) {
| adb f0, FOR_STEP
| std f0, FOR_IDX
}
| cghi RB, 0; jl >3
| cdbr f1, f0
|1:
| std f0, FOR_EXT
if (op == BC_FORI) {
| jnl <7
} else if (op == BC_JFORI) {
| branchPC RD
| llgh RD, PC_RD
| jnl =>BC_JLOOP
} else if (op == BC_IFORL) {
| jl <7
} else {
| jnl =>BC_JLOOP
}
| j <6
|
|3: // Invert comparison if step is negative.
| cdbr f0, f1
| j <1
break;
case BC_ITERL:
|.if JIT
| hotloop RB
|.endif
| // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
break;
case BC_JITERL:
#if !LJ_HASJIT
break;
#endif
case BC_IITERL:
| ins_AJ // RA = base, RD = target
| sllg RA, RA, 3(r0)
| la RA, 0(RA, BASE)
| lg RB, 0(RA)
| cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
if (op == BC_JITERL) {
| stg RB, -8(RA)
| j =>BC_JLOOP
} else {
| branchPC RD // Otherwise save control var + branch.
| stg RB, -8(RA)
}
|1:
| ins_next
break;
case BC_LOOP:
| ins_A // RA = base, RD = target (loop extent)
| // Note: RA/RD is only used by trace recorder to determine scope/extent
| // This opcode does NOT jump, it's only purpose is to detect a hot loop.
|.if JIT
| hotloop RBd
|.endif
| // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
break;
case BC_ILOOP:
| ins_A // RA = base, RD = target (loop extent)
| ins_next
break;
case BC_JLOOP:
| stg r0, 0(r0)
| stg r0, 0(r0)
break;
case BC_JMP:
| ins_AJ // RA = unused, RD = target
| branchPC RD
| ins_next
break;
/* -- Function headers -------------------------------------------------- */
/*
** Reminder: A function may be called with func/args above L->maxstack,
** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
** too. This means all FUNC* ops (including fast functions) must check
** for stack overflow _before_ adding more slots!
*/
case BC_FUNCF:
|.if JIT
| stg r0, 0(r0)
|.endif
case BC_FUNCV: /* NYI: compiled vararg functions. */
| // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
break;
case BC_JFUNCF:
#if !LJ_HASJIT
break;
#endif
case BC_IFUNCF:
| ins_AD // BASE = new base, RA = framesize, RD = nargs+1
| lg KBASE, (PC2PROTO(k)-4)(PC)
| lg L:RB, SAVE_L
| sllg RA, RA, 3(r0)
| la RA, 0(RA, BASE) // Top of frame.
| clg RA, L:RB->maxstack
| jh ->vm_growstack_f
| llgc RA, (PC2PROTO(numparams)-4)(PC)
| clgr NARGS:RD, RA // Check for missing parameters.
| jle >3
|2:
if (op == BC_JFUNCF) {
| llgh RD, PC_RD
| j =>BC_JLOOP
} else {
| ins_next
}
|
|3: // Clear missing parameters.
| // TODO: optimize this. Some of this can be hoisted.
| sllg TMPR1, NARGS:RD, 3(r0)
| lghi TMPR2, LJ_TNIL
| stg TMPR2, -8(TMPR1, BASE)
| la RD, 1(RD)
| clgr RD, RA
| jle <3
| j <2
break;
case BC_JFUNCV:
#if !LJ_HASJIT
break;
#endif
| stg r0, 0(r0) // NYI: compiled vararg functions
break; /* NYI: compiled vararg functions. */
case BC_IFUNCV:
| ins_AD // BASE = new base, RA = framesize, RD = nargs+1
| sllg TMPR1, NARGS:RD, 3(r0)
| la RB, (FRAME_VARG+8)(TMPR1)
| la RD, 8(TMPR1, BASE)
| lg LFUNC:KBASE, -16(BASE)
| stg RB, -8(RD) // Store delta + FRAME_VARG.
| stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
| lg L:RB, SAVE_L
| sllg RA, RA, 3(r0)
| la RA, 0(RA, RD)
| cg RA, L:RB->maxstack
| jh ->vm_growstack_v // Need to grow stack.
| lgr RA, BASE
| lgr BASE, RD
| llgc RB, (PC2PROTO(numparams)-4)(PC)
| cghi RB, 0
| je >2
| aghi RA, 8
| lghi TMPR1, LJ_TNIL
|1: // Copy fixarg slots up to new frame.
| la RA, 8(RA)
| cgr RA, BASE
| jnl >3 // Less args than parameters?
| lg KBASE, -16(RA)
| stg KBASE, 0(RD)
| la RD, 8(RD)
| stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
| aghi RB, -1
| jne <1
| // TODO: brctg instead of decrement/branch
|2:
if (op == BC_JFUNCV) {
| llgh RD, PC_RD
| j =>BC_JLOOP
} else {
| lg KBASE, (PC2PROTO(k)-4)(PC)
| ins_next
}
|
|3: // Clear missing parameters.
| stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
| la RD, 8(RD)
| aghi RB, -1
| jne <3
| // TODO: brctg instead of decrement/branch
| j <2
break;
case BC_FUNCC:
case BC_FUNCCW:
| ins_AD // BASE = new base, RD = nargs+1
| lg CFUNC:RB, -16(BASE)
| cleartp CFUNC:RB
| lg KBASE, CFUNC:RB->f
| lg L:RB, SAVE_L
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD,BASE)
| stg BASE, L:RB->base
| lay RA, (8*LUA_MINSTACK)(RD)
| clg RA, L:RB->maxstack
| stg RD, L:RB->top
| lgr CARG1, L:RB // Caveat: CARG1 may be RA.
if (op != BC_FUNCC) {
| lgr CARG2, KBASE
}
| jh ->vm_growstack_c // Need to grow stack.
| set_vmstate C
if (op == BC_FUNCC) {
| basr r14, KBASE // (lua_State *L)
} else {
| // (lua_State *L, lua_CFunction f)
| lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
| basr r14, TMPR1 // TODO: TMPR1==r14, is this ok?
}
| // nresults returned in r2 (CRET1).
| lgr RD, CRET1
| lg BASE, L:RB->base
| stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
| set_vmstate INTERP
| sllg TMPR1, RD, 3(r0)
| la RA, 0(TMPR1, BASE)
| lcgr RA, RA
| ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
| lg PC, -8(BASE) // Fetch PC of caller.
| // BUG: PC seems to be -1 here sometimes. Not yet sure why.
| j ->vm_returnc
break;
/* ---------------------------------------------------------------------- */
default:
fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
exit(2);
break;
}
}
static int build_backend(BuildCtx *ctx)
{
int op;
dasm_growpc(Dst, BC__MAX);
build_subroutines(ctx);
|.code_op
for (op = 0; op < BC__MAX; op++)
build_ins(ctx, (BCOp)op, op);
return BC__MAX;
}
/* Emit pseudo frame-info for all assembler functions. */
static void emit_asm_debug(BuildCtx *ctx)
{
}