mikepaul-LuaJIT/src/vm_s390x.dasc

1119 lines
31 KiB
Plaintext
Raw Normal View History

|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
|
|// ELF ABI registers:
|// r0,r1 | | volatile |
|// r2 | parameter and return value | volatile |
|// r3-r5 | parameter | volatile |
|// r6 | parameter | saved |
|// r7-r11 | | saved |
|// r12 | GOT pointer (needed?) | saved |
|// r13 | literal pool (needed?) | saved |
|// r14 | return address | volatile |
|// r15 | stack pointer | saved |
|// f0,f2,f4,f6 | parameter and return value | volatile |
|// f1,f3,f5,f7 | | volatile |
|// f8-f15 | | saved |
|// ar0,ar1 | TLS | volatile |
|// ar2-ar15 | | volatile |
|
|.arch s390x
|.section code_op, code_sub
|
|.actionlist build_actionlist
|.globals GLOB_
|.globalnames globnames
|.externnames extnames
|
|//-----------------------------------------------------------------------
|
|// Fixed register assignments for the interpreter, callee-saved.
|.define BASE, r7 // Base of current Lua stack frame.
|.define KBASE, r8 // Constants of current Lua function.
|.define PC, r9 // Next PC.
|.define DISPATCH, r10 // Opcode dispatch table.
|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
|.define ITYPE, r13 //
|
|// The following temporaries are not saved across C calls, except for RD.
|.define RA, r1 // Cannot be dereferenced.
|.define RB, r12
|.define RC, r5 // Overlaps CARG4.
|.define RD, r6 // Overlaps CARG5. Callee-saved.
|
|// Calling conventions. Also used as temporaries.
|.define CARG1, r2
|.define CARG2, r3
|.define CARG3, r4
|.define CARG4, r5
|.define CARG5, r6
|
|.define FARG1, f0
|.define FARG2, f2
|.define FARG3, f4
|.define FARG4, f6
|
|.define CRET1, r2
|
|.define OP, r2
|.define TMP1, r14
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
|
|// Register save area.
|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
|
|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
|.define SAVE_NRES, 272(sp) // Argument 3, in r4.
|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
|.define SAVE_L, 256(sp) // Argument 1, in r2.
|.define RESERVED, 248(sp) // Reserved for compiler use.
|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
|
|// Interpreter stack frame.
|.define SAVE_FPR15, 232(sp)
|.define SAVE_FPR14, 224(sp)
|.define SAVE_FPR13, 216(sp)
|.define SAVE_FPR12, 208(sp)
|.define SAVE_FPR11, 200(sp)
|.define SAVE_FPR10, 192(sp)
|.define SAVE_FPR9, 184(sp)
|.define SAVE_FPR8, 176(sp)
|.define SAVE_PC, 168(sp)
|.define SAVE_MULTRES, 160(sp)
|
|// Callee save area (allocated by interpreter).
|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
|
|.macro saveregs
| stmg r6, r15, SAVE_GPRS_P
| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
| // TODO: save backchain?
| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
| std f9, SAVE_FPR9
| std f10, SAVE_FPR10
| std f11, SAVE_FPR11
| std f12, SAVE_FPR12
| std f13, SAVE_FPR13
| std f14, SAVE_FPR14
| std f15, SAVE_FPR15
|.endmacro
|
|.macro restoreregs
| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
| ld f9, SAVE_FPR9
| ld f10, SAVE_FPR10
| ld f11, SAVE_FPR11
| ld f12, SAVE_FPR12
| ld f13, SAVE_FPR13
| ld f14, SAVE_FPR14
| ld f15, SAVE_FPR15
| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
|.endmacro
|
|// Type definitions. Some of these are only used for documentation.
|.type L, lua_State
|.type GL, global_State
|.type TVALUE, TValue
|.type GCOBJ, GCobj
|.type STR, GCstr
|.type TAB, GCtab
|.type LFUNC, GCfuncL
|.type CFUNC, GCfuncC
|.type PROTO, GCproto
|.type UPVAL, GCupval
|.type NODE, Node
|.type NARGS, int
|.type TRACE, GCtrace
|.type SBUF, SBuf
|
|//-----------------------------------------------------------------------
|
|// Instruction headers.
|.macro ins_A; .endmacro
|.macro ins_AD; .endmacro
|.macro ins_AJ; .endmacro
|.macro ins_ABC; .endmacro
|.macro ins_AB_; .endmacro
|.macro ins_A_C; .endmacro
|.macro ins_AND; .endmacro
|
|// Instruction decode+dispatch.
| // TODO: tune this, right now we always decode RA-D even if they aren't used.
|.macro ins_NEXT
| llgf RD, 0(PC)
| // 32 63
| // [ B | C | A | OP ]
| // [ D | A | OP ]
| llghr RA, RD
| srlg RA, RA, 8(r0)
| llgcr OP, RD
| srlg RD, RD, 16(r0)
| lgr RB, RD
| srlg RB, RB, 8(r0)
| llgcr RC, RD
| la PC, 4(PC)
| llgfr TMP1, OP
| sllg TMP1, TMP1, 3(r0) // TMP1=OP*8
| b 0(TMP1, DISPATCH)
|.endmacro
|
|// Instruction footer.
|.if 1
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
| .define ins_next, ins_NEXT
| .define ins_next_, ins_NEXT
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| .macro ins_next
| jmp ->ins_next
| .endmacro
| .macro ins_next_
| ->ins_next:
| ins_NEXT
| .endmacro
|.endif
|
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
| lg PC, LFUNC:RB->pc
| llgf RA, 0(PC) // TODO: combine loads?
| llgcr OP, RA
| sllg TMP1, OP, 3(r0)
| la PC, 4(PC)
| lg TMP1, 0(TMP1, DISPATCH)
| br TMP1
|.endmacro
|
|.macro ins_call
| // BASE = new base, RB = LFUNC, RD = nargs+1
| stg PC, -8(BASE)
| ins_callt
|.endmacro
|
|// Assumes DISPATCH is relative to GL.
#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|//-----------------------------------------------------------------------
|
|// Macros to clear or set tags.
|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
|.macro settp, reg, tp
| oihh reg, ((tp>>1) &0xffff)
| oihl reg, ((tp<<15)&0x8000)
|.endmacro
|.macro setint, reg
| settp reg, LJ_TISNUM
|.endmacro
|
|// Macros to test operand types.
|.macro checktp_nc, reg, tp, target
| srag ITYPE, reg, 47(r0)
| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
| jne target
|.endmacro
|.macro checktp, reg, tp, target
| srag ITYPE, reg, 47(r0)
| cleartp reg
| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
| jne target
|.endmacro
|.macro checktptp, src, tp, target
| srag ITYPE, src, 47(r0)
| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
| jne target
|.endmacro
|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
|
|.macro checknumx, reg, target, jump
| srag ITYPE, reg, 47(r0)
| cghi ITYPE, LJ_TISNUM // Sign extend LJ_TISNUM tp from 16- to 64-bits.
| jump target
|.endmacro
|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
|
|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
|
2016-12-14 23:43:21 +00:00
|.define PC_OP, -1(PC)
|.define PC_RA, -2(PC)
|.define PC_RB, -4(PC)
|.define PC_RC, -3(PC)
|.define PC_RD, -4(PC)
|
|// Set current VM state.
|.macro set_vmstate, st
| lghi TMP1, ~LJ_VMST_..st
| stg TMP1, DISPATCH_GL(vmstate)(DISPATCH)
|.endmacro
|
/* Generate subroutines used by opcodes and other parts of the VM. */
/* The .code_sub section should be last to help static branch prediction. */
static void build_subroutines(BuildCtx *ctx)
{
|.code_sub
|
|//-----------------------------------------------------------------------
|//-- Return handling ----------------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_returnp:
| cghi PC, 0
| je ->cont_dispatch
|
| // Return from pcall or xpcall fast func.
| nill PC, -7
| sgr BASE, PC // Restore caller base.
| lay RA, -8(RA, PC) // Rebase RA and prepend one result.
| lg PC, -8(BASE) // Fetch PC of previous frame.
| // Prepending may overwrite the pcall frame, so do it at the end.
| load_true ITYPE
| stg ITYPE, 0(RA, BASE) // Prepend true to results.
|
|->vm_returnc:
| ahi RD, 1 // RD = nresults+1
| jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
| stg RD, SAVE_MULTRES
| tmll PC, FRAME_TYPE
| je ->BC_RET_Z // Handle regular return to Lua.
|
|->vm_return:
| // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
| lghi TMP1, FRAME_C
| xgr PC, TMP1
| tmll PC, FRAME_TYPE
| jne ->vm_returnp
|
| // Return to C.
| set_vmstate C
| nill PC, -8
| sgr PC, BASE
| lcgr PC, PC // Previous base = BASE - delta.
|
| ahi RD, -1
| je >2
|1: // Move results down.
| lg RB, 0(BASE, RA)
| stg RB, -16(BASE)
| la BASE, 8(BASE)
| ahi RD, -1
| jne <1
|2:
| lg L:RB, SAVE_L
| stg PC, L:RB->base
|3:
| lg RD, SAVE_MULTRES
| lg RA, SAVE_NRES // RA = wanted nresults+1
|4:
| cgr RA, RD
| jne >6 // More/less results wanted?
|5:
| lay BASE, -16(BASE)
| stg BASE, L:RB->top
|
|->vm_leave_cp:
| lg RA, SAVE_CFRAME // Restore previous C frame.
| stg RA, L:LREG->cframe
| lghi CRET1, 0 // Ok return status for vm_pcall.
|
|->vm_leave_unw:
| restoreregs
| br r14
|
|6:
| jl >7 // Less results wanted?
| // More results wanted. Check stack size and fill up results with nil.
| cg BASE, L:RB->maxstack
| jh >8
| lghi TMP1, LJ_TNIL
| stg TMP1, -16(BASE)
| la BASE, 8(BASE)
| aghi RD, 1
| j <4
|
|7: // Fewer results wanted.
| cghi RA, 0
| je <5 // But check for LUA_MULTRET+1.
| sgr RA, RD // Negative result!
| sllg TMP1, RA, 3(r0)
| lay BASE, 0(TMP1, BASE) // Correct top.
| j <5
|
|8: // Corner case: need to grow stack for filling up results.
| // This can happen if:
| // - A C function grows the stack (a lot).
| // - The GC shrinks the stack in between.
| // - A return back from a lua_call() with (high) nresults adjustment.
| stg BASE, L:RB->top // Save current top held in BASE (yes).
| stg RD, SAVE_MULTRES // Need to fill only remainder with nil.
| lgr CARG2, RA
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
| j <3
|
|->vm_unwind_yield:
| stg r0, 0(r0)
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|->vm_unwind_rethrow:
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_growstack_c: // Grow stack for C function.
2016-12-14 23:43:21 +00:00
| lghi CARG2, LUA_MINSTACK
| j >2
|
|->vm_growstack_v: // Grow stack for vararg Lua function.
2016-12-14 23:43:21 +00:00
| aghi RD, -16 // LJ_FR2
| j >1
|
|->vm_growstack_f: // Grow stack for fixarg Lua function.
| // BASE = new base, RD = nargs+1, RB = L, PC = first PC
2016-12-14 23:43:21 +00:00
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD, BASE)
|1:
| llgc RA, (PC2PROTO(framesize)-4)(PC)
| la PC, 4(PC) // Must point after first instruction.
| stg BASE, L:RB->base
| stg RD, L:RB->top
| stg PC, SAVE_PC
| lgr CARG2, RA
|2:
| // RB = L, L->base = new base, L->top = top
| lgr CARG1, L:RB
| brasl r14, extern lj_state_growstack // (lua_State *L, int n)
| lg BASE, L:RB->base
| lg RD, L:RB->top
| lg LFUNC:RB, -16(BASE)
| cleartp LFUNC:RB
| sgr RD, BASE
| srlg RD, RD, 3(r0)
| aghi NARGS:RD, 1
| // BASE = new base, RB = LFUNC, RD = nargs+1
| ins_callt // Just retry the call.
|
|//-----------------------------------------------------------------------
|//-- Entry points into the assembler VM ---------------------------------
|//-----------------------------------------------------------------------
|
|->vm_resume: // Setup C frame and resume thread.
| // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
| saveregs
| lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
| stg CARG1, SAVE_L
| lgr RA, CARG2
| lghi PC, FRAME_CP
| lghi RD, 0
| lay KBASE, CFRAME_RESUME(sp)
| lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| aghi DISPATCH, GG_G2DISP
| stg RD, SAVE_PC // Any value outside of bytecode is ok.
| stg RD, SAVE_CFRAME
| stg RD, SAVE_NRES
| stg RD, SAVE_ERRF
| stg KBASE, L:RB->cframe
| clm RD, 1, L:RB->status
| je >2 // Initial resume (like a call).
|
| // Resume after yield (like a return).
| stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
| set_vmstate INTERP
| llgc RD, L:RB->status
| lg BASE, L:RB->base
| lg RD, L:RB->top
| sgr RD, RA
| srlg RD, RD, 3(r0)
| aghi RD, 1 // RD = nresults+1
| sgr RA, BASE // RA = resultofs
| lg PC, -8(BASE)
| stg RD, SAVE_MULTRES
| tmll PC, FRAME_TYPE
| je ->BC_RET_Z
| j ->vm_return
|
|->vm_pcall: // Setup protected C frame and enter VM.
| // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
| saveregs
| lghi PC, FRAME_CP
| llgfr CARG4, CARG4
| stg CARG4, SAVE_ERRF
| j >1
|
|->vm_call: // Setup C frame and enter VM.
| // (lua_State *L, TValue *base, int nres1)
| saveregs
| lghi PC, FRAME_C
|
|1: // Entry point for vm_pcall above (PC = ftype).
| lgfr CARG3, CARG3
| stg CARG3, SAVE_NRES
| lgr L:RB, CARG1
| stg CARG1, SAVE_L
| lgr RA, CARG2
|
| lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
| stg KBASE, SAVE_CFRAME
| stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
| aghi DISPATCH, GG_G2DISP
| stg sp, L:RB->cframe
| lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
|
|2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
| stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
| set_vmstate INTERP
| lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
| agr PC, RA
| sgr PC, BASE // PC = frame delta + frame type
|
| lg RD, L:LREG->top
| sgr RD, RA
| srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
| aghi NARGS:RD, 1 // RD = nargs+1
|
|->vm_call_dispatch:
| lg LFUNC:RB, -16(RA)
| checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
|
|->vm_call_dispatch_f:
| lgr BASE, RA
| ins_call
| // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
|
|->vm_cpcall: // Setup protected C frame, call C.
| // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
| saveregs
| lgr LREG, CARG1
| stg LREG, SAVE_L
| stg LREG, SAVE_PC // Any value outside of bytecode is ok.
|
| lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
| sg KBASE, L:LREG->top
| lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
| lghi RA, 0
| stg RA, SAVE_ERRF // No error function.
| stg KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
| aghi DISPATCH, GG_G2DISP
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
| lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
| stg KBASE, SAVE_CFRAME
| stg sp, L:LREG->cframe
| stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
|
| basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
| // TValue * (new base) or NULL returned in r2 (CRET1/).
| cghi CRET1, 0
| je ->vm_leave_cp // No base? Just remove C frame.
| lgr RA, CRET1
| lghi PC, FRAME_CP
| j <2 // Else continue with the call.
|
|//-----------------------------------------------------------------------
|//-- Metamethod handling ------------------------------------------------
|//-----------------------------------------------------------------------
|
|//-- Continuation dispatch ----------------------------------------------
|
|->cont_dispatch:
|
|->cont_cat: // BASE = base, RC = result, RB = mbase
|
|//-- Table indexing metamethods -----------------------------------------
|
|->vmeta_tgets:
|
|->vmeta_tgetb:
|
|->vmeta_tgetv:
|->cont_ra: // BASE = base, RC = result
|
|->vmeta_tgetr:
|
|//-----------------------------------------------------------------------
|
|->vmeta_tsets:
|
|->vmeta_tsetb:
|
|->vmeta_tsetv:
|->cont_nop: // BASE = base, (RC = result)
|
|->vmeta_tsetr:
|
|//-- Comparison metamethods ---------------------------------------------
|
|->cont_condt: // BASE = base, RC = result
|
|->cont_condf: // BASE = base, RC = result
|
|->vmeta_equal:
|
|->vmeta_equal_cd:
|
|->vmeta_istype:
|
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_arith_vno:
|->vmeta_arith_vn:
|
|->vmeta_arith_nvo:
|->vmeta_arith_nv:
|
|->vmeta_unm:
|
|->vmeta_arith_vvo:
|->vmeta_arith_vv:
|
| // Call metamethod for binary op.
|->vmeta_binop:
|
|->vmeta_len:
|
|//-- Call metamethod ----------------------------------------------------
|
|->vmeta_call_ra:
|->vmeta_call: // Resolve and call __call metamethod.
|
|//-- Argument coercion for 'for' statement ------------------------------
|
|->vmeta_for:
|
|//-----------------------------------------------------------------------
|//-- Fast functions -----------------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro .ffunc, name
|->ff_ .. name:
|.endmacro
|
|.macro .ffunc_1, name
|->ff_ .. name:
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
|.endmacro
|
|.macro .ffunc_n, name, op
| .ffunc_1 name
|.endmacro
|
|.macro .ffunc_n, name
| .ffunc_n name, mvc
|.endmacro
|
|.macro .ffunc_nn, name
| .ffunc_2 name
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses label 1.
|.macro ffgccheck
|.endmacro
|
|//-- Base library: checks -----------------------------------------------
|
|.ffunc_1 assert
|
|.ffunc_1 type
|
|//-- Base library: getters and setters ---------------------------------
|
|.ffunc_1 getmetatable
|
|.ffunc_2 setmetatable
|
|.ffunc_2 rawget
|
|//-- Base library: conversions ------------------------------------------
|
|.ffunc tonumber
|
|.ffunc_1 tostring
|
|//-- Base library: iterators -------------------------------------------
|
|.ffunc_1 next
|
|.ffunc_1 pairs
|
|.ffunc_2 ipairs_aux
|->fff_res0:
|
|.ffunc_1 ipairs
|
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
|
|.ffunc_2 xpcall
|
|//-- Coroutine library --------------------------------------------------
|
|.macro coroutine_resume_wrap, resume
|.if resume
|.ffunc_1 coroutine_resume
|.else
|.ffunc coroutine_wrap_aux
|.endif
|.endmacro
|
| coroutine_resume_wrap 1 // coroutine.resume
| coroutine_resume_wrap 0 // coroutine.wrap
|
|.ffunc coroutine_yield
|
|//-- Math library -------------------------------------------------------
|
| .ffunc_1 math_abs
|->fff_resbit:
|->fff_resi:
|->fff_resRB:
|
|.ffunc_n math_sqrt, sqrtsd
|->fff_resxmm0:
|
|->fff_res1:
|->fff_res:
|->fff_res_:
|
|.macro math_round, func
| .ffunc math_ .. func
|.endmacro
|
| math_round floor
| math_round ceil
|
|.ffunc math_log
|
|.macro math_extern, func
| .ffunc_n math_ .. func
|.endmacro
|
|.macro math_extern2, func
| .ffunc_nn math_ .. func
|.endmacro
|
| math_extern log10
| math_extern exp
| math_extern sin
| math_extern cos
| math_extern tan
| math_extern asin
| math_extern acos
| math_extern atan
| math_extern sinh
| math_extern cosh
| math_extern tanh
| math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
|.ffunc_2 math_ldexp
|
|.ffunc_n math_frexp
|
|.ffunc_n math_modf
|.macro math_minmax, name, cmovop, sseop
| .ffunc name
|.endmacro
|
| math_minmax math_min, cmovg, minsd
| math_minmax math_max, cmovl, maxsd
|
|//-- String library -----------------------------------------------------
|
|.ffunc string_byte // Only handle the 1-arg case here.
|
|.ffunc string_char // Only handle the 1-arg case here.
|->fff_newstr:
|->fff_resstr:
|
|.ffunc string_sub
|
|->fff_emptystr: // Range underflow.
|
|.macro ffstring_op, name
| .ffunc_1 string_ .. name
|.endmacro
|
|ffstring_op reverse
|ffstring_op lower
|ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
|.macro .ffunc_bit, name, kind, fdef
| fdef name
|.endmacro
|
|.macro .ffunc_bit, name, kind
| .ffunc_bit name, kind, .ffunc_1
|.endmacro
|
|.ffunc_bit bit_tobit, 0
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name, 2
|.endmacro
|
|.ffunc_bit_op bit_band, and
|.ffunc_bit_op bit_bor, or
|.ffunc_bit_op bit_bxor, xor
|
|.ffunc_bit bit_bswap, 1
|
|.ffunc_bit bit_bnot, 1
|->fff_resbit:
|
|->fff_fallback_bit_op:
|
|.macro .ffunc_bit_sh, name, ins
| .ffunc_bit name, 1, .ffunc_2
|.endmacro
|
|.ffunc_bit_sh bit_lshift, shl
|.ffunc_bit_sh bit_rshift, shr
|.ffunc_bit_sh bit_arshift, sar
|.ffunc_bit_sh bit_rol, rol
|.ffunc_bit_sh bit_ror, ror
|
|//-----------------------------------------------------------------------
|
|->fff_fallback_2:
|->fff_fallback_1:
|->fff_fallback: // Call fast function fallback handler.
|
|// Reconstruct previous base for vmeta_call during tailcall.
|->vm_call_tail:
|
|->fff_gcstep: // Call GC step function.
| // BASE = new base, RD = nargs+1
|
|//-----------------------------------------------------------------------
|//-- Special dispatch targets -------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_record: // Dispatch target for recording phase.
|
|->vm_rethook: // Dispatch target for return hooks.
|
|->vm_inshook: // Dispatch target for instr/line hooks.
|
|->cont_hook: // Continue from hook yield.
|
|->vm_hotloop: // Hot loop counter underflow.
|
|->vm_callhook: // Dispatch target for call hooks.
|
|->vm_hotcall: // Hot call counter underflow.
|
|->cont_stitch: // Trace stitching.
|
|->vm_profhook: // Dispatch target for profiler hook.
|
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
|// Called from an exit stub with the exit number on the stack.
|// The 16 bit exit number is stored with two (sign-extended) push imm8.
|->vm_exit_handler:
|->vm_exit_interp:
|
|//-----------------------------------------------------------------------
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro vm_round, name, mode, cond
|->name:
|.endmacro
|
| vm_round vm_floor, 0, 1
| vm_round vm_ceil, 1, JIT
| vm_round vm_trunc, 2, JIT
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|->vm_mod:
|
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|->vm_powi_sse:
|
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
|// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
|->vm_cpuid:
|
|//-----------------------------------------------------------------------
|//-- Assertions ---------------------------------------------------------
|//-----------------------------------------------------------------------
|
|->assert_bad_for_arg_type:
#ifdef LUA_USE_ASSERT
#endif
|
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
|// Handler for callback functions. Callback slot number in ah/al.
|->vm_ffi_callback:
|
|->cont_ffi_callback: // Return from FFI callback.
|
|->vm_ffi_call: // Call C function via FFI.
|// Note: vm_ffi_call must be the last function in this object file!
|
|//-----------------------------------------------------------------------
}
/* Generate the code for a single instruction. */
static void build_ins(BuildCtx *ctx, BCOp op, int defop)
{
int vk = 0;
(void)vk;
|// Note: aligning all instructions does not pay off.
|=>defop:
switch (op) {
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
case BC_ISEQV: case BC_ISNEV:
case BC_ISEQS: case BC_ISNES:
case BC_ISEQN: case BC_ISNEN:
case BC_ISEQP: case BC_ISNEP:
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
case BC_ISTYPE:
case BC_ISNUM:
case BC_MOV:
case BC_NOT:
case BC_UNM:
case BC_LEN:
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
case BC_MULVN: case BC_MULNV: case BC_MULVV:
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
case BC_MODVN:
case BC_MODNV: case BC_MODVV:
case BC_POW:
case BC_CAT:
case BC_KSTR:
case BC_KCDATA:
case BC_KSHORT:
case BC_KNUM:
case BC_KPRI:
case BC_KNIL:
case BC_UGET:
case BC_USETV:
case BC_USETS:
case BC_USETN:
case BC_USETP:
case BC_UCLO:
case BC_FNEW:
case BC_TNEW:
case BC_TDUP:
case BC_GGET:
case BC_GSET:
case BC_TGETV:
case BC_TGETS:
case BC_TGETB:
case BC_TGETR:
case BC_TSETV:
case BC_TSETS:
case BC_TSETB:
case BC_TSETR:
case BC_TSETM:
case BC_CALL: case BC_CALLM:
case BC_CALLMT:
case BC_CALLT:
case BC_ITERC:
case BC_ITERN:
case BC_ISNEXT:
case BC_VARG:
case BC_RETM:
| stg r0, 0(r0) // not implemented
break;
case BC_RET: case BC_RET0: case BC_RET1:
| ins_AD // RA = results, RD = nresults+1
if (op != BC_RET0) {
| sllg RA, RA, 3(r0)
}
|1:
| lg PC, -8(BASE)
| stg RD, SAVE_MULTRES // Save nresults+1.
| tmll PC, FRAME_TYPE // Check frame type marker.
| jne >7 // Not returning to a fixarg Lua func?
switch (op) {
case BC_RET:
|->BC_RET_Z:
| lgr KBASE, BASE // Use KBASE for result move.
| aghi RD, -1
| je >3
|2: // Move results down.
| lg RB, 0(KBASE, RA)
| stg RB, -16(KBASE)
| la KBASE, 8(KBASE)
| // TODO: replace with brctg RD, <2 once supported.
| aghi RD, -1
| jne <2
|3:
| lg RD, SAVE_MULTRES // Note: MULTRES may be >255.
| llgc RB, PC_RB
|5:
| cgr RB, RD // More results expected?
| jh >6
break;
case BC_RET1:
| lg RB, 0(BASE, RA)
| stg RB, -16(BASE)
/* fallthrough */
case BC_RET0:
|5:
| llgc TMP1, PC_RB
| cgr TMP1, RD
| jh >6
default:
break;
}
| llgc RA, PC_RA
| lcgr RA, RA
| sllg RA, RA, 3(r0)
| lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
| lg LFUNC:KBASE, -16(BASE)
| cleartp LFUNC:KBASE
| lg KBASE, LFUNC:KBASE->pc
| lg KBASE, PC2PROTO(k)(KBASE)
| ins_next
|
|6: // Fill up results with nil.
| lghi TMP1, LJ_TNIL
if (op == BC_RET) {
| stg TMP1, -16(KBASE) // Note: relies on shifted base.
| la KBASE, 8(KBASE)
} else {
| sllg RC, RD, 3(r0) // RC used as temp.
| stg TMP1, -24(RC, BASE)
}
| la RD, 1(RD)
| j <5
|
|7: // Non-standard return case.
| lay RB, -FRAME_VARG(PC)
| tmll RB, FRAME_TYPEP
| jne ->vm_return
| // Return from vararg function: relocate BASE down and RA up.
| sgr BASE, RB
if (op != BC_RET0) {
| agr RA, RB
}
| j <1
break;
case BC_FORL:
case BC_JFORI:
case BC_JFORL:
case BC_FORI:
case BC_IFORL:
case BC_ITERL:
case BC_JITERL:
case BC_IITERL:
case BC_LOOP:
case BC_ILOOP:
case BC_JLOOP:
case BC_JMP:
case BC_FUNCF:
case BC_FUNCV: /* NYI: compiled vararg functions. */
case BC_JFUNCF:
case BC_IFUNCF:
case BC_JFUNCV:
case BC_IFUNCV:
| lg r0, 0(r0) // Not implemented, seg fault.
break;
case BC_FUNCC:
case BC_FUNCCW:
| ins_AD // BASE = new base, RD = nargs+1
| lg CFUNC:RB, -16(BASE)
| cleartp CFUNC:RB
| lg KBASE, CFUNC:RB->f
| lg L:RB, SAVE_L
| sllg RD, NARGS:RD, 3(r0)
| lay RD, -8(RD,BASE)
| stg BASE, L:RB->base
| lay RA, (8*LUA_MINSTACK)(RD)
| cg RA, L:RB->maxstack
| stg RD, L:RB->top
| lgr CARG1, L:RB // Caveat: CARG1 may be RA.
if (op != BC_FUNCC) {
| lgr CARG2, KBASE
}
| jh ->vm_growstack_c // Need to grow stack.
| set_vmstate C
if (op == BC_FUNCC) {
| basr r14, KBASE // (lua_State *L)
} else {
| // (lua_State *L, lua_CFunction f)
| lg TMP1, (DISPATCH_GL(wrapf))(DISPATCH)
| basr r14, TMP1 // TODO: TMP1==r14, is this ok?
}
| // nresults returned in r2 (CRET1).
2016-12-14 23:43:21 +00:00
| lgr RD, CRET1
| lg BASE, L:RB->base
| stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
| set_vmstate INTERP
| sllg TMP1, RD, 3(r0)
| la RA, 0(TMP1, BASE)
| lcgr RA, RA
| ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
| lg PC, -8(BASE) // Fetch PC of caller.
| j ->vm_returnc
break;
/* ---------------------------------------------------------------------- */
default:
fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
exit(2);
break;
}
}
static int build_backend(BuildCtx *ctx)
{
int op;
dasm_growpc(Dst, BC__MAX);
build_subroutines(ctx);
|.code_op
for (op = 0; op < BC__MAX; op++)
build_ins(ctx, (BCOp)op, op);
return BC__MAX;
}
/* Emit pseudo frame-info for all assembler functions. */
static void emit_asm_debug(BuildCtx *ctx)
{
}