mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
![Mike Pall](/assets/img/avatar_default.png)
Only works with DWARF2-enabled GCC 4.x (not the default MinGW GCC). Fix fastcall symbol names for COFF assembler output. Add DWARF2 unwind info to COFF assembler output. Use COFF assembler mode for MinGW builds. Always enable the DWARF2 handler if compiled with GCC.
5028 lines
136 KiB
Plaintext
5028 lines
136 KiB
Plaintext
|// Low-level VM code for x86 CPUs.
|
|
|// Bytecode interpreter, fast functions and helper functions.
|
|
|// Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h
|
|
|
|
|
|.if X64
|
|
|.arch x64
|
|
|.else
|
|
|.arch x86
|
|
|.endif
|
|
|.section code_op, code_sub
|
|
|
|
|
|.actionlist build_actionlist
|
|
|.globals GLOB_
|
|
|.globalnames globnames
|
|
|.externnames extnames
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// Fixed register assignments for the interpreter.
|
|
|// This is very fragile and has many dependencies. Caveat emptor.
|
|
|.define BASE, edx // Not C callee-save, refetched anyway.
|
|
|.if not X64
|
|
|.define KBASE, edi // Must be C callee-save.
|
|
|.define KBASEa, KBASE
|
|
|.define PC, esi // Must be C callee-save.
|
|
|.define PCa, PC
|
|
|.define DISPATCH, ebx // Must be C callee-save.
|
|
|.elif X64WIN
|
|
|.define KBASE, edi // Must be C callee-save.
|
|
|.define KBASEa, rdi
|
|
|.define PC, esi // Must be C callee-save.
|
|
|.define PCa, rsi
|
|
|.define DISPATCH, ebx // Must be C callee-save.
|
|
|.else
|
|
|.define KBASE, r15d // Must be C callee-save.
|
|
|.define KBASEa, r15
|
|
|.define PC, ebx // Must be C callee-save.
|
|
|.define PCa, rbx
|
|
|.define DISPATCH, r14d // Must be C callee-save.
|
|
|.endif
|
|
|
|
|
|.define RA, ecx
|
|
|.define RAL, cl
|
|
|.define RB, ebp // Must be ebp (C callee-save).
|
|
|.define RC, eax // Must be eax (fcomparepp and others).
|
|
|.define RCW, ax
|
|
|.define RCH, ah
|
|
|.define RCL, al
|
|
|.define OP, RB
|
|
|.define RD, RC
|
|
|.define RDW, RCW
|
|
|.define RDL, RCL
|
|
|.if X64
|
|
|.define RAa, rcx
|
|
|.define RBa, rbp
|
|
|.define RCa, rax
|
|
|.define RDa, rax
|
|
|.else
|
|
|.define RAa, RA
|
|
|.define RBa, RB
|
|
|.define RCa, RC
|
|
|.define RDa, RD
|
|
|.endif
|
|
|
|
|
|.if not X64
|
|
|.define FCARG1, ecx // x86 fastcall arguments.
|
|
|.define FCARG2, edx
|
|
|.elif X64WIN
|
|
|.define CARG1, rcx // x64/WIN64 C call arguments.
|
|
|.define CARG2, rdx
|
|
|.define CARG3, r8
|
|
|.define CARG4, r9
|
|
|.define CARG1d, ecx
|
|
|.define CARG2d, edx
|
|
|.define CARG3d, r8d
|
|
|.define CARG4d, r9d
|
|
|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
|
|
|.define FCARG2, CARG2d
|
|
|.else
|
|
|.define CARG1, rdi // x64/POSIX C call arguments.
|
|
|.define CARG2, rsi
|
|
|.define CARG3, rdx
|
|
|.define CARG4, rcx
|
|
|.define CARG5, r8
|
|
|.define CARG6, r9
|
|
|.define CARG1d, edi
|
|
|.define CARG2d, esi
|
|
|.define CARG3d, edx
|
|
|.define CARG4d, ecx
|
|
|.define CARG5d, r8d
|
|
|.define CARG6d, r9d
|
|
|.define FCARG1, CARG1d // Simulate x86 fastcall.
|
|
|.define FCARG2, CARG2d
|
|
|.endif
|
|
|
|
|
|// Type definitions. Some of these are only used for documentation.
|
|
|.type L, lua_State
|
|
|.type GL, global_State
|
|
|.type TVALUE, TValue
|
|
|.type GCOBJ, GCobj
|
|
|.type STR, GCstr
|
|
|.type TAB, GCtab
|
|
|.type LFUNC, GCfuncL
|
|
|.type CFUNC, GCfuncC
|
|
|.type PROTO, GCproto
|
|
|.type UPVAL, GCupval
|
|
|.type NODE, Node
|
|
|.type NARGS, int
|
|
|.type TRACE, Trace
|
|
|.type EXITINFO, ExitInfo
|
|
|
|
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|
|
|//-----------------------------------------------------------------------
|
|
|.if not X64 // x86 stack layout.
|
|
|
|
|
|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
|
|
|.macro saveregs
|
|
| push ebp; push edi; push esi; push ebx
|
|
| sub esp, CFRAME_SPACE
|
|
|.endmacro
|
|
|.macro restoreregs
|
|
| add esp, CFRAME_SPACE
|
|
| pop ebx; pop esi; pop edi; pop ebp
|
|
|.endmacro
|
|
|
|
|
|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
|
|
|.define SAVE_NRES, aword [esp+aword*14]
|
|
|.define SAVE_CFRAME, aword [esp+aword*13]
|
|
|.define SAVE_L, aword [esp+aword*12]
|
|
|//----- 16 byte aligned, ^^^ arguments from C caller
|
|
|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
|
|
|.define SAVE_R4, aword [esp+aword*10]
|
|
|.define SAVE_R3, aword [esp+aword*9]
|
|
|.define SAVE_R2, aword [esp+aword*8]
|
|
|//----- 16 byte aligned
|
|
|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
|
|
|.define SAVE_PC, aword [esp+aword*6]
|
|
|.define TMP2, aword [esp+aword*5]
|
|
|.define TMP1, aword [esp+aword*4]
|
|
|//----- 16 byte aligned
|
|
|.define ARG4, aword [esp+aword*3]
|
|
|.define ARG3, aword [esp+aword*2]
|
|
|.define ARG2, aword [esp+aword*1]
|
|
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|
|
|//----- 16 byte aligned, ^^^ arguments for C callee
|
|
|
|
|
|// FPARGx overlaps ARGx and ARG(x+1) on x86.
|
|
|.define FPARG3, qword [esp+qword*1]
|
|
|.define FPARG1, qword [esp]
|
|
|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
|
|
|.define TMPQ, qword [esp+aword*4]
|
|
|.define TMP3, ARG4
|
|
|.define ARG5, TMP1
|
|
|.define TMPa, TMP1
|
|
|.define MULTRES, TMP2
|
|
|
|
|
|// Arguments for vm_call and vm_pcall.
|
|
|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
|
|
|
|
|
|// Arguments for vm_cpcall.
|
|
|.define INARG_CP_CALL, SAVE_ERRF
|
|
|.define INARG_CP_UD, SAVE_NRES
|
|
|.define INARG_CP_FUNC, SAVE_CFRAME
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|.elif X64WIN // x64/Windows stack layout
|
|
|
|
|
|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
|
|
|.macro saveregs
|
|
| push rbp; push rdi; push rsi; push rbx
|
|
| sub rsp, CFRAME_SPACE
|
|
|.endmacro
|
|
|.macro restoreregs
|
|
| add rsp, CFRAME_SPACE
|
|
| pop rbx; pop rsi; pop rdi; pop rbp
|
|
|.endmacro
|
|
|
|
|
|.define SAVE_CFRAME, aword [rsp+aword*13]
|
|
|.define SAVE_PC, dword [rsp+dword*25]
|
|
|.define SAVE_L, dword [rsp+dword*24]
|
|
|.define SAVE_ERRF, dword [rsp+dword*23]
|
|
|.define SAVE_NRES, dword [rsp+dword*22]
|
|
|.define TMP2, dword [rsp+dword*21]
|
|
|.define TMP1, dword [rsp+dword*20]
|
|
|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
|
|
|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
|
|
|.define SAVE_R4, aword [rsp+aword*8]
|
|
|.define SAVE_R3, aword [rsp+aword*7]
|
|
|.define SAVE_R2, aword [rsp+aword*6]
|
|
|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
|
|
|.define ARG5, aword [rsp+aword*4]
|
|
|.define CSAVE_4, aword [rsp+aword*3]
|
|
|.define CSAVE_3, aword [rsp+aword*2]
|
|
|.define CSAVE_2, aword [rsp+aword*1]
|
|
|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
|
|
|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
|
|
|
|
|
|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
|
|
|.define TMPQ, qword [rsp+aword*10]
|
|
|.define MULTRES, TMP2
|
|
|.define TMPa, ARG5
|
|
|.define ARG5d, dword [rsp+aword*4]
|
|
|.define TMP3, ARG5d
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|.else // x64/POSIX stack layout
|
|
|
|
|
|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
|
|
|.macro saveregs
|
|
| push rbp; push rbx; push r15; push r14
|
|
| sub rsp, CFRAME_SPACE
|
|
|.endmacro
|
|
|.macro restoreregs
|
|
| add rsp, CFRAME_SPACE
|
|
| pop r14; pop r15; pop rbx; pop rbp
|
|
|.endmacro
|
|
|
|
|
|//----- 16 byte aligned,
|
|
|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
|
|
|.define SAVE_R4, aword [rsp+aword*8]
|
|
|.define SAVE_R3, aword [rsp+aword*7]
|
|
|.define SAVE_R2, aword [rsp+aword*6]
|
|
|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
|
|
|.define SAVE_CFRAME, aword [rsp+aword*4]
|
|
|.define TMPa, aword [rsp+aword*3]
|
|
|//----- ^^^ awords above, vvv dwords below
|
|
|.define SAVE_PC, dword [rsp+dword*5]
|
|
|.define SAVE_L, dword [rsp+dword*4]
|
|
|.define SAVE_ERRF, dword [rsp+dword*3]
|
|
|.define SAVE_NRES, dword [rsp+dword*2]
|
|
|.define TMP2, dword [rsp+dword*1]
|
|
|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
|
|
|//----- 16 byte aligned
|
|
|
|
|
|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
|
|
|.define TMPQ, qword [rsp]
|
|
|.define TMP3, dword [rsp+aword*3]
|
|
|.define MULTRES, TMP2
|
|
|
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// Instruction headers.
|
|
|.macro ins_A; .endmacro
|
|
|.macro ins_AD; .endmacro
|
|
|.macro ins_AJ; .endmacro
|
|
|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
|
|
|.macro ins_AB_; movzx RB, RCH; .endmacro
|
|
|.macro ins_A_C; movzx RC, RCL; .endmacro
|
|
|.macro ins_AND; not RDa; .endmacro
|
|
|
|
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|
|
|.macro ins_NEXT
|
|
| mov RC, [PC]
|
|
| movzx RA, RCH
|
|
| movzx OP, RCL
|
|
| add PC, 4
|
|
| shr RC, 16
|
|
|.if X64
|
|
| jmp aword [DISPATCH+OP*8]
|
|
|.else
|
|
| jmp aword [DISPATCH+OP*4]
|
|
|.endif
|
|
|.endmacro
|
|
|
|
|
|// Instruction footer.
|
|
|.if 1
|
|
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
|
|
| .define ins_next, ins_NEXT
|
|
| .define ins_next_, ins_NEXT
|
|
|.else
|
|
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
|
|
| // Affects only certain kinds of benchmarks (and only with -j off).
|
|
| // Around 10%-30% slower on Core2, a lot more slower on P4.
|
|
| .macro ins_next
|
|
| jmp ->ins_next
|
|
| .endmacro
|
|
| .macro ins_next_
|
|
| ->ins_next:
|
|
| ins_NEXT
|
|
| .endmacro
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// Macros to test operand types.
|
|
|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
|
|
|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro
|
|
|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
|
|
|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
|
|
|
|
|
|// These operands must be used with movzx.
|
|
|.define PC_OP, byte [PC-4]
|
|
|.define PC_RA, byte [PC-3]
|
|
|.define PC_RB, byte [PC-1]
|
|
|.define PC_RC, byte [PC-2]
|
|
|.define PC_RD, word [PC-2]
|
|
|
|
|
|.macro branchPC, reg
|
|
| lea PC, [PC+reg*4-BCBIAS_J*4]
|
|
|.endmacro
|
|
|
|
|
|// Assumes DISPATCH is relative to GL.
|
|
#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
|
|
#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
|
|
|
|
|// Decrement hashed hotcount and trigger trace recorder if zero.
|
|
|.macro hotloop, reg
|
|
| mov reg, PC
|
|
| shr reg, 1
|
|
| and reg, HOTCOUNT_PCMASK
|
|
| sub word [DISPATCH+reg+GG_DISP2HOT], 1
|
|
| jz ->vm_hotloop
|
|
|.endmacro
|
|
|
|
|
|.macro hotcall, reg
|
|
| mov reg, PC
|
|
| shr reg, 1
|
|
| and reg, HOTCOUNT_PCMASK
|
|
| sub word [DISPATCH+reg+GG_DISP2HOT], 1
|
|
| jz ->vm_hotcall
|
|
|.endmacro
|
|
|
|
|
|// Set current VM state.
|
|
|.macro set_vmstate, st
|
|
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|
|
|.endmacro
|
|
|
|
|
|// Annoying x87 stuff: support for two compare variants.
|
|
|.macro fcomparepp // Compare and pop st0 >< st1.
|
|
||if (cmov) {
|
|
| fucomip st1
|
|
| fpop
|
|
||} else {
|
|
| fucompp
|
|
| fnstsw ax // eax modified!
|
|
| sahf
|
|
||}
|
|
|.endmacro
|
|
|
|
|
|.macro fdup; fld st0; .endmacro
|
|
|.macro fpop1; fstp st1; .endmacro
|
|
|
|
|
|// Synthesize SSE FP constants.
|
|
|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
|
|
|.if X64
|
|
| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
|
|
|.else
|
|
| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
|
|
|.endif
|
|
|.endmacro
|
|
|
|
|
|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
|
|
|.if X64
|
|
| mov64 tmp, U64x(val,00000000); movd reg, tmp
|
|
|.else
|
|
| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
|
|
|.endif
|
|
|.endmacro
|
|
|
|
|
|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
|
|
| sseconst_hi reg, tmp, 80000000
|
|
|.endmacro
|
|
|.macro sseconst_1, reg, tmp // Synthesize 1.0.
|
|
| sseconst_hi reg, tmp, 3ff00000
|
|
|.endmacro
|
|
|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
|
|
| sseconst_hi reg, tmp, bff00000
|
|
|.endmacro
|
|
|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
|
|
| sseconst_hi reg, tmp, 43300000
|
|
|.endmacro
|
|
|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
|
|
| sseconst_hi reg, tmp, 43380000
|
|
|.endmacro
|
|
|
|
|
|// Move table write barrier back. Overwrites reg.
|
|
|.macro barrierback, tab, reg
|
|
| and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab)
|
|
| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
|
|
| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
|
|
| mov tab->gclist, reg
|
|
|.endmacro
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
/* Generate subroutines used by opcodes and other parts of the VM. */
|
|
/* The .code_sub section should be last to help static branch prediction. */
|
|
static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|
|
{
|
|
|.code_sub
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Call and return handling -------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// Reminder: A call gate may be called with func/args above L->maxstack,
|
|
|// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
|
|
|// too. This means all call gates (L*, C and fast functions) must check
|
|
|// for stack overflow _before_ adding more slots!
|
|
|
|
|
|//-- Call gates ---------------------------------------------------------
|
|
|
|
|
|->gate_lf: // Call gate for fixarg Lua functions.
|
|
| // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
|
|
| // DISPATCH initialized
|
|
| mov BASE, RA
|
|
| mov PROTO:RB, LFUNC:RB->pt
|
|
| mov [BASE-4], PC // Store caller PC.
|
|
| movzx RA, byte PROTO:RB->framesize
|
|
| mov PC, PROTO:RB->bc
|
|
| mov KBASE, PROTO:RB->k
|
|
| mov L:RB, SAVE_L
|
|
| lea RA, [BASE+RA*8] // Top of frame.
|
|
| lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot.
|
|
| cmp RA, L:RB->maxstack
|
|
| ja ->gate_lf_growstack
|
|
|9: // Entry point from vararg setup below.
|
|
| mov RB, LJ_TNIL
|
|
|1: // Clear free slots until top of frame.
|
|
| mov [RC], RB
|
|
| mov [RC+8], RB
|
|
| add RC, 16
|
|
| cmp RC, RA
|
|
| jb <1
|
|
#if LJ_HASJIT
|
|
| // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
|
|
| // hotcall RB
|
|
#endif
|
|
| ins_next
|
|
|
|
|
|->gate_lv: // Call gate for vararg Lua functions.
|
|
| // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
|
|
| // DISPATCH initialized
|
|
| mov [RA-4], PC // Store caller PC.
|
|
| lea PC, [NARGS:RC*8+FRAME_VARG]
|
|
| lea BASE, [RA+PC-FRAME_VARG]
|
|
| mov [BASE-8], LFUNC:RB // Store copy of LFUNC.
|
|
| mov PROTO:RB, LFUNC:RB->pt
|
|
| mov [BASE-4], PC // Store delta + FRAME_VARG.
|
|
| movzx PC, byte PROTO:RB->framesize
|
|
| lea KBASE, [BASE+PC*8]
|
|
| mov L:PC, SAVE_L
|
|
| lea RC, [BASE+4]
|
|
| cmp KBASE, L:PC->maxstack
|
|
| ja ->gate_lv_growstack // Need to grow stack.
|
|
| movzx PC, byte PROTO:RB->numparams
|
|
| test PC, PC
|
|
| jz >2
|
|
|1: // Copy fixarg slots up.
|
|
| add RA, 8
|
|
| cmp RA, BASE
|
|
| jnb >2
|
|
| mov KBASE, [RA-8]
|
|
| mov [RC-4], KBASE
|
|
| mov KBASE, [RA-4]
|
|
| mov [RC], KBASE
|
|
| add RC, 8
|
|
| mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
|
|
| sub PC, 1
|
|
| jnz <1
|
|
|2:
|
|
| movzx RA, byte PROTO:RB->framesize
|
|
| mov PC, PROTO:RB->bc
|
|
| mov KBASE, PROTO:RB->k
|
|
| lea RA, [BASE+RA*8]
|
|
| jmp <9
|
|
|
|
|
|->gate_cwrap: // Call gate for wrapped C functions.
|
|
| // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
|
|
| mov [RA-4], PC
|
|
| mov KBASEa, CFUNC:RB->f
|
|
| mov L:RB, SAVE_L
|
|
| lea RC, [RA+NARGS:RC*8-8]
|
|
| mov L:RB->base, RA
|
|
| lea RA, [RC+8*LUA_MINSTACK]
|
|
| mov L:RB->top, RC
|
|
| cmp RA, L:RB->maxstack
|
|
|.if X64
|
|
| mov CARG2, KBASEa
|
|
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|
|
|.else
|
|
| mov ARG2, KBASEa
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
| ja ->gate_c_growstack // Need to grow stack.
|
|
| set_vmstate C
|
|
| // (lua_State *L, lua_CFunction f)
|
|
| call aword [DISPATCH+DISPATCH_GL(wrapf)]
|
|
| set_vmstate INTERP
|
|
| // nresults returned in eax (RD).
|
|
| mov BASE, L:RB->base
|
|
| lea RA, [BASE+RD*8]
|
|
| neg RA
|
|
| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
|
|
|->vm_returnc:
|
|
| add RD, 1 // RD = nresults+1
|
|
| mov MULTRES, RD
|
|
| test PC, FRAME_TYPE
|
|
| jz ->BC_RET_Z // Handle regular return to Lua.
|
|
| jmp ->vm_return
|
|
|
|
|
|->gate_c: // Call gate for C functions.
|
|
| // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
|
|
| mov [RA-4], PC
|
|
| mov KBASEa, CFUNC:RB->f
|
|
| mov L:RB, SAVE_L
|
|
| lea RC, [RA+NARGS:RC*8-8]
|
|
| mov L:RB->base, RA
|
|
| lea RA, [RC+8*LUA_MINSTACK]
|
|
| mov L:RB->top, RC
|
|
| cmp RA, L:RB->maxstack
|
|
|.if X64
|
|
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|
|
|.else
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
| ja ->gate_c_growstack // Need to grow stack.
|
|
| set_vmstate C
|
|
| call KBASEa // (lua_State *L)
|
|
| set_vmstate INTERP
|
|
| // nresults returned in eax (RD).
|
|
| mov BASE, L:RB->base
|
|
| lea RA, [BASE+RD*8]
|
|
| neg RA
|
|
| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
|
|
|->vm_returnc:
|
|
| add RD, 1 // RD = nresults+1
|
|
| mov MULTRES, RD
|
|
| test PC, FRAME_TYPE
|
|
| jz ->BC_RET_Z // Handle regular return to Lua.
|
|
| // Fallthrough.
|
|
|
|
|
|//-- Return handling (non-inline) ---------------------------------------
|
|
|
|
|
|->vm_return:
|
|
| // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
|
|
| test PC, FRAME_C
|
|
| jz ->vm_returnp
|
|
|
|
|
| // Return to C.
|
|
| set_vmstate C
|
|
| and PC, -8
|
|
| sub PC, BASE
|
|
| neg PC // Previous base = BASE - delta.
|
|
|
|
|
| sub RD, 1
|
|
| jz >2
|
|
|1:
|
|
| mov RB, [BASE+RA] // Move results down.
|
|
| mov [BASE-8], RB
|
|
| mov RB, [BASE+RA+4]
|
|
| mov [BASE-4], RB
|
|
| add BASE, 8
|
|
| sub RD, 1
|
|
| jnz <1
|
|
|2:
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, PC
|
|
|3:
|
|
| mov RD, MULTRES
|
|
| mov RA, SAVE_NRES // RA = wanted nresults+1
|
|
|4:
|
|
| cmp RA, RD
|
|
| jne >6 // More/less results wanted?
|
|
|5:
|
|
| sub BASE, 8
|
|
| mov L:RB->top, BASE
|
|
|
|
|
|->vm_leave_cp:
|
|
| mov RAa, SAVE_CFRAME // Restore previous C frame.
|
|
| mov L:RB->cframe, RAa
|
|
| xor eax, eax // Ok return status for vm_pcall.
|
|
|
|
|
|->vm_leave_unw:
|
|
| restoreregs
|
|
| ret
|
|
|
|
|
|6:
|
|
| jb >7 // Less results wanted?
|
|
| // More results wanted. Check stack size and fill up results with nil.
|
|
| cmp BASE, L:RB->maxstack
|
|
| ja >8
|
|
| mov dword [BASE-4], LJ_TNIL
|
|
| add BASE, 8
|
|
| add RD, 1
|
|
| jmp <4
|
|
|
|
|
|7: // Less results wanted.
|
|
| test RA, RA
|
|
| jz <5 // But check for LUA_MULTRET+1.
|
|
| sub RA, RD // Negative result!
|
|
| lea BASE, [BASE+RA*8] // Correct top.
|
|
| jmp <5
|
|
|
|
|
|8: // Corner case: need to grow stack for filling up results.
|
|
| // This can happen if:
|
|
| // - A C function grows the stack (a lot).
|
|
| // - The GC shrinks the stack in between.
|
|
| // - A return back from a lua_call() with (high) nresults adjustment.
|
|
| mov L:RB->top, BASE // Save current top held in BASE (yes).
|
|
| mov MULTRES, RD // Need to fill only remainder with nil.
|
|
| mov FCARG2, RA
|
|
| mov FCARG1, L:RB
|
|
| call extern lj_state_growstack@8 // (lua_State *L, int n)
|
|
| mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
|
|
| jmp <3
|
|
|
|
|
|->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
|
|
| // (void *cframe, int errcode)
|
|
|.if X64
|
|
| mov eax, CARG2d // Error return status for vm_pcall.
|
|
| mov rsp, CARG1
|
|
|.else
|
|
| mov eax, FCARG2 // Error return status for vm_pcall.
|
|
| mov esp, FCARG1
|
|
|.endif
|
|
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|
|
| mov L:RB, SAVE_L
|
|
| mov GL:RB, L:RB->glref
|
|
| mov dword GL:RB->vmstate, ~LJ_VMST_C
|
|
| jmp ->vm_leave_unw
|
|
|
|
|
|->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
|
|
| // (void *cframe)
|
|
|.if X64
|
|
| and CARG1, CFRAME_RAWMASK
|
|
| mov rsp, CARG1
|
|
|.else
|
|
| and FCARG1, CFRAME_RAWMASK
|
|
| mov esp, FCARG1
|
|
|.endif
|
|
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
|
| mov L:RB, SAVE_L
|
|
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
|
|
| mov RD, 1+1 // Really 1+2 results, incr. later.
|
|
| mov BASE, L:RB->base
|
|
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
|
|
| add DISPATCH, GG_G2DISP
|
|
| mov PC, [BASE-4] // Fetch PC of previous frame.
|
|
| mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
|
|
| set_vmstate INTERP
|
|
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
|
|
|
|
|->vm_returnp:
|
|
| test PC, FRAME_P
|
|
| jz ->cont_dispatch
|
|
|
|
|
| // Return from pcall or xpcall fast func.
|
|
| and PC, -8
|
|
| sub BASE, PC // Restore caller base.
|
|
| lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
|
|
| mov PC, [BASE-4] // Fetch PC of previous frame.
|
|
| // Prepending may overwrite the pcall frame, so do it at the end.
|
|
| mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
|
|
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
|
|
|
|
|//-- Grow stack on-demand -----------------------------------------------
|
|
|
|
|
|->gate_c_growstack: // Grow stack for C function.
|
|
| mov FCARG2, LUA_MINSTACK
|
|
| jmp >1
|
|
|
|
|
|->gate_lv_growstack: // Grow stack for vararg Lua function.
|
|
| sub RC, 8
|
|
| mov BASE, RA
|
|
| mov RA, KBASE
|
|
| mov PC, PROTO:RB->bc
|
|
| mov L:RB, SAVE_L
|
|
|
|
|
|->gate_lf_growstack: // Grow stack for fixarg Lua function.
|
|
| // BASE = new base, RA = requested top, RC = top (offset +4 bytes)
|
|
| // RB = L, PC = first PC of called function (or anything if C function)
|
|
| sub RC, 4 // Adjust top.
|
|
| sub RA, BASE
|
|
| shr RA, 3 // n = pt->framesize - L->top
|
|
| add PC, 4 // Must point after first instruction.
|
|
| mov L:RB->base, BASE
|
|
| mov L:RB->top, RC
|
|
| mov SAVE_PC, PC
|
|
| mov FCARG2, RA
|
|
|1:
|
|
| mov FCARG1, L:RB
|
|
| // L:RB = L, L->base = new base, L->top = top
|
|
| // SAVE_PC = initial PC+1 (undefined for C functions)
|
|
| call extern lj_state_growstack@8 // (lua_State *L, int n)
|
|
| mov RA, L:RB->base
|
|
| mov RC, L:RB->top
|
|
| mov LFUNC:RB, [RA-8]
|
|
| mov PC, [RA-4]
|
|
| sub RC, RA
|
|
| shr RC, 3
|
|
| add NARGS:RC, 1
|
|
| // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored.
|
|
| jmp aword LFUNC:RB->gate // Just retry call.
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Entry points into the assembler VM ---------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|->vm_resume: // Setup C frame and resume thread.
|
|
| // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
|
|
| saveregs
|
|
|.if X64
|
|
| mov L:RB, CARG1d // Caveat: CARG1d may be RA.
|
|
| mov SAVE_L, CARG1d
|
|
| mov RA, CARG2d
|
|
|.else
|
|
| mov L:RB, SAVE_L
|
|
| mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
|
|
|.endif
|
|
| mov PC, FRAME_CP
|
|
| xor RD, RD
|
|
| lea KBASEa, [esp+CFRAME_RESUME]
|
|
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
|
|
| add DISPATCH, GG_G2DISP
|
|
| mov L:RB->cframe, KBASEa
|
|
| mov SAVE_PC, RD // Any value outside of bytecode is ok.
|
|
| mov SAVE_CFRAME, RDa
|
|
|.if X64
|
|
| mov SAVE_NRES, RD
|
|
| mov SAVE_ERRF, RD
|
|
|.endif
|
|
| cmp byte L:RB->status, RDL
|
|
| je >3 // Initial resume (like a call).
|
|
|
|
|
| // Resume after yield (like a return).
|
|
| set_vmstate INTERP
|
|
| mov byte L:RB->status, RDL
|
|
| mov BASE, L:RB->base
|
|
| mov RD, L:RB->top
|
|
| sub RD, RA
|
|
| shr RD, 3
|
|
| add RD, 1 // RD = nresults+1
|
|
| sub RA, BASE // RA = resultofs
|
|
| mov PC, [BASE-4]
|
|
| mov MULTRES, RD
|
|
| test PC, FRAME_TYPE
|
|
| jz ->BC_RET_Z
|
|
| jmp ->vm_return
|
|
|
|
|
|->vm_pcall: // Setup protected C frame and enter VM.
|
|
| // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
|
|
| saveregs
|
|
| mov PC, FRAME_CP
|
|
|.if X64
|
|
| mov SAVE_ERRF, CARG4d
|
|
|.endif
|
|
| jmp >1
|
|
|
|
|
|->vm_call: // Setup C frame and enter VM.
|
|
| // (lua_State *L, TValue *base, int nres1)
|
|
| saveregs
|
|
| mov PC, FRAME_C
|
|
|
|
|
|1: // Entry point for vm_pcall above (PC = ftype).
|
|
|.if X64
|
|
| mov SAVE_NRES, CARG3d
|
|
| mov L:RB, CARG1d // Caveat: CARG1d may be RA.
|
|
| mov SAVE_L, CARG1d
|
|
| mov RA, CARG2d
|
|
|.else
|
|
| mov L:RB, SAVE_L
|
|
| mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
|
|
|.endif
|
|
|
|
|
|2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
|
|
| mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
|
|
| mov SAVE_CFRAME, KBASEa
|
|
| mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
|
|
|.if X64
|
|
| mov L:RB->cframe, rsp
|
|
|.else
|
|
| mov L:RB->cframe, esp
|
|
|.endif
|
|
|
|
|
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
|
|
| add DISPATCH, GG_G2DISP
|
|
|
|
|
|3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
|
|
| set_vmstate INTERP
|
|
| mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
|
|
| add PC, RA
|
|
| sub PC, BASE // PC = frame delta + frame type
|
|
|
|
|
| mov RC, L:RB->top
|
|
| sub RC, RA
|
|
| shr NARGS:RC, 3
|
|
| add NARGS:RC, 1 // RC = nargs+1
|
|
|
|
|
| mov LFUNC:RB, [RA-8]
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call // Ensure KBASE defined and != BASE.
|
|
| jmp aword LFUNC:RB->gate
|
|
| // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1.
|
|
|
|
|
|->vm_cpcall: // Setup protected C frame, call C.
|
|
| // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
|
|
| saveregs
|
|
|.if X64
|
|
| mov L:RB, CARG1d // Caveat: CARG1d may be RA.
|
|
| mov SAVE_L, CARG1d
|
|
|.else
|
|
| mov L:RB, SAVE_L
|
|
| // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
|
|
| mov RC, INARG_CP_UD // Get args before they are overwritten.
|
|
| mov RA, INARG_CP_FUNC
|
|
| mov BASE, INARG_CP_CALL
|
|
|.endif
|
|
| mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
|
|
|
|
|
| mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
|
|
| sub KBASE, L:RB->top
|
|
| mov SAVE_ERRF, 0 // No error function.
|
|
| mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
|
|
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
|
|
|
|
|.if X64
|
|
| mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
|
|
| mov SAVE_CFRAME, KBASEa
|
|
| mov L:RB->cframe, rsp
|
|
|
|
|
| call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
|
|
|.else
|
|
| mov ARG3, RC // Have to copy args downwards.
|
|
| mov ARG2, RA
|
|
| mov ARG1, L:RB
|
|
|
|
|
| mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
|
|
| mov SAVE_CFRAME, KBASE
|
|
| mov L:RB->cframe, esp
|
|
|
|
|
| call BASE // (lua_State *L, lua_CFunction func, void *ud)
|
|
|.endif
|
|
| // TValue * (new base) or NULL returned in eax (RC).
|
|
| test RC, RC
|
|
| jz ->vm_leave_cp // No base? Just remove C frame.
|
|
| mov RA, RC
|
|
| mov PC, FRAME_CP
|
|
| jmp <2 // Else continue with the call.
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Metamethod handling ------------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|//-- Continuation dispatch ----------------------------------------------
|
|
|
|
|
|->cont_dispatch:
|
|
| // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
|
|
| add RA, BASE
|
|
| and PC, -8
|
|
| mov RB, BASE
|
|
| sub BASE, PC // Restore caller BASE.
|
|
| mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
|
|
| mov RC, RA // ... in [RC]
|
|
| mov PC, [RB-12] // Restore PC from [cont|PC].
|
|
|.if X64
|
|
| movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
|
|
| lea KBASEa, qword [=>0]
|
|
| add RAa, KBASEa
|
|
|.else
|
|
| mov RA, dword [RB-16]
|
|
|.endif
|
|
| mov LFUNC:KBASE, [BASE-8]
|
|
| mov PROTO:KBASE, LFUNC:KBASE->pt
|
|
| mov KBASE, PROTO:KBASE->k
|
|
| // BASE = base, RC = result, RB = meta base
|
|
| jmp RAa // Jump to continuation.
|
|
|
|
|
|->cont_cat: // BASE = base, RC = result, RB = mbase
|
|
| movzx RA, PC_RB
|
|
| sub RB, 16
|
|
| lea RA, [BASE+RA*8]
|
|
| sub RA, RB
|
|
| je ->cont_ra
|
|
| neg RA
|
|
| shr RA, 3
|
|
|.if X64WIN
|
|
| mov CARG3d, RA
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE
|
|
| mov CARG2d, [RC+4]
|
|
| mov RC, [RC]
|
|
| mov [RB+4], CARG2d
|
|
| mov [RB], RC
|
|
| mov CARG2d, RB
|
|
|.elif X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE
|
|
| mov CARG3d, RA
|
|
| mov RA, [RC+4]
|
|
| mov RC, [RC]
|
|
| mov [RB+4], RA
|
|
| mov [RB], RC
|
|
| mov CARG2d, RB
|
|
|.else
|
|
| mov ARG3, RA
|
|
| mov RA, [RC+4]
|
|
| mov RC, [RC]
|
|
| mov [RB+4], RA
|
|
| mov [RB], RC
|
|
| mov ARG2, RB
|
|
|.endif
|
|
| jmp ->BC_CAT_Z
|
|
|
|
|
|//-- Table indexing metamethods -----------------------------------------
|
|
|
|
|
|->vmeta_tgets:
|
|
| mov TMP1, RC // RC = GCstr *
|
|
| mov TMP2, LJ_TSTR
|
|
| lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
|
|
| cmp PC_OP, BC_GGET
|
|
| jne >1
|
|
| lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
|
|
| mov [RA], TAB:RB // RB = GCtab *
|
|
| mov dword [RA+4], LJ_TTAB
|
|
| mov RB, RA
|
|
| jmp >2
|
|
|
|
|
|->vmeta_tgetb:
|
|
| movzx RC, PC_RC
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RC
|
|
| movsd TMPQ, xmm0
|
|
} else {
|
|
|.if not X64
|
|
| mov ARG4, RC
|
|
| fild ARG4
|
|
| fstp TMPQ
|
|
|.endif
|
|
}
|
|
| lea RCa, TMPQ // Store temp. TValue in TMPQ.
|
|
| jmp >1
|
|
|
|
|
|->vmeta_tgetv:
|
|
| movzx RC, PC_RC // Reload TValue *k from RC.
|
|
| lea RC, [BASE+RC*8]
|
|
|1:
|
|
| movzx RB, PC_RB // Reload TValue *t from RB.
|
|
| lea RB, [BASE+RB*8]
|
|
|2:
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
| mov CARG2d, RB
|
|
| mov CARG3, RCa // May be 64 bit ptr to stack.
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| mov ARG2, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, RC
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
|
|
| // TValue * (finished) or NULL (metamethod) returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| test RC, RC
|
|
| jz >3
|
|
|->cont_ra: // BASE = base, RC = result
|
|
| movzx RA, PC_RA
|
|
| mov RB, [RC+4]
|
|
| mov RC, [RC]
|
|
| mov [BASE+RA*8+4], RB
|
|
| mov [BASE+RA*8], RC
|
|
| ins_next
|
|
|
|
|
|3: // Call __index metamethod.
|
|
| // BASE = base, L->top = new base, stack = cont/func/t/k
|
|
| mov RA, L:RB->top
|
|
| mov [RA-12], PC // [cont|PC]
|
|
| lea PC, [RA+FRAME_CONT]
|
|
| sub PC, BASE
|
|
| mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
|
|
| mov NARGS:RC, 3 // 2+1 args for func(t, k).
|
|
| jmp aword LFUNC:RB->gate
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|->vmeta_tsets:
|
|
| mov TMP1, RC // RC = GCstr *
|
|
| mov TMP2, LJ_TSTR
|
|
| lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
|
|
| cmp PC_OP, BC_GSET
|
|
| jne >1
|
|
| lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
|
|
| mov [RA], TAB:RB // RB = GCtab *
|
|
| mov dword [RA+4], LJ_TTAB
|
|
| mov RB, RA
|
|
| jmp >2
|
|
|
|
|
|->vmeta_tsetb:
|
|
| movzx RC, PC_RC
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RC
|
|
| movsd TMPQ, xmm0
|
|
} else {
|
|
|.if not X64
|
|
| mov ARG4, RC
|
|
| fild ARG4
|
|
| fstp TMPQ
|
|
|.endif
|
|
}
|
|
| lea RCa, TMPQ // Store temp. TValue in TMPQ.
|
|
| jmp >1
|
|
|
|
|
|->vmeta_tsetv:
|
|
| movzx RC, PC_RC // Reload TValue *k from RC.
|
|
| lea RC, [BASE+RC*8]
|
|
|1:
|
|
| movzx RB, PC_RB // Reload TValue *t from RB.
|
|
| lea RB, [BASE+RB*8]
|
|
|2:
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
| mov CARG2d, RB
|
|
| mov CARG3, RCa // May be 64 bit ptr to stack.
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| mov ARG2, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, RC
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|
|
| // TValue * (finished) or NULL (metamethod) returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| test RC, RC
|
|
| jz >3
|
|
| // NOBARRIER: lj_meta_tset ensures the table is not black.
|
|
| movzx RA, PC_RA
|
|
| mov RB, [BASE+RA*8+4]
|
|
| mov RA, [BASE+RA*8]
|
|
| mov [RC+4], RB
|
|
| mov [RC], RA
|
|
|->cont_nop: // BASE = base, (RC = result)
|
|
| ins_next
|
|
|
|
|
|3: // Call __newindex metamethod.
|
|
| // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
|
|
| mov RA, L:RB->top
|
|
| mov [RA-12], PC // [cont|PC]
|
|
| movzx RC, PC_RA
|
|
| mov RB, [BASE+RC*8+4] // Copy value to third argument.
|
|
| mov RC, [BASE+RC*8]
|
|
| mov [RA+20], RB
|
|
| mov [RA+16], RC
|
|
| lea PC, [RA+FRAME_CONT]
|
|
| sub PC, BASE
|
|
| mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
|
|
| mov NARGS:RC, 4 // 3+1 args for func(t, k, v).
|
|
| jmp aword LFUNC:RB->gate
|
|
|
|
|
|//-- Comparison metamethods ---------------------------------------------
|
|
|
|
|
|->vmeta_comp:
|
|
|.if X64
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
|
|
|.if X64WIN
|
|
| lea CARG3d, [BASE+RD*8]
|
|
| lea CARG2d, [BASE+RA*8]
|
|
|.else
|
|
| lea CARG2d, [BASE+RA*8]
|
|
| lea CARG3d, [BASE+RD*8]
|
|
|.endif
|
|
| mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
|
|
| movzx CARG4d, PC_OP
|
|
|.else
|
|
| movzx RB, PC_OP
|
|
| lea RD, [BASE+RD*8]
|
|
| lea RA, [BASE+RA*8]
|
|
| mov ARG4, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, RD
|
|
| mov ARG2, RA
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
|
|
| // 0/1 or TValue * (metamethod) returned in eax (RC).
|
|
|3:
|
|
| mov BASE, L:RB->base
|
|
| cmp RC, 1
|
|
| ja ->vmeta_binop
|
|
|4:
|
|
| lea PC, [PC+4]
|
|
| jb >6
|
|
|5:
|
|
| movzx RD, PC_RD
|
|
| branchPC RD
|
|
|6:
|
|
| ins_next
|
|
|
|
|
|->cont_condt: // BASE = base, RC = result
|
|
| add PC, 4
|
|
| cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
|
|
| jb <5
|
|
| jmp <6
|
|
|
|
|
|->cont_condf: // BASE = base, RC = result
|
|
| cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
|
|
| jmp <4
|
|
|
|
|
|->vmeta_equal:
|
|
| sub PC, 4
|
|
|.if X64WIN
|
|
| mov CARG3d, RD
|
|
| mov CARG4d, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG2d == BASE.
|
|
| mov CARG2d, RA
|
|
| mov CARG1d, L:RB // Caveat: CARG1d == RA.
|
|
|.elif X64
|
|
| mov CARG2d, RA
|
|
| mov CARG4d, RB // Caveat: CARG4d == RA.
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG3d == BASE.
|
|
| mov CARG3d, RD
|
|
| mov CARG1d, L:RB
|
|
|.else
|
|
| mov ARG4, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, RD
|
|
| mov ARG2, RA
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
|
|
| // 0/1 or TValue * (metamethod) returned in eax (RC).
|
|
| jmp <3
|
|
|
|
|
|//-- Arithmetic metamethods ---------------------------------------------
|
|
|
|
|
|->vmeta_arith_vn:
|
|
| lea RC, [KBASE+RC*8]
|
|
| jmp >1
|
|
|
|
|
|->vmeta_arith_nv:
|
|
| lea RC, [KBASE+RC*8]
|
|
| lea RB, [BASE+RB*8]
|
|
| xchg RB, RC
|
|
| jmp >2
|
|
|
|
|
|->vmeta_unm:
|
|
| lea RC, [BASE+RD*8]
|
|
| mov RB, RC
|
|
| jmp >2
|
|
|
|
|
|->vmeta_arith_vv:
|
|
| lea RC, [BASE+RC*8]
|
|
|1:
|
|
| lea RB, [BASE+RB*8]
|
|
|2:
|
|
| lea RA, [BASE+RA*8]
|
|
|.if X64WIN
|
|
| mov CARG3d, RB
|
|
| mov CARG4d, RC
|
|
| movzx RC, PC_OP
|
|
| mov ARG5d, RC
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG2d == BASE.
|
|
| mov CARG2d, RA
|
|
| mov CARG1d, L:RB // Caveat: CARG1d == RA.
|
|
|.elif X64
|
|
| movzx CARG5d, PC_OP
|
|
| mov CARG2d, RA
|
|
| mov CARG4d, RC // Caveat: CARG4d == RA.
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
|
|
| mov CARG3d, RB
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| mov ARG3, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG4, RC
|
|
| movzx RC, PC_OP
|
|
| mov ARG2, RA
|
|
| mov ARG5, RC
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|
|
| // NULL (finished) or TValue * (metamethod) returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| test RC, RC
|
|
| jz ->cont_nop
|
|
|
|
|
| // Call metamethod for binary op.
|
|
|->vmeta_binop:
|
|
| // BASE = base, RC = new base, stack = cont/func/o1/o2
|
|
| mov RA, RC
|
|
| sub RC, BASE
|
|
| mov [RA-12], PC // [cont|PC]
|
|
| lea PC, [RC+FRAME_CONT]
|
|
| mov LFUNC:RB, [RA-8]
|
|
| mov NARGS:RC, 3 // 2+1 args for func(o1, o2).
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call
|
|
| jmp aword LFUNC:RB->gate
|
|
|
|
|
|->vmeta_len:
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
|
|
| mov L:FCARG1, L:RB
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_len@8 // (lua_State *L, TValue *o)
|
|
| // TValue * (metamethod) returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| jmp ->vmeta_binop // Binop call for compatibility.
|
|
|
|
|
|//-- Call metamethod ----------------------------------------------------
|
|
|
|
|
|->vmeta_call: // Resolve and call __call metamethod.
|
|
| // RA = new base, RC = nargs+1, BASE = old base, PC = return
|
|
| mov TMP2, RA // Save RA, RC for us.
|
|
| mov TMP1, NARGS:RC
|
|
| sub RA, 8
|
|
|.if X64
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
| mov CARG2d, RA
|
|
| lea CARG3d, [RA+NARGS:RC*8]
|
|
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|
|
|.else
|
|
| lea RC, [RA+NARGS:RC*8]
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG2, RA
|
|
| mov ARG3, RC
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE // This is the callers base!
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
|
|
| mov BASE, L:RB->base
|
|
| mov RA, TMP2
|
|
| mov NARGS:RC, TMP1
|
|
| mov LFUNC:RB, [RA-8]
|
|
| add NARGS:RC, 1
|
|
| // This is fragile. L->base must not move, KBASE must always be defined.
|
|
| cmp KBASE, BASE // Continue with CALLT if flag set.
|
|
| je ->BC_CALLT_Z
|
|
| jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod.
|
|
|
|
|
|//-- Argument coercion for 'for' statement ------------------------------
|
|
|
|
|
|->vmeta_for:
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| mov FCARG2, RA // Caveat: FCARG2 == BASE
|
|
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_for@8 // (lua_State *L, TValue *base)
|
|
| mov BASE, L:RB->base
|
|
| mov RC, [PC-4]
|
|
| movzx RA, RCH
|
|
| movzx OP, RCL
|
|
| shr RC, 16
|
|
|.if X64
|
|
| jmp aword [DISPATCH+OP*8+GG_DISP_STATIC*8] // Retry FORI or JFORI.
|
|
|.else
|
|
| jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI.
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Fast functions -----------------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|.macro .ffunc, name
|
|
|->ff_ .. name:
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_1, name
|
|
|->ff_ .. name:
|
|
| cmp NARGS:RC, 1+1; jb ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_2, name
|
|
|->ff_ .. name:
|
|
| cmp NARGS:RC, 2+1; jb ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_n, name
|
|
| .ffunc_1 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| fld qword [RA]
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_n, name, op
|
|
| .ffunc_1 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| op
|
|
| fld qword [RA]
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nsse, name, op
|
|
| .ffunc_1 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| op xmm0, qword [RA]
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nsse, name
|
|
| .ffunc_nsse name, movsd
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nn, name
|
|
| .ffunc_2 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
| fld qword [RA]
|
|
| fld qword [RA+8]
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nnsse, name
|
|
| .ffunc_1 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
| movsd xmm0, qword [RA]
|
|
| movsd xmm1, qword [RA+8]
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_nnr, name
|
|
| .ffunc_2 name
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
| fld qword [RA+8]
|
|
| fld qword [RA]
|
|
|.endmacro
|
|
|
|
|
|// Inlined GC threshold check. Caveat: uses label 1.
|
|
|.macro ffgccheck
|
|
| mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
|
|
| cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
|
|
| jb >1
|
|
| call ->fff_gcstep
|
|
|1:
|
|
|.endmacro
|
|
|
|
|
|//-- Base library: checks -----------------------------------------------
|
|
|
|
|
|.ffunc_1 assert
|
|
| mov RB, [RA+4]
|
|
| cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
|
|
| mov MULTRES, RD
|
|
| mov [RA-4], RB
|
|
| mov RB, [RA]
|
|
| mov [RA-8], RB
|
|
| sub RD, 2
|
|
| jz >2
|
|
| mov TMP1, RA
|
|
|1:
|
|
| add RA, 8
|
|
| mov RB, [RA+4]
|
|
| mov [RA-4], RB
|
|
| mov RB, [RA]
|
|
| mov [RA-8], RB
|
|
| sub RD, 1
|
|
| jnz <1
|
|
| mov RA, TMP1
|
|
|2:
|
|
| mov RD, MULTRES
|
|
| jmp ->fff_res_
|
|
|
|
|
|.ffunc_1 type
|
|
| mov RB, [RA+4]
|
|
| mov RC, ~LJ_TNUMX
|
|
| not RB
|
|
| cmp RC, RB
|
|
||if (cmov) {
|
|
| cmova RC, RB
|
|
||} else {
|
|
| jbe >1; mov RC, RB; 1:
|
|
||}
|
|
| mov CFUNC:RB, [RA-8]
|
|
| mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
|
|
| mov dword [RA-4], LJ_TSTR
|
|
| mov [RA-8], STR:RC
|
|
| jmp ->fff_res1
|
|
|
|
|
|//-- Base library: getters and setters ---------------------------------
|
|
|
|
|
|.ffunc_1 getmetatable
|
|
| mov RB, [RA+4]
|
|
| cmp RB, LJ_TTAB; jne >6
|
|
|1: // Field metatable must be at same offset for GCtab and GCudata!
|
|
| mov TAB:RB, [RA]
|
|
| mov TAB:RB, TAB:RB->metatable
|
|
|2:
|
|
| test TAB:RB, TAB:RB
|
|
| mov dword [RA-4], LJ_TNIL
|
|
| jz ->fff_res1
|
|
| mov CFUNC:RC, [RA-8]
|
|
| mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
|
|
| mov dword [RA-4], LJ_TTAB // Store metatable as default result.
|
|
| mov [RA-8], TAB:RB
|
|
| mov TMP1, RA // Save result pointer.
|
|
| mov RA, TAB:RB->hmask
|
|
| and RA, STR:RC->hash
|
|
| imul RA, #NODE
|
|
| add NODE:RA, TAB:RB->node
|
|
|3: // Rearranged logic, because we expect _not_ to find the key.
|
|
| cmp dword NODE:RA->key.it, LJ_TSTR
|
|
| jne >4
|
|
| cmp dword NODE:RA->key.gcr, STR:RC
|
|
| je >5
|
|
|4:
|
|
| mov NODE:RA, NODE:RA->next
|
|
| test NODE:RA, NODE:RA
|
|
| jnz <3
|
|
| jmp ->fff_res1 // Not found, keep default result.
|
|
|5:
|
|
| mov RB, [RA+4]
|
|
| cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value.
|
|
| mov RC, [RA]
|
|
| mov RA, TMP1 // Restore result pointer.
|
|
| mov [RA-4], RB // Return value of mt.__metatable.
|
|
| mov [RA-8], RC
|
|
| jmp ->fff_res1
|
|
|
|
|
|6:
|
|
| cmp RB, LJ_TUDATA; je <1
|
|
| cmp RB, LJ_TISNUM; ja >7
|
|
| mov RB, LJ_TNUMX
|
|
|7:
|
|
| not RB
|
|
| mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
|
|
| jmp <2
|
|
|
|
|
|.ffunc_2 setmetatable
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
| // Fast path: no mt for table yet and not clearing the mt.
|
|
| mov TAB:RB, [RA]
|
|
| cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback
|
|
| mov TAB:RC, [RA+8]
|
|
| mov TAB:RB->metatable, TAB:RC
|
|
| mov dword [RA-4], LJ_TTAB // Return original table.
|
|
| mov [RA-8], TAB:RB
|
|
| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
|
|
| jz >1
|
|
| // Possible write barrier. Table is black, but skip iswhite(mt) check.
|
|
| barrierback TAB:RB, RC
|
|
|1:
|
|
| jmp ->fff_res1
|
|
|
|
|
|.ffunc_2 rawget
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
|.if X64
|
|
| mov TMP1, BASE // Save BASE and RA.
|
|
| mov RB, RA
|
|
| mov CARG2d, [RA]
|
|
| lea CARG3d, [RA+8]
|
|
| mov CARG1d, SAVE_L // Caveat: CARG1d may be RA.
|
|
|.else
|
|
| mov TAB:RC, [RA]
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG2, TAB:RC
|
|
| mov ARG1, L:RB
|
|
| mov RB, RA
|
|
| mov TMP1, BASE // Save BASE and RA.
|
|
| add RA, 8
|
|
| mov ARG3, RA
|
|
|.endif
|
|
| call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|
|
| // cTValue * returned in eax (RC).
|
|
| mov RA, RB
|
|
| mov BASE, TMP1
|
|
| mov RB, [RC] // Copy table slot.
|
|
| mov RC, [RC+4]
|
|
| mov [RA-8], RB
|
|
| mov [RA-4], RC
|
|
| jmp ->fff_res1
|
|
|
|
|
|//-- Base library: conversions ------------------------------------------
|
|
|
|
|
|.ffunc tonumber
|
|
| // Only handles the number case inline (without a base argument).
|
|
| cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument.
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
if (sse) {
|
|
| movsd xmm0, qword [RA]; jmp ->fff_resxmm0
|
|
} else {
|
|
| fld qword [RA]; jmp ->fff_resn
|
|
}
|
|
|
|
|
|.ffunc_1 tostring
|
|
| // Only handles the string or number case inline.
|
|
| cmp dword [RA+4], LJ_TSTR; jne >3
|
|
| // A __tostring method in the string base metatable is ignored.
|
|
| mov STR:RC, [RA]
|
|
|2:
|
|
| mov dword [RA-4], LJ_TSTR
|
|
| mov [RA-8], STR:RC
|
|
| jmp ->fff_res1
|
|
|3: // Handle numbers inline, unless a number base metatable is present.
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
| cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
|
|
| jne ->fff_fallback
|
|
| ffgccheck // Caveat: uses label 1.
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, RA // Add frame since C call can throw.
|
|
| mov [RA-4], PC
|
|
| mov SAVE_PC, PC // Redundant (but a defined value).
|
|
| mov TMP1, BASE // Save BASE.
|
|
| mov FCARG2, RA // Caveat: FCARG2 == BASE
|
|
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
|
|
| call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
|
|
| // GCstr returned in eax (RC).
|
|
| mov RA, L:RB->base
|
|
| mov BASE, TMP1
|
|
| jmp <2
|
|
|
|
|
|//-- Base library: iterators -------------------------------------------
|
|
|
|
|
|.ffunc_1 next
|
|
| je >2 // Missing 2nd arg?
|
|
|1:
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
|.if X64
|
|
| mov TMP1, BASE // Save BASE.
|
|
| mov CARG2d, [RA]
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, RA // Add frame since C call can throw.
|
|
| mov [RA-4], PC
|
|
| lea CARG3d, [RA+8]
|
|
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|
|
|.else
|
|
| mov TAB:RB, [RA]
|
|
| mov ARG2, TAB:RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, RA // Add frame since C call can throw.
|
|
| mov [RA-4], PC
|
|
| mov TMP1, BASE // Save BASE.
|
|
| add RA, 8
|
|
| mov ARG3, RA
|
|
|.endif
|
|
| mov SAVE_PC, PC // Redundant (but a defined value).
|
|
| call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
|
|
| // Flag returned in eax (RC).
|
|
| mov RA, L:RB->base
|
|
| mov BASE, TMP1
|
|
| test RC, RC; jz >3 // End of traversal?
|
|
| mov RB, [RA+8] // Copy key and value to results.
|
|
| mov RC, [RA+12]
|
|
| mov [RA-8], RB
|
|
| mov [RA-4], RC
|
|
| mov RB, [RA+16]
|
|
| mov RC, [RA+20]
|
|
| mov [RA], RB
|
|
| mov [RA+4], RC
|
|
|->fff_res2:
|
|
| mov RD, 1+2
|
|
| jmp ->fff_res
|
|
|2: // Set missing 2nd arg to nil.
|
|
| mov dword [RA+12], LJ_TNIL
|
|
| jmp <1
|
|
|3: // End of traversal: return nil.
|
|
| mov dword [RA-4], LJ_TNIL
|
|
| jmp ->fff_res1
|
|
|
|
|
|.ffunc_1 pairs
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
| mov CFUNC:RC, CFUNC:RB->upvalue[0]
|
|
| mov dword [RA-4], LJ_TFUNC
|
|
| mov [RA-8], CFUNC:RC
|
|
| mov dword [RA+12], LJ_TNIL
|
|
| mov RD, 1+3
|
|
| jmp ->fff_res
|
|
|
|
|
|.ffunc_1 ipairs_aux
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
if (sse) {
|
|
| movsd xmm0, qword [RA+8]
|
|
| sseconst_1 xmm1, RBa
|
|
| addsd xmm0, xmm1
|
|
| cvtsd2si RC, xmm0
|
|
| movsd qword [RA-8], xmm0
|
|
} else {
|
|
|.if not X64
|
|
| fld qword [RA+8]
|
|
| fld1
|
|
| faddp st1
|
|
| fist ARG1
|
|
| fstp qword [RA-8]
|
|
| mov RC, ARG1
|
|
|.endif
|
|
}
|
|
| mov TAB:RB, [RA]
|
|
| cmp RC, TAB:RB->asize; jae >2 // Not in array part?
|
|
| shl RC, 3
|
|
| add RC, TAB:RB->array
|
|
|1:
|
|
| cmp dword [RC+4], LJ_TNIL; je ->fff_res0
|
|
| mov RB, [RC] // Copy array slot.
|
|
| mov RC, [RC+4]
|
|
| mov [RA], RB
|
|
| mov [RA+4], RC
|
|
| jmp ->fff_res2
|
|
|2: // Check for empty hash part first. Otherwise call C function.
|
|
| cmp dword TAB:RB->hmask, 0; je ->fff_res0
|
|
| mov TMP1, BASE // Save BASE and RA.
|
|
|.if X64 and not X64WIN
|
|
| mov FCARG1, TAB:RB
|
|
| mov RB, RA
|
|
|.else
|
|
| xchg FCARG1, TAB:RB // Caveat: FCARG1 == RA
|
|
|.endif
|
|
| mov FCARG2, RC
|
|
| call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
|
|
| // cTValue * or NULL returned in eax (RC).
|
|
| mov RA, RB
|
|
| mov BASE, TMP1
|
|
| test RC, RC
|
|
| jnz <1
|
|
|->fff_res0:
|
|
| mov RD, 1+0
|
|
| jmp ->fff_res
|
|
|
|
|
|.ffunc_1 ipairs
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
| mov CFUNC:RC, CFUNC:RB->upvalue[0]
|
|
| mov dword [RA-4], LJ_TFUNC
|
|
| mov [RA-8], CFUNC:RC
|
|
if (sse) {
|
|
| xorps xmm0, xmm0
|
|
| movsd qword [RA+8], xmm0
|
|
} else {
|
|
| fldz
|
|
| fstp qword [RA+8]
|
|
}
|
|
| mov RD, 1+3
|
|
| jmp ->fff_res
|
|
|
|
|
|//-- Base library: catch errors ----------------------------------------
|
|
|
|
|
|.ffunc_1 pcall
|
|
| mov [RA-4], PC
|
|
| mov PC, 8+FRAME_PCALL
|
|
| mov BASE, RA
|
|
| add RA, 8
|
|
| sub NARGS:RC, 1
|
|
| mov LFUNC:RB, [RA-8]
|
|
|1:
|
|
| test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE
|
|
| jnz >3 // Hook active before pcall?
|
|
|2:
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call // Ensure KBASE defined and != BASE.
|
|
| jmp aword LFUNC:RB->gate
|
|
|3:
|
|
| add PC, 1 // Use FRAME_PCALLH if hook was active.
|
|
| jmp <2
|
|
|
|
|
|.ffunc_2 xpcall
|
|
| cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback
|
|
| mov [RA-4], PC
|
|
| mov RB, [RA+4] // Swap function and traceback.
|
|
| mov [RA+12], RB
|
|
| mov dword [RA+4], LJ_TFUNC
|
|
| mov LFUNC:RB, [RA]
|
|
| mov PC, [RA+8]
|
|
| mov [RA+8], LFUNC:RB
|
|
| mov [RA], PC
|
|
| mov PC, 2*8+FRAME_PCALL
|
|
| mov BASE, RA
|
|
| add RA, 2*8
|
|
| sub NARGS:RC, 2
|
|
| jmp <1
|
|
|
|
|
|//-- Coroutine library --------------------------------------------------
|
|
|
|
|
|.macro coroutine_resume_wrap, resume
|
|
|9: // Need to restore PC for fallback handler.
|
|
| mov PC, SAVE_PC
|
|
| jmp ->fff_fallback
|
|
|
|
|
|.if resume
|
|
|.ffunc_1 coroutine_resume
|
|
| mov L:RB, [RA]
|
|
|.else
|
|
|.ffunc coroutine_wrap_aux
|
|
| mov L:RB, CFUNC:RB->upvalue[0].gcr
|
|
|.endif
|
|
| mov [RA-4], PC
|
|
| mov SAVE_PC, PC
|
|
|.if X64
|
|
| mov TMP1, L:RB
|
|
|.else
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
|.if resume
|
|
| cmp dword [RA+4], LJ_TTHREAD; jne <9
|
|
|.endif
|
|
| cmp aword L:RB->cframe, 0; jne <9
|
|
| cmp byte L:RB->status, LUA_YIELD; ja <9
|
|
| mov PC, L:RB->top
|
|
|.if X64
|
|
| mov TMP2, PC
|
|
|.else
|
|
| mov ARG2, PC
|
|
|.endif
|
|
| je >1 // Status != LUA_YIELD (i.e. 0)?
|
|
| cmp PC, L:RB->base; je <9 // Check for presence of initial func.
|
|
|1:
|
|
|.if resume
|
|
| lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread).
|
|
|.else
|
|
| lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1).
|
|
|.endif
|
|
| cmp PC, L:RB->maxstack; ja <9
|
|
| mov L:RB->top, PC
|
|
|
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, RA
|
|
|.if resume
|
|
| add RA, 8 // Keep resumed thread in stack for GC.
|
|
|.endif
|
|
| mov L:RB->top, RA
|
|
|.if X64
|
|
| mov RB, TMP2
|
|
|.else
|
|
| mov RB, ARG2
|
|
|.endif
|
|
|.if resume
|
|
| lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move.
|
|
|.else
|
|
| lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move.
|
|
|.endif
|
|
| sub RAa, PCa // Relative to PC.
|
|
|
|
|
| cmp PC, RB
|
|
| je >3
|
|
|2: // Move args to coroutine.
|
|
| mov RC, [PC+RA+4]
|
|
| mov [PC-4], RC
|
|
| mov RC, [PC+RA]
|
|
| mov [PC-8], RC
|
|
| sub PC, 8
|
|
| cmp PC, RB
|
|
| jne <2
|
|
|3:
|
|
|.if X64
|
|
| mov CARG1d, TMP1
|
|
| mov CARG2d, TMP2
|
|
|.else
|
|
| xor RA, RA
|
|
| mov ARG4, RA
|
|
| mov ARG3, RA
|
|
|.endif
|
|
| call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
|
|
| set_vmstate INTERP
|
|
|
|
|
| mov L:RB, SAVE_L
|
|
|.if X64
|
|
| mov L:PC, TMP1
|
|
|.else
|
|
| mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
|
|
|.endif
|
|
| mov BASE, L:RB->base
|
|
| cmp eax, LUA_YIELD
|
|
| ja >8
|
|
|4:
|
|
| mov RA, L:PC->base
|
|
| mov KBASE, L:PC->top
|
|
| mov L:PC->top, RA // Clear coroutine stack.
|
|
| mov PC, KBASE
|
|
| sub PC, RA
|
|
| je >6 // No results?
|
|
| lea RD, [BASE+PC]
|
|
| shr PC, 3
|
|
| cmp RD, L:RB->maxstack
|
|
| ja >9 // Need to grow stack?
|
|
|
|
|
| mov RB, BASE
|
|
| sub RBa, RAa
|
|
|5: // Move results from coroutine.
|
|
| mov RD, [RA]
|
|
| mov [RA+RB], RD
|
|
| mov RD, [RA+4]
|
|
| mov [RA+RB+4], RD
|
|
| add RA, 8
|
|
| cmp RA, KBASE
|
|
| jne <5
|
|
|6:
|
|
|.if resume
|
|
| lea RD, [PC+2] // nresults+1 = 1 + true + results.
|
|
| mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
|
|
|.else
|
|
| lea RD, [PC+1] // nresults+1 = 1 + results.
|
|
|.endif
|
|
|7:
|
|
| mov PC, SAVE_PC
|
|
| mov MULTRES, RD
|
|
|.if resume
|
|
| mov RAa, -8
|
|
|.else
|
|
| xor RA, RA
|
|
|.endif
|
|
| test PC, FRAME_TYPE
|
|
| jz ->BC_RET_Z
|
|
| jmp ->vm_return
|
|
|
|
|
|8: // Coroutine returned with error (at co->top-1).
|
|
|.if resume
|
|
| mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
|
|
| mov RA, L:PC->top
|
|
| sub RA, 8
|
|
| mov L:PC->top, RA // Clear error from coroutine stack.
|
|
| mov RD, [RA] // Copy error message.
|
|
| mov [BASE], RD
|
|
| mov RD, [RA+4]
|
|
| mov [BASE+4], RD
|
|
| mov RD, 1+2 // nresults+1 = 1 + false + error.
|
|
| jmp <7
|
|
|.else
|
|
| mov FCARG2, L:PC
|
|
| mov FCARG1, L:RB
|
|
| call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
|
|
| // Error function does not return.
|
|
|.endif
|
|
|
|
|
|9: // Handle stack expansion on return from yield.
|
|
|.if X64
|
|
| mov L:RA, TMP1
|
|
|.else
|
|
| mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
|
|
|.endif
|
|
| mov L:RA->top, KBASE // Undo coroutine stack clearing.
|
|
| mov FCARG2, PC
|
|
| mov FCARG1, L:RB
|
|
| call extern lj_state_growstack@8 // (lua_State *L, int n)
|
|
| mov BASE, L:RB->base
|
|
| jmp <4 // Retry the stack move.
|
|
|.endmacro
|
|
|
|
|
| coroutine_resume_wrap 1 // coroutine.resume
|
|
| coroutine_resume_wrap 0 // coroutine.wrap
|
|
|
|
|
|.ffunc coroutine_yield
|
|
| mov L:RB, SAVE_L
|
|
| mov [RA-4], PC
|
|
| test aword L:RB->cframe, CFRAME_RESUME
|
|
| jz ->fff_fallback
|
|
| mov L:RB->base, RA
|
|
| lea RC, [RA+NARGS:RC*8-8]
|
|
| mov L:RB->top, RC
|
|
| xor RD, RD
|
|
| mov aword L:RB->cframe, RDa
|
|
| mov al, LUA_YIELD
|
|
| mov byte L:RB->status, al
|
|
| jmp ->vm_leave_unw
|
|
|
|
|
|//-- Math library -------------------------------------------------------
|
|
|
|
|
if (sse) {
|
|
|->fff_resn:
|
|
| fstp qword [RA-8]
|
|
| jmp ->fff_res1
|
|
|
|
|
|.ffunc_nsse math_abs
|
|
| sseconst_abs xmm1, RDa
|
|
| andps xmm0, xmm1
|
|
|->fff_resxmm0:
|
|
| movsd qword [RA-8], xmm0
|
|
| // fallthrough
|
|
} else {
|
|
|.ffunc_n math_abs
|
|
| fabs
|
|
| // fallthrough
|
|
|->fff_resxmm0: // Dummy.
|
|
|->fff_resn:
|
|
| fstp qword [RA-8]
|
|
}
|
|
|->fff_res1:
|
|
| mov RD, 1+1
|
|
|->fff_res:
|
|
| mov MULTRES, RD
|
|
|->fff_res_:
|
|
| test PC, FRAME_TYPE
|
|
| jnz >7
|
|
|5:
|
|
| cmp PC_RB, RDL // More results expected?
|
|
| ja >6
|
|
| // BASE and KBASE are assumed to be set for the calling frame.
|
|
| ins_next
|
|
|
|
|
|6: // Fill up results with nil.
|
|
| mov dword [RA+RD*8-12], LJ_TNIL
|
|
| add RD, 1
|
|
| jmp <5
|
|
|
|
|
|7: // Non-standard return case.
|
|
| mov BASE, RA
|
|
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
|
|
| jmp ->vm_return
|
|
|
|
|
if (sse) {
|
|
|.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
|
|
|.ffunc_nsse math_floor; call ->vm_floor; jmp ->fff_resxmm0
|
|
|.ffunc_nsse math_ceil; call ->vm_ceil; jmp ->fff_resxmm0
|
|
} else {
|
|
|.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
|
|
|.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
|
|
|.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
|
|
}
|
|
|
|
|
|.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
|
|
|.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
|
|
|.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
|
|
|
|
|
|.ffunc_n math_sin; fsin; jmp ->fff_resn
|
|
|.ffunc_n math_cos; fcos; jmp ->fff_resn
|
|
|.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
|
|
|
|
|
|.ffunc_n math_asin
|
|
| fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
|
|
| jmp ->fff_resn
|
|
|.ffunc_n math_acos
|
|
| fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
|
|
| jmp ->fff_resn
|
|
|.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
|
|
|
|
|
|.macro math_extern, func
|
|
||if (sse) {
|
|
| .ffunc_nsse math_ .. func
|
|
| .if not X64
|
|
| movsd FPARG1, xmm0
|
|
| .endif
|
|
||} else {
|
|
| .if not X64
|
|
| .ffunc_n math_ .. func
|
|
| fstp FPARG1
|
|
| .endif
|
|
||}
|
|
| mov TMP1, RA
|
|
| mov RB, BASE
|
|
| call extern lj_wrapper_ .. func
|
|
| mov RA, TMP1
|
|
| mov BASE, RB
|
|
| .if X64
|
|
| jmp ->fff_resxmm0
|
|
| .else
|
|
| jmp ->fff_resn
|
|
| .endif
|
|
|.endmacro
|
|
|
|
|
| math_extern sinh
|
|
| math_extern cosh
|
|
| math_extern tanh
|
|
|
|
|
|->ff_math_deg:
|
|
if (sse) {
|
|
|.ffunc_nsse math_rad
|
|
| mulsd xmm0, qword CFUNC:RB->upvalue[0]
|
|
| jmp ->fff_resxmm0
|
|
} else {
|
|
|.ffunc_n math_rad
|
|
| fmul qword CFUNC:RB->upvalue[0]
|
|
| jmp ->fff_resn
|
|
}
|
|
|
|
|
|.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
|
|
|.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
|
|
|
|
|
|.ffunc_1 math_frexp
|
|
| mov RB, [RA+4]
|
|
| cmp RB, LJ_TISNUM; ja ->fff_fallback
|
|
| mov RC, [RA]
|
|
| mov [RA-4], RB; mov [RA-8], RC
|
|
| shl RB, 1; cmp RB, 0xffe00000; jae >3
|
|
| or RC, RB; jz >3
|
|
| mov RC, 1022
|
|
| cmp RB, 0x00200000; jb >4
|
|
|1:
|
|
| shr RB, 21; sub RB, RC // Extract and unbias exponent.
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RB
|
|
} else {
|
|
| mov TMP1, RB; fild TMP1
|
|
}
|
|
| mov RB, [RA-4]
|
|
| and RB, 0x800fffff // Mask off exponent.
|
|
| or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
|
|
| mov [RA-4], RB
|
|
|2:
|
|
if (sse) {
|
|
| movsd qword [RA], xmm0
|
|
} else {
|
|
| fstp qword [RA]
|
|
}
|
|
| mov RD, 1+2
|
|
| jmp ->fff_res
|
|
|3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
|
|
if (sse) {
|
|
| xorps xmm0, xmm0; jmp <2
|
|
} else {
|
|
| fldz; jmp <2
|
|
}
|
|
|4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
|
|
if (sse) {
|
|
| movsd xmm0, qword [RA]
|
|
| sseconst_hi xmm1, RBa, 43500000 // 2^54.
|
|
| mulsd xmm0, xmm1
|
|
| movsd qword [RA-8], xmm0
|
|
} else {
|
|
| fld qword [RA]
|
|
| mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
|
|
| fstp qword [RA-8]
|
|
}
|
|
| mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1
|
|
|
|
|
if (sse) {
|
|
|.ffunc_nsse math_modf
|
|
} else {
|
|
|.ffunc_n math_modf
|
|
}
|
|
| mov RB, [RA+4]
|
|
| shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
|
|
if (sse) {
|
|
| movaps xmm4, xmm0
|
|
| call ->vm_trunc
|
|
| subsd xmm4, xmm0
|
|
|1:
|
|
| movsd qword [RA-8], xmm0
|
|
| movsd qword [RA], xmm4
|
|
} else {
|
|
| fdup
|
|
| call ->vm_trunc
|
|
| fsub st1, st0
|
|
|1:
|
|
| fstp qword [RA-8]
|
|
| fstp qword [RA]
|
|
}
|
|
| mov RC, [RA-4]; mov RB, [RA+4]
|
|
| xor RC, RB; js >3 // Need to adjust sign?
|
|
|2:
|
|
| mov RD, 1+2
|
|
| jmp ->fff_res
|
|
|3:
|
|
| xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction.
|
|
|4:
|
|
if (sse) {
|
|
| xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
|
|
} else {
|
|
| fldz; fxch; jmp <1 // Return +-Inf and +-0.
|
|
}
|
|
|
|
|
|.ffunc_nnr math_fmod
|
|
|1: ; fprem; fnstsw ax; sahf; jp <1
|
|
| fpop1
|
|
| jmp ->fff_resn
|
|
|
|
|
if (sse) {
|
|
|.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
|
|
} else {
|
|
|.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
|
|
}
|
|
|
|
|
|.macro math_minmax, name, cmovop, nocmovop, sseop
|
|
||if (sse) {
|
|
|.ffunc_nsse name
|
|
| mov RB, 2
|
|
|1:
|
|
| cmp RB, RD
|
|
| jae ->fff_resxmm0
|
|
| cmp dword [RA+RB*8-4], LJ_TISNUM; ja ->fff_fallback
|
|
| movsd xmm1, qword [RA+RB*8-8]
|
|
| sseop xmm0, xmm1
|
|
| add RB, 1
|
|
| jmp <1
|
|
||} else {
|
|
|.if not X64
|
|
|.ffunc_n name
|
|
| mov RB, 2
|
|
|1:
|
|
| cmp RB, RD
|
|
| jae ->fff_resn
|
|
| cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5
|
|
| fld qword [RA+RB*8-8]
|
|
||if (cmov) {
|
|
| fucomi st1; cmovop st1; fpop1
|
|
||} else {
|
|
| push eax
|
|
| fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
|
|
| pop eax
|
|
||}
|
|
| add RB, 1
|
|
| jmp <1
|
|
|.endif
|
|
||}
|
|
|.endmacro
|
|
|
|
|
| math_minmax math_min, fcmovnbe, jz, minsd
|
|
| math_minmax math_max, fcmovbe, jnz, maxsd
|
|
if (!sse) {
|
|
|5:
|
|
| fpop; jmp ->fff_fallback
|
|
}
|
|
|
|
|
|//-- String library -----------------------------------------------------
|
|
|
|
|
|.ffunc_1 string_len
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
|
|
} else {
|
|
| fild dword STR:RB->len; jmp ->fff_resn
|
|
}
|
|
|
|
|
|.ffunc string_byte // Only handle the 1-arg case here.
|
|
| cmp NARGS:RC, 1+1; jne ->fff_fallback
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
| cmp dword STR:RB->len, 1
|
|
| jb ->fff_res0 // Return no results for empty string.
|
|
| movzx RB, byte STR:RB[1]
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
|
|
} else {
|
|
| mov TMP1, RB; fild TMP1; jmp ->fff_resn
|
|
}
|
|
|
|
|
|.ffunc string_char // Only handle the 1-arg case here.
|
|
| ffgccheck
|
|
| cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
|
|
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
|
|
if (sse) {
|
|
| cvtsd2si RC, qword [RA]
|
|
| cmp RC, 255; ja ->fff_fallback
|
|
| mov TMP2, RC
|
|
} else {
|
|
| fld qword [RA]
|
|
| fistp TMP2
|
|
| cmp TMP2, 255; ja ->fff_fallback
|
|
}
|
|
|.if X64
|
|
| mov TMP3, 1
|
|
|.else
|
|
| mov ARG3, 1
|
|
|.endif
|
|
| lea RDa, TMP2 // Points to stack. Little-endian.
|
|
| mov TMP1, RA // Save RA.
|
|
|->fff_newstr:
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
|.if X64
|
|
| mov CARG3d, TMP3 // Zero-extended to size_t.
|
|
| mov CARG2, RDa // May be 64 bit ptr to stack.
|
|
| mov CARG1d, L:RB
|
|
|.else
|
|
| mov ARG2, RD
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_str_new // (lua_State *L, char *str, size_t l)
|
|
| // GCstr * returned in eax (RC).
|
|
| mov RA, TMP1
|
|
| mov BASE, L:RB->base
|
|
| mov dword [RA-4], LJ_TSTR
|
|
| mov [RA-8], STR:RC
|
|
| jmp ->fff_res1
|
|
|
|
|
|.ffunc string_sub
|
|
| ffgccheck
|
|
| mov TMP1, RA // Save RA.
|
|
| mov TMP2, -1
|
|
| cmp NARGS:RC, 1+2; jb ->fff_fallback
|
|
| jna >1
|
|
| cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback
|
|
if (sse) {
|
|
| cvtsd2si RB, qword [RA+16]
|
|
| mov TMP2, RB
|
|
} else {
|
|
| fld qword [RA+16]
|
|
| fistp TMP2
|
|
}
|
|
|1:
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
| mov TMP3, STR:RB
|
|
| mov RB, STR:RB->len
|
|
if (sse) {
|
|
| cvtsd2si RA, qword [RA+8]
|
|
} else {
|
|
|.if not X64
|
|
| fld qword [RA+8]
|
|
| fistp ARG3
|
|
| mov RA, ARG3
|
|
|.endif
|
|
}
|
|
| mov RC, TMP2
|
|
| cmp RB, RC // len < end? (unsigned compare)
|
|
| jb >5
|
|
|2:
|
|
| test RA, RA // start <= 0?
|
|
| jle >7
|
|
|3:
|
|
| mov STR:RB, TMP3
|
|
| sub RC, RA // start > end?
|
|
| jl ->fff_emptystr
|
|
| lea RB, [STR:RB+RA+#STR-1]
|
|
| add RC, 1
|
|
|4:
|
|
|.if X64
|
|
| mov TMP3, RC
|
|
|.else
|
|
| mov ARG3, RC
|
|
|.endif
|
|
| mov RD, RB
|
|
| jmp ->fff_newstr
|
|
|
|
|
|5: // Negative end or overflow.
|
|
| jl >6
|
|
| lea RC, [RC+RB+1] // end = end+(len+1)
|
|
| jmp <2
|
|
|6: // Overflow.
|
|
| mov RC, RB // end = len
|
|
| jmp <2
|
|
|
|
|
|7: // Negative start or underflow.
|
|
| je >8
|
|
| add RA, RB // start = start+(len+1)
|
|
| add RA, 1
|
|
| jg <3 // start > 0?
|
|
|8: // Underflow.
|
|
| mov RA, 1 // start = 1
|
|
| jmp <3
|
|
|
|
|
|->fff_emptystr: // Range underflow.
|
|
| xor RC, RC // Zero length. Any ptr in RB is ok.
|
|
| jmp <4
|
|
|
|
|
|.ffunc_2 string_rep // Only handle the 1-char case inline.
|
|
| ffgccheck
|
|
| mov TMP1, RA // Save RA.
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
if (sse) {
|
|
| cvtsd2si RC, qword [RA+8]
|
|
} else {
|
|
| fld qword [RA+8]
|
|
| fistp TMP2
|
|
| mov RC, TMP2
|
|
}
|
|
| test RC, RC
|
|
| jle ->fff_emptystr // Count <= 0? (or non-int)
|
|
| cmp dword STR:RB->len, 1
|
|
| jb ->fff_emptystr // Zero length string?
|
|
| jne ->fff_fallback_2 // Fallback for > 1-char strings.
|
|
| cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
|
|
| movzx RA, byte STR:RB[1]
|
|
| mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
|
|
|.if X64
|
|
| mov TMP3, RC
|
|
|.else
|
|
| mov ARG3, RC
|
|
|.endif
|
|
|1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
|
|
| mov [RB], RAL
|
|
| add RB, 1
|
|
| sub RC, 1
|
|
| jnz <1
|
|
| mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
|
|
| jmp ->fff_newstr
|
|
|
|
|
|.ffunc_1 string_reverse
|
|
| ffgccheck
|
|
| mov TMP1, RA // Save RA.
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
| mov RC, STR:RB->len
|
|
| test RC, RC
|
|
| jz ->fff_emptystr // Zero length string?
|
|
| cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
|
|
| add RB, #STR
|
|
| mov TMP2, PC // Need another temp register.
|
|
|.if X64
|
|
| mov TMP3, RC
|
|
|.else
|
|
| mov ARG3, RC
|
|
|.endif
|
|
| mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
|
|
|1:
|
|
| movzx RA, byte [RB]
|
|
| add RB, 1
|
|
| sub RC, 1
|
|
| mov [PC+RC], RAL
|
|
| jnz <1
|
|
| mov RD, PC
|
|
| mov PC, TMP2
|
|
| jmp ->fff_newstr
|
|
|
|
|
|.macro ffstring_case, name, lo, hi
|
|
| .ffunc_1 name
|
|
| ffgccheck
|
|
| mov TMP1, RA // Save RA.
|
|
| cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
|
|
| mov STR:RB, [RA]
|
|
| mov RC, STR:RB->len
|
|
| cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
|
|
| add RB, #STR
|
|
| mov TMP2, PC // Need another temp register.
|
|
|.if X64
|
|
| mov TMP3, RC
|
|
|.else
|
|
| mov ARG3, RC
|
|
|.endif
|
|
| mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
|
|
| jmp >3
|
|
|1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
|
|
| movzx RA, byte [RB+RC]
|
|
| cmp RA, lo
|
|
| jb >2
|
|
| cmp RA, hi
|
|
| ja >2
|
|
| xor RA, 0x20
|
|
|2:
|
|
| mov [PC+RC], RAL
|
|
|3:
|
|
| sub RC, 1
|
|
| jns <1
|
|
| mov RD, PC
|
|
| mov PC, TMP2
|
|
| jmp ->fff_newstr
|
|
|.endmacro
|
|
|
|
|
|ffstring_case string_lower, 0x41, 0x5a
|
|
|ffstring_case string_upper, 0x61, 0x7a
|
|
|
|
|
|//-- Table library ------------------------------------------------------
|
|
|
|
|
|.ffunc_1 table_getn
|
|
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
|
|
| mov TMP1, BASE // Save RA and BASE.
|
|
| mov RB, RA
|
|
| mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
|
|
| call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
|
|
| // Length of table returned in eax (RC).
|
|
| mov RA, RB // Restore RA and BASE.
|
|
| mov BASE, TMP1
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RC; jmp ->fff_resxmm0
|
|
} else {
|
|
|.if not X64
|
|
| mov ARG1, RC; fild ARG1; jmp ->fff_resn
|
|
|.endif
|
|
}
|
|
|
|
|
|//-- Bit library --------------------------------------------------------
|
|
|
|
|
|.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
|
|
|
|
|
if (sse) {
|
|
|.ffunc_nsse bit_tobit
|
|
| sseconst_tobit xmm1, RBa
|
|
| addsd xmm0, xmm1
|
|
| movd RB, xmm0
|
|
| cvtsi2sd xmm0, RB
|
|
| jmp ->fff_resxmm0
|
|
} else {
|
|
|.if not X64
|
|
|.ffunc_n bit_tobit
|
|
| mov TMP1, TOBIT_BIAS
|
|
| fadd TMP1
|
|
| fstp FPARG1 // 64 bit FP store.
|
|
| fild ARG1 // 32 bit integer load (s2lfwd ok).
|
|
| jmp ->fff_resn
|
|
|.endif
|
|
}
|
|
|
|
|
|.macro .ffunc_bit, name
|
|
||if (sse) {
|
|
| .ffunc_nsse name
|
|
| sseconst_tobit xmm1, RBa
|
|
| addsd xmm0, xmm1
|
|
| movd RB, xmm0
|
|
||} else {
|
|
|.if not X64
|
|
| .ffunc_n name
|
|
| mov TMP1, TOBIT_BIAS
|
|
| fadd TMP1
|
|
| fstp FPARG1
|
|
| mov RB, ARG1
|
|
|.endif
|
|
||}
|
|
|.endmacro
|
|
|
|
|
|.macro .ffunc_bit_op, name, ins
|
|
| .ffunc_bit name
|
|
| mov TMP2, NARGS:RC // Save for fallback.
|
|
| lea RC, [RA+NARGS:RC*8-16]
|
|
||if (sse) {
|
|
| mov TMP1, BASE // Need BASE as a scratch register.
|
|
||}
|
|
|1:
|
|
| cmp RC, RA
|
|
| jbe ->fff_resbit_op
|
|
| cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op
|
|
||if (sse) {
|
|
| movsd xmm0, qword [RC]
|
|
| addsd xmm0, xmm1
|
|
| movd BASE, xmm0
|
|
| ins RB, BASE
|
|
||} else {
|
|
|.if not X64
|
|
| fld qword [RC]
|
|
| fadd TMP1
|
|
| fstp FPARG1
|
|
| ins RB, ARG1
|
|
|.endif
|
|
||}
|
|
| sub RC, 8
|
|
| jmp <1
|
|
|.endmacro
|
|
|
|
|
|.ffunc_bit_op bit_band, and
|
|
|.ffunc_bit_op bit_bor, or
|
|
|.ffunc_bit_op bit_bxor, xor
|
|
|
|
|
|.ffunc_bit bit_bswap
|
|
| bswap RB
|
|
| jmp ->fff_resbit
|
|
|
|
|
|.ffunc_bit bit_bnot
|
|
| not RB
|
|
if (sse) {
|
|
|->fff_resbit:
|
|
| cvtsi2sd xmm0, RB
|
|
| jmp ->fff_resxmm0
|
|
|->fff_resbit_op:
|
|
| cvtsi2sd xmm0, RB
|
|
| mov BASE, TMP1
|
|
| jmp ->fff_resxmm0
|
|
} else {
|
|
|.if not X64
|
|
|->fff_resbit:
|
|
|->fff_resbit_op:
|
|
| mov ARG1, RB
|
|
| fild ARG1
|
|
| jmp ->fff_resn
|
|
|.endif
|
|
}
|
|
|
|
|
|->fff_fallback_bit_op:
|
|
if (sse) {
|
|
| mov BASE, TMP1
|
|
}
|
|
| mov NARGS:RC, TMP2 // Restore for fallback
|
|
| jmp ->fff_fallback
|
|
|
|
|
|.macro .ffunc_bit_sh, name, ins
|
|
||if (sse) {
|
|
| .ffunc_nnsse name
|
|
| sseconst_tobit xmm2, RBa
|
|
| addsd xmm0, xmm2
|
|
| addsd xmm1, xmm2
|
|
| mov RC, RA // Assumes RA is ecx.
|
|
| movd RB, xmm0
|
|
| movd RA, xmm1
|
|
||} else {
|
|
|.if not X64
|
|
| .ffunc_nn name
|
|
| mov TMP1, TOBIT_BIAS
|
|
| fadd TMP1
|
|
| fstp FPARG3
|
|
| fadd TMP1
|
|
| fstp FPARG1
|
|
| mov RC, RA // Assumes RA is ecx.
|
|
| mov RA, ARG3
|
|
| mov RB, ARG1
|
|
|.endif
|
|
||}
|
|
| ins RB, cl
|
|
| mov RA, RC
|
|
| jmp ->fff_resbit
|
|
|.endmacro
|
|
|
|
|
|.ffunc_bit_sh bit_lshift, shl
|
|
|.ffunc_bit_sh bit_rshift, shr
|
|
|.ffunc_bit_sh bit_arshift, sar
|
|
|.ffunc_bit_sh bit_rol, rol
|
|
|.ffunc_bit_sh bit_ror, ror
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|->fff_fallback_2:
|
|
| mov NARGS:RC, 1+2 // Other args are ignored, anyway.
|
|
| jmp ->fff_fallback
|
|
|->fff_fallback_1:
|
|
| mov NARGS:RC, 1+1 // Other args are ignored, anyway.
|
|
|->fff_fallback: // Call fast function fallback handler.
|
|
| // RA = new base, RC = nargs+1
|
|
| mov L:RB, SAVE_L
|
|
| sub BASE, RA
|
|
| mov [RA-4], PC
|
|
| mov SAVE_PC, PC // Redundant (but a defined value).
|
|
| mov TMP1, BASE // Save old BASE (relative).
|
|
| mov L:RB->base, RA
|
|
| lea RC, [RA+NARGS:RC*8-8]
|
|
| lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
|
|
| mov L:RB->top, RC
|
|
| mov CFUNC:RC, [RA-8]
|
|
| cmp BASE, L:RB->maxstack
|
|
| ja >5 // Need to grow stack.
|
|
|.if X64
|
|
| mov CARG1d, L:RB
|
|
|.else
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
| call aword CFUNC:RC->f // (lua_State *L)
|
|
| // Either throws an error or recovers and returns 0 or MULTRES (+1).
|
|
| test RC, RC; jnz >3
|
|
|1: // Returned 0: retry fast path.
|
|
| mov RA, L:RB->base
|
|
| mov RC, L:RB->top
|
|
| sub RC, RA
|
|
| shr RC, 3
|
|
| add NARGS:RC, 1
|
|
| mov LFUNC:RB, [RA-8]
|
|
| mov BASE, TMP1 // Restore old BASE.
|
|
| add BASE, RA
|
|
| cmp [RA-4], PC; jne >2 // Callable modified by handler?
|
|
| jmp aword LFUNC:RB->gate // Retry the call.
|
|
|
|
|
|2: // Run modified callable.
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call
|
|
| jmp aword LFUNC:RB->gate // Retry the call.
|
|
|
|
|
|3: // Returned MULTRES (already in RC/RD).
|
|
| mov RA, L:RB->base
|
|
| mov BASE, TMP1 // Restore old BASE.
|
|
| add BASE, RA
|
|
| jmp ->fff_res
|
|
|
|
|
|5: // Grow stack for fallback handler.
|
|
| mov FCARG2, LUA_MINSTACK
|
|
| mov FCARG1, L:RB
|
|
| call extern lj_state_growstack@8 // (lua_State *L, int n)
|
|
| jmp <1 // Dumb retry (goes through ff first).
|
|
|
|
|
|->fff_gcstep: // Call GC step function.
|
|
| // RA = new base, RC = nargs+1
|
|
| pop RBa // Must keep stack at same level.
|
|
| mov TMPa, RBa // Save return address
|
|
| mov L:RB, SAVE_L
|
|
| sub BASE, RA
|
|
| mov TMP2, BASE // Save old BASE (relative).
|
|
| mov [RA-4], PC
|
|
| mov SAVE_PC, PC // Redundant (but a defined value).
|
|
| mov L:RB->base, RA
|
|
| lea RC, [RA+NARGS:RC*8-8]
|
|
| mov FCARG1, L:RB
|
|
| mov L:RB->top, RC
|
|
| call extern lj_gc_step@4 // (lua_State *L)
|
|
| mov RA, L:RB->base
|
|
| mov RC, L:RB->top
|
|
| sub RC, RA
|
|
| shr RC, 3
|
|
| add NARGS:RC, 1
|
|
| mov PC, [RA-4]
|
|
| mov BASE, TMP2 // Restore old BASE.
|
|
| add BASE, RA
|
|
| mov RBa, TMPa
|
|
| push RBa // Restore return address.
|
|
| mov LFUNC:RB, [RA-8]
|
|
| ret
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Special dispatch targets -------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|->vm_record: // Dispatch target for recording phase.
|
|
#if LJ_HASJIT
|
|
| movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
|
|
| test RDL, HOOK_VMEVENT // No recording while in vmevent.
|
|
| jnz >5
|
|
| // Decrement the hookcount for consistency, but always do the call.
|
|
| test RDL, HOOK_ACTIVE
|
|
| jnz >1
|
|
| test RDL, LUA_MASKLINE|LUA_MASKCOUNT
|
|
| jz >1
|
|
| dec dword [DISPATCH+DISPATCH_GL(hookcount)]
|
|
| jmp >1
|
|
#endif
|
|
|
|
|
|->vm_hook: // Dispatch target with enabled hooks.
|
|
| movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
|
|
| test RDL, HOOK_ACTIVE // Hook already active?
|
|
| jnz >5
|
|
|
|
|
| test RDL, LUA_MASKLINE|LUA_MASKCOUNT
|
|
| jz >5
|
|
| dec dword [DISPATCH+DISPATCH_GL(hookcount)]
|
|
| jz >1
|
|
| test RDL, LUA_MASKLINE
|
|
| jz >5
|
|
|1:
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| mov FCARG2, PC // Caveat: FCARG2 == BASE
|
|
| mov FCARG1, L:RB
|
|
| // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
|
|
| call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc)
|
|
|3:
|
|
| mov BASE, L:RB->base
|
|
|4:
|
|
| movzx RA, PC_RA
|
|
|5:
|
|
| movzx OP, PC_OP
|
|
| movzx RD, PC_RD
|
|
|.if X64
|
|
| jmp aword [DISPATCH+OP*8+GG_DISP_STATIC*8] // Re-dispatch to static ins.
|
|
|.else
|
|
| jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins.
|
|
|.endif
|
|
|
|
|
|->cont_hook: // Continue from hook yield.
|
|
| add PC, 4
|
|
| mov RA, [RB-24]
|
|
| mov MULTRES, RA // Restore MULTRES for *M ins.
|
|
| jmp <4
|
|
|
|
|
|->vm_hotloop: // Hot loop counter underflow.
|
|
#if LJ_HASJIT
|
|
|.if X64
|
|
| int3 // NYI
|
|
|.else
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| mov FCARG2, PC
|
|
| lea FCARG1, [DISPATCH+GG_DISP2J]
|
|
| mov [DISPATCH+DISPATCH_J(L)], L:RB
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
|
|
| jmp <3
|
|
|.endif
|
|
#endif
|
|
|
|
|
|->vm_hotcall: // Hot call counter underflow.
|
|
#if LJ_HASJIT
|
|
|.if X64
|
|
| int3 // NYI
|
|
|.else
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| mov FCARG2, PC
|
|
| lea FCARG1, [DISPATCH+GG_DISP2J]
|
|
| mov [DISPATCH+DISPATCH_J(L)], L:RB
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
|
|
| mov BASE, L:RB->base
|
|
| // Dispatch the first instruction and optionally record it.
|
|
| ins_next
|
|
|.endif
|
|
#endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Trace exit handler -------------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// Called from an exit stub with the exit number on the stack.
|
|
|// The 16 bit exit number is stored with two (sign-extended) push imm8.
|
|
|->vm_exit_handler:
|
|
#if LJ_HASJIT
|
|
|.if X64
|
|
| int3 // NYI
|
|
|.else
|
|
| push ebp; lea ebp, [esp+12]; push ebp
|
|
| push ebx; push edx; push ecx; push eax
|
|
| movzx RC, byte [ebp-4] // Reconstruct exit number.
|
|
| mov RCH, byte [ebp-8]
|
|
| mov [ebp-4], edi; mov [ebp-8], esi
|
|
| // Caveat: DISPATCH is ebx.
|
|
| mov DISPATCH, [ebp]
|
|
| mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
|
|
| set_vmstate EXIT
|
|
| mov [DISPATCH+DISPATCH_J(exitno)], RC
|
|
| mov [DISPATCH+DISPATCH_J(parent)], RA
|
|
| sub esp, 8*8+16 // Room for SSE regs + args.
|
|
|
|
|
| // Must not access SSE regs if SSE2 is not present.
|
|
| test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2
|
|
| jz >1
|
|
| movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
|
|
| movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
|
|
| movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
|
|
| movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
|
|
|1:
|
|
| // Caveat: RB is ebp.
|
|
| mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
|
|
| mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
|
|
| mov [DISPATCH+DISPATCH_J(L)], L:RB
|
|
| mov L:RB->base, BASE
|
|
| lea FCARG2, [esp+16]
|
|
| lea FCARG1, [DISPATCH+GG_DISP2J]
|
|
| call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
|
|
| // Interpreter C frame returned in eax.
|
|
| mov esp, eax // Reposition stack to C frame.
|
|
| mov BASE, L:RB->base
|
|
| mov PC, SAVE_PC
|
|
| mov SAVE_L, L:RB // Needed for on-trace resume/yield.
|
|
|.endif
|
|
#endif
|
|
|->vm_exit_interp:
|
|
#if LJ_HASJIT
|
|
| mov LFUNC:KBASE, [BASE-8]
|
|
| mov PROTO:KBASE, LFUNC:KBASE->pt
|
|
| mov KBASE, PROTO:KBASE->k
|
|
| mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
|
|
| set_vmstate INTERP
|
|
| ins_next
|
|
#endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Math helper functions ----------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// FP value rounding. Called by math.floor/math.ceil fast functions
|
|
|// and from JIT code.
|
|
|
|
|
|// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
|
|
|.macro vm_round_x87, mode1, mode2
|
|
| fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
|
|
| mov [esp+8], eax
|
|
| mov ax, mode1
|
|
| or ax, [esp+4]
|
|
|.if mode2 ~= 0xffff
|
|
| and ax, mode2
|
|
|.endif
|
|
| mov [esp+6], ax
|
|
| fldcw word [esp+6]
|
|
| frndint
|
|
| fldcw word [esp+4]
|
|
| mov eax, [esp+8]
|
|
| ret
|
|
|.endmacro
|
|
|
|
|
|// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
|
|
|.macro vm_round_sse, mode
|
|
| sseconst_abs xmm2, RDa
|
|
| sseconst_2p52 xmm3, RDa
|
|
| movaps xmm1, xmm0
|
|
| andpd xmm1, xmm2 // |x|
|
|
| ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
|
|
| jbe >1
|
|
| andnpd xmm2, xmm0 // Isolate sign bit.
|
|
|.if mode == 2 // trunc(x)?
|
|
| movaps xmm0, xmm1
|
|
| addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
|
|
| subsd xmm1, xmm3
|
|
| sseconst_1 xmm3, RDa
|
|
| cmpsd xmm0, xmm1, 1 // |x| < result?
|
|
| andpd xmm0, xmm3
|
|
| subsd xmm1, xmm0 // If yes, subtract -1.
|
|
| orpd xmm1, xmm2 // Merge sign bit back in.
|
|
|.else
|
|
| addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
|
|
| subsd xmm1, xmm3
|
|
| orpd xmm1, xmm2 // Merge sign bit back in.
|
|
| .if mode == 1 // ceil(x)?
|
|
| sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
|
|
| cmpsd xmm0, xmm1, 6 // x > result?
|
|
| .else // floor(x)?
|
|
| sseconst_1 xmm2, RDa
|
|
| cmpsd xmm0, xmm1, 1 // x < result?
|
|
| .endif
|
|
| andpd xmm0, xmm2
|
|
| subsd xmm1, xmm0 // If yes, subtract +-1.
|
|
|.endif
|
|
| movaps xmm0, xmm1
|
|
|1:
|
|
| ret
|
|
|.endmacro
|
|
|
|
|
|.macro vm_round, name, ssemode, mode1, mode2
|
|
|->name:
|
|
||if (!sse) {
|
|
| vm_round_x87 mode1, mode2
|
|
||}
|
|
|->name .. _sse:
|
|
| vm_round_sse ssemode
|
|
|.endmacro
|
|
|
|
|
| vm_round vm_floor, 0, 0x0400, 0xf7ff
|
|
| vm_round vm_ceil, 1, 0x0800, 0xfbff
|
|
| vm_round vm_trunc, 2, 0x0c00, 0xffff
|
|
|
|
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|
|
|->vm_mod:
|
|
if (sse) {
|
|
|// Args in xmm0/xmm1, return value in xmm0.
|
|
|// Caveat: xmm0-xmm5 and RC (eax) modified!
|
|
| movaps xmm5, xmm0
|
|
| divsd xmm0, xmm1
|
|
| sseconst_abs xmm2, RDa
|
|
| sseconst_2p52 xmm3, RDa
|
|
| movaps xmm4, xmm0
|
|
| andpd xmm4, xmm2 // |x/y|
|
|
| ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
|
|
| jbe >1
|
|
| andnpd xmm2, xmm0 // Isolate sign bit.
|
|
| addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
|
|
| subsd xmm4, xmm3
|
|
| orpd xmm4, xmm2 // Merge sign bit back in.
|
|
| sseconst_1 xmm2, RDa
|
|
| cmpsd xmm0, xmm4, 1 // x/y < result?
|
|
| andpd xmm0, xmm2
|
|
| subsd xmm4, xmm0 // If yes, subtract 1.0.
|
|
| movaps xmm0, xmm5
|
|
| mulsd xmm1, xmm4
|
|
| subsd xmm0, xmm1
|
|
| ret
|
|
|1:
|
|
| mulsd xmm1, xmm0
|
|
| movaps xmm0, xmm5
|
|
| subsd xmm0, xmm1
|
|
| ret
|
|
} else {
|
|
|// Args/ret on x87 stack (y on top). No xmm registers modified.
|
|
|// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
|
|
| fld st1
|
|
| fdiv st1
|
|
| fnstcw word [esp+4]
|
|
| mov ax, 0x0400
|
|
| or ax, [esp+4]
|
|
| and ax, 0xf7ff
|
|
| mov [esp+6], ax
|
|
| fldcw word [esp+6]
|
|
| frndint
|
|
| fldcw word [esp+4]
|
|
| fmulp st1
|
|
| fsubp st1
|
|
| ret
|
|
}
|
|
|
|
|
|// FP exponentiation e^x and 2^x. Called by math.exp fast function and
|
|
|// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
|
|
|// Caveat: needs 3 slots on x87 stack!
|
|
|->vm_exp:
|
|
| fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
|
|
|->vm_exp2:
|
|
| .if X64WIN
|
|
| .define expscratch, dword [rsp+8] // Use scratch area.
|
|
| .elif X64
|
|
| .define expscratch, dword [rsp-8] // Use red zone.
|
|
| .else
|
|
| .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
|
|
| .endif
|
|
| fst expscratch // Caveat: overwrites ARG1.
|
|
| cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
|
|
| cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
|
|
|->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
|
|
| fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
|
|
| f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
|
|
|1:
|
|
| ret
|
|
|2:
|
|
| fpop; fldz; ret
|
|
|
|
|
|// Generic power function x^y. Called by BC_POW, math.pow fast function,
|
|
|// and vm_arith.
|
|
if (!sse) {
|
|
|.if not X64
|
|
|// Args/ret on x87 stack (y on top). RC (eax) modified.
|
|
|// Caveat: needs 3 slots on x87 stack!
|
|
|->vm_pow:
|
|
| fist dword [esp+4] // Store/reload int before comparison.
|
|
| fild dword [esp+4] // Integral exponent used in vm_powi.
|
|
||if (cmov) {
|
|
| fucomip st1
|
|
||} else {
|
|
| fucomp st1; fnstsw ax; sahf
|
|
||}
|
|
| jnz >8 // Branch for FP exponents.
|
|
| jp >9 // Branch for NaN exponent.
|
|
| fpop // Pop y and fallthrough to vm_powi.
|
|
|
|
|
|// FP/int power function x^i. Arg1/ret on x87 stack.
|
|
|// Arg2 (int) on C stack. RC (eax) modified.
|
|
|// Caveat: needs 2 slots on x87 stack!
|
|
| mov eax, [esp+4]
|
|
| cmp eax, 1; jle >6 // i<=1?
|
|
| // Now 1 < (unsigned)i <= 0x80000000.
|
|
|1: // Handle leading zeros.
|
|
| test eax, 1; jnz >2
|
|
| fmul st0
|
|
| shr eax, 1
|
|
| jmp <1
|
|
|2:
|
|
| shr eax, 1; jz >5
|
|
| fdup
|
|
|3: // Handle trailing bits.
|
|
| fmul st0
|
|
| shr eax, 1; jz >4
|
|
| jnc <3
|
|
| fmul st1, st0
|
|
| jmp <3
|
|
|4:
|
|
| fmulp st1
|
|
|5:
|
|
| ret
|
|
|6:
|
|
| je <5 // x^1 ==> x
|
|
| jb >7
|
|
| fld1; fdivrp st1
|
|
| neg eax
|
|
| cmp eax, 1; je <5 // x^-1 ==> 1/x
|
|
| jmp <1 // x^-i ==> (1/x)^i
|
|
|7:
|
|
| fpop; fld1 // x^0 ==> 1
|
|
| ret
|
|
|
|
|
|8: // FP/FP power function x^y.
|
|
| fst dword [esp+4]
|
|
| fxch
|
|
| fst dword [esp+8]
|
|
| mov eax, [esp+4]; shl eax, 1
|
|
| cmp eax, 0xff000000; je >2 // x^+-Inf?
|
|
| mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
|
|
| cmp eax, 0xff000000; je >4 // +-Inf^y?
|
|
| fyl2x
|
|
| jmp ->vm_exp2raw
|
|
|
|
|
|9: // Handle x^NaN.
|
|
| fld1
|
|
||if (cmov) {
|
|
| fucomip st2
|
|
||} else {
|
|
| fucomp st2; fnstsw ax; sahf
|
|
||}
|
|
| je >1 // 1^NaN ==> 1
|
|
| fxch // x^NaN ==> NaN
|
|
|1:
|
|
| fpop
|
|
| ret
|
|
|
|
|
|2: // Handle x^+-Inf.
|
|
| fabs
|
|
| fld1
|
|
||if (cmov) {
|
|
| fucomip st1
|
|
||} else {
|
|
| fucomp st1; fnstsw ax; sahf
|
|
||}
|
|
| je >3 // +-1^+-Inf ==> 1
|
|
| fpop; fabs; fldz; mov eax, 0; setc al
|
|
| ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
|
|
| fxch
|
|
|3:
|
|
| fpop1; fabs
|
|
| ret
|
|
|
|
|
|4: // Handle +-0^y or +-Inf^y.
|
|
| cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
|
|
| fpop; fpop
|
|
| test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
|
|
| fldz // y < 0, +-Inf^y ==> 0
|
|
| ret
|
|
|5:
|
|
| mov dword [esp+4], 0x7f800000 // Return +Inf.
|
|
| fld dword [esp+4]
|
|
| ret
|
|
|.endif
|
|
} else {
|
|
|->vm_pow:
|
|
}
|
|
|
|
|
|// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
|
|
|// Needs 16 byte scratch area for x86. Also called from JIT code.
|
|
|->vm_pow_sse:
|
|
| cvtsd2si eax, xmm1
|
|
| cvtsi2sd xmm2, eax
|
|
| ucomisd xmm1, xmm2
|
|
| jnz >8 // Branch for FP exponents.
|
|
| jp >9 // Branch for NaN exponent.
|
|
| // Fallthrough to vm_powi_sse.
|
|
|
|
|
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|
|
|->vm_powi_sse:
|
|
| cmp eax, 1; jle >6 // i<=1?
|
|
| // Now 1 < (unsigned)i <= 0x80000000.
|
|
|1: // Handle leading zeros.
|
|
| test eax, 1; jnz >2
|
|
| mulsd xmm0, xmm0
|
|
| shr eax, 1
|
|
| jmp <1
|
|
|2:
|
|
| shr eax, 1; jz >5
|
|
| movaps xmm1, xmm0
|
|
|3: // Handle trailing bits.
|
|
| mulsd xmm0, xmm0
|
|
| shr eax, 1; jz >4
|
|
| jnc <3
|
|
| mulsd xmm1, xmm0
|
|
| jmp <3
|
|
|4:
|
|
| mulsd xmm0, xmm1
|
|
|5:
|
|
| ret
|
|
|6:
|
|
| je <5 // x^1 ==> x
|
|
| jb >7
|
|
| push RDa
|
|
| sseconst_1 xmm1, RDa
|
|
| divsd xmm1, xmm0
|
|
| pop RDa
|
|
| movaps xmm0, xmm1
|
|
| neg eax
|
|
| cmp eax, 1; je <5 // x^-1 ==> 1/x
|
|
| jmp <1 // x^-i ==> (1/x)^i
|
|
|7:
|
|
| sseconst_1 xmm0, RDa
|
|
| ret
|
|
|
|
|
|8: // FP/FP power function x^y.
|
|
|.if X64
|
|
| movd rax, xmm1; shl rax, 1
|
|
| rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
|
|
| movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
|
|
| rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
|
|
| .if X64WIN
|
|
| movsd qword [rsp+16], xmm1 // Use scratch area.
|
|
| movsd qword [rsp+8], xmm0
|
|
| fld qword [rsp+16]
|
|
| fld qword [rsp+8]
|
|
| .else
|
|
| movsd qword [rsp-16], xmm1 // Use red zone.
|
|
| movsd qword [rsp-8], xmm0
|
|
| fld qword [rsp-16]
|
|
| fld qword [rsp-8]
|
|
| .endif
|
|
|.else
|
|
| movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
|
|
| movsd qword [esp+4], xmm0
|
|
| cmp dword [esp+12], 0; jne >1
|
|
| mov eax, [esp+16]; shl eax, 1
|
|
| cmp eax, 0xffe00000; je >2 // x^+-Inf?
|
|
|1:
|
|
| cmp dword [esp+4], 0; jne >1
|
|
| mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
|
|
| cmp eax, 0xffe00000; je >5 // +-Inf^y?
|
|
|1:
|
|
| fld qword [esp+12]
|
|
| fld qword [esp+4]
|
|
|.endif
|
|
| fyl2x // y*log2(x)
|
|
| fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
|
|
| f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
|
|
|.if X64WIN
|
|
| fstp qword [rsp+8] // Use scratch area.
|
|
| movsd xmm0, qword [rsp+8]
|
|
|.elif X64
|
|
| fstp qword [rsp-8] // Use red zone.
|
|
| movsd xmm0, qword [rsp-8]
|
|
|.else
|
|
| fstp qword [esp+4] // Needs 8 byte scratch area.
|
|
| movsd xmm0, qword [esp+4]
|
|
|.endif
|
|
| ret
|
|
|
|
|
|9: // Handle x^NaN.
|
|
| sseconst_1 xmm2, RDa
|
|
| ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
|
|
| movaps xmm0, xmm1 // x^NaN ==> NaN
|
|
|1:
|
|
| ret
|
|
|
|
|
|2: // Handle x^+-Inf.
|
|
| sseconst_abs xmm2, RDa
|
|
| andpd xmm0, xmm2 // |x|
|
|
| sseconst_1 xmm2, RDa
|
|
| ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
|
|
| movmskpd eax, xmm1
|
|
| xorps xmm0, xmm0
|
|
| mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
|
|
|3:
|
|
| sseconst_hi xmm0, RDa, 7ff00000 // +Inf
|
|
| ret
|
|
|
|
|
|4: // Handle +-0^y.
|
|
| movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
|
|
| xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
|
|
| ret
|
|
|
|
|
|5: // Handle +-Inf^y.
|
|
| movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
|
|
| xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
|
|
| ret
|
|
|
|
|
|// Callable from C: double lj_vm_foldfpm(double x, int fpm)
|
|
|// Computes fpm(x) for extended math functions. ORDER FPM.
|
|
|->vm_foldfpm:
|
|
if (sse) {
|
|
|.if X64
|
|
|
|
|
| .if X64WIN
|
|
| .define fpmop, CARG2d
|
|
| .else
|
|
| .define fpmop, CARG1d
|
|
| .endif
|
|
| cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
|
|
| cmp fpmop, 3; jb ->vm_trunc; ja >2
|
|
| sqrtsd xmm0, xmm0; ret
|
|
|2:
|
|
| .if X64WIN
|
|
| movsd qword [rsp+8], xmm0 // Use scratch area.
|
|
| fld qword [rsp+8]
|
|
| .else
|
|
| movsd qword [rsp-8], xmm0 // Use red zone.
|
|
| fld qword [rsp-8]
|
|
| .endif
|
|
| cmp fpmop, 5; ja >2
|
|
| .if X64WIN; pop rax; .endif
|
|
| je >1
|
|
| call ->vm_exp
|
|
| .if X64WIN; push rax; .endif
|
|
| jmp >7
|
|
|1:
|
|
| call ->vm_exp2
|
|
| .if X64WIN; push rax; .endif
|
|
| jmp >7
|
|
|2: ; cmp fpmop, 7; je >1; ja >2
|
|
| fldln2; fxch; fyl2x; jmp >7
|
|
|1: ; fld1; fxch; fyl2x; jmp >7
|
|
|2: ; cmp fpmop, 9; je >1; ja >2
|
|
| fldlg2; fxch; fyl2x; jmp >7
|
|
|1: ; fsin; jmp >7
|
|
|2: ; cmp fpmop, 11; je >1; ja >9
|
|
| fcos; jmp >7
|
|
|1: ; fptan; fpop
|
|
|7:
|
|
| .if X64WIN
|
|
| fstp qword [rsp+8] // Use scratch area.
|
|
| movsd xmm0, qword [rsp+8]
|
|
| .else
|
|
| fstp qword [rsp-8] // Use red zone.
|
|
| movsd xmm0, qword [rsp-8]
|
|
| .endif
|
|
| ret
|
|
|
|
|
|.else // x86 calling convention.
|
|
|
|
|
| .define fpmop, eax
|
|
| mov fpmop, [esp+12]
|
|
| movsd xmm0, qword [esp+4]
|
|
| cmp fpmop, 1; je >1; ja >2
|
|
| call ->vm_floor; jmp >7
|
|
|1: ; call ->vm_ceil; jmp >7
|
|
|2: ; cmp fpmop, 3; je >1; ja >2
|
|
| call ->vm_trunc; jmp >7
|
|
|1:
|
|
| sqrtsd xmm0, xmm0
|
|
|7:
|
|
| movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
|
|
| fld qword [esp+4]
|
|
| ret
|
|
|2: ; fld qword [esp+4]
|
|
| cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
|
|
|2: ; cmp fpmop, 7; je >1; ja >2
|
|
| fldln2; fxch; fyl2x; ret
|
|
|1: ; fld1; fxch; fyl2x; ret
|
|
|2: ; cmp fpmop, 9; je >1; ja >2
|
|
| fldlg2; fxch; fyl2x; ret
|
|
|1: ; fsin; ret
|
|
|2: ; cmp fpmop, 11; je >1; ja >9
|
|
| fcos; ret
|
|
|1: ; fptan; fpop; ret
|
|
|
|
|
|.endif
|
|
} else {
|
|
| mov fpmop, [esp+12]
|
|
| fld qword [esp+4]
|
|
| cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
|
|
| cmp fpmop, 3; jb ->vm_trunc; ja >2
|
|
| fsqrt; ret
|
|
|2: ; cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
|
|
| cmp fpmop, 7; je >1; ja >2
|
|
| fldln2; fxch; fyl2x; ret
|
|
|1: ; fld1; fxch; fyl2x; ret
|
|
|2: ; cmp fpmop, 9; je >1; ja >2
|
|
| fldlg2; fxch; fyl2x; ret
|
|
|1: ; fsin; ret
|
|
|2: ; cmp fpmop, 11; je >1; ja >9
|
|
| fcos; ret
|
|
|1: ; fptan; fpop; ret
|
|
}
|
|
|9: ; int3 // Bad fpm.
|
|
|
|
|
|// Callable from C: double lj_vm_foldarith(double x, double y, int op)
|
|
|// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
|
|
|// and basic math functions. ORDER ARITH
|
|
|->vm_foldarith:
|
|
if (sse) {
|
|
|.if X64
|
|
|
|
|
| .if X64WIN
|
|
| .define foldop, CARG3d
|
|
| .else
|
|
| .define foldop, CARG1d
|
|
| .endif
|
|
| cmp foldop, 1; je >1; ja >2
|
|
| addsd xmm0, xmm1; ret
|
|
|1: ; subsd xmm0, xmm1; ret
|
|
|2: ; cmp foldop, 3; je >1; ja >2
|
|
| mulsd xmm0, xmm1; ret
|
|
|1: ; divsd xmm0, xmm1; ret
|
|
|2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
|
|
| cmp foldop, 7; je >1; ja >2
|
|
| sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
|
|
|1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
|
|
|2: ; cmp foldop, 9; ja >2
|
|
|.if X64WIN
|
|
| movsd qword [rsp+8], xmm0 // Use scratch area.
|
|
| movsd qword [rsp+16], xmm1
|
|
| fld qword [rsp+8]
|
|
| fld qword [rsp+16]
|
|
|.else
|
|
| movsd qword [rsp-8], xmm0 // Use red zone.
|
|
| movsd qword [rsp-16], xmm1
|
|
| fld qword [rsp-8]
|
|
| fld qword [rsp-16]
|
|
|.endif
|
|
| je >1
|
|
| fpatan
|
|
|7:
|
|
|.if X64WIN
|
|
| fstp qword [rsp+8] // Use scratch area.
|
|
| movsd xmm0, qword [rsp+8]
|
|
|.else
|
|
| fstp qword [rsp-8] // Use red zone.
|
|
| movsd xmm0, qword [rsp-8]
|
|
|.endif
|
|
| ret
|
|
|1: ; fxch; fscale; fpop1; jmp <7
|
|
|2: ; cmp foldop, 11; je >1; ja >9
|
|
| minsd xmm0, xmm1; ret
|
|
|1: ; maxsd xmm0, xmm1; ret
|
|
|9: ; int3 // Bad op.
|
|
|
|
|
|.else // x86 calling convention.
|
|
|
|
|
| .define foldop, eax
|
|
| mov foldop, [esp+20]
|
|
| movsd xmm0, qword [esp+4]
|
|
| movsd xmm1, qword [esp+12]
|
|
| cmp foldop, 1; je >1; ja >2
|
|
| addsd xmm0, xmm1
|
|
|7:
|
|
| movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
|
|
| fld qword [esp+4]
|
|
| ret
|
|
|1: ; subsd xmm0, xmm1; jmp <7
|
|
|2: ; cmp foldop, 3; je >1; ja >2
|
|
| mulsd xmm0, xmm1; jmp <7
|
|
|1: ; divsd xmm0, xmm1; jmp <7
|
|
|2: ; cmp foldop, 5
|
|
| je >1; ja >2
|
|
| call ->vm_mod; jmp <7
|
|
|1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
|
|
|2: ; cmp foldop, 7; je >1; ja >2
|
|
| sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
|
|
|1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
|
|
|2: ; cmp foldop, 9; ja >2
|
|
| fld qword [esp+4] // Reload from stack
|
|
| fld qword [esp+12]
|
|
| je >1
|
|
| fpatan; ret
|
|
|1: ; fxch; fscale; fpop1; ret
|
|
|2: ; cmp foldop, 11; je >1; ja >9
|
|
| minsd xmm0, xmm1; jmp <7
|
|
|1: ; maxsd xmm0, xmm1; jmp <7
|
|
|9: ; int3 // Bad op.
|
|
|
|
|
|.endif
|
|
} else {
|
|
| mov eax, [esp+20]
|
|
| fld qword [esp+4]
|
|
| fld qword [esp+12]
|
|
| cmp eax, 1; je >1; ja >2
|
|
| faddp st1; ret
|
|
|1: ; fsubp st1; ret
|
|
|2: ; cmp eax, 3; je >1; ja >2
|
|
| fmulp st1; ret
|
|
|1: ; fdivp st1; ret
|
|
|2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
|
|
| cmp eax, 7; je >1; ja >2
|
|
| fpop; fchs; ret
|
|
|1: ; fpop; fabs; ret
|
|
|2: ; cmp eax, 9; je >1; ja >2
|
|
| fpatan; ret
|
|
|1: ; fxch; fscale; fpop1; ret
|
|
|2: ; cmp eax, 11; je >1; ja >9
|
|
||if (cmov) {
|
|
| fucomi st1; fcmovnbe st1; fpop1; ret
|
|
|1: ; fucomi st1; fcmovbe st1; fpop1; ret
|
|
||} else {
|
|
| fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
|
|
|1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
|
|
||}
|
|
|9: ; int3 // Bad op.
|
|
}
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Miscellaneous functions --------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
|
|
|->vm_cpuid:
|
|
|.if X64
|
|
| mov eax, CARG1d
|
|
| .if X64WIN; push rsi; mov rsi, CARG2; .endif
|
|
| push rbx
|
|
| cpuid
|
|
| mov [rsi], eax
|
|
| mov [rsi+4], ebx
|
|
| mov [rsi+8], ecx
|
|
| mov [rsi+12], edx
|
|
| pop rbx
|
|
| .if X64WIN; pop rsi; .endif
|
|
| ret
|
|
|.else
|
|
| pushfd
|
|
| pop edx
|
|
| mov ecx, edx
|
|
| xor edx, 0x00200000 // Toggle ID bit in flags.
|
|
| push edx
|
|
| popfd
|
|
| pushfd
|
|
| pop edx
|
|
| xor eax, eax // Zero means no features supported.
|
|
| cmp ecx, edx
|
|
| jz >1 // No ID toggle means no CPUID support.
|
|
| mov eax, [esp+4] // Argument 1 is function number.
|
|
| push edi
|
|
| push ebx
|
|
| cpuid
|
|
| mov edi, [esp+16] // Argument 2 is result area.
|
|
| mov [edi], eax
|
|
| mov [edi+4], ebx
|
|
| mov [edi+8], ecx
|
|
| mov [edi+12], edx
|
|
| pop ebx
|
|
| pop edi
|
|
|1:
|
|
| ret
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
}
|
|
|
|
/* Generate the code for a single instruction. */
|
|
static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
|
|
{
|
|
int vk = 0;
|
|
|// Note: aligning all instructions does not pay off.
|
|
|=>defop:
|
|
|
|
switch (op) {
|
|
|
|
/* -- Comparison ops ---------------------------------------------------- */
|
|
|
|
/* Remember: all ops branch for a true comparison, fall through otherwise. */
|
|
|
|
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
|
|
| // RA = src1, RD = src2, JMP with RD = target
|
|
| ins_AD
|
|
| checknum RA, ->vmeta_comp
|
|
| checknum RD, ->vmeta_comp
|
|
if (sse) {
|
|
| movsd xmm0, qword [BASE+RD*8]
|
|
| add PC, 4
|
|
| ucomisd xmm0, qword [BASE+RA*8]
|
|
} else {
|
|
| fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
|
|
| fld qword [BASE+RD*8]
|
|
| add PC, 4
|
|
| fcomparepp // eax (RD) modified!
|
|
}
|
|
| // Unordered: all of ZF CF PF set, ordered: PF clear.
|
|
| // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
|
|
switch (op) {
|
|
case BC_ISLT:
|
|
| jbe >2
|
|
break;
|
|
case BC_ISGE:
|
|
| ja >2
|
|
break;
|
|
case BC_ISLE:
|
|
| jb >2
|
|
break;
|
|
case BC_ISGT:
|
|
| jae >2
|
|
break;
|
|
default: break; /* Shut up GCC. */
|
|
}
|
|
|1:
|
|
| movzx RD, PC_RD
|
|
| branchPC RD
|
|
|2:
|
|
| ins_next
|
|
break;
|
|
|
|
case BC_ISEQV: case BC_ISNEV:
|
|
vk = op == BC_ISEQV;
|
|
| ins_AD // RA = src1, RD = src2, JMP with RD = target
|
|
| mov RB, [BASE+RD*8+4]
|
|
| add PC, 4
|
|
| cmp RB, LJ_TISNUM; ja >5
|
|
| checknum RA, >5
|
|
if (sse) {
|
|
| movsd xmm0, qword [BASE+RD*8]
|
|
| ucomisd xmm0, qword [BASE+RA*8]
|
|
} else {
|
|
| fld qword [BASE+RA*8]
|
|
| fld qword [BASE+RD*8]
|
|
| fcomparepp // eax (RD) modified!
|
|
}
|
|
iseqne_fp:
|
|
if (vk) {
|
|
| jp >2 // Unordered means not equal.
|
|
| jne >2
|
|
} else {
|
|
| jp >2 // Unordered means not equal.
|
|
| je >1
|
|
}
|
|
iseqne_end:
|
|
if (vk) {
|
|
|1: // EQ: Branch to the target.
|
|
| movzx RD, PC_RD
|
|
| branchPC RD
|
|
|2: // NE: Fallthrough to next instruction.
|
|
} else {
|
|
|2: // NE: Branch to the target.
|
|
| movzx RD, PC_RD
|
|
| branchPC RD
|
|
|1: // EQ: Fallthrough to next instruction.
|
|
}
|
|
| ins_next
|
|
|
|
|
if (op == BC_ISEQV || op == BC_ISNEV) {
|
|
|5: // Either or both types are not numbers.
|
|
| checktp RA, RB // Compare types.
|
|
| jne <2 // Not the same type?
|
|
| cmp RB, LJ_TISPRI
|
|
| jae <1 // Same type and primitive type?
|
|
|
|
|
| // Same types and not a primitive type. Compare GCobj or pvalue.
|
|
| mov RA, [BASE+RA*8]
|
|
| mov RD, [BASE+RD*8]
|
|
| cmp RA, RD
|
|
| je <1 // Same GCobjs or pvalues?
|
|
| cmp RB, LJ_TISTABUD
|
|
| ja <2 // Different objects and not table/ud?
|
|
|
|
|
| // Different tables or userdatas. Need to check __eq metamethod.
|
|
| // Field metatable must be at same offset for GCtab and GCudata!
|
|
| mov TAB:RB, TAB:RA->metatable
|
|
| test TAB:RB, TAB:RB
|
|
| jz <2 // No metatable?
|
|
| test byte TAB:RB->nomm, 1<<MM_eq
|
|
| jnz <2 // Or 'no __eq' flag set?
|
|
if (vk) {
|
|
| xor RB, RB // ne = 0
|
|
} else {
|
|
| mov RB, 1 // ne = 1
|
|
}
|
|
| jmp ->vmeta_equal // Handle __eq metamethod.
|
|
}
|
|
break;
|
|
case BC_ISEQS: case BC_ISNES:
|
|
vk = op == BC_ISEQS;
|
|
| ins_AND // RA = src, RD = str const, JMP with RD = target
|
|
| add PC, 4
|
|
| checkstr RA, >2
|
|
| mov RA, [BASE+RA*8]
|
|
| cmp RA, [KBASE+RD*4]
|
|
iseqne_test:
|
|
if (vk) {
|
|
| jne >2
|
|
} else {
|
|
| je >1
|
|
}
|
|
goto iseqne_end;
|
|
case BC_ISEQN: case BC_ISNEN:
|
|
vk = op == BC_ISEQN;
|
|
| ins_AD // RA = src, RD = num const, JMP with RD = target
|
|
| add PC, 4
|
|
| checknum RA, >2
|
|
if (sse) {
|
|
| movsd xmm0, qword [KBASE+RD*8]
|
|
| ucomisd xmm0, qword [BASE+RA*8]
|
|
} else {
|
|
| fld qword [BASE+RA*8]
|
|
| fld qword [KBASE+RD*8]
|
|
| fcomparepp // eax (RD) modified!
|
|
}
|
|
goto iseqne_fp;
|
|
case BC_ISEQP: case BC_ISNEP:
|
|
vk = op == BC_ISEQP;
|
|
| ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
|
|
| add PC, 4
|
|
| checktp RA, RD
|
|
goto iseqne_test;
|
|
|
|
/* -- Unary test and copy ops ------------------------------------------- */
|
|
|
|
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
|
|
| ins_AD // RA = dst or unused, RD = src, JMP with RD = target
|
|
| mov RB, [BASE+RD*8+4]
|
|
| add PC, 4
|
|
| cmp RB, LJ_TISTRUECOND
|
|
if (op == BC_IST || op == BC_ISTC) {
|
|
| jae >1
|
|
} else {
|
|
| jb >1
|
|
}
|
|
if (op == BC_ISTC || op == BC_ISFC) {
|
|
| mov [BASE+RA*8+4], RB
|
|
| mov RB, [BASE+RD*8]
|
|
| mov [BASE+RA*8], RB
|
|
}
|
|
| movzx RD, PC_RD
|
|
| branchPC RD
|
|
|1: // Fallthrough to the next instruction.
|
|
| ins_next
|
|
break;
|
|
|
|
/* -- Unary ops --------------------------------------------------------- */
|
|
|
|
case BC_MOV:
|
|
| ins_AD // RA = dst, RD = src
|
|
| mov RB, [BASE+RD*8+4]
|
|
| mov RD, [BASE+RD*8] // Overwrites RD.
|
|
| mov [BASE+RA*8+4], RB
|
|
| mov [BASE+RA*8], RD
|
|
| ins_next_
|
|
break;
|
|
case BC_NOT:
|
|
| ins_AD // RA = dst, RD = src
|
|
| xor RB, RB
|
|
| checktp RD, LJ_TISTRUECOND
|
|
| adc RB, LJ_TTRUE
|
|
| mov [BASE+RA*8+4], RB
|
|
| ins_next
|
|
break;
|
|
case BC_UNM:
|
|
| ins_AD // RA = dst, RD = src
|
|
| checknum RD, ->vmeta_unm
|
|
if (sse) {
|
|
| movsd xmm0, qword [BASE+RD*8]
|
|
| sseconst_sign xmm1, RDa
|
|
| xorps xmm0, xmm1
|
|
| movsd qword [BASE+RA*8], xmm0
|
|
} else {
|
|
| fld qword [BASE+RD*8]
|
|
| fchs
|
|
| fstp qword [BASE+RA*8]
|
|
}
|
|
| ins_next
|
|
break;
|
|
case BC_LEN:
|
|
| ins_AD // RA = dst, RD = src
|
|
| checkstr RD, >2
|
|
| mov STR:RD, [BASE+RD*8]
|
|
if (sse) {
|
|
| xorps xmm0, xmm0
|
|
| cvtsi2sd xmm0, dword STR:RD->len
|
|
|1:
|
|
| movsd qword [BASE+RA*8], xmm0
|
|
} else {
|
|
| fild dword STR:RD->len
|
|
|1:
|
|
| fstp qword [BASE+RA*8]
|
|
}
|
|
| ins_next
|
|
|2:
|
|
| checktab RD, ->vmeta_len
|
|
| mov TAB:FCARG1, [BASE+RD*8]
|
|
| mov RB, BASE // Save BASE.
|
|
| call extern lj_tab_len@4 // (GCtab *t)
|
|
| // Length of table returned in eax (RC).
|
|
if (sse) {
|
|
| cvtsi2sd xmm0, RC
|
|
| mov BASE, RB // Restore BASE.
|
|
} else {
|
|
|.if not X64
|
|
| mov ARG1, RC
|
|
| mov BASE, RB // Restore BASE.
|
|
| fild ARG1
|
|
|.endif
|
|
}
|
|
| movzx RA, PC_RA
|
|
| jmp <1
|
|
break;
|
|
|
|
/* -- Binary ops -------------------------------------------------------- */
|
|
|
|
|.macro ins_arithpre, ins, sseins, ssereg
|
|
| ins_ABC
|
|
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
|
|
||switch (vk) {
|
|
||case 0:
|
|
| checknum RB, ->vmeta_arith_vn
|
|
||if (sse) {
|
|
| movsd xmm0, qword [BASE+RB*8]
|
|
| sseins ssereg, qword [KBASE+RC*8]
|
|
||} else {
|
|
| fld qword [BASE+RB*8]
|
|
| ins qword [KBASE+RC*8]
|
|
||}
|
|
|| break;
|
|
||case 1:
|
|
| checknum RB, ->vmeta_arith_nv
|
|
||if (sse) {
|
|
| movsd xmm0, qword [KBASE+RC*8]
|
|
| sseins ssereg, qword [BASE+RB*8]
|
|
||} else {
|
|
| fld qword [KBASE+RC*8]
|
|
| ins qword [BASE+RB*8]
|
|
||}
|
|
|| break;
|
|
||default:
|
|
| checknum RB, ->vmeta_arith_vv
|
|
| checknum RC, ->vmeta_arith_vv
|
|
||if (sse) {
|
|
| movsd xmm0, qword [BASE+RB*8]
|
|
| sseins ssereg, qword [BASE+RC*8]
|
|
||} else {
|
|
| fld qword [BASE+RB*8]
|
|
| ins qword [BASE+RC*8]
|
|
||}
|
|
|| break;
|
|
||}
|
|
|.endmacro
|
|
|
|
|
|.macro ins_arithpost
|
|
||if (sse) {
|
|
| movsd qword [BASE+RA*8], xmm0
|
|
||} else {
|
|
| fstp qword [BASE+RA*8]
|
|
||}
|
|
|.endmacro
|
|
|
|
|
|.macro ins_arith, ins, sseins
|
|
| ins_arithpre ins, sseins, xmm0
|
|
| ins_arithpost
|
|
| ins_next
|
|
|.endmacro
|
|
|
|
| // RA = dst, RB = src1 or num const, RC = src2 or num const
|
|
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
|
|
| ins_arith fadd, addsd
|
|
break;
|
|
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
|
|
| ins_arith fsub, subsd
|
|
break;
|
|
case BC_MULVN: case BC_MULNV: case BC_MULVV:
|
|
| ins_arith fmul, mulsd
|
|
break;
|
|
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
|
|
| ins_arith fdiv, divsd
|
|
break;
|
|
case BC_MODVN:
|
|
| ins_arithpre fld, movsd, xmm1
|
|
|->BC_MODVN_Z:
|
|
| call ->vm_mod
|
|
| ins_arithpost
|
|
| ins_next
|
|
break;
|
|
case BC_MODNV: case BC_MODVV:
|
|
| ins_arithpre fld, movsd, xmm1
|
|
| jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|
|
break;
|
|
case BC_POW:
|
|
| ins_arithpre fld, movsd, xmm1
|
|
| call ->vm_pow
|
|
| ins_arithpost
|
|
| ins_next
|
|
break;
|
|
|
|
case BC_CAT:
|
|
| ins_ABC // RA = dst, RB = src_start, RC = src_end
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE
|
|
| lea CARG2d, [BASE+RC*8]
|
|
| mov CARG3d, RC
|
|
| sub CARG3d, RB
|
|
|->BC_CAT_Z:
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| lea RA, [BASE+RC*8]
|
|
| sub RC, RB
|
|
| mov ARG2, RA
|
|
| mov ARG3, RC
|
|
|->BC_CAT_Z:
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
|
|
| // NULL (finished) or TValue * (metamethod) returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| test RC, RC
|
|
| jnz ->vmeta_binop
|
|
| movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
|
|
| movzx RA, PC_RA
|
|
| mov RC, [BASE+RB*8+4]
|
|
| mov RB, [BASE+RB*8]
|
|
| mov [BASE+RA*8+4], RC
|
|
| mov [BASE+RA*8], RB
|
|
| ins_next
|
|
break;
|
|
|
|
/* -- Constant ops ------------------------------------------------------ */
|
|
|
|
case BC_KSTR:
|
|
| ins_AND // RA = dst, RD = str const (~)
|
|
| mov RD, [KBASE+RD*4]
|
|
| mov dword [BASE+RA*8+4], LJ_TSTR
|
|
| mov [BASE+RA*8], RD
|
|
| ins_next
|
|
break;
|
|
case BC_KSHORT:
|
|
| ins_AD // RA = dst, RD = signed int16 literal
|
|
if (sse) {
|
|
| movsx RD, RDW // Sign-extend literal.
|
|
| cvtsi2sd xmm0, RD
|
|
| movsd qword [BASE+RA*8], xmm0
|
|
} else {
|
|
| fild PC_RD // Refetch signed RD from instruction.
|
|
| fstp qword [BASE+RA*8]
|
|
}
|
|
| ins_next
|
|
break;
|
|
case BC_KNUM:
|
|
| ins_AD // RA = dst, RD = num const
|
|
if (sse) {
|
|
| movsd xmm0, qword [KBASE+RD*8]
|
|
| movsd qword [BASE+RA*8], xmm0
|
|
} else {
|
|
| fld qword [KBASE+RD*8]
|
|
| fstp qword [BASE+RA*8]
|
|
}
|
|
| ins_next
|
|
break;
|
|
case BC_KPRI:
|
|
| ins_AND // RA = dst, RD = primitive type (~)
|
|
| mov [BASE+RA*8+4], RD
|
|
| ins_next
|
|
break;
|
|
case BC_KNIL:
|
|
| ins_AD // RA = dst_start, RD = dst_end
|
|
| lea RA, [BASE+RA*8+12]
|
|
| lea RD, [BASE+RD*8+4]
|
|
| mov RB, LJ_TNIL
|
|
| mov [RA-8], RB // Sets minimum 2 slots.
|
|
|1:
|
|
| mov [RA], RB
|
|
| add RA, 8
|
|
| cmp RA, RD
|
|
| jbe <1
|
|
| ins_next
|
|
break;
|
|
|
|
/* -- Upvalue and function ops ------------------------------------------ */
|
|
|
|
case BC_UGET:
|
|
| ins_AD // RA = dst, RD = upvalue #
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
|
|
| mov RB, UPVAL:RB->v
|
|
| mov RD, [RB+4]
|
|
| mov RB, [RB]
|
|
| mov [BASE+RA*8+4], RD
|
|
| mov [BASE+RA*8], RB
|
|
| ins_next
|
|
break;
|
|
case BC_USETV:
|
|
#define TV2MARKOFS \
|
|
((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
|
|
| ins_AD // RA = upvalue #, RD = src
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
|
|
| cmp byte UPVAL:RB->closed, 0
|
|
| mov RB, UPVAL:RB->v
|
|
| mov RA, [BASE+RD*8]
|
|
| mov RD, [BASE+RD*8+4]
|
|
| mov [RB], RA
|
|
| mov [RB+4], RD
|
|
| jz >1
|
|
| // Check barrier for closed upvalue.
|
|
| test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
|
|
| jnz >2
|
|
|1:
|
|
| ins_next
|
|
|
|
|
|2: // Upvalue is black. Check if new value is collectable and white.
|
|
| sub RD, LJ_TISGCV
|
|
| cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
|
|
| jbe <1
|
|
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
|
|
| jz <1
|
|
| // Crossed a write barrier. Move the barrier forward.
|
|
|.if X64 and not X64WIN
|
|
| mov FCARG2, RB
|
|
| mov RB, BASE // Save BASE.
|
|
|.else
|
|
| xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
|
|
|.endif
|
|
| lea GL:FCARG1, [DISPATCH+GG_DISP2G]
|
|
| call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
|
|
| mov BASE, RB // Restore BASE.
|
|
| jmp <1
|
|
break;
|
|
#undef TV2MARKOFS
|
|
case BC_USETS:
|
|
| ins_AND // RA = upvalue #, RD = str const (~)
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
|
|
| mov GCOBJ:RA, [KBASE+RD*4]
|
|
| mov RD, UPVAL:RB->v
|
|
| mov [RD], GCOBJ:RA
|
|
| mov dword [RD+4], LJ_TSTR
|
|
| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
|
|
| jnz >2
|
|
|1:
|
|
| ins_next
|
|
|
|
|
|2: // Check if string is white and ensure upvalue is closed.
|
|
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
|
|
| jz <1
|
|
| cmp byte UPVAL:RB->closed, 0
|
|
| jz <1
|
|
| // Crossed a write barrier. Move the barrier forward.
|
|
| mov RB, BASE // Save BASE (FCARG2 == BASE).
|
|
| mov FCARG2, RD
|
|
| lea GL:FCARG1, [DISPATCH+GG_DISP2G]
|
|
| call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
|
|
| mov BASE, RB // Restore BASE.
|
|
| jmp <1
|
|
break;
|
|
case BC_USETN:
|
|
| ins_AD // RA = upvalue #, RD = num const
|
|
| mov LFUNC:RB, [BASE-8]
|
|
if (sse) {
|
|
| movsd xmm0, qword [KBASE+RD*8]
|
|
} else {
|
|
| fld qword [KBASE+RD*8]
|
|
}
|
|
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
|
|
| mov RA, UPVAL:RB->v
|
|
if (sse) {
|
|
| movsd qword [RA], xmm0
|
|
} else {
|
|
| fstp qword [RA]
|
|
}
|
|
| ins_next
|
|
break;
|
|
case BC_USETP:
|
|
| ins_AND // RA = upvalue #, RD = primitive type (~)
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
|
|
| mov RA, UPVAL:RB->v
|
|
| mov [RA+4], RD
|
|
| ins_next
|
|
break;
|
|
case BC_UCLO:
|
|
| ins_AD // RA = level, RD = target
|
|
| branchPC RD // Do this first to free RD.
|
|
| mov L:RB, SAVE_L
|
|
| cmp dword L:RB->openupval, 0
|
|
| je >1
|
|
| mov L:RB->base, BASE
|
|
| lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
|
|
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
|
|
| call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
|
|
| mov BASE, L:RB->base
|
|
|1:
|
|
| ins_next
|
|
break;
|
|
|
|
case BC_FNEW:
|
|
| ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
|
|
|.if X64
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
| mov CARG3d, [BASE-8]
|
|
| mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
|
|
| mov CARG1d, L:RB
|
|
|.else
|
|
| mov LFUNC:RA, [BASE-8]
|
|
| mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, LFUNC:RA
|
|
| mov ARG2, PROTO:RD
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| // (lua_State *L, GCproto *pt, GCfuncL *parent)
|
|
| call extern lj_func_newL_gc
|
|
| // GCfuncL * returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| movzx RA, PC_RA
|
|
| mov [BASE+RA*8], LFUNC:RC
|
|
| mov dword [BASE+RA*8+4], LJ_TFUNC
|
|
| ins_next
|
|
break;
|
|
|
|
/* -- Table ops --------------------------------------------------------- */
|
|
|
|
case BC_TNEW:
|
|
| ins_AD // RA = dst, RD = hbits|asize
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
|1:
|
|
| mov CARG3d, RD
|
|
| and RD, 0x7ff
|
|
| shr CARG3d, 11
|
|
| cmp RD, 0x7ff
|
|
| je >3
|
|
|2:
|
|
| mov CARG2d, RD
|
|
| mov RD, [DISPATCH+DISPATCH_GL(gc.total)]
|
|
| mov L:RB, L:CARG1d
|
|
| cmp RD, [DISPATCH+DISPATCH_GL(gc.threshold)]
|
|
| mov SAVE_PC, PC
|
|
| jae >5
|
|
|.else
|
|
| mov RB, RD
|
|
| and RD, 0x7ff
|
|
| shr RB, 11
|
|
| cmp RD, 0x7ff
|
|
| je >3
|
|
|2:
|
|
| mov ARG3, RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG2, RD
|
|
| mov SAVE_PC, PC
|
|
| mov RD, [DISPATCH+DISPATCH_GL(gc.total)]
|
|
| mov ARG1, L:RB
|
|
| cmp RD, [DISPATCH+DISPATCH_GL(gc.threshold)]
|
|
| mov L:RB->base, BASE
|
|
| jae >5
|
|
|1:
|
|
|.endif
|
|
| call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
|
|
| // Table * returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| movzx RA, PC_RA
|
|
| mov [BASE+RA*8], TAB:RC
|
|
| mov dword [BASE+RA*8+4], LJ_TTAB
|
|
| ins_next
|
|
|3: // Turn 0x7ff into 0x801.
|
|
| mov RD, 0x801
|
|
| jmp <2
|
|
|5:
|
|
|.if X64
|
|
| call extern lj_gc_step_fixtop@4 // (lua_State *L)
|
|
| movzx RD, PC_RD
|
|
| mov L:CARG1d, L:RB
|
|
| jmp <1
|
|
|.else
|
|
| mov L:FCARG1, L:RB
|
|
| call extern lj_gc_step_fixtop@4 // (lua_State *L)
|
|
| jmp <1
|
|
|.endif
|
|
break;
|
|
case BC_TDUP:
|
|
| ins_AND // RA = dst, RD = table const (~) (holding template table)
|
|
| mov L:RB, SAVE_L
|
|
| mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
|
|
| mov SAVE_PC, PC
|
|
| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
|
|
| mov L:RB->base, BASE
|
|
| jae >3
|
|
|2:
|
|
| mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
|
|
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
|
|
| call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
|
|
| // Table * returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| movzx RA, PC_RA
|
|
| mov [BASE+RA*8], TAB:RC
|
|
| mov dword [BASE+RA*8+4], LJ_TTAB
|
|
| ins_next
|
|
|3:
|
|
| mov L:FCARG1, L:RB
|
|
| call extern lj_gc_step_fixtop@4 // (lua_State *L)
|
|
| movzx RD, PC_RD // Need to reload RD.
|
|
| not RDa
|
|
| jmp <2
|
|
break;
|
|
|
|
case BC_GGET:
|
|
| ins_AND // RA = dst, RD = str const (~)
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov TAB:RB, LFUNC:RB->env
|
|
| mov STR:RC, [KBASE+RD*4]
|
|
| jmp ->BC_TGETS_Z
|
|
break;
|
|
case BC_GSET:
|
|
| ins_AND // RA = src, RD = str const (~)
|
|
| mov LFUNC:RB, [BASE-8]
|
|
| mov TAB:RB, LFUNC:RB->env
|
|
| mov STR:RC, [KBASE+RD*4]
|
|
| jmp ->BC_TSETS_Z
|
|
break;
|
|
|
|
case BC_TGETV:
|
|
| ins_ABC // RA = dst, RB = table, RC = key
|
|
| checktab RB, ->vmeta_tgetv
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
|
|
|
| // Integer key? Convert number to int and back and compare.
|
|
| checknum RC, >5
|
|
if (sse) {
|
|
| movsd xmm0, qword [BASE+RC*8]
|
|
| cvtsd2si RC, xmm0
|
|
| cvtsi2sd xmm1, RC
|
|
| ucomisd xmm0, xmm1
|
|
} else {
|
|
|.if not X64
|
|
| fld qword [BASE+RC*8]
|
|
| fist ARG1
|
|
| fild ARG1
|
|
| fcomparepp // eax (RC) modified!
|
|
| mov RC, ARG1
|
|
|.endif
|
|
}
|
|
| jne ->vmeta_tgetv // Generic numeric key? Use fallback.
|
|
| cmp RC, TAB:RB->asize // Takes care of unordered, too.
|
|
| jae ->vmeta_tgetv // Not in array part? Use fallback.
|
|
| shl RC, 3
|
|
| add RC, TAB:RB->array
|
|
| cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
|
|
| je >2
|
|
|1:
|
|
| mov RB, [RC] // Get array slot.
|
|
| mov RC, [RC+4]
|
|
| mov [BASE+RA*8], RB
|
|
| mov [BASE+RA*8+4], RC
|
|
| ins_next
|
|
|
|
|
|2: // Check for __index if table value is nil.
|
|
| cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
|
|
| jz <1
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test byte TAB:RA->nomm, 1<<MM_index
|
|
| jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <1
|
|
|
|
|
|5: // String key?
|
|
| checkstr RC, ->vmeta_tgetv
|
|
| mov STR:RC, [BASE+RC*8]
|
|
| jmp ->BC_TGETS_Z
|
|
break;
|
|
case BC_TGETS:
|
|
| ins_ABC // RA = dst, RB = table, RC = str const (~)
|
|
| not RCa
|
|
| mov STR:RC, [KBASE+RC*4]
|
|
| checktab RB, ->vmeta_tgets
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
|->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
|
|
| mov RA, TAB:RB->hmask
|
|
| and RA, STR:RC->hash
|
|
| imul RA, #NODE
|
|
| add NODE:RA, TAB:RB->node
|
|
|1:
|
|
| cmp dword NODE:RA->key.it, LJ_TSTR
|
|
| jne >4
|
|
| cmp dword NODE:RA->key.gcr, STR:RC
|
|
| jne >4
|
|
| // Ok, key found. Assumes: offsetof(Node, val) == 0
|
|
| cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
|
|
| je >5 // Key found, but nil value?
|
|
| movzx RC, PC_RA
|
|
| mov RB, [RA] // Get node value.
|
|
| mov RA, [RA+4]
|
|
| mov [BASE+RC*8], RB
|
|
|2:
|
|
| mov [BASE+RC*8+4], RA
|
|
| ins_next
|
|
|
|
|
|3:
|
|
| movzx RC, PC_RA
|
|
| mov RA, LJ_TNIL
|
|
| jmp <2
|
|
|
|
|
|4: // Follow hash chain.
|
|
| mov NODE:RA, NODE:RA->next
|
|
| test NODE:RA, NODE:RA
|
|
| jnz <1
|
|
| // End of hash chain: key not found, nil result.
|
|
|
|
|
|5: // Check for __index if table value is nil.
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test TAB:RA, TAB:RA
|
|
| jz <3 // No metatable: done.
|
|
| test byte TAB:RA->nomm, 1<<MM_index
|
|
| jnz <3 // 'no __index' flag set: done.
|
|
| jmp ->vmeta_tgets // Caveat: preserve STR:RC.
|
|
break;
|
|
case BC_TGETB:
|
|
| ins_ABC // RA = dst, RB = table, RC = byte literal
|
|
| checktab RB, ->vmeta_tgetb
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
| cmp RC, TAB:RB->asize
|
|
| jae ->vmeta_tgetb
|
|
| shl RC, 3
|
|
| add RC, TAB:RB->array
|
|
| cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
|
|
| je >2
|
|
|1:
|
|
| mov RB, [RC] // Get array slot.
|
|
| mov RC, [RC+4]
|
|
| mov [BASE+RA*8], RB
|
|
| mov [BASE+RA*8+4], RC
|
|
| ins_next
|
|
|
|
|
|2: // Check for __index if table value is nil.
|
|
| cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
|
|
| jz <1
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test byte TAB:RA->nomm, 1<<MM_index
|
|
| jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <1
|
|
break;
|
|
|
|
case BC_TSETV:
|
|
| ins_ABC // RA = src, RB = table, RC = key
|
|
| checktab RB, ->vmeta_tsetv
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
|
|
|
| // Integer key? Convert number to int and back and compare.
|
|
| checknum RC, >5
|
|
if (sse) {
|
|
| movsd xmm0, qword [BASE+RC*8]
|
|
| cvtsd2si RC, xmm0
|
|
| cvtsi2sd xmm1, RC
|
|
| ucomisd xmm0, xmm1
|
|
} else {
|
|
|.if not X64
|
|
| fld qword [BASE+RC*8]
|
|
| fist ARG1
|
|
| fild ARG1
|
|
| fcomparepp // eax (RC) modified!
|
|
| mov RC, ARG1
|
|
|.endif
|
|
}
|
|
| jne ->vmeta_tsetv // Generic numeric key? Use fallback.
|
|
| cmp RC, TAB:RB->asize // Takes care of unordered, too.
|
|
| jae ->vmeta_tsetv
|
|
| shl RC, 3
|
|
| add RC, TAB:RB->array
|
|
| cmp dword [RC+4], LJ_TNIL
|
|
| je >3 // Previous value is nil?
|
|
|1:
|
|
| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
|
|
| jnz >7
|
|
|2:
|
|
| mov RB, [BASE+RA*8+4] // Set array slot.
|
|
| mov RA, [BASE+RA*8]
|
|
| mov [RC+4], RB
|
|
| mov [RC], RA
|
|
| ins_next
|
|
|
|
|
|3: // Check for __newindex if previous value is nil.
|
|
| cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
|
|
| jz <1
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test byte TAB:RA->nomm, 1<<MM_newindex
|
|
| jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <1
|
|
|
|
|
|5: // String key?
|
|
| checkstr RC, ->vmeta_tsetv
|
|
| mov STR:RC, [BASE+RC*8]
|
|
| jmp ->BC_TSETS_Z
|
|
|
|
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
| barrierback TAB:RB, RA
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <2
|
|
break;
|
|
case BC_TSETS:
|
|
| ins_ABC // RA = src, RB = table, RC = str const (~)
|
|
| not RCa
|
|
| mov STR:RC, [KBASE+RC*4]
|
|
| checktab RB, ->vmeta_tsets
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
|->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
|
|
| mov RA, TAB:RB->hmask
|
|
| and RA, STR:RC->hash
|
|
| imul RA, #NODE
|
|
| mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
|
|
| add NODE:RA, TAB:RB->node
|
|
|1:
|
|
| cmp dword NODE:RA->key.it, LJ_TSTR
|
|
| jne >5
|
|
| cmp dword NODE:RA->key.gcr, STR:RC
|
|
| jne >5
|
|
| // Ok, key found. Assumes: offsetof(Node, val) == 0
|
|
| cmp dword [RA+4], LJ_TNIL
|
|
| je >4 // Previous value is nil?
|
|
|2:
|
|
| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
|
|
| jnz >7
|
|
|3:
|
|
| movzx RC, PC_RA
|
|
| mov RB, [BASE+RC*8+4] // Set node value.
|
|
| mov RC, [BASE+RC*8]
|
|
| mov [RA+4], RB
|
|
| mov [RA], RC
|
|
| ins_next
|
|
|
|
|
|4: // Check for __newindex if previous value is nil.
|
|
| cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
|
|
| jz <2
|
|
| mov TMP1, RA // Save RA.
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test byte TAB:RA->nomm, 1<<MM_newindex
|
|
| jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|
|
| mov RA, TMP1 // Restore RA.
|
|
| jmp <2
|
|
|
|
|
|5: // Follow hash chain.
|
|
| mov NODE:RA, NODE:RA->next
|
|
| test NODE:RA, NODE:RA
|
|
| jnz <1
|
|
| // End of hash chain: key not found, add a new one.
|
|
|
|
|
| // But check for __newindex first.
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test TAB:RA, TAB:RA
|
|
| jz >6 // No metatable: continue.
|
|
| test byte TAB:RA->nomm, 1<<MM_newindex
|
|
| jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|
|
|6:
|
|
| mov TMP1, STR:RC
|
|
| mov TMP2, LJ_TSTR
|
|
| mov TMP3, TAB:RB // Save TAB:RB for us.
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE
|
|
| lea CARG3, TMP1
|
|
| mov CARG2d, TAB:RB
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
|
|
| mov ARG2, TAB:RB
|
|
| mov L:RB, SAVE_L
|
|
| mov ARG3, RC
|
|
| mov ARG1, L:RB
|
|
| mov L:RB->base, BASE
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
|
|
| // Handles write barrier for the new key. TValue * returned in eax (RC).
|
|
| mov BASE, L:RB->base
|
|
| mov TAB:RB, TMP3 // Need TAB:RB for barrier.
|
|
| mov RA, eax
|
|
| jmp <2 // Must check write barrier for value.
|
|
|
|
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
| barrierback TAB:RB, RC // Destroys STR:RC.
|
|
| jmp <3
|
|
break;
|
|
case BC_TSETB:
|
|
| ins_ABC // RA = src, RB = table, RC = byte literal
|
|
| checktab RB, ->vmeta_tsetb
|
|
| mov TAB:RB, [BASE+RB*8]
|
|
| cmp RC, TAB:RB->asize
|
|
| jae ->vmeta_tsetb
|
|
| shl RC, 3
|
|
| add RC, TAB:RB->array
|
|
| cmp dword [RC+4], LJ_TNIL
|
|
| je >3 // Previous value is nil?
|
|
|1:
|
|
| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
|
|
| jnz >7
|
|
|2:
|
|
| mov RB, [BASE+RA*8+4] // Set array slot.
|
|
| mov RA, [BASE+RA*8]
|
|
| mov [RC+4], RB
|
|
| mov [RC], RA
|
|
| ins_next
|
|
|
|
|
|3: // Check for __newindex if previous value is nil.
|
|
| cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
|
|
| jz <1
|
|
| mov TAB:RA, TAB:RB->metatable
|
|
| test byte TAB:RA->nomm, 1<<MM_newindex
|
|
| jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <1
|
|
|
|
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
| barrierback TAB:RB, RA
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <2
|
|
break;
|
|
|
|
case BC_TSETM:
|
|
| ins_AD // RA = base (table at base-1), RD = num const (start index)
|
|
| mov TMP1, KBASE // Need one more free register.
|
|
if (sse) {
|
|
| movsd xmm0, qword [KBASE+RD*8]
|
|
} else {
|
|
|.if not X64
|
|
| fld qword [KBASE+RD*8]
|
|
| fistp ARG4 // Const is guaranteed to be an int.
|
|
|.endif
|
|
}
|
|
|1:
|
|
| lea RA, [BASE+RA*8]
|
|
| mov TAB:RB, [RA-8] // Guaranteed to be a table.
|
|
| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
|
|
| jnz >7
|
|
|2:
|
|
| mov RD, MULTRES
|
|
if (sse) {
|
|
| cvtsd2si KBASE, xmm0 // Const is guaranteed to be an int.
|
|
} else {
|
|
|.if not X64
|
|
| mov KBASE, ARG4
|
|
|.endif
|
|
}
|
|
| sub RD, 1
|
|
| jz >4 // Nothing to copy?
|
|
| add RD, KBASE // Compute needed size.
|
|
| cmp RD, TAB:RB->asize
|
|
| jae >5 // Does not fit into array part?
|
|
| sub RD, KBASE
|
|
| shl KBASE, 3
|
|
| add KBASE, TAB:RB->array
|
|
|3: // Copy result slots to table.
|
|
| mov RB, [RA]
|
|
| mov [KBASE], RB
|
|
| mov RB, [RA+4]
|
|
| add RA, 8
|
|
| mov [KBASE+4], RB
|
|
| add KBASE, 8
|
|
| sub RD, 1
|
|
| jnz <3
|
|
|4:
|
|
| mov KBASE, TMP1
|
|
| ins_next
|
|
|
|
|
|5: // Need to resize array part.
|
|
|.if X64
|
|
| mov L:CARG1d, SAVE_L
|
|
| mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
|
|
| mov CARG2d, TAB:RB
|
|
| mov CARG3d, RD
|
|
| mov L:RB, L:CARG1d
|
|
|.else
|
|
| mov ARG2, TAB:RB
|
|
| mov L:RB, SAVE_L
|
|
| mov L:RB->base, BASE
|
|
| mov ARG3, RD
|
|
| mov ARG1, L:RB
|
|
|.endif
|
|
| mov SAVE_PC, PC
|
|
| call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
|
|
| mov BASE, L:RB->base
|
|
| movzx RA, PC_RA // Restore RA.
|
|
| jmp <1 // Retry.
|
|
|
|
|
|7: // Possible table write barrier for any value. Skip valiswhite check.
|
|
| barrierback TAB:RB, RD
|
|
| jmp <2
|
|
break;
|
|
|
|
/* -- Calls and vararg handling ----------------------------------------- */
|
|
|
|
case BC_CALL: case BC_CALLM:
|
|
| ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
|
|
if (op == BC_CALLM) {
|
|
| add NARGS:RC, MULTRES
|
|
}
|
|
| lea RA, [BASE+RA*8+8]
|
|
| mov LFUNC:RB, [RA-8]
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call
|
|
| jmp aword LFUNC:RB->gate
|
|
break;
|
|
|
|
case BC_CALLMT:
|
|
| ins_AD // RA = base, RD = extra_nargs
|
|
| add NARGS:RD, MULTRES
|
|
| // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op.
|
|
break;
|
|
case BC_CALLT:
|
|
| ins_AD // RA = base, RD = nargs+1
|
|
| lea RA, [BASE+RA*8+8]
|
|
| mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
|
|
| mov LFUNC:RB, [RA-8]
|
|
| cmp dword [RA-4], LJ_TFUNC
|
|
| jne ->vmeta_call
|
|
|->BC_CALLT_Z:
|
|
| mov PC, [BASE-4]
|
|
| test PC, FRAME_TYPE
|
|
| jnz >7
|
|
|1:
|
|
| mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
|
|
| mov MULTRES, NARGS:RD
|
|
| sub NARGS:RD, 1
|
|
| jz >3
|
|
|2:
|
|
| mov RB, [RA] // Move args down.
|
|
| mov [KBASE], RB
|
|
| mov RB, [RA+4]
|
|
| mov [KBASE+4], RB
|
|
| add KBASE, 8
|
|
| add RA, 8
|
|
| sub NARGS:RD, 1
|
|
| jnz <2
|
|
|
|
|
| mov LFUNC:RB, [BASE-8]
|
|
|3:
|
|
| mov RA, BASE // BASE is ignored, except when ...
|
|
| cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
|
|
| ja >5
|
|
|4:
|
|
| mov NARGS:RD, MULTRES
|
|
| jmp aword LFUNC:RB->gate
|
|
|
|
|
|5: // Tailcall to a fast function.
|
|
| test PC, FRAME_TYPE // Lua frame below?
|
|
| jnz <4
|
|
| movzx RD, PC_RA // Need to prepare BASE/KBASE.
|
|
| not RDa
|
|
| lea BASE, [BASE+RD*8]
|
|
| mov LFUNC:KBASE, [BASE-8]
|
|
| mov PROTO:KBASE, LFUNC:KBASE->pt
|
|
| mov KBASE, PROTO:KBASE->k
|
|
| jmp <4
|
|
|
|
|
|7: // Tailcall from a vararg function.
|
|
| jnp <1 // Vararg frame below?
|
|
| and PC, -8
|
|
| sub BASE, PC // Need to relocate BASE/KBASE down.
|
|
| mov KBASE, BASE
|
|
| mov PC, [BASE-4]
|
|
| jmp <1
|
|
break;
|
|
|
|
case BC_ITERC:
|
|
| ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
|
|
| lea RA, [BASE+RA*8+8] // fb = base+1
|
|
| mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
|
|
| mov RC, [RA-20]
|
|
| mov [RA], RB
|
|
| mov [RA+4], RC
|
|
| mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
|
|
| mov RC, [RA-12]
|
|
| mov [RA+8], RB
|
|
| mov [RA+12], RC
|
|
| mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
|
|
| mov RC, [RA-28]
|
|
| mov [RA-8], LFUNC:RB
|
|
| mov [RA-4], RC
|
|
| cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
|
|
| mov NARGS:RC, 3
|
|
| jne ->vmeta_call
|
|
| jmp aword LFUNC:RB->gate
|
|
break;
|
|
|
|
case BC_VARG:
|
|
| ins_AB_ // RA = base, RB = nresults+1, (RC = 1)
|
|
| mov LFUNC:RC, [BASE-8]
|
|
| lea RA, [BASE+RA*8]
|
|
| mov PROTO:RC, LFUNC:RC->pt
|
|
| movzx RC, byte PROTO:RC->numparams
|
|
| mov TMP1, KBASE // Need one more free register.
|
|
| lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
|
|
| sub KBASE, [BASE-4]
|
|
| // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
|
|
| test RB, RB
|
|
| jz >5 // Copy all varargs?
|
|
| lea RB, [RA+RB*8-8]
|
|
| cmp KBASE, BASE // No vararg slots?
|
|
| jnb >2
|
|
|1: // Copy vararg slots to destination slots.
|
|
| mov RC, [KBASE-8]
|
|
| mov [RA], RC
|
|
| mov RC, [KBASE-4]
|
|
| add KBASE, 8
|
|
| mov [RA+4], RC
|
|
| add RA, 8
|
|
| cmp RA, RB // All destination slots filled?
|
|
| jnb >3
|
|
| cmp KBASE, BASE // No more vararg slots?
|
|
| jb <1
|
|
|2: // Fill up remainder with nil.
|
|
| mov dword [RA+4], LJ_TNIL
|
|
| add RA, 8
|
|
| cmp RA, RB
|
|
| jb <2
|
|
|3:
|
|
| mov KBASE, TMP1
|
|
| ins_next
|
|
|
|
|
|5: // Copy all varargs.
|
|
| mov MULTRES, 1 // MULTRES = 0+1
|
|
| mov RC, BASE
|
|
| sub RC, KBASE
|
|
| jbe <3 // No vararg slots?
|
|
| mov RB, RC
|
|
| shr RB, 3
|
|
| add RB, 1
|
|
| mov MULTRES, RB // MULTRES = #varargs+1
|
|
| mov L:RB, SAVE_L
|
|
| add RC, RA
|
|
| cmp RC, L:RB->maxstack
|
|
| ja >7 // Need to grow stack?
|
|
|6: // Copy all vararg slots.
|
|
| mov RC, [KBASE-8]
|
|
| mov [RA], RC
|
|
| mov RC, [KBASE-4]
|
|
| add KBASE, 8
|
|
| mov [RA+4], RC
|
|
| add RA, 8
|
|
| cmp KBASE, BASE // No more vararg slots?
|
|
| jb <6
|
|
| jmp <3
|
|
|
|
|
|7: // Grow stack for varargs.
|
|
| mov L:RB->base, BASE
|
|
| mov L:RB->top, RA
|
|
| mov SAVE_PC, PC
|
|
| sub KBASE, BASE // Need delta, because BASE may change.
|
|
| mov FCARG2, MULTRES
|
|
| sub FCARG2, 1
|
|
| mov FCARG1, L:RB
|
|
| call extern lj_state_growstack@8 // (lua_State *L, int n)
|
|
| mov BASE, L:RB->base
|
|
| mov RA, L:RB->top
|
|
| add KBASE, BASE
|
|
| jmp <6
|
|
break;
|
|
|
|
/* -- Returns ----------------------------------------------------------- */
|
|
|
|
case BC_RETM:
|
|
| ins_AD // RA = results, RD = extra_nresults
|
|
| add RD, MULTRES // MULTRES >=1, so RD >=1.
|
|
| // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
|
|
break;
|
|
|
|
case BC_RET: case BC_RET0: case BC_RET1:
|
|
| ins_AD // RA = results, RD = nresults+1
|
|
if (op != BC_RET0) {
|
|
| shl RA, 3
|
|
}
|
|
|1:
|
|
| mov PC, [BASE-4]
|
|
| mov MULTRES, RD // Save nresults+1.
|
|
| test PC, FRAME_TYPE // Check frame type marker.
|
|
| jnz >7 // Not returning to a fixarg Lua func?
|
|
switch (op) {
|
|
case BC_RET:
|
|
|->BC_RET_Z:
|
|
| mov KBASE, BASE // Use KBASE for result move.
|
|
| sub RD, 1
|
|
| jz >3
|
|
|2:
|
|
| mov RB, [KBASE+RA] // Move results down.
|
|
| mov [KBASE-8], RB
|
|
| mov RB, [KBASE+RA+4]
|
|
| mov [KBASE-4], RB
|
|
| add KBASE, 8
|
|
| sub RD, 1
|
|
| jnz <2
|
|
|3:
|
|
| mov RD, MULTRES // Note: MULTRES may be >255.
|
|
| movzx RB, PC_RB // So cannot compare with RDL!
|
|
|5:
|
|
| cmp RB, RD // More results expected?
|
|
| ja >6
|
|
break;
|
|
case BC_RET1:
|
|
| mov RB, [BASE+RA+4]
|
|
| mov [BASE-4], RB
|
|
| mov RB, [BASE+RA]
|
|
| mov [BASE-8], RB
|
|
/* fallthrough */
|
|
case BC_RET0:
|
|
|5:
|
|
| cmp PC_RB, RDL // More results expected?
|
|
| ja >6
|
|
default:
|
|
break;
|
|
}
|
|
| movzx RA, PC_RA
|
|
| not RAa // Note: ~RA = -(RA+1)
|
|
| lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
|
|
| mov LFUNC:KBASE, [BASE-8]
|
|
| mov PROTO:KBASE, LFUNC:KBASE->pt
|
|
| mov KBASE, PROTO:KBASE->k
|
|
| ins_next
|
|
|
|
|
|6: // Fill up results with nil.
|
|
if (op == BC_RET) {
|
|
| mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
|
|
| add KBASE, 8
|
|
} else {
|
|
| mov dword [BASE+RD*8-12], LJ_TNIL
|
|
}
|
|
| add RD, 1
|
|
| jmp <5
|
|
|
|
|
|7: // Non-standard return case.
|
|
| jnp ->vm_return
|
|
| // Return from vararg function: relocate BASE down and RA up.
|
|
| and PC, -8
|
|
| sub BASE, PC
|
|
if (op != BC_RET0) {
|
|
| add RA, PC
|
|
}
|
|
| jmp <1
|
|
break;
|
|
|
|
/* -- Loops and branches ------------------------------------------------ */
|
|
|
|
|.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4]
|
|
|.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12]
|
|
|.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20]
|
|
|.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28]
|
|
|
|
case BC_FORL:
|
|
#if LJ_HASJIT
|
|
| hotloop RB
|
|
#endif
|
|
| // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
|
|
break;
|
|
|
|
case BC_JFORI:
|
|
case BC_JFORL:
|
|
#if !LJ_HASJIT
|
|
break;
|
|
#endif
|
|
case BC_FORI:
|
|
case BC_IFORL:
|
|
vk = (op == BC_IFORL || op == BC_JFORL);
|
|
| ins_AJ // RA = base, RD = target (after end of loop or start of loop)
|
|
| lea RA, [BASE+RA*8]
|
|
if (!vk) {
|
|
| cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks
|
|
| cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for
|
|
}
|
|
| mov RB, FOR_TSTEP // Load type/hiword of for step.
|
|
if (!vk) {
|
|
| cmp RB, LJ_TISNUM; ja ->vmeta_for
|
|
}
|
|
if (sse) {
|
|
| movsd xmm0, FOR_IDX
|
|
| movsd xmm1, FOR_STOP
|
|
if (vk) {
|
|
| addsd xmm0, FOR_STEP
|
|
| movsd FOR_IDX, xmm0
|
|
| test RB, RB; js >3
|
|
} else {
|
|
| jl >3
|
|
}
|
|
| ucomisd xmm1, xmm0
|
|
|1:
|
|
| movsd FOR_EXT, xmm0
|
|
} else {
|
|
| fld FOR_STOP
|
|
| fld FOR_IDX
|
|
if (vk) {
|
|
| fadd FOR_STEP // nidx = idx + step
|
|
| fst FOR_IDX
|
|
| fst FOR_EXT
|
|
| test RB, RB; js >1
|
|
} else {
|
|
| fst FOR_EXT
|
|
| jl >1
|
|
}
|
|
| fxch // Swap lim/(n)idx if step non-negative.
|
|
|1:
|
|
| fcomparepp // eax (RD) modified if !cmov.
|
|
if (!cmov) {
|
|
| movzx RD, PC_RD // Need to reload RD.
|
|
}
|
|
}
|
|
if (op == BC_FORI) {
|
|
| jnb >2
|
|
| branchPC RD
|
|
} else if (op == BC_JFORI) {
|
|
| branchPC RD
|
|
| movzx RD, PC_RD
|
|
| jnb =>BC_JLOOP
|
|
} else if (op == BC_IFORL) {
|
|
| jb >2
|
|
| branchPC RD
|
|
} else {
|
|
| jnb =>BC_JLOOP
|
|
}
|
|
|2:
|
|
| ins_next
|
|
if (sse) {
|
|
|3: // Invert comparison if step is negative.
|
|
| ucomisd xmm0, xmm1
|
|
| jmp <1
|
|
}
|
|
break;
|
|
|
|
case BC_ITERL:
|
|
#if LJ_HASJIT
|
|
| hotloop RB
|
|
#endif
|
|
| // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
|
|
break;
|
|
|
|
case BC_JITERL:
|
|
#if !LJ_HASJIT
|
|
break;
|
|
#endif
|
|
case BC_IITERL:
|
|
| ins_AJ // RA = base, RD = target
|
|
| lea RA, [BASE+RA*8]
|
|
| mov RB, [RA+4]
|
|
| cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
|
|
if (op == BC_JITERL) {
|
|
| mov [RA-4], RB
|
|
| mov RB, [RA]
|
|
| mov [RA-8], RB
|
|
| jmp =>BC_JLOOP
|
|
} else {
|
|
| branchPC RD // Otherwise save control var + branch.
|
|
| mov RD, [RA]
|
|
| mov [RA-4], RB
|
|
| mov [RA-8], RD
|
|
}
|
|
|1:
|
|
| ins_next
|
|
break;
|
|
|
|
case BC_LOOP:
|
|
| ins_A // RA = base, RD = target (loop extent)
|
|
| // Note: RA/RD is only used by trace recorder to determine scope/extent
|
|
| // This opcode does NOT jump, it's only purpose is to detect a hot loop.
|
|
#if LJ_HASJIT
|
|
| hotloop RB
|
|
#endif
|
|
| // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
|
|
break;
|
|
|
|
case BC_ILOOP:
|
|
| ins_A // RA = base, RD = target (loop extent)
|
|
| ins_next
|
|
break;
|
|
|
|
case BC_JLOOP:
|
|
#if LJ_HASJIT
|
|
| ins_AD // RA = base (ignored), RD = traceno
|
|
| mov RA, [DISPATCH+DISPATCH_J(trace)]
|
|
| mov TRACE:RD, [RA+RD*4]
|
|
| mov RDa, TRACE:RD->mcode
|
|
| mov L:RB, SAVE_L
|
|
| mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
|
|
| mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
|
|
| jmp RDa
|
|
#endif
|
|
break;
|
|
|
|
case BC_JMP:
|
|
| ins_AJ // RA = unused, RD = target
|
|
| branchPC RD
|
|
| ins_next
|
|
break;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
default:
|
|
fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
|
|
exit(2);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int build_backend(BuildCtx *ctx)
|
|
{
|
|
int op;
|
|
int cmov = 1;
|
|
int sse = 0;
|
|
#ifdef LUAJIT_CPU_NOCMOV
|
|
cmov = 0;
|
|
#endif
|
|
#if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
|
|
sse = 1;
|
|
#endif
|
|
|
|
dasm_growpc(Dst, BC__MAX);
|
|
|
|
build_subroutines(ctx, cmov, sse);
|
|
|
|
|.code_op
|
|
for (op = 0; op < BC__MAX; op++)
|
|
build_ins(ctx, (BCOp)op, op, cmov, sse);
|
|
|
|
return BC__MAX;
|
|
}
|
|
|
|
/* Emit pseudo frame-info for all assembler functions. */
|
|
static void emit_asm_debug(BuildCtx *ctx)
|
|
{
|
|
#if LJ_64
|
|
#define SZPTR "8"
|
|
#define BSZPTR "3"
|
|
#define REG_SP "0x7"
|
|
#define REG_RA "0x10"
|
|
#else
|
|
#define SZPTR "4"
|
|
#define BSZPTR "2"
|
|
#define REG_SP "0x4"
|
|
#define REG_RA "0x8"
|
|
#endif
|
|
switch (ctx->mode) {
|
|
case BUILD_elfasm:
|
|
fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
|
|
fprintf(ctx->fp,
|
|
".Lframe0:\n"
|
|
"\t.long .LECIE0-.LSCIE0\n"
|
|
".LSCIE0:\n"
|
|
"\t.long 0xffffffff\n"
|
|
"\t.byte 0x1\n"
|
|
"\t.string \"\"\n"
|
|
"\t.uleb128 0x1\n"
|
|
"\t.sleb128 -" SZPTR "\n"
|
|
"\t.byte " REG_RA "\n"
|
|
"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
|
|
"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
|
|
"\t.align " SZPTR "\n"
|
|
".LECIE0:\n\n");
|
|
fprintf(ctx->fp,
|
|
".LSFDE0:\n"
|
|
"\t.long .LEFDE0-.LASFDE0\n"
|
|
".LASFDE0:\n"
|
|
"\t.long .Lframe0\n"
|
|
"\t.long .Lbegin\n"
|
|
"\t.long %d\n"
|
|
"\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
|
|
#if LJ_64
|
|
"\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
|
|
"\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
|
|
"\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
|
|
"\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
|
|
#else
|
|
"\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
|
|
"\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
|
|
"\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
|
|
"\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
|
|
#endif
|
|
"\t.align " SZPTR "\n"
|
|
".LEFDE0:\n\n", (int)ctx->codesz, CFRAME_SIZE);
|
|
fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
|
|
fprintf(ctx->fp,
|
|
".Lframe1:\n"
|
|
"\t.long .LECIE1-.LSCIE1\n"
|
|
".LSCIE1:\n"
|
|
"\t.long 0\n"
|
|
"\t.byte 0x1\n"
|
|
"\t.string \"zPR\"\n"
|
|
"\t.uleb128 0x1\n"
|
|
"\t.sleb128 -" SZPTR "\n"
|
|
"\t.byte " REG_RA "\n"
|
|
"\t.uleb128 6\n" /* augmentation length */
|
|
"\t.byte 0x1b\n" /* pcrel|sdata4 */
|
|
"\t.long lj_err_unwind_dwarf-.\n"
|
|
"\t.byte 0x1b\n" /* pcrel|sdata4 */
|
|
"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
|
|
"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
|
|
"\t.align " SZPTR "\n"
|
|
".LECIE1:\n\n");
|
|
fprintf(ctx->fp,
|
|
".LSFDE1:\n"
|
|
"\t.long .LEFDE1-.LASFDE1\n"
|
|
".LASFDE1:\n"
|
|
"\t.long .LASFDE1-.Lframe1\n"
|
|
"\t.long .Lbegin-.\n"
|
|
"\t.long %d\n"
|
|
"\t.uleb128 0\n" /* augmentation length */
|
|
"\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
|
|
#if LJ_64
|
|
"\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
|
|
"\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
|
|
"\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
|
|
"\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
|
|
#else
|
|
"\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
|
|
"\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
|
|
"\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
|
|
"\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
|
|
#endif
|
|
"\t.align " SZPTR "\n"
|
|
".LEFDE1:\n\n", (int)ctx->codesz, CFRAME_SIZE);
|
|
break;
|
|
case BUILD_coffasm:
|
|
fprintf(ctx->fp, "\t.section .eh_frame,\"dr\"\n");
|
|
fprintf(ctx->fp,
|
|
"\t.def %slj_err_unwind_dwarf; .scl 2; .type 32; .endef\n",
|
|
LJ_32 ? "_" : "");
|
|
fprintf(ctx->fp,
|
|
"Lframe1:\n"
|
|
"\t.long LECIE1-LSCIE1\n"
|
|
"LSCIE1:\n"
|
|
"\t.long 0\n"
|
|
"\t.byte 0x1\n"
|
|
"\t.string \"zP\"\n"
|
|
"\t.uleb128 0x1\n"
|
|
"\t.sleb128 -" SZPTR "\n"
|
|
"\t.byte " REG_RA "\n"
|
|
"\t.uleb128 5\n" /* augmentation length */
|
|
"\t.byte 0x00\n" /* absptr */
|
|
"\t.long %slj_err_unwind_dwarf\n"
|
|
"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
|
|
"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
|
|
"\t.align " SZPTR "\n"
|
|
"LECIE1:\n\n", LJ_32 ? "_" : "");
|
|
fprintf(ctx->fp,
|
|
"LSFDE1:\n"
|
|
"\t.long LEFDE1-LASFDE1\n"
|
|
"LASFDE1:\n"
|
|
"\t.long LASFDE1-Lframe1\n"
|
|
"\t.long %slj_vm_asm_begin\n"
|
|
"\t.long %d\n"
|
|
"\t.uleb128 0\n" /* augmentation length */
|
|
"\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
|
|
#if LJ_64
|
|
"\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
|
|
"\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
|
|
"\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
|
|
"\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
|
|
#else
|
|
"\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
|
|
"\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
|
|
"\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
|
|
"\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
|
|
#endif
|
|
"\t.align " SZPTR "\n"
|
|
"LEFDE1:\n\n", LJ_32 ? "_" : "", (int)ctx->codesz, CFRAME_SIZE);
|
|
break;
|
|
case BUILD_machasm:
|
|
fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
|
|
fprintf(ctx->fp,
|
|
"EH_frame1:\n"
|
|
"\t.set L$set$0,LECIE1-LSCIE1\n"
|
|
"\t.long L$set$0\n"
|
|
"LSCIE1:\n"
|
|
"\t.long 0\n"
|
|
"\t.byte 0x1\n"
|
|
"\t.ascii \"zPR\\0\"\n"
|
|
"\t.byte 0x1\n"
|
|
"\t.byte 128-" SZPTR "\n"
|
|
"\t.byte " REG_RA "\n"
|
|
"\t.byte 6\n" /* augmentation length */
|
|
"\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
|
|
"\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
|
|
"\t.byte 0x1b\n" /* pcrel|sdata4 */
|
|
#if LJ_64
|
|
"\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
|
|
#else
|
|
"\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
|
|
#endif
|
|
"\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
|
|
"\t.align " BSZPTR "\n"
|
|
"LECIE1:\n\n");
|
|
fprintf(ctx->fp,
|
|
"_lj_vm_asm_begin.eh:\n"
|
|
"LSFDE1:\n"
|
|
"\t.set L$set$1,LEFDE1-LASFDE1\n"
|
|
"\t.long L$set$1\n"
|
|
"LASFDE1:\n"
|
|
"\t.long LASFDE1-EH_frame1\n"
|
|
"\t.long _lj_vm_asm_begin-.\n"
|
|
"\t.long %d\n"
|
|
"\t.byte 0\n" /* augmentation length */
|
|
"\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
|
|
#if LJ_64
|
|
"\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
|
|
"\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
|
|
"\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
|
|
"\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
|
|
#else
|
|
"\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
|
|
"\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
|
|
"\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
|
|
"\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
|
|
#endif
|
|
"\t.align " BSZPTR "\n"
|
|
"LEFDE1:\n\n", (int)ctx->codesz, CFRAME_SIZE);
|
|
fprintf(ctx->fp,
|
|
"\t.non_lazy_symbol_pointer\n"
|
|
"L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
|
|
".indirect_symbol _lj_err_unwind_dwarf\n"
|
|
".long 0\n");
|
|
break;
|
|
default: /* Difficult for other modes. */
|
|
break;
|
|
}
|
|
}
|
|
|