diff --git a/src/lj_vm.h b/src/lj_vm.h index 9cc42613..77494472 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); #if LJ_TARGET_PPC void lj_vm_cachesync(void *start, void *end); #endif +#if LJ_TARGET_RISCV64 +void lj_vm_fence_rw_rw(); +#endif LJ_ASMF double lj_vm_foldarith(double x, double y, int op); #if LJ_HASJIT LJ_ASMF double lj_vm_foldfpm(double x, int op); diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2c9b96cc..a31a4adf 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) /* -- Helper functions for generated machine code ------------------------- */ -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ + || LJ_TARGET_RISCV64 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index f8f338ce..c86f94bd 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -438,9 +438,11 @@ | auipc CFUNCADDR, extern %pcrel_hi(func) | jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) |.endmacro -|.macro call_extern, func -| call extern func -| empty +|.macro call_extern, curfunc, func +|->curfunc .. _got_pcrel_ .. func: +| auipc CFUNCADDR, extern %got_pcrel_hi(func) +| ld CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _got_pcrel_ .. func)(CFUNCADDR) +| jalr CFUNCADDR |.endmacro | |// Set current VM state. Uses TMP0. @@ -516,3 +518,3579 @@ |.endmacro | |//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi TMP0, PC, FRAME_P + | + | // Return from pcall or xpcall fast func. + | mov_true TMP1 + | bxeqz TMP0, ->cont_dispatch + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | mv BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | addi RA, RA, -8 + | + |->vm_returnc: + | addiw RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | li CRET1, LUA_YIELD + | bxeqz RD, ->vm_unwind_c_eh + | mv MULTRES, RD + | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | andi TMP2, PC, ~FRAME_TYPEP + | xori TMP0, TMP0, FRAME_C + | sub TMP2, BASE, TMP2 // TMP2 = previous base. + | bxnez TMP0, ->vm_returnp + | + | addiw TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES(sp) + | addi BASE, BASE, -16 + | st_vmstate + | slliw TMP2, TMP2, 3 + | beqz TMP1, >2 + |1: + | addiw TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(BASE) + | addi BASE, BASE, 8 + | bnez TMP1, <1 + | + |2: + | bne TMP2, RD, >6 + |3: + | sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame. + | mv CRET1, x0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | blt TMP2, RD, >7 + | // More results wanted. Check stack size and fill up results with nil. + | bge BASE, TMP1, >9 + | sd TISNIL, 0(BASE) + | addiw RD, RD, 8 + | addi BASE, BASE, 8 + | j <2 + | + |7: // Less results wanted. + | subw TMP0, RD, TMP2 + | sub TMP0, BASE, TMP0 // Either keep top or shrink it. + | beqz TMP2, >8 + | mv BASE, TMP0 // LUA_MULTRET+1 case + |8: + | j <3 + | + |9: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | + | sd BASE, L->top // Save current top held in BASE (yes). + | mv MULTRES, RD + | srliw CARG2, TMP2, 3 + | mv CARG1, L + | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) + | lw TMP2, SAVE_NRES(sp) + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | mv RD, MULTRES + | slliw TMP2, TMP2, 3 + | j <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mv sp, CARG1 + | mv CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | li TMP0, ~LJ_VMST_C + | ld GL, L->glref + | sw TMP0, GL->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | andi sp, CARG1, CFRAME_RAWMASK + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld GL, L->glref // Setup pointer to global state. + | slli TMP3, TMP3, 32 + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | fmv.d.x TOBIT, TMP3 + | addi RA, BASE, -8 // Results start at BASE-8. + | addxi DISPATCH, GL, GG_G2DISP + | sd TMP1, -8(BASE) // Prepend false to error message. + | st_vmstate + | li RD, 16 // 2 results: false + error message. + | j ->vm_returnc + | + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | li CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | sd BASE, L->base + | addi PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srliw CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | sub RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mv L, CARG1 + | ld GL, L->glref // Setup pointer to global state. + | mv BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L(sp) + | li PC, FRAME_CP + | addi TMP0, sp, CFRAME_RESUME + | addxi DISPATCH, GL, GG_G2DISP + | sw x0, SAVE_NRES(sp) + | sw x0, SAVE_ERRF(sp) + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd x0, SAVE_CFRAME(sp) + | sd TMP0, L->cframe + | beqz TMP1, >3 + | + | // Resume after yield (like a return). + | sd L, GL->cur_L + | mv RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | sub RD, TMP1, BASE + | slli TMP3, TMP3, 32 + | sb x0, L->status + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | addi RD, RD, 8 + | st_vmstate + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | bxeqz TMP0, ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF(sp) + | li PC, FRAME_CP + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | mv L, CARG1 + | sw CARG3, SAVE_NRES(sp) + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_L(sp) + | mv BASE, CARG2 + | addxi DISPATCH, GL, GG_G2DISP + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, GL->cur_L + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld TMP1, L->top + | slli TMP3, TMP3, 32 + | add PC, PC, BASE + | sub NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | sub PC, PC, TMP2 // PC = frame delta + frame type + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mv L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L(sp) + | ld TMP1, L->top + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | addxi DISPATCH, GL, GG_G2DISP + | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. + | sw x0, SAVE_ERRF(sp) // No error function. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, GL->cur_L + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | mv BASE, CRET1 + | li PC, FRAME_CP + | bnez CRET1, <3 // Else continue with the call. + | j ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | mv RB, BASE + | mv BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | add TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + | sd TISNIL, -8(TMP2) // Ensure one valid arg. + | // BASE = base, RA = resultptr, RB = meta base + | ld KBASE, PC2PROTO(k)(TMP1) + | jr TMP0 // Jump to continuation. + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | addi CARG2, RB, -32 + | ld TMP0, 0(RA) + | decode_RB8 MULTRES, INS + | decode_RA8 RA, INS + | add TMP1, BASE, MULTRES + | sd BASE, L->base + | sub CARG3, CARG2, TMP1 + | sd TMP0, 0(CARG2) + | bxne TMP1, CARG2, ->BC_CAT_Z + | add RA, BASE, RA + | sd TMP0, 0(RA) + | j ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | addi CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tgetv, lj_meta_tget + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + | ld TMP0, 0(CRET1) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | addi TMP1, BASE, -FRAME_CONT + | li NARGS8:RC, 16 // 2 args for func(t, k). + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mv TMP1, TISNIL + | bxeqz CRET1, ->BC_TGETR_Z + | ld TMP1, 0(CRET1) + | j ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | addi, CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tsetv, lj_meta_tset + | // Returns TValue * (finished) or NULL (metamethod). + | ld TMP2, 0(RA) + | beqz CRET1, >3 + | ins_next1 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | sd TMP2, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | addi TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) + | cleartp LFUNC:RB + | sd TMP2, 16(BASE) // Copy value to third argument. + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCtab *t, int32_t key) + | call_intern vmeta_tsetr, lj_tab_setinth + | // Returns TValue *. + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | mv CARG2, RA + | mv CARG3, RD + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | decode_OP1 CARG4, INS + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o1, *o2, int op) + | call_intern vmeta_comp, lj_meta_comp + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 + | bxeqz TMP1, ->vmeta_binop + | negw TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | addi PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff + | slliw RD, RD, 2 + | addw RD, RD, TMP1 + | and RD, RD, TMP2 + | add PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld TMP2, 0(RA) + | slliw TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP2, 0(TMP1) + | j ->cont_nop + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | negw TMP2, TMP1 // Branch if result is true. + | j <4 + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | addiw TMP2, TMP1, -1 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | mv CARG4, TMP0 + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCobj *o1, *o2, int ne) + | call_intern vmeta_equal, lj_meta_equal + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | + |->vmeta_istype: + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, BCReg tp) + | call_intern vmeta_istype, lj_meta_istype + | j ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | mv RC, RB + | + |->vmeta_arith: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv CARG3, RB + | mv CARG4, RC + | decode_OP1 CARG5, INS + | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). + | bxeqz CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | mv TMP2, BASE + | addi PC, TMP1, FRAME_CONT + | mv BASE, CRET1 + | li NARGS8:RC, 16 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | mv MULTRES, CARG1 +#endif + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. + | mv CARG1, MULTRES + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | mv CARG1, L + | sd TMP2, L->base // This is the callers base! + | addi CARG2, BASE, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, BASE, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_call, lj_meta_call + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mv CARG1, L + | sd BASE, L->base + | addi CARG2, RA, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, RA, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_callt, lj_meta_call + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:CARG3, RB + | j ->BC_CALLT_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS + | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + | decode_RA8 RA, MULTRES + | decode_RD8 RD, MULTRES + | j =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bxnez TMP0, ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bxnez TMP0, ->fff_fallback + | gettp TMP1, CARG1 + | gettp TMP2, CARG2 + | sltiu TMP1, TMP1, LJ_TISNUM + | sltiu TMP2, TMP2, LJ_TISNUM + | fld FARG1, 0(BASE) + | and TMP1, TMP1, TMP2 + | fld FARG2, 8(BASE) + | bxeqz TMP1, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. + |.macro ffgccheck + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | bltu TMP0, TMP1, >1 + | jal ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp TMP1, CARG1 + | sltiu TMP1, TMP1, LJ_TISTRUECOND + | addi RA, BASE, -16 + | bxeqz TMP1, ->fff_fallback + | ld PC, FRAME_PC(BASE) + | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | addi TMP1, BASE, 8 + | add TMP2, RA, RD + | sd CARG1, -16(BASE) + | bne BASE, TMP2, >1 + | j ->fff_res // Done if exactly 1 argument. + |1: + | ld TMP0, 0(TMP1) + | sd TMP0, -16(TMP1) + | mv TMP3, TMP1 + | addi TMP1, TMP1, 8 + | bne TMP3, TMP2, <1 + | j ->fff_res + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | not TMP3, TMP0 + | bltu TISNUM, TMP0, >1 + | li TMP3, ~LJ_TISNUM + |1: + | slli TMP3, TMP3, 3 + | add TMP3, CFUNC:RB, TMP3 + | ld CARG1, CFUNC:TMP3->upvalue + | j ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | addi TMP0, TMP2, -LJ_TTAB + | addi TMP1, TMP2, -LJ_TUDATA + | snez TMP0, TMP0 + | neg TMP0, TMP0 + | and TMP0, TMP0, TMP1 + | cleartp TAB:CARG1 + | bnez TMP0, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | li CARG1, LJ_TNIL + | bxeqz TAB:RB, ->fff_restv + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slli TMP0, TMP1, 5 + | slli TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | li TMP3, LJ_TTAB + | beq RC, TMP0, >5 + | bnez NODE:TMP2, <3 + |4: + | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: + | bxne CARG1, TISNIL, ->fff_restv + | j <4 // Ditto for nil value. + | + |6: + | sltiu TMP3, TMP2, LJ_TISNUM + | neg TMP4, TMP3 + | xor TMP0, TMP2, TISNUM // TMP2 = TMP3 ? TISNUM : TMP2 + | and TMP0, TMP0, TMP4 + | xor TMP2, TMP0, TMP2 + | slli TMP2, TMP2, 3 + | sub TMP0, GL, TMP2 + | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0) + | j <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | addi TMP3, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or TMP3, TMP3, TAB:TMP0 + | bxnez TMP3, ->fff_fallback + | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) + | sd TAB:CARG2, TAB:TMP1->metatable + | bxeqz TMP3, ->fff_restv + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | gettp TMP1, CARG2 + | cleartp CARG2 + | addi TMP1, TMP1, -LJ_TTAB + | or TMP0, TMP0, TMP1 + | addi CARG3, BASE, 8 + | bxnez TMP0, ->fff_fallback + | mv CARG1, L + | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ld CARG1, 0(CRET1) + | j ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori TMP0, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP1, TISNUM, TMP1 + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number + | j ->fff_restv + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | addi TMP1, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | bxeqz TMP1, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] + | sltu TMP0, TISNUM, TMP0 + | sd BASE, L->base // Add frame since C call can throw. + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | ffgccheck + | mv CARG1, L + | mv CARG2, BASE + | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | li TMP1, LJ_TSTR + | ld BASE, L->base + | settp CARG1, TMP1 + | j ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, -LJ_TTAB, ->fff_fallback + | add TMP0, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) + | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. + | addi CARG2, BASE, 8 + | addi CARG3, BASE, -16 + | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | li RD, (2+1)*8 + | bxgtz CRET1, ->fff_res // Found key/value. + | mv TMP1, CRET1 + | mv CARG1, TISNIL + | bxeqz TMP1, ->fff_restv // End of traversal: return nil. + | ld CFUNC:RB, FRAME_FUNC(BASE) + | li RC, 2*8 + | cleartp CFUNC:RB + | j ->fff_fallback // Invalid key. + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bxnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sext.w TMP2, CARG2 + | addiw TMP2, TMP2, 1 + | sltu TMP3, TMP2, TMP0 + | zext.w TMP0, TMP2 + | settp_b TMP0, TISNUM + | sd TMP0, -16(BASE) + | beqz TMP3, >2 // Not in array part? + | slli TMP3, TMP2, 3 + | add TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | li RD, (0+1)*8 + | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + | sd TMP1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | li RD, (0+1)*8 + | bxeqz TMP0, ->fff_res + | mv CARG2, TMP2 + | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 + | bxeqz CRET1, ->fff_res + | ld TMP1, 0(CRET1) + | j <1 + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable +#endif + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] +#if LJ_52 + | bxnez TAB:TMP2, ->fff_fallback +#endif + | slli TMP1, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd TMP1, 0(BASE) + | sd CFUNC:TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -8 + | lbu TMP3, GL->hookmask + | mv TMP2, BASE + | bxltz NARGS8:TMP0, ->fff_fallback + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 16 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | addi PC, TMP3, 16+FRAME_PCALL + | bxeqz NARGS8:RC, ->vm_call_dispatch + |1: + | add TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | addi TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + | j ->vm_call_dispatch + | + |.ffunc xpcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | lbu TMP1, GL->hookmask + | bxltz NARGS8:TMP0, ->fff_fallback + | gettp TMP2, CARG2 + | addi TMP2, TMP2, -LJ_TFUNC + | bxnez TMP2, ->fff_fallback // Traceback must be a function. + | mv TMP2, BASE + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 24 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | addi PC, TMP3, 24+FRAME_PCALL + | bnez NARGS8:RC, <1 + | j ->vm_call_dispatch + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiw CARG4, TMP0, -LUA_YIELD + | add CARG3, CARG2, TMP0 + | addi TMP3, CARG2, 8 + | seqz TMP4, CARG4 + | neg TMP4, TMP4 + | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 + | and CARG2, CARG2, TMP4 + | xor CARG2, TMP3, CARG2 + | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD? + | xor TMP2, TMP2, CARG3 + | or CARG4, TMP2, TMP0 + | bxnez TMP1, ->fff_fallback // cframe != 0? + | ld TMP0, L:CARG1->maxstack + | ld PC, FRAME_PC(BASE) + | bxeqz CARG4, ->fff_fallback // base == top && st == 0? + | add TMP2, CARG2, NARGS8:RC + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow? + |1: + |.if resume + | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. + | addi NARGS8:RC, NARGS8:RC, -8 + | addi TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | sd BASE, L->top + | add TMP1, BASE, NARGS8:RC + | mv CARG3, CARG2 + |2: // Move args to coroutine. + | ld TMP0, 0(BASE) + | sltu TMP3, BASE, TMP1 + | addi BASE, BASE, 8 + | beqz TMP3, >3 + | sd TMP0, 0(CARG3) + | addi CARG3, CARG3, 8 + | j <2 + |3: + | mv L:RA, L:CARG1 + | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu TMP1, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, GL->cur_L + | st_vmstate + | sub RD, TMP3, TMP2 + | beqz TMP1, >8 + | ld TMP0, L->maxstack + | add TMP1, BASE, RD + | beqz RD, >6 // No results? + | add TMP3, TMP2, RD + | bltu TMP0, TMP1, >9 // Need to grow stack? + | sd TMP2, L:RA->top // Clear coroutine stack. + | mv TMP1, BASE + |5: // Move results from coroutine. + | ld TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | sd TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu TMP2, TMP3, <5 + |6: + |.if resume + | mov_true TMP1 + | addi RD, RD, 16 + |7: + | sd TMP1, -8(BASE) // Prepend true/false to results. + | addi RA, BASE, -8 + |.else + | mv RA, BASE + | addi RD, RD, 8 + |.endif + | andi TMP0, PC, FRAME_TYPE + | sd PC, SAVE_PC(sp) + | mv MULTRES, RD + |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use + | bnez TMP0, >6 + | j ->BC_RET_Z + |6: + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | addi TMP3, TMP3, -8 + | mov_false TMP1 + | li RD, (2+1)*8 + | ld TMP0, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | sd TMP0, 0(BASE) // Copy error message. + | j <7 + |.else + | mv CARG1, L + | mv CARG2, L:RA + | // (lua_State *L, lua_State *co) + | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err + |.endif + | + |9: // Handle stack expansion on return from yield. + | mv CARG1, L + | srliw CARG2, RD, 3 + | // (lua_State *L, int n) + |.if resume + | call_intern ff_coroutine_resume, lj_state_growstack + |.else + | call_intern ff_coroutine_wrap_aux, lj_state_growstack + |.endif + | mv CRET1, x0 + | j <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | add TMP1, BASE, NARGS8:RC + | li CRET1, LUA_YIELD + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | bxeqz TMP0, ->fff_fallback + | sd x0, L->cframe + | sb CRET1, L->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func, rm + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | bxeqz NARGS8:RC, ->fff_fallback + | fmv.d.x FARG1, CARG1 + | bxeq TMP0, TISNUM, ->fff_restv + | srli TMP1, CARG1, 52 // Extract exponent (and sign). + | bxgeu TMP0, TISNUM, ->fff_fallback + | andi TMP1, TMP1, 0x7ff // Extract exponent. + | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction + | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? + | fcvt.l.d TMP3, FARG1, rm + | fcvt.d.l FTMP1, TMP3 + | fsgnj.d FRET1, FTMP1, FARG1 + | j ->fff_resn + |.endmacro + | + | math_round floor, rdn + | math_round ceil, rup + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | addi TMP2, CARG2, -LJ_TISNUM + | sext.w TMP1, CARG1 + | bnez TMP2, >1 + | sraiw TMP0, TMP1, 31 // Extract sign. int + | xor TMP1, TMP1, TMP0 + | sub CARG1, TMP1, TMP0 + | slli TMP3, CARG1, 32 + | settp CARG1, TISNUM + | bxgez TMP3, ->fff_restv + | lui CARG1, 0x41e00 // 2^31 as a double. + | slli CARG1, CARG1, 32 + | j ->fff_restv + |1: + | sltiu TMP2, CARG2, LJ_TISNUM + | slli CARG1, CARG1, 1 + | srli CARG1, CARG1, 1 + | bxeqz TMP2, ->fff_fallback // int + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | mv MULTRES, RD + | addi RA, BASE, -16 + | bxnez TMP0, ->vm_return + | lw INS, -4(PC) + | decode_RB8 RB, INS + |5: + | bltu RD, RB, >6 // More results expected? + | decode_RA8a TMP0, INS + | ins_next1 + | decode_RA8b TMP0 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | add TMP1, RA, RD + | addi RD, RD, 8 + | sd TISNIL, -8(TMP1) + | j <5 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | call_extern ff_math_extern, func + | j ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | call_extern ff_math_extern2, func + | j ->fff_resn + |.endmacro + | + |.ffunc_n math_sqrt + | fsqrt.d FRET1, FARG1 + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | fsd FRET1, -16(BASE) + | j ->fff_res1 + | + |.ffunc math_log + | li TMP1, 8 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | call_extern ff_math_log, log + | j ->fff_resn + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | fld FARG1, 0(BASE) + | lw CARG1, 8(BASE) + | call_extern ff_math_ldexp, ldexp // (double x, int exp) + | j ->fff_resn + | + |.ffunc_n math_frexp + | ld PC, FRAME_PC(BASE) + | addi CARG1, GL, offsetof(global_State, tmptv) + | call_extern ff_math_frexp, frexp + | lw TMP1, GL->tmptv + | fcvt.d.w FARG2, TMP1 + | fsd FRET1, -16(BASE) + | fsd FARG2, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.ffunc_n math_modf + | addi CARG1, BASE, -16 + | ld PC, FRAME_PC(BASE) + | call_extern ff_math_modf, modf + | fsd FRET1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.macro math_minmax, name, ismax + | .ffunc_1 name + | add RB, BASE, NARGS8:RC + | addi RA, BASE, 8 + | checkint CARG1, >4 + |1: // Handle integers. + | ld CARG2, 0(RA) + | bxeq RA, RB, ->fff_restv + | sext.w CARG1, CARG1 + | checkint CARG2, >3 + | sext.w CARG2, CARG2 + | slt TMP0, CARG1, CARG2 + |.if ismax + | addi TMP1, TMP0, -1 + |.else + | neg TMP1, TMP0 + |.endif + | xor TMP2, CARG1, CARG2 // CARG1 = TMP1 ? CARG1 : CARG2 + | and TMP2, TMP2, TMP1 + | xor CARG1, CARG2, TMP2 + | addi RA, RA, 8 + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | j <1 + |3: // Convert intermediate result to number and continue below. + | fcvt.d.w FARG1, CARG1 + | checknum CARG2, ->fff_fallback + | fld FARG2, 0(RA) + | j >6 + | + |4: + | fld FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |5: // Handle numbers. + | ld CARG2, 0(RA) + | fld FARG2, 0(RA) + | bxgeu RA, RB, ->fff_resn + | checknum CARG2, >7 + |6: + |.if ismax + | flt.d TMP0, FARG2, FARG1 + |.else // min + | flt.d TMP0, FARG1, FARG2 + |.endif + | bnez TMP0, >8 // skip swap + | fmv.d FARG1, FARG2 + |8: + | addi RA, RA, 8 + | j <5 + |7: // Convert integer to number and continue above. + | checkint CARG2, ->fff_fallback + | fcvt.d.w FARG2, CARG2 + | j <6 + |.endmacro + | + | math_minmax math_min, 0 + | math_minmax math_max, 1 + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 + | addi TMP0, TMP0, -LJ_TSTR + | or TMP1, TMP1, TMP0 + | cleartp STR:CARG1 + | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument. + | lw TMP0, STR:CARG1->len + | ld PC, FRAME_PC(BASE) + | snez RD, TMP0 + | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addiw RD, RD, 1 + | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp_b TMP2, TISNUM + | sd TMP2, -16(BASE) + | j ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument. + | addi TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP2, 255 + | sext.w CARG1, CARG1 + | or TMP1, TMP1, TMP0 + | sltu TMP2, TMP2, CARG1 // !(255 < n). + | or TMP1, TMP1, TMP2 + | li CARG3, 1 + | bxnez TMP1, ->fff_fallback + | addi CARG2, sp, TMPD_OFS + | sb CARG1, TMPD(sp) + |->fff_newstr: + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, const char *str, size_t l) + | call_intern fff_newstr, lj_str_new + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li TMP1, LJ_TSTR + | settp CRET1, TMP1 + | j ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | ld CARG3, 16(BASE) + | addi TMP0, NARGS8:RC, -16 + | gettp TMP1, CARG1 + | bxltz TMP0, ->fff_fallback + | cleartp STR:CARG1, CARG1 + | li CARG4, -1 + | beqz TMP0, >1 + | sext.w CARG4, CARG3 + | checkint CARG3, ->fff_fallback + |1: + | checkint CARG2, ->fff_fallback + | addi TMP0, TMP1, -LJ_TSTR + | sext.w CARG3, CARG2 + | bxnez TMP0, ->fff_fallback + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | addiw TMP0, CARG2, 1 + | bgez CARG4, >2 + | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1 + |2: + | bgez CARG3, >3 + | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1 + |3: + | bgez CARG4, >4 + | mv CARG4, x0 // if (end < 0) end = 0 + |4: + | bgtz CARG3, >5 + | li CARG3, 1 // if (start < 1) start = 1 + |5: + | ble CARG4, CARG2, >6 + | mv CARG4, CARG2 // if (end > len) end = len + |6: + | add CARG2, STR:CARG1, CARG3 + | sub CARG3, CARG4, CARG3 // len = end - start + | addi CARG2, CARG2, sizeof(GCstr)-1 + | addiw CARG3, CARG3, 1 // len += 1 + | bxgez CARG3, ->fff_newstr + |->fff_emptystr: // Return empty string. + | li TMP1, LJ_TSTR + | addi STR:CARG1, GL, offsetof(global_State, strempty) + | settp CARG1, TMP1 + | j ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ld CARG2, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + | checkstr STR:CARG2, ->fff_fallback + | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | sd PC, SAVE_PC(sp) + | call_intern ff_string_ .. name, lj_buf_putstr_ .. name + | call_intern ff_string_ .. name, lj_buf_tostr // CARG1 = CRET1 + | ld BASE, L->base + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | fld FARG1, 0(BASE) + | bxeqz TMP1, ->fff_fallback + | fadd.d FARG1, FARG1, TOBIT + | fmv.x.w CRET1, FARG1 + | zext.w CRET1, CRET1 + | ret + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | zext.w CRET1, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |1: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | addi TMP2, BASE, 8 + | add TMP3, BASE, NARGS8:RC + |1: + | ld TMP1, 0(TMP2) + | bxeq TMP2, TMP3, ->fff_resi + | gettp TMP0, TMP1 + | addi TMP2, TMP2, 8 + | bne TMP0, TISNUM, >2 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |2: + | fld FARG1, -8(TMP2) + | sltiu TMP0, TMP0, LJ_TISNUM + | fadd.d FARG1, FARG1, TOBIT + | bxeqz TMP0, ->fff_fallback + | fmv.x.w TMP1, FARG1 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | srliw CARG2, CARG1, 8 + | lui CARG3, 16 + | addiw CARG3, CARG3, -256 + | and CARG2, CARG2, CARG3 + | srliw CARG3, CARG1, 24 + | or CARG2, CARG2, CARG3 + | slli CARG3, CARG1, 8 + | lui CARG4, 0x00ff0 + | and CARG3, CARG3, CARG4 + | slli CARG1, CARG1, 24 + | or CARG1, CARG1, CARG3 + | or CARG1, CARG1, CARG2 + | slli CARG1, CARG1, 32 + | srli CARG1, CARG1, 32 + | j ->fff_resi + | + |.ffunc_bit tobit + |->fff_resi: + | settp CARG1, TISNUM // CARG1 = CRET1 + | j ->fff_restv + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | zext.w CRET1, CRET1 + | j ->fff_resi + | + |.macro .ffunc_bit_sh, name, shins + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bxne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | shins CRET1, CARG1, CARG2 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_sh lshift, sllw + |.ffunc_bit_sh rshift, srlw + |.ffunc_bit_sh arshift, sraw + | + |.macro .ffunc_bit_rot, name, rotinsa, rotinsb + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bxne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | neg TMP2, CARG2 + | rotinsa TMP1, CARG1, CARG2 + | rotinsb TMP0, CARG1, TMP2 + | or CRET1, TMP0, TMP1 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_rot rol, sllw, srlw + |.ffunc_bit_rot ror, srlw, sllw + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | ld CARG3, CFUNC:RB->f + | add TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | addi TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | sd TMP1, L->top + | mv CARG1, L + | bltu TMP2, TMP0, >5 // Need to grow stack. + | jalr CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | slliw RD, CRET1, 3 + | bxgtz CRET1, ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | sub NARGS8:RC, TMP0, BASE + | cleartp LFUNC:RB + | bxnez CRET1, ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | andi TMP1, PC, ~FRAME_TYPEP // TODO + | bnez TMP0, >3 + | lbu TMP1, OFS_RA(PC) + | slliw TMP1, TMP1, 3 + | addiw TMP1, TMP1, 16 + |3: + | sub TMP2, BASE, TMP1 + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | li CARG2, LUA_MINSTACK + | mv CARG1, L + | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | mv CRET1, x0 // Set zero-flag to force retry. + | j <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | mv MULTRES, ra + | add TMP0, BASE, NARGS8:RC // Calculate L->top. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | mv CARG1, L + | sd TMP0, L->top + | call_intern fff_gc_step, lj_gc_step // (lua_State *L) + | ld BASE, L->base + | mv ra, MULTRES // Help return address predictor. + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | sub NARGS8:RC, TMP0, BASE + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, GL->hookmask + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | beqz TMP1, >1 + |5: // Re-dispatch to static ins. + | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr TMP1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, GL->hookmask + | lw TMP2, GL->hookcount + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | bnez TMP1, <5 + | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | addiw TMP2, TMP2, -1 + | beqz TMP1, <5 + | sw TMP2, GL->hookcount + | beqz TMP2, >1 + | andi TMP1, TMP3, LUA_MASKLINE + | beqz TMP1, <5 + |1: + | sw MULTRES, TMPD(sp) + | mv CARG2, PC + | sd BASE, L->base + | mv CARG1, L + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld TMP1, GG_DISP2STATIC(TMP0) + | decode_RA8 RA, INS + | decode_RD8b RD + | jr TMP1 + | + |->cont_hook: // Continue from hook yield. + | addi PC, PC, 4 + | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. + | j <4 + | + | + |->vm_callhook: // Dispatch target for call hooks. + | mv CARG2, PC + | + |->cont_stitch: // Trace stitching. + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mv CARG1, L + | mv CARG2, PC + | sd BASE, L->base + | sw MULTRES, TMPD(sp) + | // (lua_State *L, const BCIns *pc) + | call_intern vm_profhook, lj_dispatch_profile + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addi PC, PC, -4 + | ld BASE, L->base + | j ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + | + |// Hard-float round to integer. + |// Modifies TMP0, FARG1, FARG5 + |.macro vm_round, rm + | fmv.x.d TMP0, FARG1 + | srli TMP0, TMP0, 52 // Extract exponent (and sign). + | andi TMP0, TMP0, 0x7ff // Extract exponent. + | addi TMP0, TMP0, -1075 + | bgtz TMP0, >1 // Less than 2^52 / Not NaN? + | fcvt.l.d TMP0, FARG1, rm + | fcvt.d.l FARG5, TMP0 + | fsgnj.d FRET1, FARG5, FARG1 + |1: + | ret + |.endmacro + | + | + |->vm_floor: + | vm_round rdn + |->vm_ceil: + | vm_round rup + | + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// void lj_vm_fence_rw_rw() + |->vm_fence_rw_rw: + |.if JIT or FFI + | .long 0x0330000f + | ret + |.endif + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + if (op == BC_ISLT || op == BC_ISGE) { + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + } else { + | ld CARG2, 0(RA) + | ld CARG1, 0(RD) + | gettp CARG3, CARG2 + | gettp CARG4, CARG1 + } + | lhu TMP2, OFS_RD(PC) // TMP2=jump + | addi PC, PC, 4 + | bne CARG3, TISNUM, >2 + | decode_BC4b TMP2 + | bne CARG4, TISNUM, >5 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | slt TMP1, CARG1, CARG2 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (op == BC_ISLT || op == BC_ISGT) { + | neg TMP1, TMP1 + } else { + | addi TMP1, TMP1, -1 + } + | and TMP2, TMP2, TMP1 + |1: + | add PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu TMP1, CARG3, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | sltiu TMP1, CARG4, LJ_TISNUM + | decode_BC4b TMP2 + | beqz TMP1, >4 + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + |3: // RA and RD are both numbers. + | addw TMP2, TMP2, TMP3 + if (op == BC_ISLT) { + | flt.d TMP3, FTMP0, FTMP2 + | neg TMP3, TMP3 + } else if (op == BC_ISGE) { + | flt.d TMP3, FTMP0, FTMP2 + | addi TMP3, TMP3, -1 + } else if (op == BC_ISLE) { + | fle.d TMP3, FTMP2, FTMP0 + | neg TMP3, TMP3 + } else if (op == BC_ISGT) { + | fle.d TMP3, FTMP2, FTMP0 + | addi TMP3, TMP3, -1 + } + | and TMP2, TMP2, TMP3 + | j <1 + | + |4: // RA is a number, RD is not a number. + | // RA is a number, RD is an integer. Convert RD to a number. + | bxne CARG4, TISNUM, ->vmeta_comp + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } else { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } + | j <3 + | + |5: // RA is an integer, RD is not an integer + | sltiu TMP1, CARG4, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } else { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } + | j <3 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, -4+OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu TMP0, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or TMP0, TMP0, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + | beqz TMP0, ->BC_ISEQN_Z + } else { + | beqz TMP0, ->BC_ISNEN_Z + } + |// Either or both types are not numbers. + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b TMP2 + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + | bne CARG1, CARG2, >2 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | add PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor TMP3, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 + | beqz TMP3, >3 + | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata + |3: + | cleartp TAB:TMP1, CARG1 + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? + } + | j ->vmeta_equal // Handle __eq metamethod. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | add RA, BASE, RA + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | sub RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + | li TMP0, LJ_TSTR + | decode_BC4b TMP2 + | settp CARG2, TMP0 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D + | addw TMP2, TMP2, TMP3 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | add RA, BASE, RA + | add RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | addi PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | decode_BC4b TMP2 + | bne CARG3, TISNUM, >4 + | addw TMP2, TMP2, TMP3 + | bne CARG4, TISNUM, >6 + | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D + |1: + if (vk) { + | seqz TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + |2: + } else { + | snez TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + |2: + | add PC, PC, TMP2 + } + |3: + | ins_next + | + |4: // RA is not an integer. + | addw TMP2, TMP2, TMP3 + | bgeu CARG3, TISNUM, <2 + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | bne CARG4, TISNUM, >5 + |// RA is a number, RD is an integer. + | fcvt.d.w FTMP2, CARG2 + | + |5: // RA and RD are both numbers. + | feq.d TMP0, FTMP0, FTMP2 + | seqz TMP0, TMP0 + | j <1 + | + |6: // RA is an integer, RD is a number. + | bgeu CARG4, TISNUM, <2 + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | j <5 + | + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | add RA, BASE, RA + | srliw TMP0, RD, 3 + | ld TMP1, 0(RA) + | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 + | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1 + | addi PC, PC, 4 + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | add RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | addi PC, PC, 4 + | gettp TMP0, TMP0 + | add RA, BASE, RA + | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | ld CRET1, 0(RD) + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + if (op == BC_IST || op == BC_ISTC) { + | beqz TMP0, >1 + if (op == BC_ISTC) { + | sd CRET1, 0(RA) + } + } else { + | bnez TMP0, >1 + if (op == BC_ISFC) { + | sd CRET1, 0(RA) + } + } + | add PC, PC, TMP2 + |1: + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | add TMP0, BASE, RA + | srliw TMP1, RD, 3 + | ld TMP0, 0(TMP0) + | gettp TMP0, TMP0 + | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 + | bxnez TMP0, ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | add TMP0, BASE, RA + | ld TMP0, 0(TMP0) + | checknum TMP0, ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | li TMP1, LJ_TTRUE + | ins_next1 + | gettp TMP0, TMP0 + | sltu TMP0, TMP1, TMP0 + | addiw TMP0, TMP0, 1 + | slli TMP0, TMP0, 47 + | not TMP0, TMP0 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | add RB, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RB) + | lui TMP1, 0x80000 + | gettp CARG3, TMP0 + | bne CARG3, TISNUM, >1 + | negw TMP0, TMP0 + | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + | zext.w TMP0, TMP0 + | settp_b TMP0, TISNUM + | j >2 + |1: + | sltiu TMP3, CARG3, LJ_TISNUM + | slli TMP1, TMP1, 32 + | bxeqz TMP3, ->vmeta_unm + | xor TMP0, TMP0, TMP1 // sign => ~sign + |2: + | sd TMP0, 0(RA) + | ins_next + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | add CARG2, BASE, RD + | ld TMP0, 0(CARG2) + | add RA, BASE, RA + | gettp TMP1, TMP0 + | addi TMP2, TMP1, -LJ_TSTR + | cleartp STR:CARG1, TMP0 + | bnez TMP2, >2 + | lwu CARG1, STR:CARG1->len + |1: + | settp_b CARG1, TISNUM + | sd CARG1, 0(RA) + | ins_next + |2: + | addi TMP2, TMP1, -LJ_TTAB + | bxnez TMP2, ->vmeta_len +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |3: +#endif + |->BC_LEN_Z: + | call_intern BC_LEN, lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | j <1 +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | fdiv.d FARG1, b, c + | jal ->vm_floor // floor(b/c) + | fmul.d a, FRET1, c + | fsub.d a, b, a // b - floor(b/c)*c + |.endmacro + | + |.macro ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||if (vk == 1) { + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | decode_RB8 RC, INS + | decode_RDtoRC8 RB, RD + ||} else { + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + ||} + ||switch (vk) { + ||case 0: // suffix is VN + | add RB, BASE, RB + | add RC, KBASE, RC + || break; + ||case 1: // suffix is NV + | add RC, BASE, RC + | add RB, KBASE, RB + || break; + ||default: // CAT or suffix is VV + | add RB, BASE, RB + | add RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arithfp, fpins, itype1, itype2 + | fld FTMP0, 0(RB) + | sltu itype1, itype1, TISNUM + | sltu itype2, itype2, TISNUM + | fld FTMP2, 0(RC) + | and itype1, itype1, itype2 + | add RA, BASE, RA + | bxeqz itype1, ->vmeta_arith + | fpins FRET1, FTMP0, FTMP2 + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + |.endmacro + | + |.macro ins_arithead, itype1, itype2, tval1, tval2 + | ld tval1, 0(RB) + | ld tval2, 0(RC) + | // Check for two integers. + | gettp itype1, tval1 + | gettp itype2, tval2 + |.endmacro + | + |.macro ins_arithdn, intins, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG3, CARG1 + | sext.w CARG4, CARG2 + |.if "intins" == "addw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "subw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "mulw" + | mul TMP2, CARG3, CARG4 + | add RA, BASE, RA + | sext.w CRET1, TMP2 + | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. + |.endif + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithdiv, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithmod, fpins, BC + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | add RA, BASE, RA + | bxeqz CARG2, ->vmeta_arith + | call_intern BC, lj_vm_modi + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn addw, fadd.d + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subw, fsub.d + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn mulw, fmul.d + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdiv.d + break; + case BC_MODVN: + | ins_arithmod fpmod, BC_MODVN + break; + case BC_MODNV: + | ins_arithmod fpmod, BC_MODNV + break; + case BC_MODVV: + | ins_arithmod fpmod, BC_MODVV + break; + case BC_POW: + | ins_arithpre + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | add RA, BASE, RA + | bxeqz TMP0, ->vmeta_arith + | fld FARG1, 0(RB) + | fld FARG2, 0(RC) + | call_extern BC_POW, pow + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | sub CARG3, RC, RB + | sd BASE, L->base + | add CARG2, BASE, RC + | mv MULTRES, RB + |->BC_CAT_Z: + | srliw CARG3, CARG3, 3 + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ld BASE, L->base + | bxnez CRET1, ->vmeta_binop + | add RB, BASE, MULTRES + | ld TMP0, 0(RB) + | add RA, BASE, RA + | sd TMP0, 0(RA) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | sub TMP1, KBASE, RD + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + break; + case BC_KCDATA: + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sraiw RD, INS, 16 + | add RA, BASE, RA + | zext.w RD, RD + | ins_next1 + | settp_b RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | add RD, KBASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | add RA, BASE, RA + | slli TMP0, RD, 44 // 44+3 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | add RA, BASE, RA + | sd TISNIL, 0(RA) + | addi RA, RA, 8 + | add RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt TMP0, RA, RD + | addi RA, RA, 8 + | bnez TMP0, <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RA, BASE, RA + | cleartp LFUNC:TMP0 + | add RD, RD, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RD->uvptr + | ld TMP1, UPVAL:TMP0->v + | ld TMP2, 0(TMP1) + | ins_next1 + | sd TMP2, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, BASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:TMP0->closed + | gettp TMP2, CRET1 + | sd CRET1, 0(CARG2) + | or TMP3, TMP3, TMP0 + | li TMP0, LJ_GC_BLACK|1 + | addi TMP2, TMP2, -(LJ_TNUMX+1) + | beq TMP3, TMP0, >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | cleartp GCOBJ:CRET1, CRET1 + | beqz TMP0, <1 // tvisgcv(v) + | lbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | lbu TMP3, STR:TMP1->marked + | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:TMP0->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | sd TMP1, 0(CARG2) + | bnez TMP4, >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, KBASE, RD + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, 0(RD) + | ld TMP0, UPVAL:TMP0->v + | sd TMP1, 0(TMP0) + | ins_next + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | slli TMP2, RD, 44 + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | not TMP2, TMP2 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, UPVAL:TMP0->v + | sd TMP2, 0(TMP1) + | ins_next + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | sd BASE, L->base + | mv CARG1, L + | beqz TMP2, >1 + | add CARG2, BASE, RA + | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | sub TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | cleartp CARG3 + | mv CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern BC_FNEW, lj_func_newL_gc + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bgeu TMP0, TMP1, >5 + |1: + if (op == BC_TNEW) { + | srliw CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | lzi TMP0, 0x801 + | addiw TMP2, CARG2, -0x7ff + | srliw CARG3, RD, 14 + | seqz TMP3, TMP2 + | neg TMP4, TMP3 + | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 + | and CARG1, CARG1, TMP4 + | xor CARG2, CARG2, CARG1 + | mv CARG1, L + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern BC_TNEW, lj_tab_new + | // Returns Table *. + } else { + | sub TMP1, KBASE, RD + | mv CARG1, L + | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + |5: + | mv MULTRES, RD + | mv CARG1, L + if (op == BC_TNEW) { + | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) + } else { + | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) + } + | mv RD, MULTRES + | j <1 + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP0 + | ld TAB:RB, LFUNC:TMP0->env + | add RA, BASE, RA + if (op == BC_GGET) { + | j ->BC_TGETS_Z + } else { + | j ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | lw TMP0, TAB:RB->asize + | bne TMP3, TISNUM, >5 // Integer key? + | sext.w TMP2, TMP2 + | ld TMP1, TAB:RB->array + | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? + | slliw TMP2, TMP2, 3 + | add TMP2, TMP1, TMP2 + | ld CRET1, 0(TMP2) + | beq CRET1, TISNIL, >2 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tgetv + | + |5: + | li TMP0, LJ_TSTR + | cleartp RC, TMP2 + | bxne TMP3, TMP0, ->vmeta_tgetv // String key? + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | ld TAB:TMP3, TAB:RB->metatable + | bne CARG1, RC, >4 + | beq CARG2, TISNIL, >5 // Key found, but nil value? + |3: + | sd CARG2, 0(RA) + | ins_next + | + |4: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | mv CARG2, TISNIL + | beqz TAB:TMP3, <3 // No metatable: done. + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<vmeta_tgets + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | add CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | srliw TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tgetb + | add RC, TMP2, RC + | ld CRET1, 0(RC) + | beq CRET1, TISNIL, >5 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add RB, BASE, RB + | add RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, 0(RC) + | add RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? + | slliw TMP2, CARG2, 3 + | add TMP3, TMP1, TMP2 + | ld TMP1, 0(TMP3) + |->BC_TGETR_Z: + | ins_next1 + | sd TMP1, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tsetv + | sext.w RC, TMP2 + | checkint TMP2, >5 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? + | slliw TMP2, RC, 3 + | add TMP1, TMP1, TMP2 + | lbu TMP3, TAB:RB->marked + | ld TMP0, 0(TMP1) + | ld CRET1, 0(RA) + | beq TMP0, TISNIL, >3 + |1: + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(TMP1) + | bnez TMP2, >7 + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<vmeta_tsetv + |5: + | gettp TMP0, TMP2 + | addi TMP0, TMP0, -LJ_TSTR + | bxnez TMP0, ->vmeta_tsetv + | cleartp STR:RC, TMP2 + | j ->BC_TSETS_Z // String key? + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | add RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | sb x0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + | fld FTMP0, 0(RA) + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | lbu TMP3, TAB:RB->marked + | bne TMP0, RC, >5 + | ld TAB:TMP0, TAB:RB->metatable + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |2: + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table) + | fsd FTMP0, NODE:TMP2->val + | bnez TMP3, >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<vmeta_tsets + | + |5: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | addi CARG3, GL, offsetof(global_State, tmptv) + | beqz TAB:TMP2, >6 // No metatable: continue. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | sd RC, 0(CARG3) + | sd BASE, L->base + | mv CARG2, TAB:RB + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, GCtab *t, TValue *k) + | call_intern BC_TSETS, lj_tab_newkey + | // Returns TValue *. + | ld BASE, L->base + | fsd FTMP0, 0(CRET1) + | j <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add RA, BASE, RA + | ld TAB:RB, 0(CARG2) + | srliw TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tsetb + | add RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + | ld CRET1, 0(RA) + | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(RC) + | bnez TMP1, >7 + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG1, BASE, RB + | add CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, 0(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | add RA, BASE, RA + | bnez TMP2, >7 + |2: + | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? + | slliw TMP2, CARG3, 3 + | add CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld TMP1, 0(RA) + | ins_next1 + | sd TMP1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | add RA, BASE, RA + |1: + | add TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiw TMP0, MULTRES, -8 + | lw TMP3, 0(TMP3) // Integer constant is in lo-word. + | srliw CARG3, TMP0, 3 + | beqz TMP0, >4 // Nothing to copy? + | cleartp TAB:CARG2 + | addw CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | slliw TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | bltu TMP2, CARG3, >5 + | add TMP2, RA, TMP0 + | add TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu RA, TMP2, <3 + | bnez TMP0, >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv BASE, RD + | mv CARG1, L + | // (lua_State *L, GCtab *t, int nasize) + | call_intern BC_TSETM, lj_tab_reasize + | // Must not reallocate the stack. + | mv RD, BASE + | ld BASE, L->base // Reload BASE for lack of a saved register. + | j <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | addw NARGS8:RC, NARGS8:RC, MULTRES + | j ->BC_CALL_Z + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | mv TMP2, BASE + | add BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | addi BASE, BASE, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addw NARGS8:RD, NARGS8:RD, MULTRES + | j ->BC_CALLT_Z1 + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT_Z1: + | add RA, BASE, RA + | ld LFUNC:RB, 0(RA) + | mv NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | addi RA, RA, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | xori TMP2, TMP1, FRAME_VARG + | bnez TMP0, >7 + |1: + | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? + | mv TMP2, BASE + | mv RB, CARG3 + | mv TMP3, NARGS8:RC + | beqz NARGS8:RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addiw TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez TMP3, <2 + |3: + | or TMP0, TMP0, CARG4 + | beqz TMP0, >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8 RA, INS + | sub TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | j <4 + | + |7: // Tailcall from a vararg function. + | andi CARG4, TMP2, FRAME_TYPEP + | sub TMP2, BASE, TMP2 // Relocate BASE down. + | bnez CARG4, <1 // Vararg frame below? + | mv BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | andi TMP0, TMP1, FRAME_TYPE + | j <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | mv TMP2, BASE // Save old BASE for vmeta_call. + | add BASE, BASE, RA + | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | addi BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |->vm_IITERN: + | add RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8(RA) // Get index from control var. + | cleartp TAB:RB + | addi PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | slli CARG3, TISNUM, 47 + |1: // Traverse array part. + | bleu TMP0, RC, >5 // Index points after array part? + | slliw TMP3, RC, 3 + | add TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | or TMP2, RC, CARG3 + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b RD + | add RD, RD, TMP3 + | sw RC, -8(RA) // Update control var. + | add PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subw RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. + | slliw TMP3, RC, 5 + | slliw RB, RC, 3 + | subw TMP3, TMP3, RB + | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | sd CARG1, 8(RA) + | addw RC, RC, TMP0 + | decode_BC4b RD + | addw RD, RD, TMP3 + | sd CARG2, 0(RA) + | add PC, PC, RD + | sw RC, -8(RA) // Update control var. + | j <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | add RA, BASE, RA + | srliw TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | add TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | addi CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | addi CARG3, CARG3, -LJ_TNIL + | or TMP3, CARG2, CARG3 + | addi TMP1, TMP1, -FF_next_N + | or TMP3, TMP3, TMP1 + | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff + | bnez TMP3, >5 + | add PC, TMP0, TMP2 + | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) + | slli TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | add PC, TMP0, TMP2 + | sb TMP1, OFS_OP(PC) + | j <1 + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8 RB, INS + | add RC, BASE, RC + | add RA, BASE, RA + | addi RC, RC, FRAME_VARG + | add TMP2, RA, RB + | addi TMP3, BASE, -16 // TMP3 = vtop + | sub RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | sub TMP1, TMP3, RC + | beqz RB, >5 // Copy all varargs? + | addi TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu TMP0, RC, TMP3 + | addi RC, RC, 8 + | bnez TMP0, >2 + | mv CARG1, TISNIL + |2: + | sd CARG1, 0(RA) + | sltu TMP0, RA, TMP2 + | addi RA, RA, 8 + | bnez TMP0, <1 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | li MULTRES, 8 // MULTRES = (0+1)*8 + | blez TMP1, <3 // No vararg slots? + | add TMP2, RA, TMP1 + | addi MULTRES, TMP1, 8 + | bltu TMP0, TMP2, >7 + |6: + | ld CRET1, 0(RC) + | addi RC, RC, 8 + | sd CRET1, 0(RA) + | addi RA, RA, 8 + | bltu RC, TMP3, <6 // More vararg slots? + | j <3 + | + |7: // Grow stack for varargs. + | sd RA, L->top + | sub RA, RA, BASE + | sd BASE, L->base + | sub BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC(sp) + | srliw CARG2, TMP1, 3 + | mv CARG1, L + | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) + | mv RC, BASE + | ld BASE, L->base + | add RA, BASE, RA + | add RC, BASE, RC + | addi TMP3, BASE, -16 + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addw RD, RD, MULTRES + | j ->BC_RET_Z1 + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + |->BC_RET_Z1: + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + | addi RC, RD, -8 + | decode_RA8 TMP0, INS + | decode_RB8 RB, INS + | sub BASE, TMP2, TMP0 + | add TMP3, TMP2, RB + | beqz RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addi RC, RC, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez RC, <2 + |3: + | addi TMP3, TMP3, -8 + |5: + | bltu TMP2, TMP3, >6 + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | addi TMP2, TMP2, 8 + | j <5 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bxnez TMP2, ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, TMP1 + | ld PC, FRAME_PC(BASE) + | j <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8 RB, INS + | decode_RA8 RA, INS + | sub BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | bltu RD, RB, >6 + | ld TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ins_next1 + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | addi TMP2, TMP2, 8 + | addi RD, RD, 8 + if (op == BC_RET1) { + | sd TISNIL, 0(TMP2) + } else { + | sd TISNIL, -8(TMP2) + } + | j <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | add RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX + | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP + | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP + | gettp CARG4, CARG1 + | gettp CARG5, CARG2 + | gettp CARG6, CARG3 + if (op != BC_JFORL) { + | srliw RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 + | add TMP2, RD, TMP2 + } + | bne CARG4, TISNUM, >3 + | sext.w CARG4, CARG1 // start + | sext.w CARG3, CARG3 // stop + if (!vk) { // init + | bxne CARG6, TISNUM, ->vmeta_for + | bxne CARG5, TISNUM, ->vmeta_for + | bfextri TMP0, CARG2, 31, 31 // sign + | slt CARG2, CARG3, CARG4 + | slt TMP1, CARG4, CARG3 + | neg TMP4, TMP0 + | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 + | and TMP0, TMP0, TMP4 + | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop + } else { + | sext.w CARG5, CARG2 // step + | addw CARG1, CARG4, CARG5 // start + step + | xor TMP3, CARG1, CARG4 // y^a + | xor TMP1, CARG1, CARG5 // y^b + | and TMP3, TMP3, TMP1 + | slt TMP1, CARG1, CARG3 // start+step < stop ? + | slt CARG3, CARG3, CARG1 // stop < start+step ? + | sltz TMP0, CARG5 // step < 0 ? + | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. + | neg TMP4, TMP0 + | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 + | and TMP1, TMP1, TMP4 + | xor CARG3, CARG3, TMP1 + | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | sd CARG1, FORL_IDX*8(RA) + } + |1: + if (op == BC_FORI) { + | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } else if (op == BC_JFORI) { + | add PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | decode_RD8b RD + | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop + } else if (op == BC_JFORL) { + | beqz CARG2, =>BC_JLOOP + } + | ins_next2 + | + |3: // FP loop. + | fld FTMP0, FORL_IDX*8(RA) // start + | fld FTMP1, FORL_STOP*8(RA) // stop + | ld TMP0, FORL_STEP*8(RA) // step + | sltz CARG2, TMP0 // step < 0 ? + | neg CARG2, CARG2 + if (!vk) { + | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? + | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? + | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? + | and TMP3, TMP3, TMP1 + | and TMP0, TMP0, TMP3 + | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number + | flt.d TMP3, FTMP0, FTMP1 // start < stop ? + | flt.d TMP4, FTMP1, FTMP0 // stop < start ? + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop + | j <1 + } else { + | fld FTMP3, FORL_STEP*8(RA) + | fadd.d FTMP0, FTMP0, FTMP3 // start + step + | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? + | flt.d TMP4, FTMP1, FTMP0 + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 + if (op == BC_IFORL) { + | addi TMP3, CARG2, -1 + | and TMP2, TMP2, TMP3 + | add PC, PC, TMP2 + } + | fsd FTMP0, FORL_IDX*8(RA) + | ins_next1 + | fsd FTMP0, FORL_EXT*8(RA) + | j <2 + } + break; + + case BC_ITERL: + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | add RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | sd TMP1,-8(RA) + | j =>BC_JLOOP + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD // PC + (jump - 0x8000)<<2 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | bxltu TMP2, RA, ->vm_growstack_l + | slliw TMP1, TMP1, 3 // numparams*8 + |2: + | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. + if (op == BC_JFUNCF) { + | decode_RD8 RD, INS + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | add TMP0, BASE, NARGS8:RC + | sd TISNIL, 0(TMP0) + | addiw NARGS8:RC, NARGS8:RC, 8 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC + | add TMP1, BASE, RC + | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | add TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | addi TMP2, TMP2, -8 + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | bxgeu TMP0, TMP2, ->vm_growstack_l + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | mv RA, BASE + | mv RC, TMP1 + | ins_next1 + | addi BASE, TMP1, 16 + | beqz TMP2, >2 + |1: + | ld TMP0, 0(RA) + | sltu CARG2, RA, RC // Less args than parameters? + | addi RA, RA, 8 + | addi TMP1, TMP1, 8 + | addiw TMP2, TMP2, -1 + | beqz CARG2, >3 + | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). + | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 + | and TMP3, TMP3, TMP4 + | xor CARG1, TMP0, TMP3 + | sd CARG1, -8(RA) + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + |2: + | ins_next2 + |3: + | neg TMP4, CARG2 // Clear missing fixargs. + | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL + | and TMP3, TMP3, TMP4 + | xor TMP0, TISNIL, TMP3 + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CARG4, CFUNC:RB->f + } else { + | ld CARG4, GL->wrapf + } + | add TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | add RC, BASE, NARGS8:RC + | sd BASE, L->base // base of currently excuting function + | sd RC, L->top + | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. + | li_vmstate C // li TMP0, ~LJ_VMST_C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | mv CARG1, L + | st_vmstate // sw TMP0, GL->vmstate + | jalr CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ld BASE, L->base + | ld TMP1, L->top + | sd L, GL->cur_L + | slliw RD, CRET1, 3 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | sub RA, TMP1, RD // RA = L->top - nresults*8 + | st_vmstate + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + +}