mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 15:14:08 +00:00
PPC: Tune and reschedule interpreter for PPC/e300.
This commit is contained in:
parent
f333dfd17d
commit
28b98acd75
@ -183,15 +183,15 @@
|
||||
| lwz INS, 0(PC)
|
||||
| addi PC, PC, 4
|
||||
|.endmacro
|
||||
|// Instruction decode+dispatch.
|
||||
|// Instruction decode+dispatch. Note: optimized for e300!
|
||||
|.macro ins_NEXT2
|
||||
| decode_OP4 TMP1, INS
|
||||
| lwzx TMP0, DISPATCH, TMP1
|
||||
| mtctr TMP0
|
||||
| decode_RB8 RB, INS
|
||||
| decode_RD8 RD, INS
|
||||
| lwzx TMP0, DISPATCH, TMP1
|
||||
| decode_RA8 RA, INS
|
||||
| decode_RC8 RC, INS
|
||||
| mtctr TMP0
|
||||
| bctr
|
||||
|.endmacro
|
||||
|.macro ins_NEXT
|
||||
@ -255,8 +255,8 @@
|
||||
|
|
||||
|.macro branch_RD
|
||||
| srwi TMP0, RD, 1
|
||||
| add PC, PC, TMP0
|
||||
| addis PC, PC, -(BCBIAS_J*4 >> 16)
|
||||
| add PC, PC, TMP0
|
||||
|.endmacro
|
||||
|
|
||||
|// Assumes DISPATCH is relative to GL.
|
||||
@ -2983,14 +2983,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|.endmacro
|
||||
|
|
||||
|.macro intmod, a, b, c
|
||||
|->BC_MODVNI_Z:
|
||||
| bl ->vm_modi
|
||||
|.endmacro
|
||||
|
|
||||
|.macro fpmod, a, b, c
|
||||
||if (!LJ_DUALNUM) {
|
||||
|->BC_MODVNI_Z:
|
||||
||}
|
||||
|->BC_MODVN_Z:
|
||||
| fdiv FARG1, b, c
|
||||
| // NYI: Use internal implementation of floor.
|
||||
@ -3038,11 +3034,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|| break;
|
||||
||}
|
||||
| checknum cr1, TMP2
|
||||
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
|
||||
| bne >5
|
||||
|.if "intins" == "intmod_"
|
||||
| b ->BC_MODVNI_Z // Avoid 3 copies. It's slow anyway.
|
||||
|.else
|
||||
| bne cr1, >5
|
||||
| intins CARG1, CARG1, CARG2
|
||||
| bso >4
|
||||
|1:
|
||||
@ -3054,7 +3047,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|4: // Overflow.
|
||||
| mcrxr cr0; ble <1 // Ignore unrelated overflow.
|
||||
| ins_arithfallback b
|
||||
|.endif
|
||||
|5: // FP variant.
|
||||
||if (vk == 1) {
|
||||
| lfd f15, 0(RB)
|
||||
@ -3100,7 +3092,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ins_arith intmod, fpmod
|
||||
break;
|
||||
case BC_MODNV: case BC_MODVV:
|
||||
| ins_arith intmod_, fpmod_
|
||||
| ins_arith intmod, fpmod_
|
||||
break;
|
||||
case BC_POW:
|
||||
| // NYI: (partial) integer arithmetic.
|
||||
@ -3113,8 +3105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
||||
| bge ->vmeta_arith_vv
|
||||
| bl extern pow
|
||||
| ins_next1
|
||||
| stfdx FARG1, BASE, RA
|
||||
| ins_next
|
||||
| ins_next2
|
||||
break;
|
||||
|
||||
case BC_CAT:
|
||||
@ -3132,9 +3125,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| cmplwi CRET1, 0
|
||||
| lwz BASE, L->base
|
||||
| bne ->vmeta_binop
|
||||
| ins_next1
|
||||
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
|
||||
| stfdx f0, BASE, RA
|
||||
| ins_next
|
||||
| ins_next2
|
||||
break;
|
||||
|
||||
/* -- Constant ops ------------------------------------------------------ */
|
||||
@ -3143,9 +3137,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| // RA = dst*8, RD = str_const*8 (~)
|
||||
| srwi TMP1, RD, 1
|
||||
| subfic TMP1, TMP1, -4
|
||||
| ins_next1
|
||||
| lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
|
||||
| li TMP2, LJ_TSTR
|
||||
| ins_next1
|
||||
| stwux TMP2, RA, BASE
|
||||
| stw TMP0, 4(RA)
|
||||
| ins_next2
|
||||
@ -3155,9 +3149,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| // RA = dst*8, RD = cdata_const*8 (~)
|
||||
| srwi TMP1, RD, 1
|
||||
| subfic TMP1, TMP1, -4
|
||||
| ins_next1
|
||||
| lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
|
||||
| li TMP2, LJ_TCDATA
|
||||
| ins_next1
|
||||
| stwux TMP2, RA, BASE
|
||||
| stw TMP0, 4(RA)
|
||||
| ins_next2
|
||||
@ -3173,21 +3167,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| stw RD, 4(RA)
|
||||
| ins_next2
|
||||
} else {
|
||||
| // NYI: which approach is faster?
|
||||
|.if 1
|
||||
| slwi RD, RD, 13
|
||||
| srawi RD, RD, 16
|
||||
| tonum_i f0, RD
|
||||
| ins_next1
|
||||
| stfdx f0, BASE, RA
|
||||
| ins_next2
|
||||
|.else
|
||||
| // The soft-float approach is faster.
|
||||
| slwi RD, RD, 13
|
||||
| srawi TMP1, RD, 31
|
||||
| xor TMP2, TMP1, RD
|
||||
| sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
|
||||
| cntlzw TMP3, TMP2
|
||||
| subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
|
||||
| subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
|
||||
| slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa
|
||||
| subfic TMP3, RD, 0
|
||||
| slwi TMP1, TMP1, 20
|
||||
@ -3199,13 +3185,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| stwux RD, RA, BASE
|
||||
| stw ZERO, 4(RA)
|
||||
| ins_next2
|
||||
|.endif
|
||||
}
|
||||
break;
|
||||
case BC_KNUM:
|
||||
| // RA = dst*8, RD = num_const*8
|
||||
| lfdx f0, KBASE, RD
|
||||
| ins_next1
|
||||
| lfdx f0, KBASE, RD
|
||||
| stfdx f0, BASE, RA
|
||||
| ins_next2
|
||||
break;
|
||||
@ -3233,11 +3218,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
|
||||
case BC_UGET:
|
||||
| // RA = dst*8, RD = uvnum*8
|
||||
| ins_next1
|
||||
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
||||
| srwi RD, RD, 1
|
||||
| addi RD, RD, offsetof(GCfuncL, uvptr)
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RD
|
||||
| ins_next1
|
||||
| lwz TMP1, UPVAL:RB->v
|
||||
| lfd f0, 0(TMP1)
|
||||
| stfdx f0, BASE, RA
|
||||
@ -3250,6 +3235,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
||||
| lfdux f0, RD, BASE
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RA
|
||||
| ins_next1
|
||||
| lbz TMP3, UPVAL:RB->marked
|
||||
| lwz CARG2, UPVAL:RB->v
|
||||
| andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
|
||||
@ -3262,7 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| subi TMP2, TMP2, (LJ_TISNUM+1)
|
||||
| bne >2 // Upvalue is closed and black?
|
||||
|1:
|
||||
| ins_next
|
||||
| ins_next2
|
||||
|
|
||||
|2: // Check if new value is collectable.
|
||||
| cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
|
||||
@ -3277,7 +3263,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
break;
|
||||
case BC_USETS:
|
||||
| // RA = uvnum*8, RD = str_const*8 (~)
|
||||
| ins_next1
|
||||
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
||||
| srwi TMP1, RD, 1
|
||||
| srwi RA, RA, 1
|
||||
@ -3285,6 +3270,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
||||
| lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RA
|
||||
| ins_next1
|
||||
| lbz TMP3, UPVAL:RB->marked
|
||||
| lwz CARG2, UPVAL:RB->v
|
||||
| andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
|
||||
@ -3309,25 +3295,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
break;
|
||||
case BC_USETN:
|
||||
| // RA = uvnum*8, RD = num_const*8
|
||||
| ins_next1
|
||||
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
||||
| srwi RA, RA, 1
|
||||
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
||||
| lfdx f0, KBASE, RD
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RA
|
||||
| ins_next1
|
||||
| lwz TMP1, UPVAL:RB->v
|
||||
| stfd f0, 0(TMP1)
|
||||
| ins_next2
|
||||
break;
|
||||
case BC_USETP:
|
||||
| // RA = uvnum*8, RD = primitive_type*8 (~)
|
||||
| ins_next1
|
||||
| lwz LFUNC:RB, FRAME_FUNC(BASE)
|
||||
| srwi RA, RA, 1
|
||||
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
||||
| srwi TMP0, RD, 3
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RA
|
||||
| addi RA, RA, offsetof(GCfuncL, uvptr)
|
||||
| not TMP0, TMP0
|
||||
| lwzx UPVAL:RB, LFUNC:RB, RA
|
||||
| ins_next1
|
||||
| lwz TMP1, UPVAL:RB->v
|
||||
| stw TMP0, 0(TMP1)
|
||||
| ins_next2
|
||||
@ -3538,8 +3524,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
case BC_TGETB:
|
||||
| // RA = dst*8, RB = table*8, RC = index*8
|
||||
| lwzux CARG1, RB, BASE
|
||||
| lwz TAB:RB, 4(RB)
|
||||
| srwi TMP0, RC, 3
|
||||
| lwz TAB:RB, 4(RB)
|
||||
| checktab CARG1; bne ->vmeta_tgetb
|
||||
| lwz TMP1, TAB:RB->asize
|
||||
| lwz TMP2, TAB:RB->array
|
||||
@ -3717,8 +3703,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
case BC_TSETB:
|
||||
| // RA = src*8, RB = table*8, RC = index*8
|
||||
| lwzux CARG1, RB, BASE
|
||||
| lwz TAB:RB, 4(RB)
|
||||
| srwi TMP0, RC, 3
|
||||
| lwz TAB:RB, 4(RB)
|
||||
| checktab CARG1; bne ->vmeta_tsetb
|
||||
| lwz TMP1, TAB:RB->asize
|
||||
| lwz TMP2, TAB:RB->array
|
||||
@ -4470,9 +4456,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| st_vmstate
|
||||
| bctrl // (lua_State *L [, lua_CFunction f])
|
||||
| // Returns nresults.
|
||||
| lwz TMP1, L->top
|
||||
| slwi RD, CRET1, 3
|
||||
| lwz BASE, L->base
|
||||
| slwi RD, CRET1, 3
|
||||
| lwz TMP1, L->top
|
||||
| li_vmstate INTERP
|
||||
| lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
|
||||
| sub RA, TMP1, RD // RA = L->top - nresults*8
|
||||
|
Loading…
Reference in New Issue
Block a user