PPC: More interpreter tuning. Use y-bit for branch predictions.

This commit is contained in:
Mike Pall 2011-09-05 18:30:36 +02:00
parent d9fe562ccc
commit 690ff909f5

View File

@ -326,7 +326,7 @@ static void build_subroutines(BuildCtx *ctx)
| rlwinm TMP2, PC, 0, 0, 28 | rlwinm TMP2, PC, 0, 0, 28
| li_vmstate C | li_vmstate C
| sub TMP2, BASE, TMP2 // TMP2 = previous base. | sub TMP2, BASE, TMP2 // TMP2 = previous base.
| bne ->vm_returnp | bney ->vm_returnp
| |
| addic. TMP1, RD, -8 | addic. TMP1, RD, -8
| stw TMP2, L->base | stw TMP2, L->base
@ -341,7 +341,7 @@ static void build_subroutines(BuildCtx *ctx)
| addi RA, RA, 8 | addi RA, RA, 8
| stfd f0, 0(BASE) | stfd f0, 0(BASE)
| addi BASE, BASE, 8 | addi BASE, BASE, 8
| bne <1 | bney <1
| |
|2: |2:
| cmpw TMP2, RD // More/less results wanted? | cmpw TMP2, RD // More/less results wanted?
@ -627,7 +627,7 @@ static void build_subroutines(BuildCtx *ctx)
| sub CARG3, CARG2, TMP1 | sub CARG3, CARG2, TMP1
| decode_RA8 RA, INS | decode_RA8 RA, INS
| stfd f0, 0(CARG2) | stfd f0, 0(CARG2)
| bne ->BC_CAT_Z | bney ->BC_CAT_Z
| stfdx f0, BASE, RA | stfdx f0, BASE, RA
| b ->cont_nop | b ->cont_nop
| |
@ -962,7 +962,7 @@ static void build_subroutines(BuildCtx *ctx)
#endif #endif
| decode_RD8 RD, SAVE0 | decode_RD8 RD, SAVE0
#if LJ_HASJIT #if LJ_HASJIT
| beq =>BC_JFORI | beqy =>BC_JFORI
#endif #endif
| b =>BC_FORI | b =>BC_FORI
| |
@ -1040,7 +1040,7 @@ static void build_subroutines(BuildCtx *ctx)
| lfdx f0, BASE, TMP1 | lfdx f0, BASE, TMP1
| stfdx f0, RA, TMP1 | stfdx f0, RA, TMP1
| addi TMP1, TMP1, 8 | addi TMP1, TMP1, 8
| bne <1 | bney <1
| b ->fff_res | b ->fff_res
| |
|.ffunc type |.ffunc type
@ -1513,7 +1513,7 @@ static void build_subroutines(BuildCtx *ctx)
| // RA = results, RD = (nresults+1)*8, PC = return. | // RA = results, RD = (nresults+1)*8, PC = return.
| andi. TMP0, PC, FRAME_TYPE | andi. TMP0, PC, FRAME_TYPE
| mr MULTRES, RD | mr MULTRES, RD
| bne ->vm_return | bney ->vm_return
| lwz INS, -4(PC) | lwz INS, -4(PC)
| decode_RB8 RB, INS | decode_RB8 RB, INS
|5: |5:
@ -1545,7 +1545,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
|.macro math_round, func |.macro math_round, func
| .ffunc_1 math_ .. func | .ffunc_1 math_ .. func
| checknum CARG3; beq ->fff_restv | checknum CARG3; beqy ->fff_restv
| rlwinm TMP2, CARG3, 12, 21, 31 | rlwinm TMP2, CARG3, 12, 21, 31
| bge ->fff_fallback | bge ->fff_fallback
| addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
@ -1580,7 +1580,7 @@ static void build_subroutines(BuildCtx *ctx)
| sub CARG1, CARG1, TMP2 | sub CARG1, CARG1, TMP2
| bns ->fff_resi | bns ->fff_resi
| // Potential overflow. | // Potential overflow.
| mcrxr cr0; ble ->fff_resi // Ignore unrelated overflow. | mcrxr cr0; bley ->fff_resi // Ignore unrelated overflow.
| lus CARG3, 0x41e0 // 2^31. | lus CARG3, 0x41e0 // 2^31.
| li CARG1, 0 | li CARG1, 0
| b ->fff_restv | b ->fff_restv
@ -1608,7 +1608,7 @@ static void build_subroutines(BuildCtx *ctx)
| orc. TMP1, TMP1, TMP2 | orc. TMP1, TMP1, TMP2
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
| lus CARG1, 0x8000 // -(2^31). | lus CARG1, 0x8000 // -(2^31).
| beq ->fff_resi | beqy ->fff_resi
|5: |5:
| lfd FARG1, 0(BASE) | lfd FARG1, 0(BASE)
| bl extern func | bl extern func
@ -1792,7 +1792,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz PC, FRAME_PC(BASE) | lwz PC, FRAME_PC(BASE)
| cmplwi TMP0, 0 | cmplwi TMP0, 0
| la RA, -8(BASE) | la RA, -8(BASE)
| beq ->fff_res | beqy ->fff_res
| b ->fff_resi | b ->fff_resi
} else { } else {
| lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
@ -1965,7 +1965,7 @@ static void build_subroutines(BuildCtx *ctx)
|1: // Reverse string copy. |1: // Reverse string copy.
| cmpwi TMP3, 0 | cmpwi TMP3, 0
| lbzx TMP1, CARG1, TMP2 | lbzx TMP1, CARG1, TMP2
| blt ->fff_newstr | blty ->fff_newstr
| stbx TMP1, CARG2, TMP3 | stbx TMP1, CARG2, TMP3
| subi TMP3, TMP3, 1 | subi TMP3, TMP3, 1
| addi TMP2, TMP2, 1 | addi TMP2, TMP2, 1
@ -1990,7 +1990,7 @@ static void build_subroutines(BuildCtx *ctx)
|1: // ASCII case conversion. |1: // ASCII case conversion.
| cmplw TMP2, CARG3 | cmplw TMP2, CARG3
| lbzx TMP1, CARG1, TMP2 | lbzx TMP1, CARG1, TMP2
| bge ->fff_newstr | bgey ->fff_newstr
| subi TMP0, TMP1, lo | subi TMP0, TMP1, lo
| xori TMP3, TMP1, 0x20 | xori TMP3, TMP1, 0x20
| addic TMP0, TMP0, -26 | addic TMP0, TMP0, -26
@ -2039,7 +2039,7 @@ static void build_subroutines(BuildCtx *ctx)
||} else { ||} else {
| lfd FARG1, 0(TMP1) | lfd FARG1, 0(TMP1)
||} ||}
| bge cr1, ->fff_resi | bgey cr1, ->fff_resi
| checknum CARG4 | checknum CARG4
||if (LJ_DUALNUM) { ||if (LJ_DUALNUM) {
| bnel ->fff_bitop_fb | bnel ->fff_bitop_fb
@ -2400,7 +2400,7 @@ static void build_subroutines(BuildCtx *ctx)
| li TMP3, 0 | li TMP3, 0
| la TMP1, CCSTATE->stack | la TMP1, CCSTATE->stack
| slwi CARG2, CARG2, 2 | slwi CARG2, CARG2, 2
| blt >2 | blty >2
| la TMP2, 8(sp) | la TMP2, 8(sp)
|1: |1:
| lwzx TMP0, TMP1, CARG2 | lwzx TMP0, TMP1, CARG2
@ -2408,7 +2408,7 @@ static void build_subroutines(BuildCtx *ctx)
| addic. CARG2, CARG2, -4 | addic. CARG2, CARG2, -4
| bge <1 | bge <1
|2: |2:
| bne cr1, >3 | bney cr1, >3
| lfd f1, CCSTATE->fpr[0] | lfd f1, CCSTATE->fpr[0]
| lfd f2, CCSTATE->fpr[1] | lfd f2, CCSTATE->fpr[1]
| lfd f3, CCSTATE->fpr[2] | lfd f3, CCSTATE->fpr[2]
@ -2863,7 +2863,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|3: |3:
| ins_next2 | ins_next2
|4: // Potential overflow. |4: // Potential overflow.
| mcrxr cr0; ble <1 // Ignore unrelated overflow. | mcrxr cr0; bley <1 // Ignore unrelated overflow.
| lus TMP1, 0x41e0 // 2^31. | lus TMP1, 0x41e0 // 2^31.
| li TMP0, 0 | li TMP0, 0
| b >7 | b >7
@ -3047,7 +3047,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|2: |2:
| ins_next2 | ins_next2
|4: // Overflow. |4: // Overflow.
| mcrxr cr0; ble <1 // Ignore unrelated overflow. | mcrxr cr0; bley <1 // Ignore unrelated overflow.
| ins_arithfallback b | ins_arithfallback b
|5: // FP variant. |5: // FP variant.
||if (vk == 1) { ||if (vk == 1) {
@ -3924,7 +3924,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|6: |6:
| cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
| slwi TMP3, RC, 5 | slwi TMP3, RC, 5
| bgt <3 | bgty <3
| slwi RB, RC, 3 | slwi RB, RC, 3
| sub TMP3, TMP3, RB | sub TMP3, TMP3, RB
| lwzx RB, TMP2, TMP3 | lwzx RB, TMP2, TMP3
@ -4015,7 +4015,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Copy all varargs. |5: // Copy all varargs.
| lwz TMP0, L->maxstack | lwz TMP0, L->maxstack
| li MULTRES, 8 // MULTRES = (0+1)*8 | li MULTRES, 8 // MULTRES = (0+1)*8
| ble <3 // No vararg slots? | bley <3 // No vararg slots?
| add TMP2, RA, TMP1 | add TMP2, RA, TMP1
| cmplw TMP2, TMP0 | cmplw TMP2, TMP0
| addi MULTRES, TMP1, 8 | addi MULTRES, TMP1, 8
@ -4117,7 +4117,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mr MULTRES, RD | mr MULTRES, RD
| andi. TMP0, PC, FRAME_TYPE | andi. TMP0, PC, FRAME_TYPE
| xori TMP1, PC, FRAME_VARG | xori TMP1, PC, FRAME_VARG
| bne ->BC_RETV_Z | bney ->BC_RETV_Z
| |
| lwz INS, -4(PC) | lwz INS, -4(PC)
| subi TMP2, BASE, 8 | subi TMP2, BASE, 8
@ -4217,7 +4217,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| b <1 | b <1
if (vk) { if (vk) {
|6: // Potential overflow. |6: // Potential overflow.
| mcrxr cr0; ble <4 // Ignore unrelated overflow. | mcrxr cr0; bley <4 // Ignore unrelated overflow.
| b <2 | b <2
} }
} }
@ -4268,7 +4268,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bgt >3 | bgt >3
} else if (op == BC_IFORL) { } else if (op == BC_IFORL) {
if (LJ_DUALNUM) { if (LJ_DUALNUM) {
| bgt <2 | bgty <2
} else { } else {
| bgt >2 | bgt >2
} }
@ -4289,7 +4289,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|3: // Used by integer loop, too. |3: // Used by integer loop, too.
| addis PC, RD, -(BCBIAS_J*4 >> 16) | addis PC, RD, -(BCBIAS_J*4 >> 16)
} else if (op == BC_IFORL) { } else if (op == BC_IFORL) {
| bge <1 | bgey <1
} else { } else {
| bge =>BC_JLOOP | bge =>BC_JLOOP
} }