From 179fe9e5d8a512cabcd9f09d4c02029d4fc244e9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 25 Oct 2011 19:50:44 +0200 Subject: [PATCH] PPC: Fuse BSWAP with XLOAD/XSTORE to lwbrx/stwbrx. --- src/lj_asm_ppc.h | 55 ++++++++++++++++++++++++++++++++++----------- src/lj_target_ppc.h | 3 +++ 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 72e4c956..b2cf9f65 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -205,6 +205,22 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, emit_fai(as, pi, rt, base, ofs); } +/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ +static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, + RegSet allow) +{ + IRIns *ira = IR(ref); + Reg right, left; + if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) { + left = ra_alloc2(as, ira, allow); + right = (left >> 8); left &= 255; + } else { + right = ra_alloc1(as, ref, allow); + left = RID_R0; + } + emit_tab(as, pi, rt, left, right); +} + /* Fuse to multiply-add/sub instruction. */ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) { @@ -886,10 +902,17 @@ static void asm_xload(ASMState *as, IRIns *ir) static void asm_xstore(ASMState *as, IRIns *ir) { - // NYI: fuse with bswap to stwbrx. - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); + IRIns *irb; + if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && + ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { + /* Fuse BSWAP with XSTORE to stwbrx. */ + Reg src = ra_alloc1(as, irb->op1, RSET_GPR); + asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); + } else { + Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src)); + } } static void asm_ahuvload(ASMState *as, IRIns *ir) @@ -1410,17 +1433,23 @@ nofuse: static void asm_bitswap(ASMState *as, IRIns *ir) { - // NYI: fuse with XLOAD to lwbrx. Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg tmp = dest; - if (tmp == left) { - tmp = RID_TMP; - emit_mr(as, dest, RID_TMP); + IRIns *irx; + if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && + ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { + /* Fuse BSWAP with XLOAD to lwbrx. */ + asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); + } else { + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + Reg tmp = dest; + if (tmp == left) { + tmp = RID_TMP; + emit_mr(as, dest, RID_TMP); + } + emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); + emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); + emit_rotlwi(as, tmp, left, 8); } - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); - emit_rotlwi(as, tmp, left, 8); } static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index d0b3f4d0..8abc38fd 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -240,6 +240,9 @@ typedef enum PPCIns { PPCI_LHAX = 0x7c0002ae, PPCI_STHX = 0x7c00032e, + PPCI_LWBRX = 0x7c00042c, + PPCI_STWBRX = 0x7c00052c, + PPCI_LFSX = 0x7c00042e, PPCI_LFDX = 0x7c0004ae, PPCI_STFSX = 0x7c00052e,