diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index c202bc82..25016f4a 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -327,6 +327,27 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, emit_lso(as, ai, (rd & 31), base, ofs); } +/* Fuse FP multiply-add/sub. */ +static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ai = air, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + /* -- Calls --------------------------------------------------------------- */ /* Generate a call to a C function. */ @@ -1308,7 +1329,8 @@ static void asm_intmul(ASMState *as, IRIns *ir) static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { - asm_fparith(as, ir, A64I_FADDd); + if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) + asm_fparith(as, ir, A64I_FADDd); return; } asm_intop_s(as, ir, A64I_ADDw); @@ -1317,7 +1339,8 @@ static void asm_add(ASMState *as, IRIns *ir) static void asm_sub(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { - asm_fparith(as, ir, A64I_FSUBd); + if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) + asm_fparith(as, ir, A64I_FSUBd); return; } asm_intop_s(as, ir, A64I_SUBw); diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 6686802b..e0f43689 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -74,6 +74,11 @@ static uint32_t emit_isfpk64(uint64_t n) /* -- Emit basic instructions --------------------------------------------- */ +static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); +} + static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) { *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);