mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
ARM: Drop hard-fp variants of floor/ceil/trunc.
Soft-fp variants are faster on a Cortex-A9. Duh.
This commit is contained in:
parent
894d2d6ef4
commit
2621617a92
@ -1574,15 +1574,23 @@ static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
|
||||
static void asm_callround(ASMState *as, IRIns *ir, int id)
|
||||
{
|
||||
/* The modified regs must match with the *.dasc implementation. */
|
||||
RegSet drop = RID2RSET(RID_D0)|RID2RSET(RID_D1)|RID2RSET(RID_D2)|
|
||||
RID2RSET(RID_R0)|RID2RSET(RID_R1);
|
||||
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
|
||||
RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
|
||||
RID2RSET(RID_R3)|RID2RSET(RID_R12);
|
||||
RegSet of;
|
||||
Reg dest, src;
|
||||
ra_evictset(as, drop);
|
||||
ra_destreg(as, ir, RID_FPRET);
|
||||
emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_hf :
|
||||
id == IRFPM_CEIL ? (void *)lj_vm_ceil_hf :
|
||||
(void *)lj_vm_trunc_hf);
|
||||
ra_leftov(as, RID_D0, ir->op1);
|
||||
dest = ra_dest(as, ir, RSET_FPR);
|
||||
emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
|
||||
emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
|
||||
id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
|
||||
(void *)lj_vm_trunc_sf);
|
||||
/* Workaround to protect argument GPRs from being used for remat. */
|
||||
of = as->freeset;
|
||||
as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
|
||||
as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
|
||||
src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
|
||||
as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
|
||||
emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -56,8 +56,8 @@ LJ_ASMF void lj_vm_exit_interp(void);
|
||||
LJ_ASMF double lj_vm_floor(double);
|
||||
LJ_ASMF double lj_vm_ceil(double);
|
||||
#if LJ_TARGET_ARM
|
||||
LJ_ASMF double lj_vm_floor_hf(double);
|
||||
LJ_ASMF double lj_vm_ceil_hf(double);
|
||||
LJ_ASMF double lj_vm_floor_sf(double);
|
||||
LJ_ASMF double lj_vm_ceil_sf(double);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
|
||||
@ -81,7 +81,7 @@ LJ_ASMF void lj_vm_powi_sse(void);
|
||||
#else
|
||||
LJ_ASMF double lj_vm_trunc(double);
|
||||
#if LJ_TARGET_ARM
|
||||
LJ_ASMF double lj_vm_trunc_hf(double);
|
||||
LJ_ASMF double lj_vm_trunc_sf(double);
|
||||
#endif
|
||||
#endif
|
||||
LJ_ASMF double lj_vm_powi(double, int32_t);
|
||||
|
115
src/vm_arm.dasc
115
src/vm_arm.dasc
@ -1368,14 +1368,8 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| movmi CARG1, #0x80000000
|
||||
| bmi <1
|
||||
|4:
|
||||
|.if HFABI
|
||||
| vmov d0, CARG1, CARG2
|
||||
| bl ->vm_..func.._hf
|
||||
| b ->fff_resd
|
||||
|.else
|
||||
| bl ->vm_..func
|
||||
| bl ->vm_..func.._sf
|
||||
| b ->fff_restv
|
||||
|.endif
|
||||
|.endmacro
|
||||
|
|
||||
| math_round floor
|
||||
@ -2221,52 +2215,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|//
|
||||
|// double lj_vm_floor/ceil/trunc(double x);
|
||||
|.macro vm_round, func, hf
|
||||
|.if FPU
|
||||
|.if hf == 0
|
||||
| vmov d0, CARG1, CARG2
|
||||
| vldr d2, <8 // 2^52
|
||||
|.else
|
||||
| vldr d2, <8 // 2^52
|
||||
|.if hf == 1
|
||||
| vmov CARG1, CARG2, d0
|
||||
|.endif
|
||||
| vabs.f64 d1, d0
|
||||
| vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?
|
||||
| vmrs
|
||||
|.if "func" == "trunc"
|
||||
| bxpl lr // Return argument unchanged.
|
||||
| vadd.f64 d0, d1, d2
|
||||
| vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52
|
||||
| vldr d2, <9 // +1.0
|
||||
| vcmp.f64 d1, d0 // |x| < result: subtract +1.0
|
||||
| vmrs
|
||||
| vsubmi.f64 d0, d0, d2
|
||||
| cmp CARG2, #0
|
||||
| vnegmi.f64 d0, d0 // Merge sign bit back in.
|
||||
|.else
|
||||
| vadd.f64 d1, d1, d2
|
||||
| bxpl lr // Return argument unchanged.
|
||||
| cmp CARG2, #0
|
||||
| vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52
|
||||
| vldr d2, <9 // +1.0
|
||||
| vnegmi.f64 d1, d1 // Merge sign bit back in.
|
||||
|.if "func" == "floor"
|
||||
| vcmp.f64 d0, d1 // x < result: subtract +1.0.
|
||||
| vmrs
|
||||
| vsubmi.f64 d0, d1, d2
|
||||
|.else
|
||||
| vcmp.f64 d1, d0 // x > result: add +1.0.
|
||||
| vmrs
|
||||
| vaddmi.f64 d0, d1, d2
|
||||
|.endif
|
||||
| vmovpl.f64 d0, d1
|
||||
|.endif
|
||||
|.if hf == 0
|
||||
| vmov CARG1, CARG2, d0
|
||||
|.endif
|
||||
| bx lr
|
||||
|
|
||||
|.else
|
||||
|
|
||||
| lsl CARG3, CARG2, #1
|
||||
| adds RB, CARG3, #0x00200000
|
||||
| bpl >2 // |x| < 1?
|
||||
@ -2286,6 +2237,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.else
|
||||
| bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
|
||||
|.endif
|
||||
|.if hf == 1
|
||||
| vmoveq d0, CARG1, CARG2
|
||||
|.endif
|
||||
| bxeq lr // iszero: done.
|
||||
| mvn CARG4, #1
|
||||
| cmp RB, #0
|
||||
@ -2294,6 +2248,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| add RB, RB, #32
|
||||
| subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask
|
||||
| sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
|
||||
|.if hf == 1
|
||||
| vmov d0, CARG1, CARG2
|
||||
|.endif
|
||||
| bx lr
|
||||
|
|
||||
|2: // |x| < 1:
|
||||
@ -2308,45 +2265,41 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| and CARG2, CARG2, #0x80000000
|
||||
| ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
|
||||
| orrne CARG2, CARG2, CARG4
|
||||
| bx lr
|
||||
|.if hf == 1
|
||||
| vmov d0, CARG1, CARG2
|
||||
|.endif
|
||||
| bx lr
|
||||
|.endmacro
|
||||
|
|
||||
|.if FPU
|
||||
|.align 8
|
||||
|9:
|
||||
| .long 0, 0x3ff00000 // +1.0
|
||||
|8:
|
||||
| .long 0, 0x43300000 // 2^52
|
||||
|.else
|
||||
|9:
|
||||
| .long 0x3ff00000 // hiword(+1.0)
|
||||
|.endif
|
||||
|
|
||||
|->vm_floor:
|
||||
|.if not HFABI
|
||||
| vm_round floor, 0
|
||||
|.endif
|
||||
|->vm_floor_hf:
|
||||
|.if FPU
|
||||
|.if HFABI
|
||||
| vm_round floor, 1
|
||||
|.endif
|
||||
|->vm_floor_sf:
|
||||
| vm_round floor, 0
|
||||
|
|
||||
|->vm_ceil:
|
||||
|.if not HFABI
|
||||
| vm_round ceil, 0
|
||||
|.endif
|
||||
|->vm_ceil_hf:
|
||||
|.if FPU
|
||||
|.if HFABI
|
||||
| vm_round ceil, 1
|
||||
|.endif
|
||||
|->vm_ceil_sf:
|
||||
| vm_round ceil, 0
|
||||
|
|
||||
|->vm_trunc:
|
||||
|.if JIT and not HFABI
|
||||
|.macro vm_trunc, hf
|
||||
|.if JIT
|
||||
|.if hf == 1
|
||||
| vmov CARG1, CARG2, d0
|
||||
|.endif
|
||||
| lsl CARG3, CARG2, #1
|
||||
| adds RB, CARG3, #0x00200000
|
||||
| andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
|
||||
| movpl CARG1, #0
|
||||
|.if hf == 1
|
||||
| vmovpl d0, CARG1, CARG2
|
||||
|.endif
|
||||
| bxpl lr
|
||||
| mvn CARG4, #0x3cc
|
||||
| subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
|
||||
@ -2355,13 +2308,19 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| and CARG1, CARG1, CARG4, lsl RB // lo &= lomask
|
||||
| subs RB, RB, #32
|
||||
| andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask
|
||||
|.if hf == 1
|
||||
| vmov d0, CARG1, CARG2
|
||||
|.endif
|
||||
| bx lr
|
||||
|.endif
|
||||
|.endmacro
|
||||
|
|
||||
|->vm_trunc_hf:
|
||||
|.if JIT and FPU
|
||||
| vm_round trunc, 1
|
||||
|->vm_trunc:
|
||||
|.if HFABI
|
||||
| vm_trunc 1
|
||||
|.endif
|
||||
|->vm_trunc_sf:
|
||||
| vm_trunc 0
|
||||
|
|
||||
| // double lj_vm_mod(double dividend, double divisor);
|
||||
|->vm_mod:
|
||||
@ -2369,7 +2328,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| // Special calling convention. Also, RC (r11) is not preserved.
|
||||
| vdiv.f64 d0, d6, d7
|
||||
| mov RC, lr
|
||||
| bl ->vm_floor_hf
|
||||
| vmov CARG1, CARG2, d0
|
||||
| bl ->vm_floor_sf
|
||||
| vmov d0, CARG1, CARG2
|
||||
| vmul.f64 d0, d0, d7
|
||||
| mov lr, RC
|
||||
| vsub.f64 d6, d6, d0
|
||||
@ -2377,7 +2338,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.else
|
||||
| push {r0, r1, r2, r3, r4, lr}
|
||||
| bl extern __aeabi_ddiv
|
||||
| bl ->vm_floor
|
||||
| bl ->vm_floor_sf
|
||||
| ldrd CARG34, [sp, #8]
|
||||
| bl extern __aeabi_dmul
|
||||
| ldrd CARG34, [sp]
|
||||
|
Loading…
Reference in New Issue
Block a user