mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
ARM: Drop hard-fp variants of floor/ceil/trunc.
Soft-fp variants are faster on a Cortex-A9. Duh.
This commit is contained in:
parent
894d2d6ef4
commit
2621617a92
@ -1574,15 +1574,23 @@ static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
|
|||||||
static void asm_callround(ASMState *as, IRIns *ir, int id)
|
static void asm_callround(ASMState *as, IRIns *ir, int id)
|
||||||
{
|
{
|
||||||
/* The modified regs must match with the *.dasc implementation. */
|
/* The modified regs must match with the *.dasc implementation. */
|
||||||
RegSet drop = RID2RSET(RID_D0)|RID2RSET(RID_D1)|RID2RSET(RID_D2)|
|
RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
|
||||||
RID2RSET(RID_R0)|RID2RSET(RID_R1);
|
RID2RSET(RID_R3)|RID2RSET(RID_R12);
|
||||||
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
|
RegSet of;
|
||||||
|
Reg dest, src;
|
||||||
ra_evictset(as, drop);
|
ra_evictset(as, drop);
|
||||||
ra_destreg(as, ir, RID_FPRET);
|
dest = ra_dest(as, ir, RSET_FPR);
|
||||||
emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_hf :
|
emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
|
||||||
id == IRFPM_CEIL ? (void *)lj_vm_ceil_hf :
|
emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
|
||||||
(void *)lj_vm_trunc_hf);
|
id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
|
||||||
ra_leftov(as, RID_D0, ir->op1);
|
(void *)lj_vm_trunc_sf);
|
||||||
|
/* Workaround to protect argument GPRs from being used for remat. */
|
||||||
|
of = as->freeset;
|
||||||
|
as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
|
||||||
|
as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
|
||||||
|
src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
|
||||||
|
as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
|
||||||
|
emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -56,8 +56,8 @@ LJ_ASMF void lj_vm_exit_interp(void);
|
|||||||
LJ_ASMF double lj_vm_floor(double);
|
LJ_ASMF double lj_vm_floor(double);
|
||||||
LJ_ASMF double lj_vm_ceil(double);
|
LJ_ASMF double lj_vm_ceil(double);
|
||||||
#if LJ_TARGET_ARM
|
#if LJ_TARGET_ARM
|
||||||
LJ_ASMF double lj_vm_floor_hf(double);
|
LJ_ASMF double lj_vm_floor_sf(double);
|
||||||
LJ_ASMF double lj_vm_ceil_hf(double);
|
LJ_ASMF double lj_vm_ceil_sf(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
|
#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
|
||||||
@ -81,7 +81,7 @@ LJ_ASMF void lj_vm_powi_sse(void);
|
|||||||
#else
|
#else
|
||||||
LJ_ASMF double lj_vm_trunc(double);
|
LJ_ASMF double lj_vm_trunc(double);
|
||||||
#if LJ_TARGET_ARM
|
#if LJ_TARGET_ARM
|
||||||
LJ_ASMF double lj_vm_trunc_hf(double);
|
LJ_ASMF double lj_vm_trunc_sf(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
LJ_ASMF double lj_vm_powi(double, int32_t);
|
LJ_ASMF double lj_vm_powi(double, int32_t);
|
||||||
|
115
src/vm_arm.dasc
115
src/vm_arm.dasc
@ -1368,14 +1368,8 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| movmi CARG1, #0x80000000
|
| movmi CARG1, #0x80000000
|
||||||
| bmi <1
|
| bmi <1
|
||||||
|4:
|
|4:
|
||||||
|.if HFABI
|
| bl ->vm_..func.._sf
|
||||||
| vmov d0, CARG1, CARG2
|
|
||||||
| bl ->vm_..func.._hf
|
|
||||||
| b ->fff_resd
|
|
||||||
|.else
|
|
||||||
| bl ->vm_..func
|
|
||||||
| b ->fff_restv
|
| b ->fff_restv
|
||||||
|.endif
|
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
| math_round floor
|
| math_round floor
|
||||||
@ -2221,52 +2215,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|//
|
|//
|
||||||
|// double lj_vm_floor/ceil/trunc(double x);
|
|// double lj_vm_floor/ceil/trunc(double x);
|
||||||
|.macro vm_round, func, hf
|
|.macro vm_round, func, hf
|
||||||
|.if FPU
|
|.if hf == 1
|
||||||
|.if hf == 0
|
|
||||||
| vmov d0, CARG1, CARG2
|
|
||||||
| vldr d2, <8 // 2^52
|
|
||||||
|.else
|
|
||||||
| vldr d2, <8 // 2^52
|
|
||||||
| vmov CARG1, CARG2, d0
|
| vmov CARG1, CARG2, d0
|
||||||
|.endif
|
|.endif
|
||||||
| vabs.f64 d1, d0
|
|
||||||
| vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?
|
|
||||||
| vmrs
|
|
||||||
|.if "func" == "trunc"
|
|
||||||
| bxpl lr // Return argument unchanged.
|
|
||||||
| vadd.f64 d0, d1, d2
|
|
||||||
| vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52
|
|
||||||
| vldr d2, <9 // +1.0
|
|
||||||
| vcmp.f64 d1, d0 // |x| < result: subtract +1.0
|
|
||||||
| vmrs
|
|
||||||
| vsubmi.f64 d0, d0, d2
|
|
||||||
| cmp CARG2, #0
|
|
||||||
| vnegmi.f64 d0, d0 // Merge sign bit back in.
|
|
||||||
|.else
|
|
||||||
| vadd.f64 d1, d1, d2
|
|
||||||
| bxpl lr // Return argument unchanged.
|
|
||||||
| cmp CARG2, #0
|
|
||||||
| vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52
|
|
||||||
| vldr d2, <9 // +1.0
|
|
||||||
| vnegmi.f64 d1, d1 // Merge sign bit back in.
|
|
||||||
|.if "func" == "floor"
|
|
||||||
| vcmp.f64 d0, d1 // x < result: subtract +1.0.
|
|
||||||
| vmrs
|
|
||||||
| vsubmi.f64 d0, d1, d2
|
|
||||||
|.else
|
|
||||||
| vcmp.f64 d1, d0 // x > result: add +1.0.
|
|
||||||
| vmrs
|
|
||||||
| vaddmi.f64 d0, d1, d2
|
|
||||||
|.endif
|
|
||||||
| vmovpl.f64 d0, d1
|
|
||||||
|.endif
|
|
||||||
|.if hf == 0
|
|
||||||
| vmov CARG1, CARG2, d0
|
|
||||||
|.endif
|
|
||||||
| bx lr
|
|
||||||
|
|
|
||||||
|.else
|
|
||||||
|
|
|
||||||
| lsl CARG3, CARG2, #1
|
| lsl CARG3, CARG2, #1
|
||||||
| adds RB, CARG3, #0x00200000
|
| adds RB, CARG3, #0x00200000
|
||||||
| bpl >2 // |x| < 1?
|
| bpl >2 // |x| < 1?
|
||||||
@ -2286,6 +2237,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|.else
|
|.else
|
||||||
| bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
|
| bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
|
||||||
|.endif
|
|.endif
|
||||||
|
|.if hf == 1
|
||||||
|
| vmoveq d0, CARG1, CARG2
|
||||||
|
|.endif
|
||||||
| bxeq lr // iszero: done.
|
| bxeq lr // iszero: done.
|
||||||
| mvn CARG4, #1
|
| mvn CARG4, #1
|
||||||
| cmp RB, #0
|
| cmp RB, #0
|
||||||
@ -2294,6 +2248,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| add RB, RB, #32
|
| add RB, RB, #32
|
||||||
| subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask
|
| subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask
|
||||||
| sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
|
| sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
|
||||||
|
|.if hf == 1
|
||||||
|
| vmov d0, CARG1, CARG2
|
||||||
|
|.endif
|
||||||
| bx lr
|
| bx lr
|
||||||
|
|
|
|
||||||
|2: // |x| < 1:
|
|2: // |x| < 1:
|
||||||
@ -2308,45 +2265,41 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| and CARG2, CARG2, #0x80000000
|
| and CARG2, CARG2, #0x80000000
|
||||||
| ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
|
| ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
|
||||||
| orrne CARG2, CARG2, CARG4
|
| orrne CARG2, CARG2, CARG4
|
||||||
| bx lr
|
|.if hf == 1
|
||||||
|
| vmov d0, CARG1, CARG2
|
||||||
|.endif
|
|.endif
|
||||||
|
| bx lr
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|.if FPU
|
|
||||||
|.align 8
|
|
||||||
|9:
|
|
||||||
| .long 0, 0x3ff00000 // +1.0
|
|
||||||
|8:
|
|
||||||
| .long 0, 0x43300000 // 2^52
|
|
||||||
|.else
|
|
||||||
|9:
|
|9:
|
||||||
| .long 0x3ff00000 // hiword(+1.0)
|
| .long 0x3ff00000 // hiword(+1.0)
|
||||||
|.endif
|
|
||||||
|
|
|
|
||||||
|->vm_floor:
|
|->vm_floor:
|
||||||
|.if not HFABI
|
|.if HFABI
|
||||||
| vm_round floor, 0
|
|
||||||
|.endif
|
|
||||||
|->vm_floor_hf:
|
|
||||||
|.if FPU
|
|
||||||
| vm_round floor, 1
|
| vm_round floor, 1
|
||||||
|.endif
|
|.endif
|
||||||
|
|->vm_floor_sf:
|
||||||
|
| vm_round floor, 0
|
||||||
|
|
|
|
||||||
|->vm_ceil:
|
|->vm_ceil:
|
||||||
|.if not HFABI
|
|.if HFABI
|
||||||
| vm_round ceil, 0
|
|
||||||
|.endif
|
|
||||||
|->vm_ceil_hf:
|
|
||||||
|.if FPU
|
|
||||||
| vm_round ceil, 1
|
| vm_round ceil, 1
|
||||||
|.endif
|
|.endif
|
||||||
|
|->vm_ceil_sf:
|
||||||
|
| vm_round ceil, 0
|
||||||
|
|
|
|
||||||
|->vm_trunc:
|
|.macro vm_trunc, hf
|
||||||
|.if JIT and not HFABI
|
|.if JIT
|
||||||
|
|.if hf == 1
|
||||||
|
| vmov CARG1, CARG2, d0
|
||||||
|
|.endif
|
||||||
| lsl CARG3, CARG2, #1
|
| lsl CARG3, CARG2, #1
|
||||||
| adds RB, CARG3, #0x00200000
|
| adds RB, CARG3, #0x00200000
|
||||||
| andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
|
| andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
|
||||||
| movpl CARG1, #0
|
| movpl CARG1, #0
|
||||||
|
|.if hf == 1
|
||||||
|
| vmovpl d0, CARG1, CARG2
|
||||||
|
|.endif
|
||||||
| bxpl lr
|
| bxpl lr
|
||||||
| mvn CARG4, #0x3cc
|
| mvn CARG4, #0x3cc
|
||||||
| subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
|
| subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
|
||||||
@ -2355,13 +2308,19 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| and CARG1, CARG1, CARG4, lsl RB // lo &= lomask
|
| and CARG1, CARG1, CARG4, lsl RB // lo &= lomask
|
||||||
| subs RB, RB, #32
|
| subs RB, RB, #32
|
||||||
| andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask
|
| andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask
|
||||||
|
|.if hf == 1
|
||||||
|
| vmov d0, CARG1, CARG2
|
||||||
|
|.endif
|
||||||
| bx lr
|
| bx lr
|
||||||
|.endif
|
|.endif
|
||||||
|
|.endmacro
|
||||||
|
|
|
|
||||||
|->vm_trunc_hf:
|
|->vm_trunc:
|
||||||
|.if JIT and FPU
|
|.if HFABI
|
||||||
| vm_round trunc, 1
|
| vm_trunc 1
|
||||||
|.endif
|
|.endif
|
||||||
|
|->vm_trunc_sf:
|
||||||
|
| vm_trunc 0
|
||||||
|
|
|
|
||||||
| // double lj_vm_mod(double dividend, double divisor);
|
| // double lj_vm_mod(double dividend, double divisor);
|
||||||
|->vm_mod:
|
|->vm_mod:
|
||||||
@ -2369,7 +2328,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| // Special calling convention. Also, RC (r11) is not preserved.
|
| // Special calling convention. Also, RC (r11) is not preserved.
|
||||||
| vdiv.f64 d0, d6, d7
|
| vdiv.f64 d0, d6, d7
|
||||||
| mov RC, lr
|
| mov RC, lr
|
||||||
| bl ->vm_floor_hf
|
| vmov CARG1, CARG2, d0
|
||||||
|
| bl ->vm_floor_sf
|
||||||
|
| vmov d0, CARG1, CARG2
|
||||||
| vmul.f64 d0, d0, d7
|
| vmul.f64 d0, d0, d7
|
||||||
| mov lr, RC
|
| mov lr, RC
|
||||||
| vsub.f64 d6, d6, d0
|
| vsub.f64 d6, d6, d0
|
||||||
@ -2377,7 +2338,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|.else
|
|.else
|
||||||
| push {r0, r1, r2, r3, r4, lr}
|
| push {r0, r1, r2, r3, r4, lr}
|
||||||
| bl extern __aeabi_ddiv
|
| bl extern __aeabi_ddiv
|
||||||
| bl ->vm_floor
|
| bl ->vm_floor_sf
|
||||||
| ldrd CARG34, [sp, #8]
|
| ldrd CARG34, [sp, #8]
|
||||||
| bl extern __aeabi_dmul
|
| bl extern __aeabi_dmul
|
||||||
| ldrd CARG34, [sp]
|
| ldrd CARG34, [sp]
|
||||||
|
Loading…
Reference in New Issue
Block a user