From a373fddbd3b129f3f95474533e74f0a52744ff8c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 30 Jul 2012 18:59:13 +0200 Subject: [PATCH] ARM: Add VFP and hard-float ABI variants to interpreter. --- src/lj_frame.h | 4 + src/lj_target_arm.h | 11 +- src/vm_arm.dasc | 447 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 434 insertions(+), 28 deletions(-) diff --git a/src/lj_frame.h b/src/lj_frame.h index b8429c2a..b8af2349 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -97,7 +97,11 @@ enum { #define CFRAME_OFS_L 12 #define CFRAME_OFS_PC 8 #define CFRAME_OFS_MULTRES 4 +#if LJ_ARCH_HASFPU +#define CFRAME_SIZE 128 +#else #define CFRAME_SIZE 64 +#endif #define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_PPC #if LJ_ARCH_PPC64 diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index a24fc819..20e8ad36 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -14,7 +14,9 @@ #if LJ_SOFTFP #define FPRDEF(_) #else -#error "NYI: hard-float support for ARM" +#define FPRDEF(_) \ + _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ + _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) #endif #define VRIDDEF(_) @@ -45,7 +47,7 @@ enum { #if LJ_SOFTFP RID_MAX_FPR = RID_MIN_FPR, #else -#error "NYI: VFP support for ARM" + RID_MAX_FPR = RID_D15+1, #endif RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR @@ -68,7 +70,8 @@ enum { #define RSET_FPR 0 #define RSET_ALL RSET_GPR #else -#error "NYI: VFP support for ARM" +#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) +#define RSET_ALL (RSET_GPR|RSET_FPR) #endif #define RSET_INIT RSET_ALL @@ -82,7 +85,7 @@ enum { #if LJ_SOFTFP #define RSET_SCRATCH_FPR 0 #else -#error "NYI: VFP support for ARM" +#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) #endif #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define REGARG_FIRSTGPR RID_R0 diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 8ddce49e..26f97aa3 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -46,6 +46,7 @@ |.define CRET2, r1 | |// Stack layout while in interpreter. Must match with lj_frame.h. +|.define SAVE_R4, [sp, #28] |.define CFRAME_SPACE, #28 |.define SAVE_ERRF, [sp, #24] |.define SAVE_NRES, [sp, #20] @@ -60,6 +61,20 @@ |.define TMPD, [sp] |.define TMPDp, sp | +|.if FPU +|.macro saveregs +| push {r5, r6, r7, r8, r9, r10, r11, lr} +| vpush {d8-d15} +| sub sp, sp, CFRAME_SPACE+4 +| str r4, SAVE_R4 +|.endmacro +|.macro restoreregs_ret +| ldr r4, SAVE_R4 +| add sp, sp, CFRAME_SPACE+4 +| vpop {d8-d15} +| pop {r5, r6, r7, r8, r9, r10, r11, pc} +|.endmacro +|.else |.macro saveregs | push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | sub sp, sp, CFRAME_SPACE @@ -68,6 +83,7 @@ | add sp, sp, CFRAME_SPACE | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} |.endmacro +|.endif | |// Type definitions. Some of these are only used for documentation. |.type L, lua_State, LREG @@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx) | bhs ->fff_fallback |.endmacro | + |.macro .ffunc_d, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 + | vldr d0, [BASE] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_dd, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | ldr CARG4, [BASE, #12] + | cmp NARGS8:RC, #16 + | vldr d0, [BASE] + | vldr d1, [BASE, #8] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | cmnlo CARG4, #-LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. |.macro ffgccheck | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] @@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx) | movmi CARG1, #0x80000000 | bmi <1 |4: + |.if HFABI + | vmov d0, CARG1, CARG2 + | bl ->vm_..func.._hf + | b ->fff_resd + |.else | bl ->vm_..func | b ->fff_restv + |.endif |.endmacro | | math_round floor @@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx) | b <5 | |.macro math_extern, func + |.if HFABI + | .ffunc_d math_ .. func + |.else | .ffunc_n math_ .. func + |.endif | .IOS mov RA, BASE | bl extern func | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else | b ->fff_restv + |.endif |.endmacro | |.macro math_extern2, func + |.if HFABI + | .ffunc_dd math_ .. func + |.else | .ffunc_nn math_ .. func + |.endif | .IOS mov RA, BASE | bl extern func | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else | b ->fff_restv + |.endif |.endmacro | + |.if FPU + | .ffunc_d math_sqrt + | vsqrt.f64 d0, d0 + |->fff_resd: + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | b ->fff_res1 + |.else | math_extern sqrt + |.endif + | | math_extern log | math_extern log10 | math_extern exp @@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx) | math_extern2 fmod | |->ff_math_deg: - |.ffunc_n math_rad + |.if FPU + | .ffunc_d math_rad + | vldr d1, CFUNC:CARG3->upvalue[0] + | vmul.f64 d0, d0, d1 + | b ->fff_resd + |.else + | .ffunc_n math_rad | ldrd CARG34, CFUNC:CARG3->upvalue[0] | bl extern __aeabi_dmul | b ->fff_restv + |.endif | + |.if HFABI + | .ffunc math_ldexp + | ldr CARG4, [BASE, #4] + | ldrd CARG12, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | vldr d0, [BASE] + | checktp CARG4, LJ_TISNUM + | bhs ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bne ->fff_fallback + | .IOS mov RA, BASE + | bl extern ldexp // (double x, int exp) + | .IOS mov BASE, RA + | b ->fff_resd + |.else |.ffunc_2 math_ldexp | checktp CARG2, LJ_TISNUM | bhs ->fff_fallback @@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx) | bl extern ldexp // (double x, int exp) | .IOS mov BASE, RA | b ->fff_restv + |.endif | + |.if HFABI + |.ffunc_d math_frexp + | mov CARG1, sp + | .IOS mov RA, BASE + | bl extern frexp + | .IOS mov BASE, RA + | ldr CARG3, [sp] + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | mov RC, #(2+1)*8 + | strd CARG34, [BASE] + | b ->fff_res + |.else |.ffunc_n math_frexp | mov CARG3, sp | .IOS mov RA, BASE @@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx) | mov RC, #(2+1)*8 | strd CARG34, [BASE] | b ->fff_res + |.endif | + |.if HFABI + |.ffunc_d math_modf + | sub CARG1, BASE, #8 + | ldr PC, [BASE, FRAME_PC] + | .IOS mov RA, BASE + | bl extern modf + | .IOS mov BASE, RA + | mov RC, #(2+1)*8 + | vstr d0, [BASE] + | b ->fff_res + |.else |.ffunc_n math_modf | sub CARG3, BASE, #8 | ldr PC, [BASE, FRAME_PC] @@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx) | mov RC, #(2+1)*8 | strd CARG12, [BASE] | b ->fff_res + |.endif | |.macro math_minmax, name, cond, fcond + |.if FPU + | .ffunc_1 name + | add RB, BASE, RC + | checktp CARG2, LJ_TISNUM + | add RA, BASE, #8 + | bne >4 + |1: // Handle integers. + | ldrd CARG34, [RA] + | cmp RA, RB + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bne >3 + | cmp CARG1, CARG3 + | add RA, RA, #8 + | mov..cond CARG1, CARG3 + | b <1 + |3: // Convert intermediate result to number and continue below. + | vmov s4, CARG1 + | bhi ->fff_fallback + | vldr d1, [RA] + | vcvt.f64.s32 d0, s4 + | b >6 + | + |4: + | vldr d0, [BASE] + | bhi ->fff_fallback + |5: // Handle numbers. + | ldrd CARG34, [RA] + | vldr d1, [RA] + | cmp RA, RB + | bhs ->fff_resd + | checktp CARG4, LJ_TISNUM + | bhs >7 + |6: + | vcmp.f64 d0, d1 + | vmrs + | add RA, RA, #8 + | vmov..fcond.f64 d0, d1 + | b <5 + |7: // Convert integer to number and continue above. + | vmov s4, CARG3 + | bhi ->fff_fallback + | vcvt.f64.s32 d1, s4 + | b <6 + | + |.else + | | .ffunc_1 name | checktp CARG2, LJ_TISNUM | mov RA, #8 @@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx) | add RA, RA, #8 | mov..cond CARG1, CARG3 | b <1 - |3: + |3: // Convert intermediate result to number and continue below. | bhi ->fff_fallback - | // Convert intermediate result to number and continue below. | bl extern __aeabi_i2d | ldrd CARG34, [BASE, RA] | b >6 @@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern __aeabi_i2d | ldrd CARG34, TMPD | b <6 + |.endif |.endmacro | | math_minmax math_min, gt, hi @@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx) | ldr CARG2, [CARG1, #-4]! // Get exit instruction. | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. | str CARG1, [sp, #60] + |.if FPU + | vpush {d0-d15} + |.endif | lsl CARG2, CARG2, #8 | add CARG1, CARG1, CARG2, asr #6 | ldr CARG2, [lr, #4] // Load exit stub group offset. @@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx) |// FP value rounding. Called from JIT code. |// |// double lj_vm_floor/ceil/trunc(double x); - |.macro vm_round, func - |->vm_ .. func: + |.macro vm_round, func, hf + |.if FPU + |.if hf == 0 + | vmov d0, CARG1, CARG2 + | vldr d2, <8 // 2^52 + |.else + | vldr d2, <8 // 2^52 + | vmov CARG1, CARG2, d0 + |.endif + | vabs.f64 d1, d0 + | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN? + | vmrs + |.if "func" == "trunc" + | vadd.f64 d0, d1, d2 + | bxpl lr // Return argument unchanged. + | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52 + | vldr d2, <9 // +1.0 + | vcmp.f64 d1, d0 // |x| < result: subtract +1.0 + | vmrs + | vsubmi.f64 d0, d1, d2 + | cmp CARG2, #0 + | vnegmi.f64 d0, d0 // Merge sign bit back in. + |.else + | vadd.f64 d1, d1, d2 + | bxpl lr // Return argument unchanged. + | cmp CARG2, #0 + | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52 + | vldr d2, <9 // +1.0 + | vnegmi.f64 d1, d1 // Merge sign bit back in. + |.if "func" == "floor" + | vcmp.f64 d0, d1 // x < result: subtract +1.0. + | vmrs + | vsubmi.f64 d0, d1, d2 + |.else + | vcmp.f64 d1, d0 // x > result: add +1.0. + | vmrs + | vaddmi.f64 d0, d1, d2 + |.endif + | vmovpl.f64 d0, d1 + |.endif + |.if hf == 0 + | vmov CARG1, CARG2, d0 + |.endif + | bx lr + | + |.else + | | lsl CARG3, CARG2, #1 | adds RB, CARG3, #0x00200000 | bpl >2 // |x| < 1? @@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx) | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) | orrne CARG2, CARG2, CARG4 | bx lr + |.endif |.endmacro | + |.if FPU + |.align 8 |9: - | .long 0x3ff00000 // hiword(1.0) - | vm_round floor - | vm_round ceil + | .long 0, 0x3ff00000 // +1.0 + |8: + | .long 0, 0x43300000 // 2^52 + |.else + |9: + | .long 0x3ff00000 // hiword(+1.0) + |.endif + | + |->vm_floor: + |.if not HFABI + | vm_round floor, 0 + |.endif + |->vm_floor_hf: + |.if FPU + | vm_round floor, 1 + |.endif + | + |->vm_ceil: + |.if not HFABI + | vm_round ceil, 0 + |.endif + |->vm_ceil_hf: + |.if FPU + | vm_round ceil, 1 + |.endif | |->vm_trunc: - |.if JIT + |.if JIT and not HFABI | lsl CARG3, CARG2, #1 | adds RB, CARG3, #0x00200000 | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. @@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx) | bx lr |.endif | + |->vm_trunc_hf: + |.if JIT and FPU + | vm_round trunc, 1 + |.endif + | | // double lj_vm_mod(double dividend, double divisor); |->vm_mod: + |.if FPU + | // Special calling convention. Also, RC (r11) is not preserved. + | vdiv.f64 d0, d6, d7 + | mov RC, lr + | bl ->vm_floor_hf + | vmul.f64 d0, d0, d7 + | mov lr, RC + | vsub.f64 d6, d6, d0 + | bx lr + |.else | push {r0, r1, r2, r3, r4, lr} | bl extern __aeabi_ddiv | bl ->vm_floor @@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern __aeabi_dadd | add sp, sp, #20 | pop {pc} + |.endif | | // int lj_vm_modi(int dividend, int divisor); |->vm_modi: @@ -2266,6 +2524,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next | |3: // CARG12 is not an integer. + |.if FPU + | vldr d0, [RA] + | bhi ->vmeta_comp + | // d0 is a number. + | checktp CARG4, LJ_TISNUM + | vldr d1, [RC] + | blo >5 + | // d0 is a number, CARG3 is an integer. + | vmov s4, CARG3 + | vcvt.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, CARG34 is not an integer. + | vldr d1, [RC] + | bhi ->vmeta_comp + | // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | sublo PC, RB, #0x20000 + } else if (op == BC_ISGE) { + | subhs PC, RB, #0x20000 + } else if (op == BC_ISLE) { + | subls PC, RB, #0x20000 + } else { + | subhi PC, RB, #0x20000 + } + | b <1 + |.else | bhi ->vmeta_comp | // CARG12 is a number. | checktp CARG4, LJ_TISNUM @@ -2282,7 +2572,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | b >5 |4: // CARG1 is an integer, CARG34 is not an integer. | bhi ->vmeta_comp - | // CARG1 is an integer, CARG34 is a number + | // CARG1 is an integer, CARG34 is a number. | mov RA, RB // Save RB. | bl extern __aeabi_i2d | ldrd CARG34, [RC] // Restore second operand. @@ -2299,6 +2589,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subhi PC, RA, #0x20000 } | b <1 + |.endif break; case BC_ISEQV: case BC_ISNEV: @@ -2439,6 +2730,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | bhi <2 |.endif + |.if FPU + | checktp CARG4, LJ_TISNUM + | vmov s4, CARG3 + | vldr d0, [RA] + | vldrlo d1, [RC] + | vcvths.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vldr d1, [RC] + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + if (vk) { + | subeq PC, RB, #0x20000 + } else { + | subne PC, RB, #0x20000 + } + | b <2 + |.else | // CARG12 is a number. | checktp CARG4, LJ_TISNUM | movlo RA, RB // Save RB. @@ -2458,6 +2770,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subne PC, RA, #0x20000 } | b <2 + |.endif | |.if FFI |7: @@ -2617,20 +2930,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: + | .if FPU + | ldrd CARG12, [RB, BASE]! + | ldrd CARG34, [RC, KBASE]! + | .else | ldrd CARG12, [BASE, RB] | ldrd CARG34, [KBASE, RC] + | .endif || break; ||case 1: + | .if FPU + | ldrd CARG34, [RB, BASE]! + | ldrd CARG12, [RC, KBASE]! + | .else | ldrd CARG34, [BASE, RB] | ldrd CARG12, [KBASE, RC] + | .endif || break; ||default: + | .if FPU + | ldrd CARG12, [RB, BASE]! + | ldrd CARG34, [RC, BASE]! + | .else | ldrd CARG12, [BASE, RB] | ldrd CARG34, [BASE, RC] + | .endif || break; ||} |.endmacro | + |.macro ins_arithpre_fpu, reg1, reg2 + |.if FPU + ||if (vk == 1) { + | vldr reg2, [RB] + | vldr reg1, [RC] + ||} else { + | vldr reg1, [RB] + | vldr reg2, [RC] + ||} + |.endif + |.endmacro + | + |.macro ins_arithpost_fpu, reg + | ins_next1 + | add RA, BASE, RA + | ins_next2 + | vstr reg, [RA] + | ins_next3 + |.endmacro + | |.macro ins_arithfallback, ins ||switch (vk) { ||case 0: @@ -2645,9 +2993,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) ||} |.endmacro | - |.macro ins_arithdn, intins, fpcall + |.macro ins_arithdn, intins, fpins, fpcall | ins_arithpre - |.if "intins" ~= "vm_modi" + |.if "intins" ~= "vm_modi" and not FPU | ins_next1 |.endif | ins_arithcheck_int >5 @@ -2665,57 +3013,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_arithfallback bvs |.endif |4: - |.if "intins" == "vm_modi" + |.if "intins" == "vm_modi" or FPU | ins_next1 |.endif | ins_next2 | strd CARG12, [BASE, RA] | ins_next3 |5: // FP variant. + | ins_arithpre_fpu d6, d7 | ins_arithfallback ins_arithcheck_num + |.if FPU |.if "intins" == "vm_modi" | bl fpcall |.else + | fpins d6, d6, d7 + |.endif + | ins_arithpost_fpu d6 + |.else | bl fpcall - | ins_next1 + |.if "intins" ~= "vm_modi" + | ins_next1 |.endif | b <4 + |.endif |.endmacro | - |.macro ins_arithfp, fpcall + |.macro ins_arithfp, fpins, fpcall | ins_arithpre + |.if "fpins" ~= "extern" or HFABI + | ins_arithpre_fpu d0, d1 + |.endif | ins_arithfallback ins_arithcheck_num - |.if "fpcall" == "extern pow" + |.if "fpins" == "extern" | .IOS mov RC, BASE | bl fpcall | .IOS mov BASE, RC + |.elif FPU + | fpins d0, d0, d1 |.else | bl fpcall |.endif + |.if ("fpins" ~= "extern" or HFABI) and FPU + | ins_arithpost_fpu d0 + |.else | ins_next1 | ins_next2 | strd CARG12, [BASE, RA] | ins_next3 + |.endif |.endmacro case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, extern __aeabi_dadd + | ins_arithdn adds, vadd.f64, extern __aeabi_dadd break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, extern __aeabi_dsub + | ins_arithdn subs, vsub.f64, extern __aeabi_dsub break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, extern __aeabi_dmul + | ins_arithdn smull, vmul.f64, extern __aeabi_dmul break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp extern __aeabi_ddiv + | ins_arithfp vdiv.f64, extern __aeabi_ddiv break; case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn vm_modi, ->vm_mod + | ins_arithdn vm_modi, vm_mod, ->vm_mod break; case BC_POW: | // NYI: (partial) integer arithmetic. - | ins_arithfp extern pow + | ins_arithfp extern, extern pow break; case BC_CAT: @@ -3775,20 +4140,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmnlo CARG4, #-LJ_TISNUM | cmnlo RB, #-LJ_TISNUM | bhs ->vmeta_for + |.if FPU + | vldr d0, FOR_IDX + | vldr d1, FOR_STOP + | cmp RB, #0 + | vstr d0, FOR_EXT + |.else | cmp RB, #0 - | strd CARG12, FOR_IDX | strd CARG12, FOR_EXT | blt >8 + |.endif } else { + |.if FPU + | vldr d0, FOR_IDX + | vldr d2, FOR_STEP + | vldr d1, FOR_STOP + | cmp CARG4, #0 + | vadd.f64 d0, d0, d2 + |.else | cmp CARG4, #0 | blt >8 | bl extern __aeabi_dadd | strd CARG12, FOR_IDX | ldrd CARG34, FOR_STOP | strd CARG12, FOR_EXT + |.endif } |6: + |.if FPU + | vcmpge.f64 d0, d1 + | vcmplt.f64 d1, d0 + | vmrs + |.else | bl extern __aeabi_cdcmple + |.endif + if (vk) { + |.if FPU + | vstr d0, FOR_IDX + | vstr d0, FOR_EXT + |.endif + } if (op == BC_FORI) { | subhi PC, RC, #0x20000 } else if (op == BC_JFORI) { @@ -3804,6 +4195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next2 | b <3 | + |.if not FPU |8: // Invert check for negative step. if (vk) { | bl extern __aeabi_dadd @@ -3814,6 +4206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov CARG4, CARG2 | ldrd CARG12, FOR_STOP | b <6 + |.endif break; case BC_ITERL: @@ -4048,8 +4441,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ fcofs, CFRAME_SIZE); - for (i = 11; i >= 4; i--) /* offset r4-r11 */ + for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); +#if LJ_ARCH_HASFPU + for (i = 15; i >= 8; i--) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", + 64+2*i, 10+2*(15-i)); + fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ +#endif fprintf(ctx->fp, "\t.align 2\n" ".LEFDE0:\n\n");