ARM: Add VFP and hard-float ABI variants to interpreter.

This commit is contained in:
Mike Pall 2012-07-30 18:59:13 +02:00
parent 23abbd9ef3
commit a373fddbd3
3 changed files with 434 additions and 28 deletions

View File

@ -97,7 +97,11 @@ enum {
#define CFRAME_OFS_L 12 #define CFRAME_OFS_L 12
#define CFRAME_OFS_PC 8 #define CFRAME_OFS_PC 8
#define CFRAME_OFS_MULTRES 4 #define CFRAME_OFS_MULTRES 4
#if LJ_ARCH_HASFPU
#define CFRAME_SIZE 128
#else
#define CFRAME_SIZE 64 #define CFRAME_SIZE 64
#endif
#define CFRAME_SHIFT_MULTRES 3 #define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC
#if LJ_ARCH_PPC64 #if LJ_ARCH_PPC64

View File

@ -14,7 +14,9 @@
#if LJ_SOFTFP #if LJ_SOFTFP
#define FPRDEF(_) #define FPRDEF(_)
#else #else
#error "NYI: hard-float support for ARM" #define FPRDEF(_) \
_(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
_(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15)
#endif #endif
#define VRIDDEF(_) #define VRIDDEF(_)
@ -45,7 +47,7 @@ enum {
#if LJ_SOFTFP #if LJ_SOFTFP
RID_MAX_FPR = RID_MIN_FPR, RID_MAX_FPR = RID_MIN_FPR,
#else #else
#error "NYI: VFP support for ARM" RID_MAX_FPR = RID_D15+1,
#endif #endif
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
@ -68,7 +70,8 @@ enum {
#define RSET_FPR 0 #define RSET_FPR 0
#define RSET_ALL RSET_GPR #define RSET_ALL RSET_GPR
#else #else
#error "NYI: VFP support for ARM" #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#define RSET_ALL (RSET_GPR|RSET_FPR)
#endif #endif
#define RSET_INIT RSET_ALL #define RSET_INIT RSET_ALL
@ -82,7 +85,7 @@ enum {
#if LJ_SOFTFP #if LJ_SOFTFP
#define RSET_SCRATCH_FPR 0 #define RSET_SCRATCH_FPR 0
#else #else
#error "NYI: VFP support for ARM" #define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
#endif #endif
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R0 #define REGARG_FIRSTGPR RID_R0

View File

@ -46,6 +46,7 @@
|.define CRET2, r1 |.define CRET2, r1
| |
|// Stack layout while in interpreter. Must match with lj_frame.h. |// Stack layout while in interpreter. Must match with lj_frame.h.
|.define SAVE_R4, [sp, #28]
|.define CFRAME_SPACE, #28 |.define CFRAME_SPACE, #28
|.define SAVE_ERRF, [sp, #24] |.define SAVE_ERRF, [sp, #24]
|.define SAVE_NRES, [sp, #20] |.define SAVE_NRES, [sp, #20]
@ -60,6 +61,20 @@
|.define TMPD, [sp] |.define TMPD, [sp]
|.define TMPDp, sp |.define TMPDp, sp
| |
|.if FPU
|.macro saveregs
| push {r5, r6, r7, r8, r9, r10, r11, lr}
| vpush {d8-d15}
| sub sp, sp, CFRAME_SPACE+4
| str r4, SAVE_R4
|.endmacro
|.macro restoreregs_ret
| ldr r4, SAVE_R4
| add sp, sp, CFRAME_SPACE+4
| vpop {d8-d15}
| pop {r5, r6, r7, r8, r9, r10, r11, pc}
|.endmacro
|.else
|.macro saveregs |.macro saveregs
| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
| sub sp, sp, CFRAME_SPACE | sub sp, sp, CFRAME_SPACE
@ -68,6 +83,7 @@
| add sp, sp, CFRAME_SPACE | add sp, sp, CFRAME_SPACE
| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|.endmacro |.endmacro
|.endif
| |
|// Type definitions. Some of these are only used for documentation. |// Type definitions. Some of these are only used for documentation.
|.type L, lua_State, LREG |.type L, lua_State, LREG
@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx)
| bhs ->fff_fallback | bhs ->fff_fallback
|.endmacro |.endmacro
| |
|.macro .ffunc_d, name
| .ffunc name
| ldr CARG2, [BASE, #4]
| cmp NARGS8:RC, #8
| vldr d0, [BASE]
| blo ->fff_fallback
| checktp CARG2, LJ_TISNUM
| bhs ->fff_fallback
|.endmacro
|
|.macro .ffunc_dd, name
| .ffunc name
| ldr CARG2, [BASE, #4]
| ldr CARG4, [BASE, #12]
| cmp NARGS8:RC, #16
| vldr d0, [BASE]
| vldr d1, [BASE, #8]
| blo ->fff_fallback
| checktp CARG2, LJ_TISNUM
| cmnlo CARG4, #-LJ_TISNUM
| bhs ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
|.macro ffgccheck |.macro ffgccheck
| ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)]
@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx)
| movmi CARG1, #0x80000000 | movmi CARG1, #0x80000000
| bmi <1 | bmi <1
|4: |4:
|.if HFABI
| vmov d0, CARG1, CARG2
| bl ->vm_..func.._hf
| b ->fff_resd
|.else
| bl ->vm_..func | bl ->vm_..func
| b ->fff_restv | b ->fff_restv
|.endif
|.endmacro |.endmacro
| |
| math_round floor | math_round floor
@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx)
| b <5 | b <5
| |
|.macro math_extern, func |.macro math_extern, func
|.if HFABI
| .ffunc_d math_ .. func
|.else
| .ffunc_n math_ .. func | .ffunc_n math_ .. func
|.endif
| .IOS mov RA, BASE | .IOS mov RA, BASE
| bl extern func | bl extern func
| .IOS mov BASE, RA | .IOS mov BASE, RA
|.if HFABI
| b ->fff_resd
|.else
| b ->fff_restv | b ->fff_restv
|.endif
|.endmacro |.endmacro
| |
|.macro math_extern2, func |.macro math_extern2, func
|.if HFABI
| .ffunc_dd math_ .. func
|.else
| .ffunc_nn math_ .. func | .ffunc_nn math_ .. func
|.endif
| .IOS mov RA, BASE | .IOS mov RA, BASE
| bl extern func | bl extern func
| .IOS mov BASE, RA | .IOS mov BASE, RA
|.if HFABI
| b ->fff_resd
|.else
| b ->fff_restv | b ->fff_restv
|.endif
|.endmacro |.endmacro
| |
|.if FPU
| .ffunc_d math_sqrt
| vsqrt.f64 d0, d0
|->fff_resd:
| ldr PC, [BASE, FRAME_PC]
| vstr d0, [BASE, #-8]
| b ->fff_res1
|.else
| math_extern sqrt | math_extern sqrt
|.endif
|
| math_extern log | math_extern log
| math_extern log10 | math_extern log10
| math_extern exp | math_extern exp
@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern2 fmod | math_extern2 fmod
| |
|->ff_math_deg: |->ff_math_deg:
|.if FPU
| .ffunc_d math_rad
| vldr d1, CFUNC:CARG3->upvalue[0]
| vmul.f64 d0, d0, d1
| b ->fff_resd
|.else
| .ffunc_n math_rad | .ffunc_n math_rad
| ldrd CARG34, CFUNC:CARG3->upvalue[0] | ldrd CARG34, CFUNC:CARG3->upvalue[0]
| bl extern __aeabi_dmul | bl extern __aeabi_dmul
| b ->fff_restv | b ->fff_restv
|.endif
| |
|.if HFABI
| .ffunc math_ldexp
| ldr CARG4, [BASE, #4]
| ldrd CARG12, [BASE, #8]
| cmp NARGS8:RC, #16
| blo ->fff_fallback
| vldr d0, [BASE]
| checktp CARG4, LJ_TISNUM
| bhs ->fff_fallback
| checktp CARG2, LJ_TISNUM
| bne ->fff_fallback
| .IOS mov RA, BASE
| bl extern ldexp // (double x, int exp)
| .IOS mov BASE, RA
| b ->fff_resd
|.else
|.ffunc_2 math_ldexp |.ffunc_2 math_ldexp
| checktp CARG2, LJ_TISNUM | checktp CARG2, LJ_TISNUM
| bhs ->fff_fallback | bhs ->fff_fallback
@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern ldexp // (double x, int exp) | bl extern ldexp // (double x, int exp)
| .IOS mov BASE, RA | .IOS mov BASE, RA
| b ->fff_restv | b ->fff_restv
|.endif
| |
|.if HFABI
|.ffunc_d math_frexp
| mov CARG1, sp
| .IOS mov RA, BASE
| bl extern frexp
| .IOS mov BASE, RA
| ldr CARG3, [sp]
| mvn CARG4, #~LJ_TISNUM
| ldr PC, [BASE, FRAME_PC]
| vstr d0, [BASE, #-8]
| mov RC, #(2+1)*8
| strd CARG34, [BASE]
| b ->fff_res
|.else
|.ffunc_n math_frexp |.ffunc_n math_frexp
| mov CARG3, sp | mov CARG3, sp
| .IOS mov RA, BASE | .IOS mov RA, BASE
@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx)
| mov RC, #(2+1)*8 | mov RC, #(2+1)*8
| strd CARG34, [BASE] | strd CARG34, [BASE]
| b ->fff_res | b ->fff_res
|.endif
| |
|.if HFABI
|.ffunc_d math_modf
| sub CARG1, BASE, #8
| ldr PC, [BASE, FRAME_PC]
| .IOS mov RA, BASE
| bl extern modf
| .IOS mov BASE, RA
| mov RC, #(2+1)*8
| vstr d0, [BASE]
| b ->fff_res
|.else
|.ffunc_n math_modf |.ffunc_n math_modf
| sub CARG3, BASE, #8 | sub CARG3, BASE, #8
| ldr PC, [BASE, FRAME_PC] | ldr PC, [BASE, FRAME_PC]
@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx)
| mov RC, #(2+1)*8 | mov RC, #(2+1)*8
| strd CARG12, [BASE] | strd CARG12, [BASE]
| b ->fff_res | b ->fff_res
|.endif
| |
|.macro math_minmax, name, cond, fcond |.macro math_minmax, name, cond, fcond
|.if FPU
| .ffunc_1 name
| add RB, BASE, RC
| checktp CARG2, LJ_TISNUM
| add RA, BASE, #8
| bne >4
|1: // Handle integers.
| ldrd CARG34, [RA]
| cmp RA, RB
| bhs ->fff_restv
| checktp CARG4, LJ_TISNUM
| bne >3
| cmp CARG1, CARG3
| add RA, RA, #8
| mov..cond CARG1, CARG3
| b <1
|3: // Convert intermediate result to number and continue below.
| vmov s4, CARG1
| bhi ->fff_fallback
| vldr d1, [RA]
| vcvt.f64.s32 d0, s4
| b >6
|
|4:
| vldr d0, [BASE]
| bhi ->fff_fallback
|5: // Handle numbers.
| ldrd CARG34, [RA]
| vldr d1, [RA]
| cmp RA, RB
| bhs ->fff_resd
| checktp CARG4, LJ_TISNUM
| bhs >7
|6:
| vcmp.f64 d0, d1
| vmrs
| add RA, RA, #8
| vmov..fcond.f64 d0, d1
| b <5
|7: // Convert integer to number and continue above.
| vmov s4, CARG3
| bhi ->fff_fallback
| vcvt.f64.s32 d1, s4
| b <6
|
|.else
|
| .ffunc_1 name | .ffunc_1 name
| checktp CARG2, LJ_TISNUM | checktp CARG2, LJ_TISNUM
| mov RA, #8 | mov RA, #8
@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx)
| add RA, RA, #8 | add RA, RA, #8
| mov..cond CARG1, CARG3 | mov..cond CARG1, CARG3
| b <1 | b <1
|3: |3: // Convert intermediate result to number and continue below.
| bhi ->fff_fallback | bhi ->fff_fallback
| // Convert intermediate result to number and continue below.
| bl extern __aeabi_i2d | bl extern __aeabi_i2d
| ldrd CARG34, [BASE, RA] | ldrd CARG34, [BASE, RA]
| b >6 | b >6
@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern __aeabi_i2d | bl extern __aeabi_i2d
| ldrd CARG34, TMPD | ldrd CARG34, TMPD
| b <6 | b <6
|.endif
|.endmacro |.endmacro
| |
| math_minmax math_min, gt, hi | math_minmax math_min, gt, hi
@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx)
| ldr CARG2, [CARG1, #-4]! // Get exit instruction. | ldr CARG2, [CARG1, #-4]! // Get exit instruction.
| str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC.
| str CARG1, [sp, #60] | str CARG1, [sp, #60]
|.if FPU
| vpush {d0-d15}
|.endif
| lsl CARG2, CARG2, #8 | lsl CARG2, CARG2, #8
| add CARG1, CARG1, CARG2, asr #6 | add CARG1, CARG1, CARG2, asr #6
| ldr CARG2, [lr, #4] // Load exit stub group offset. | ldr CARG2, [lr, #4] // Load exit stub group offset.
@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx)
|// FP value rounding. Called from JIT code. |// FP value rounding. Called from JIT code.
|// |//
|// double lj_vm_floor/ceil/trunc(double x); |// double lj_vm_floor/ceil/trunc(double x);
|.macro vm_round, func |.macro vm_round, func, hf
|->vm_ .. func: |.if FPU
|.if hf == 0
| vmov d0, CARG1, CARG2
| vldr d2, <8 // 2^52
|.else
| vldr d2, <8 // 2^52
| vmov CARG1, CARG2, d0
|.endif
| vabs.f64 d1, d0
| vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?
| vmrs
|.if "func" == "trunc"
| vadd.f64 d0, d1, d2
| bxpl lr // Return argument unchanged.
| vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52
| vldr d2, <9 // +1.0
| vcmp.f64 d1, d0 // |x| < result: subtract +1.0
| vmrs
| vsubmi.f64 d0, d1, d2
| cmp CARG2, #0
| vnegmi.f64 d0, d0 // Merge sign bit back in.
|.else
| vadd.f64 d1, d1, d2
| bxpl lr // Return argument unchanged.
| cmp CARG2, #0
| vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52
| vldr d2, <9 // +1.0
| vnegmi.f64 d1, d1 // Merge sign bit back in.
|.if "func" == "floor"
| vcmp.f64 d0, d1 // x < result: subtract +1.0.
| vmrs
| vsubmi.f64 d0, d1, d2
|.else
| vcmp.f64 d1, d0 // x > result: add +1.0.
| vmrs
| vaddmi.f64 d0, d1, d2
|.endif
| vmovpl.f64 d0, d1
|.endif
|.if hf == 0
| vmov CARG1, CARG2, d0
|.endif
| bx lr
|
|.else
|
| lsl CARG3, CARG2, #1 | lsl CARG3, CARG2, #1
| adds RB, CARG3, #0x00200000 | adds RB, CARG3, #0x00200000
| bpl >2 // |x| < 1? | bpl >2 // |x| < 1?
@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx)
| ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
| orrne CARG2, CARG2, CARG4 | orrne CARG2, CARG2, CARG4
| bx lr | bx lr
|.endif
|.endmacro |.endmacro
| |
|.if FPU
|.align 8
|9: |9:
| .long 0x3ff00000 // hiword(1.0) | .long 0, 0x3ff00000 // +1.0
| vm_round floor |8:
| vm_round ceil | .long 0, 0x43300000 // 2^52
|.else
|9:
| .long 0x3ff00000 // hiword(+1.0)
|.endif
|
|->vm_floor:
|.if not HFABI
| vm_round floor, 0
|.endif
|->vm_floor_hf:
|.if FPU
| vm_round floor, 1
|.endif
|
|->vm_ceil:
|.if not HFABI
| vm_round ceil, 0
|.endif
|->vm_ceil_hf:
|.if FPU
| vm_round ceil, 1
|.endif
| |
|->vm_trunc: |->vm_trunc:
|.if JIT |.if JIT and not HFABI
| lsl CARG3, CARG2, #1 | lsl CARG3, CARG2, #1
| adds RB, CARG3, #0x00200000 | adds RB, CARG3, #0x00200000
| andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx)
| bx lr | bx lr
|.endif |.endif
| |
|->vm_trunc_hf:
|.if JIT and FPU
| vm_round trunc, 1
|.endif
|
| // double lj_vm_mod(double dividend, double divisor); | // double lj_vm_mod(double dividend, double divisor);
|->vm_mod: |->vm_mod:
|.if FPU
| // Special calling convention. Also, RC (r11) is not preserved.
| vdiv.f64 d0, d6, d7
| mov RC, lr
| bl ->vm_floor_hf
| vmul.f64 d0, d0, d7
| mov lr, RC
| vsub.f64 d6, d6, d0
| bx lr
|.else
| push {r0, r1, r2, r3, r4, lr} | push {r0, r1, r2, r3, r4, lr}
| bl extern __aeabi_ddiv | bl extern __aeabi_ddiv
| bl ->vm_floor | bl ->vm_floor
@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern __aeabi_dadd | bl extern __aeabi_dadd
| add sp, sp, #20 | add sp, sp, #20
| pop {pc} | pop {pc}
|.endif
| |
| // int lj_vm_modi(int dividend, int divisor); | // int lj_vm_modi(int dividend, int divisor);
|->vm_modi: |->vm_modi:
@ -2266,6 +2524,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next | ins_next
| |
|3: // CARG12 is not an integer. |3: // CARG12 is not an integer.
|.if FPU
| vldr d0, [RA]
| bhi ->vmeta_comp
| // d0 is a number.
| checktp CARG4, LJ_TISNUM
| vldr d1, [RC]
| blo >5
| // d0 is a number, CARG3 is an integer.
| vmov s4, CARG3
| vcvt.f64.s32 d1, s4
| b >5
|4: // CARG1 is an integer, CARG34 is not an integer.
| vldr d1, [RC]
| bhi ->vmeta_comp
| // CARG1 is an integer, d1 is a number.
| vmov s4, CARG1
| vcvt.f64.s32 d0, s4
|5: // d0 and d1 are numbers.
| vcmp.f64 d0, d1
| vmrs
| // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
if (op == BC_ISLT) {
| sublo PC, RB, #0x20000
} else if (op == BC_ISGE) {
| subhs PC, RB, #0x20000
} else if (op == BC_ISLE) {
| subls PC, RB, #0x20000
} else {
| subhi PC, RB, #0x20000
}
| b <1
|.else
| bhi ->vmeta_comp | bhi ->vmeta_comp
| // CARG12 is a number. | // CARG12 is a number.
| checktp CARG4, LJ_TISNUM | checktp CARG4, LJ_TISNUM
@ -2282,7 +2572,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| b >5 | b >5
|4: // CARG1 is an integer, CARG34 is not an integer. |4: // CARG1 is an integer, CARG34 is not an integer.
| bhi ->vmeta_comp | bhi ->vmeta_comp
| // CARG1 is an integer, CARG34 is a number | // CARG1 is an integer, CARG34 is a number.
| mov RA, RB // Save RB. | mov RA, RB // Save RB.
| bl extern __aeabi_i2d | bl extern __aeabi_i2d
| ldrd CARG34, [RC] // Restore second operand. | ldrd CARG34, [RC] // Restore second operand.
@ -2299,6 +2589,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subhi PC, RA, #0x20000 | subhi PC, RA, #0x20000
} }
| b <1 | b <1
|.endif
break; break;
case BC_ISEQV: case BC_ISNEV: case BC_ISEQV: case BC_ISNEV:
@ -2439,6 +2730,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} }
| bhi <2 | bhi <2
|.endif |.endif
|.if FPU
| checktp CARG4, LJ_TISNUM
| vmov s4, CARG3
| vldr d0, [RA]
| vldrlo d1, [RC]
| vcvths.f64.s32 d1, s4
| b >5
|4: // CARG1 is an integer, d1 is a number.
| vmov s4, CARG1
| vldr d1, [RC]
| vcvt.f64.s32 d0, s4
|5: // d0 and d1 are numbers.
| vcmp.f64 d0, d1
| vmrs
if (vk) {
| subeq PC, RB, #0x20000
} else {
| subne PC, RB, #0x20000
}
| b <2
|.else
| // CARG12 is a number. | // CARG12 is a number.
| checktp CARG4, LJ_TISNUM | checktp CARG4, LJ_TISNUM
| movlo RA, RB // Save RB. | movlo RA, RB // Save RB.
@ -2458,6 +2770,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subne PC, RA, #0x20000 | subne PC, RA, #0x20000
} }
| b <2 | b <2
|.endif
| |
|.if FFI |.if FFI
|7: |7:
@ -2617,20 +2930,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) { ||switch (vk) {
||case 0: ||case 0:
| .if FPU
| ldrd CARG12, [RB, BASE]!
| ldrd CARG34, [RC, KBASE]!
| .else
| ldrd CARG12, [BASE, RB] | ldrd CARG12, [BASE, RB]
| ldrd CARG34, [KBASE, RC] | ldrd CARG34, [KBASE, RC]
| .endif
|| break; || break;
||case 1: ||case 1:
| .if FPU
| ldrd CARG34, [RB, BASE]!
| ldrd CARG12, [RC, KBASE]!
| .else
| ldrd CARG34, [BASE, RB] | ldrd CARG34, [BASE, RB]
| ldrd CARG12, [KBASE, RC] | ldrd CARG12, [KBASE, RC]
| .endif
|| break; || break;
||default: ||default:
| .if FPU
| ldrd CARG12, [RB, BASE]!
| ldrd CARG34, [RC, BASE]!
| .else
| ldrd CARG12, [BASE, RB] | ldrd CARG12, [BASE, RB]
| ldrd CARG34, [BASE, RC] | ldrd CARG34, [BASE, RC]
| .endif
|| break; || break;
||} ||}
|.endmacro |.endmacro
| |
|.macro ins_arithpre_fpu, reg1, reg2
|.if FPU
||if (vk == 1) {
| vldr reg2, [RB]
| vldr reg1, [RC]
||} else {
| vldr reg1, [RB]
| vldr reg2, [RC]
||}
|.endif
|.endmacro
|
|.macro ins_arithpost_fpu, reg
| ins_next1
| add RA, BASE, RA
| ins_next2
| vstr reg, [RA]
| ins_next3
|.endmacro
|
|.macro ins_arithfallback, ins |.macro ins_arithfallback, ins
||switch (vk) { ||switch (vk) {
||case 0: ||case 0:
@ -2645,9 +2993,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||} ||}
|.endmacro |.endmacro
| |
|.macro ins_arithdn, intins, fpcall |.macro ins_arithdn, intins, fpins, fpcall
| ins_arithpre | ins_arithpre
|.if "intins" ~= "vm_modi" |.if "intins" ~= "vm_modi" and not FPU
| ins_next1 | ins_next1
|.endif |.endif
| ins_arithcheck_int >5 | ins_arithcheck_int >5
@ -2665,57 +3013,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_arithfallback bvs | ins_arithfallback bvs
|.endif |.endif
|4: |4:
|.if "intins" == "vm_modi" |.if "intins" == "vm_modi" or FPU
| ins_next1 | ins_next1
|.endif |.endif
| ins_next2 | ins_next2
| strd CARG12, [BASE, RA] | strd CARG12, [BASE, RA]
| ins_next3 | ins_next3
|5: // FP variant. |5: // FP variant.
| ins_arithpre_fpu d6, d7
| ins_arithfallback ins_arithcheck_num | ins_arithfallback ins_arithcheck_num
|.if FPU
|.if "intins" == "vm_modi" |.if "intins" == "vm_modi"
| bl fpcall | bl fpcall
|.else |.else
| fpins d6, d6, d7
|.endif
| ins_arithpost_fpu d6
|.else
| bl fpcall | bl fpcall
|.if "intins" ~= "vm_modi"
| ins_next1 | ins_next1
|.endif |.endif
| b <4 | b <4
|.endif
|.endmacro |.endmacro
| |
|.macro ins_arithfp, fpcall |.macro ins_arithfp, fpins, fpcall
| ins_arithpre | ins_arithpre
|.if "fpins" ~= "extern" or HFABI
| ins_arithpre_fpu d0, d1
|.endif
| ins_arithfallback ins_arithcheck_num | ins_arithfallback ins_arithcheck_num
|.if "fpcall" == "extern pow" |.if "fpins" == "extern"
| .IOS mov RC, BASE | .IOS mov RC, BASE
| bl fpcall | bl fpcall
| .IOS mov BASE, RC | .IOS mov BASE, RC
|.elif FPU
| fpins d0, d0, d1
|.else |.else
| bl fpcall | bl fpcall
|.endif |.endif
|.if ("fpins" ~= "extern" or HFABI) and FPU
| ins_arithpost_fpu d0
|.else
| ins_next1 | ins_next1
| ins_next2 | ins_next2
| strd CARG12, [BASE, RA] | strd CARG12, [BASE, RA]
| ins_next3 | ins_next3
|.endif
|.endmacro |.endmacro
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
| ins_arithdn adds, extern __aeabi_dadd | ins_arithdn adds, vadd.f64, extern __aeabi_dadd
break; break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
| ins_arithdn subs, extern __aeabi_dsub | ins_arithdn subs, vsub.f64, extern __aeabi_dsub
break; break;
case BC_MULVN: case BC_MULNV: case BC_MULVV: case BC_MULVN: case BC_MULNV: case BC_MULVV:
| ins_arithdn smull, extern __aeabi_dmul | ins_arithdn smull, vmul.f64, extern __aeabi_dmul
break; break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
| ins_arithfp extern __aeabi_ddiv | ins_arithfp vdiv.f64, extern __aeabi_ddiv
break; break;
case BC_MODVN: case BC_MODNV: case BC_MODVV: case BC_MODVN: case BC_MODNV: case BC_MODVV:
| ins_arithdn vm_modi, ->vm_mod | ins_arithdn vm_modi, vm_mod, ->vm_mod
break; break;
case BC_POW: case BC_POW:
| // NYI: (partial) integer arithmetic. | // NYI: (partial) integer arithmetic.
| ins_arithfp extern pow | ins_arithfp extern, extern pow
break; break;
case BC_CAT: case BC_CAT:
@ -3775,20 +4140,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmnlo CARG4, #-LJ_TISNUM | cmnlo CARG4, #-LJ_TISNUM
| cmnlo RB, #-LJ_TISNUM | cmnlo RB, #-LJ_TISNUM
| bhs ->vmeta_for | bhs ->vmeta_for
|.if FPU
| vldr d0, FOR_IDX
| vldr d1, FOR_STOP
| cmp RB, #0
| vstr d0, FOR_EXT
|.else
| cmp RB, #0 | cmp RB, #0
| strd CARG12, FOR_IDX
| strd CARG12, FOR_EXT | strd CARG12, FOR_EXT
| blt >8 | blt >8
|.endif
} else { } else {
|.if FPU
| vldr d0, FOR_IDX
| vldr d2, FOR_STEP
| vldr d1, FOR_STOP
| cmp CARG4, #0
| vadd.f64 d0, d0, d2
|.else
| cmp CARG4, #0 | cmp CARG4, #0
| blt >8 | blt >8
| bl extern __aeabi_dadd | bl extern __aeabi_dadd
| strd CARG12, FOR_IDX | strd CARG12, FOR_IDX
| ldrd CARG34, FOR_STOP | ldrd CARG34, FOR_STOP
| strd CARG12, FOR_EXT | strd CARG12, FOR_EXT
|.endif
} }
|6: |6:
|.if FPU
| vcmpge.f64 d0, d1
| vcmplt.f64 d1, d0
| vmrs
|.else
| bl extern __aeabi_cdcmple | bl extern __aeabi_cdcmple
|.endif
if (vk) {
|.if FPU
| vstr d0, FOR_IDX
| vstr d0, FOR_EXT
|.endif
}
if (op == BC_FORI) { if (op == BC_FORI) {
| subhi PC, RC, #0x20000 | subhi PC, RC, #0x20000
} else if (op == BC_JFORI) { } else if (op == BC_JFORI) {
@ -3804,6 +4195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next2 | ins_next2
| b <3 | b <3
| |
|.if not FPU
|8: // Invert check for negative step. |8: // Invert check for negative step.
if (vk) { if (vk) {
| bl extern __aeabi_dadd | bl extern __aeabi_dadd
@ -3814,6 +4206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov CARG4, CARG2 | mov CARG4, CARG2
| ldrd CARG12, FOR_STOP | ldrd CARG12, FOR_STOP
| b <6 | b <6
|.endif
break; break;
case BC_ITERL: case BC_ITERL:
@ -4048,8 +4441,14 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
"\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */
fcofs, CFRAME_SIZE); fcofs, CFRAME_SIZE);
for (i = 11; i >= 4; i--) /* offset r4-r11 */ for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
#if LJ_ARCH_HASFPU
for (i = 15; i >= 8; i--) /* offset d8-d15 */
fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n",
64+2*i, 10+2*(15-i));
fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */
#endif
fprintf(ctx->fp, fprintf(ctx->fp,
"\t.align 2\n" "\t.align 2\n"
".LEFDE0:\n\n"); ".LEFDE0:\n\n");