Use execute rather than loop for mvc and avoid jumps in fast path.

Not sure if this works, the tests don't exercise the stack code.
This commit is contained in:
Michael Munday 2017-01-10 14:12:06 -05:00
parent 660ddd1db2
commit e933353feb
2 changed files with 31 additions and 29 deletions

View File

@ -56,7 +56,7 @@ CCOPT_mips=
#
CCDEBUG=
# Uncomment the next line to generate debug information:
#CCDEBUG= -g
CCDEBUG= -g
#
CCWARN= -Wall
# Uncomment the next line to enable more warnings:

View File

@ -2142,47 +2142,26 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below.
|.if FFI
| .type CCSTATE, CCallState, r10
| stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store?
| .type CCSTATE, CCallState, r8
| stmg r6, r15, 48(sp)
| lgr CCSTATE, CARG1
| lg r7, CCSTATE->func // TODO: move further up?
|
| // Readjust stack.
| sgf sp, CCSTATE->spadj
|
| // Copy stack slots.
| llgc r0, CCSTATE->nsp
| cghi r0, 0
| jle >3
| lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
| lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
| llgc r1, CCSTATE->nsp
| chi r1, 0
| jh >2
|1:
| cghi r0, 256
| jl >2
| mvc 0(256, r11), 0(r1)
| aghi r1, 256*8
| aghi r11, 256*8
| aghi r0, -256
| j <1
|2:
| cghi r0, 0
| je >3
| // TODO: exrl mvc rather than loop.
| mvc 0(8, r11), 0(r1)
| aghi r1, 8
| aghi r11, 8
| aghi r0, -1
| j <2
|3:
|
| lmg CARG1, CARG5, CCSTATE->gpr[0]
| // TODO: conditionally load FPRs?
| ld FARG1, CCSTATE->fpr[0]
| ld FARG2, CCSTATE->fpr[1]
| ld FARG3, CCSTATE->fpr[2]
| ld FARG4, CCSTATE->fpr[3]
|5:
| lg r1, CCSTATE->func // TODO: move further up?
| basr r14, r1
| basr r14, r7
|
| stg CRET1, CCSTATE->gpr[0]
| stg f0, CCSTATE->fpr[0]
@ -2190,6 +2169,29 @@ static void build_subroutines(BuildCtx *ctx)
| agf sp, CCSTATE->spadj
| lmg r6, r15, 48(sp)
| br r14
|
|2:
| lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
| lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
|3:
| chi r1, 256
| jl >4
| mvc 0(256, r11), 0(r10)
| la r10, 256*8(r10)
| la r11, 256*8(r11)
| ahi r1, -256
| j <3
|
|4:
| ahi r1, -1
| jl <1
| larl r9, >5
| ex r1, 0(r9) // TODO: exrl is faster but needs z10.
| j <1
|
|5:
| // exrl target
| mvc 0(1, r11), 0(r10)
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
|