Use execute rather than loop for mvc and avoid jumps in fast path.

Not sure if this works, the tests don't exercise the stack code.
This commit is contained in:
Michael Munday 2017-01-10 14:12:06 -05:00
parent 660ddd1db2
commit e933353feb
2 changed files with 31 additions and 29 deletions

View File

@ -56,7 +56,7 @@ CCOPT_mips=
# #
CCDEBUG= CCDEBUG=
# Uncomment the next line to generate debug information: # Uncomment the next line to generate debug information:
#CCDEBUG= -g CCDEBUG= -g
# #
CCWARN= -Wall CCWARN= -Wall
# Uncomment the next line to enable more warnings: # Uncomment the next line to enable more warnings:

View File

@ -2142,47 +2142,26 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_ffi_call: // Call C function via FFI. |->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below. | // Caveat: needs special frame unwinding, see below.
|.if FFI |.if FFI
| .type CCSTATE, CCallState, r10 | .type CCSTATE, CCallState, r8
| stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store? | stmg r6, r15, 48(sp)
| lgr CCSTATE, CARG1 | lgr CCSTATE, CARG1
| lg r7, CCSTATE->func // TODO: move further up?
| |
| // Readjust stack. | // Readjust stack.
| sgf sp, CCSTATE->spadj | sgf sp, CCSTATE->spadj
| |
| // Copy stack slots. | // Copy stack slots.
| llgc r0, CCSTATE->nsp | llgc r1, CCSTATE->nsp
| cghi r0, 0 | chi r1, 0
| jle >3 | jh >2
| lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
| lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
|1: |1:
| cghi r0, 256
| jl >2
| mvc 0(256, r11), 0(r1)
| aghi r1, 256*8
| aghi r11, 256*8
| aghi r0, -256
| j <1
|2:
| cghi r0, 0
| je >3
| // TODO: exrl mvc rather than loop.
| mvc 0(8, r11), 0(r1)
| aghi r1, 8
| aghi r11, 8
| aghi r0, -1
| j <2
|3:
|
| lmg CARG1, CARG5, CCSTATE->gpr[0] | lmg CARG1, CARG5, CCSTATE->gpr[0]
| // TODO: conditionally load FPRs? | // TODO: conditionally load FPRs?
| ld FARG1, CCSTATE->fpr[0] | ld FARG1, CCSTATE->fpr[0]
| ld FARG2, CCSTATE->fpr[1] | ld FARG2, CCSTATE->fpr[1]
| ld FARG3, CCSTATE->fpr[2] | ld FARG3, CCSTATE->fpr[2]
| ld FARG4, CCSTATE->fpr[3] | ld FARG4, CCSTATE->fpr[3]
|5: | basr r14, r7
| lg r1, CCSTATE->func // TODO: move further up?
| basr r14, r1
| |
| stg CRET1, CCSTATE->gpr[0] | stg CRET1, CCSTATE->gpr[0]
| stg f0, CCSTATE->fpr[0] | stg f0, CCSTATE->fpr[0]
@ -2190,6 +2169,29 @@ static void build_subroutines(BuildCtx *ctx)
| agf sp, CCSTATE->spadj | agf sp, CCSTATE->spadj
| lmg r6, r15, 48(sp) | lmg r6, r15, 48(sp)
| br r14 | br r14
|
|2:
| lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
| lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
|3:
| chi r1, 256
| jl >4
| mvc 0(256, r11), 0(r10)
| la r10, 256*8(r10)
| la r11, 256*8(r11)
| ahi r1, -256
| j <3
|
|4:
| ahi r1, -1
| jl <1
| larl r9, >5
| ex r1, 0(r9) // TODO: exrl is faster but needs z10.
| j <1
|
|5:
| // exrl target
| mvc 0(1, r11), 0(r10)
|.endif |.endif
|// Note: vm_ffi_call must be the last function in this object file! |// Note: vm_ffi_call must be the last function in this object file!
| |