From e933353feb13b26b437f7fbe5e02e7ccc7147be9 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 10 Jan 2017 14:12:06 -0500 Subject: [PATCH] Use execute rather than loop for mvc and avoid jumps in fast path. Not sure if this works, the tests don't exercise the stack code. --- src/Makefile | 2 +- src/vm_s390x.dasc | 58 ++++++++++++++++++++++++----------------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/src/Makefile b/src/Makefile index 1450adc0..a6723e23 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ CCOPT_mips= # CCDEBUG= # Uncomment the next line to generate debug information: -#CCDEBUG= -g +CCDEBUG= -g # CCWARN= -Wall # Uncomment the next line to enable more warnings: diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc index bb53757f..f8be2847 100644 --- a/src/vm_s390x.dasc +++ b/src/vm_s390x.dasc @@ -2142,47 +2142,26 @@ static void build_subroutines(BuildCtx *ctx) |->vm_ffi_call: // Call C function via FFI. | // Caveat: needs special frame unwinding, see below. |.if FFI - | .type CCSTATE, CCallState, r10 - | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store? + | .type CCSTATE, CCallState, r8 + | stmg r6, r15, 48(sp) | lgr CCSTATE, CARG1 + | lg r7, CCSTATE->func // TODO: move further up? | | // Readjust stack. | sgf sp, CCSTATE->spadj | | // Copy stack slots. - | llgc r0, CCSTATE->nsp - | cghi r0, 0 - | jle >3 - | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source. - | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + | llgc r1, CCSTATE->nsp + | chi r1, 0 + | jh >2 |1: - | cghi r0, 256 - | jl >2 - | mvc 0(256, r11), 0(r1) - | aghi r1, 256*8 - | aghi r11, 256*8 - | aghi r0, -256 - | j <1 - |2: - | cghi r0, 0 - | je >3 - | // TODO: exrl mvc rather than loop. - | mvc 0(8, r11), 0(r1) - | aghi r1, 8 - | aghi r11, 8 - | aghi r0, -1 - | j <2 - |3: - | | lmg CARG1, CARG5, CCSTATE->gpr[0] | // TODO: conditionally load FPRs? | ld FARG1, CCSTATE->fpr[0] | ld FARG2, CCSTATE->fpr[1] | ld FARG3, CCSTATE->fpr[2] | ld FARG4, CCSTATE->fpr[3] - |5: - | lg r1, CCSTATE->func // TODO: move further up? - | basr r14, r1 + | basr r14, r7 | | stg CRET1, CCSTATE->gpr[0] | stg f0, CCSTATE->fpr[0] @@ -2190,6 +2169,29 @@ static void build_subroutines(BuildCtx *ctx) | agf sp, CCSTATE->spadj | lmg r6, r15, 48(sp) | br r14 + | + |2: + | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source. + | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + |3: + | chi r1, 256 + | jl >4 + | mvc 0(256, r11), 0(r10) + | la r10, 256*8(r10) + | la r11, 256*8(r11) + | ahi r1, -256 + | j <3 + | + |4: + | ahi r1, -1 + | jl <1 + | larl r9, >5 + | ex r1, 0(r9) // TODO: exrl is faster but needs z10. + | j <1 + | + |5: + | // exrl target + | mvc 0(1, r11), 0(r10) |.endif |// Note: vm_ffi_call must be the last function in this object file! |