Add assembly for decoding instructions.

Still guessing at this point. This code will need to be changed.
This commit is contained in:
Michael Munday 2016-11-23 18:02:00 -05:00
parent dbf789536c
commit 5887962b0e

View File

@ -1,4 +1,4 @@
|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
|
@ -32,7 +32,7 @@
|.define BASE, r7 // Base of current Lua stack frame.
|.define KBASE, r8 // Constants of current Lua function.
|.define PC, r9 // Next PC.
|.define GLREG, r10 // Global state.
|.define DISPATCH, r10 // Opcode dispatch table.
|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
|
|// The following temporaries are not saved across C calls, except for RD.
@ -56,6 +56,8 @@
|.define CRET1, r2
|
|.define SP, r15
|.define OP, r2
|.define TMP1, r3
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
@ -139,9 +141,24 @@
|.macro ins_A_C; .endmacro
|.macro ins_AND; .endmacro
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|// Instruction decode+dispatch.
| // TODO: tune this, right now we always decode RA-D even if they aren't used.
|.macro ins_NEXT
| l RD, (PC)
| // 32 63
| // [ B | C | A | OP ]
| // [ D | A | OP ]
| llhr RA, RD
| srl RA, #8
| llcr OP, RD
| srl RD, #16
| lr RB, RD
| srl RB, #8
| llcr RC, RD
| la PC, 4(PC)
| llgfr TMP1, OP
| sll TMP1, #3 // TMP1=OP*8
| b 0(TMP1, DISPATCH)
|.endmacro
|
|// Instruction footer.
@ -151,8 +168,6 @@
| .define ins_next_, ins_NEXT
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| // Affects only certain kinds of benchmarks (and only with -j off).
| // Around 10%-30% slower on Core2, a lot more slower on P4.
| .macro ins_next
| jmp ->ins_next
| .endmacro