MIPS: Integrate and enable JIT compiler.

This commit is contained in:
Mike Pall 2012-03-30 01:36:55 +02:00
parent 2225c9aafc
commit bcd459aa0e
10 changed files with 2308 additions and 25 deletions

View File

@ -231,7 +231,7 @@ local map_gpr = {
[0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "gp", "sp", "r30", "ra",
"r24", "r25", "r26", "r27", "r28", "sp", "r30", "ra",
}
------------------------------------------------------------------------------

View File

@ -84,6 +84,10 @@ local nexitsym = 0
local function fillsymtab_tr(tr, nexit)
local t = {}
symtabmt.__index = t
if jit.arch == "mips" or jit.arch == "mipsel" then
t[traceexitstub(tr, 0)] = "exit"
return
end
for i=0,nexit-1 do
local addr = traceexitstub(tr, i)
t[addr] = tostring(i)
@ -604,9 +608,16 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
if i % 8 == 0 then out:write("\n") end
end
end
for i=1,nfpr do
out:write(format(" %+17.14g", regs[ngpr+i]))
if i % 4 == 0 then out:write("\n") end
if jit.arch == "mips" or jit.arch == "mipsel" then
for i=1,nfpr,2 do
out:write(format(" %+17.14g", regs[ngpr+i]))
if i % 8 == 7 then out:write("\n") end
end
else
for i=1,nfpr do
out:write(format(" %+17.14g", regs[ngpr+i]))
if i % 4 == 0 then out:write("\n") end
end
end
end
end

View File

@ -204,7 +204,6 @@
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
#define LJ_ARCH_NOJIT 1
#else
#error "No target architecture defined"

View File

@ -155,6 +155,8 @@ IRFLDEF(FLOFS)
#include "lj_emit_arm.h"
#elif LJ_TARGET_PPC
#include "lj_emit_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_emit_mips.h"
#else
#error "Missing instruction emitter for target CPU"
#endif
@ -441,12 +443,22 @@ static Reg ra_scratch(ASMState *as, RegSet allow)
/* Evict all registers from a set (if not free). */
static void ra_evictset(ASMState *as, RegSet drop)
{
RegSet work;
as->modset |= drop;
drop &= ~as->freeset;
while (drop) {
Reg r = rset_pickbot(drop);
#if !LJ_SOFTFP
work = (drop & ~as->freeset) & RSET_FPR;
while (work) {
Reg r = rset_pickbot(work);
ra_restore(as, regcost_ref(as->cost[r]));
rset_clear(drop, r);
rset_clear(work, r);
checkmclim(as);
}
#endif
work = (drop & ~as->freeset) & RSET_GPR;
while (work) {
Reg r = rset_pickbot(work);
ra_restore(as, regcost_ref(as->cost[r]));
rset_clear(work, r);
checkmclim(as);
}
}
@ -1153,6 +1165,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_arm.h"
#elif LJ_TARGET_PPC
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
#else
#error "Missing assembler for target CPU"
#endif
@ -1530,9 +1544,7 @@ static void asm_setup_regsp(ASMState *as)
#if LJ_SOFTFP
case IR_MIN: case IR_MAX:
#endif
#if LJ_BE
(ir-1)->prev = REGSP_HINT(RID_RETLO);
#endif
ir->prev = REGSP_HINT(RID_RETHI);
continue;
default:

1949
src/lj_asm_mips.h Normal file

File diff suppressed because it is too large Load Diff

205
src/lj_emit_mips.h Normal file
View File

@ -0,0 +1,205 @@
/*
** MIPS instruction emitter.
** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Emit basic instructions --------------------------------------------- */
static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
{
*--as->mcp = mi | MIPSF_D(rd) | MIPSF_S(rs) | MIPSF_T(rt);
}
static void emit_dta(ASMState *as, MIPSIns mi, Reg rd, Reg rt, uint32_t a)
{
*--as->mcp = mi | MIPSF_D(rd) | MIPSF_T(rt) | MIPSF_A(a);
}
#define emit_ds(as, mi, rd, rs) emit_dst(as, (mi), (rd), (rs), 0)
#define emit_tg(as, mi, rt, rg) emit_dst(as, (mi), (rg)&31, 0, (rt))
static void emit_tsi(ASMState *as, MIPSIns mi, Reg rt, Reg rs, int32_t i)
{
*--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | (i & 0xffff);
}
#define emit_ti(as, mi, rt, i) emit_tsi(as, (mi), (rt), 0, (i))
#define emit_hsi(as, mi, rh, rs, i) emit_tsi(as, (mi), (rh) & 31, (rs), (i))
static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
{
*--as->mcp = mi | MIPSF_F(rf&31) | MIPSF_G(rg&31) | MIPSF_H(rh&31);
}
#define emit_fg(as, mi, rf, rg) emit_fgh(as, (mi), (rf), (rg), 0)
static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
{
if ((as->flags & JIT_F_MIPS32R2)) {
emit_dta(as, MIPSI_ROTR, dest, src, shift);
} else {
emit_dst(as, MIPSI_OR, dest, dest, tmp);
emit_dta(as, MIPSI_SLL, dest, src, (-shift)&31);
emit_dta(as, MIPSI_SRL, tmp, src, shift);
}
}
/* -- Emit loads/stores --------------------------------------------------- */
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Try to find a one step delta relative to another constant. */
static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
{
RegSet work = ~as->freeset & RSET_GPR;
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
lua_assert(r != t);
if (ref < ASMREF_L) {
int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
if (checki16(delta)) {
emit_tsi(as, MIPSI_ADDIU, t, r, delta);
return 1;
}
}
rset_clear(work, r);
}
return 0; /* Failed. */
}
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg r, int32_t i)
{
if (checki16(i)) {
emit_ti(as, MIPSI_LI, r, i);
} else {
if ((i & 0xffff)) {
int32_t jgl = i32ptr(J2G(as->J));
if ((uint32_t)(i-jgl) < 65536) {
emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
return;
} else if (emit_kdelta1(as, r, i)) {
return;
} else if ((i >> 16) == 0) {
emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i);
return;
}
emit_tsi(as, MIPSI_ORI, r, r, i);
}
emit_ti(as, MIPSI_LUI, r, (i >> 16));
}
}
#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
static void ra_allockreg(ASMState *as, int32_t k, Reg r);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
{
int32_t jgl = i32ptr(J2G(as->J));
int32_t i = i32ptr(p);
Reg base;
if ((uint32_t)(i-jgl) < 65536) {
i = i-jgl-32768;
base = RID_JGL;
} else {
base = ra_allock(as, i-(int16_t)i, allow);
}
emit_tsi(as, mi, r, base, i);
}
#define emit_loadn(as, r, tv) \
emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR)
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
{
emit_tsi(as, mi, r, RID_JGL, ofs-32768);
}
#define emit_getgl(as, r, field) \
emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field))
#define emit_setgl(as, r, field) \
emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field))
/* Trace number is determined from per-trace exit stubs. */
#define emit_setvmstate(as, i) UNUSED(i)
/* -- Emit control-flow instructions -------------------------------------- */
/* Label for internal jumps. */
typedef MCode *MCLabel;
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target)
{
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
lua_assert(((delta + 0x8000) >> 16) == 0);
*--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu);
as->mcp = p;
}
static void emit_call(ASMState *as, void *target)
{
MCode *p = as->mcp;
*--p = MIPSI_NOP;
if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0)
*--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
else /* Target out of range: need indirect call. */
*--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
as->mcp = p;
ra_allockreg(as, i32ptr(target), RID_CFUNCADDR);
}
/* -- Emit generic operations --------------------------------------------- */
#define emit_move(as, dst, src) \
emit_ds(as, MIPSI_MOVE, (dst), (src))
/* Generic move between two regs. */
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
{
if (dst < RID_MAX_GPR)
emit_move(as, dst, src);
else
emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
}
/* Generic load of register from stack slot. */
static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_tsi(as, MIPSI_LW, r, RID_SP, ofs);
else
emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
(r & 31), RID_SP, ofs);
}
/* Generic store of register to stack slot. */
static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_tsi(as, MIPSI_SW, r, RID_SP, ofs);
else
emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
(r&31), RID_SP, ofs);
}
/* Add offset to pointer. */
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
lua_assert(checki16(ofs));
emit_tsi(as, MIPSI_ADDIU, r, r, ofs);
}
}
#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))

View File

@ -66,7 +66,7 @@ typedef struct CCallInfo {
#define IRCALLCOND_SOFTFP_FFI(x) NULL
#endif
#define LJ_NEED_FP64 LJ_TARGET_PPC
#define LJ_NEED_FP64 (LJ_TARGET_PPC || LJ_TARGET_MIPS)
#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
#define IRCALLCOND_FP64_FFI(x) x
@ -157,8 +157,8 @@ typedef struct CCallInfo {
_(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \
_(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \
_(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \
_(FP64_FFI, fp64_d2l, 2, N, I64, 0) \
_(FP64_FFI, fp64_d2ul, 2, N, U64, 0) \
_(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \
_(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \
_(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
_(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
_(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \

View File

@ -203,7 +203,13 @@ static void *mcode_alloc(jit_State *J, size_t sz)
/* Target an address in the static assembler code (64K aligned).
** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB.
*/
#if LJ_TARGET_MIPS
/* Use the middle of the 256MB-aligned region. */
uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) +
0x08000000u;
#else
uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
#endif
const uintptr_t range = (1u << LJ_TARGET_JUMPRANGE) - (1u << 21);
/* First try a contiguous area below the last one. */
uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0;

View File

@ -53,7 +53,7 @@ typedef uint32_t RegSP;
/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
#if LJ_TARGET_PPC
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
typedef uint64_t RegSet;
#else
typedef uint32_t RegSet;
@ -63,11 +63,11 @@ typedef uint32_t RegSet;
#define RSET_EMPTY ((RegSet)0)
#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo))
#define rset_test(rs, r) (((rs) >> (r)) & 1)
#define rset_test(rs, r) ((int)((rs) >> (r)) & 1)
#define rset_set(rs, r) (rs |= RID2RSET(r))
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
#if LJ_TARGET_PPC
#if LJ_TARGET_PPC || LJ_TARGET_MIPS
#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
#else

View File

@ -12,7 +12,7 @@
_(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
_(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
_(R24) _(R25) _(SYS1) _(SYS2) _(GP) _(SP) _(R30) _(RA)
_(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
@ -26,6 +26,7 @@ enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_ZERO = RID_R0,
RID_TMP = RID_RA,
/* Calling conventions. */
@ -38,6 +39,7 @@ enum {
RID_RETLO = RID_R3,
#endif
RID_FPRET = RID_F0,
RID_CFUNCADDR = RID_R25,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_R16, /* Interpreter BASE. */
@ -52,7 +54,7 @@ enum {
RID_MIN_FPR = RID_F0,
RID_MAX_FPR = RID_F31+1,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = (RID_MAX_FPR - RID_MIN_FPR)/2
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
};
#define RID_NUM_KREF RID_NUM_GPR
@ -60,10 +62,10 @@ enum {
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except TMP, SP, SYS1, SYS2 and JGL. */
/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2 and JGL. */
#define RSET_FIXED \
(RID2RSET(RID_TMP)|RID2RSET(RID_SP)|RID2RSET(RID_SYS1)|\
RID2RSET(RID_SYS2)|RID2RSET(RID_JGL))
(RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL))
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
#define RSET_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
@ -75,7 +77,7 @@ enum {
#define RSET_SCRATCH_GPR \
(RSET_RANGE(RID_R1, RID_R15+1)|\
RID2RSET(RID_R24)|RID2RSET(RID_R25)|RID2RSET(RID_GP)|RID2RSET(RID_RA))
RID2RSET(RID_R24)|RID2RSET(RID_R25)|RID2RSET(RID_R28))
#define RSET_SCRATCH_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
@ -115,8 +117,15 @@ typedef struct {
/* Highest exit + 1 indicates stack check. */
#define EXITSTATE_CHECKEXIT 1
#define EXITSTUB_SPACING 8
#define EXITSTUBS_PER_GROUP 16
/* Return the address of a per-trace exit stub. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
{
while (*p == 0x00000000) p++; /* Skip MIPSI_NOP. */
return p;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
#define exitstub_trace_addr(T, exitno) \
exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode))
/* -- Instructions -------------------------------------------------------- */
@ -140,14 +149,106 @@ typedef enum MIPSIns {
MIPSI_LU = 0x34000000,
MIPSI_LUI = 0x3c000000,
MIPSI_ADDIU = 0x24000000,
MIPSI_ANDI = 0x30000000,
MIPSI_ORI = 0x34000000,
MIPSI_XORI = 0x38000000,
MIPSI_SLTI = 0x28000000,
MIPSI_SLTIU = 0x2c000000,
MIPSI_ADDU = 0x00000021,
MIPSI_SUBU = 0x00000023,
MIPSI_MUL = 0x70000002,
MIPSI_AND = 0x00000024,
MIPSI_OR = 0x00000025,
MIPSI_XOR = 0x00000026,
MIPSI_NOR = 0x00000027,
MIPSI_SLT = 0x0000002a,
MIPSI_SLTU = 0x0000002b,
MIPSI_MOVZ = 0x0000000a,
MIPSI_MOVN = 0x0000000b,
MIPSI_SLL = 0x00000000,
MIPSI_SRL = 0x00000002,
MIPSI_SRA = 0x00000003,
MIPSI_ROTR = 0x00200002, /* MIPS32R2 */
MIPSI_SLLV = 0x00000004,
MIPSI_SRLV = 0x00000006,
MIPSI_SRAV = 0x00000007,
MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */
MIPSI_SEB = 0x7c000420, /* MIPS32R2 */
MIPSI_SEH = 0x7c000620, /* MIPS32R2 */
MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */
MIPSI_B = 0x10000000,
MIPSI_J = 0x08000000,
MIPSI_JAL = 0x0c000000,
MIPSI_JR = 0x00000008,
MIPSI_JALR = 0x0000f809,
MIPSI_BEQ = 0x10000000,
MIPSI_BNE = 0x14000000,
MIPSI_BLEZ = 0x18000000,
MIPSI_BGTZ = 0x1c000000,
MIPSI_BLTZ = 0x04000000,
MIPSI_BGEZ = 0x04010000,
/* Load/store instructions. */
MIPSI_LW = 0x8c000000,
MIPSI_SW = 0xac000000,
MIPSI_LB = 0x80000000,
MIPSI_SB = 0xa0000000,
MIPSI_LH = 0x84000000,
MIPSI_SH = 0xa4000000,
MIPSI_LBU = 0x90000000,
MIPSI_LHU = 0x94000000,
MIPSI_LWC1 = 0xc4000000,
MIPSI_SWC1 = 0xe4000000,
MIPSI_LDC1 = 0xd4000000,
MIPSI_SDC1 = 0xf4000000,
/* FP instructions. */
MIPSI_MOV_S = 0x46000006,
MIPSI_MOV_D = 0x46200006,
MIPSI_MOVT_D = 0x46210011,
MIPSI_MOVF_D = 0x46200011,
MIPSI_ABS_D = 0x46200005,
MIPSI_NEG_D = 0x46200007,
MIPSI_ADD_D = 0x46200000,
MIPSI_SUB_D = 0x46200001,
MIPSI_MUL_D = 0x46200002,
MIPSI_DIV_D = 0x46200003,
MIPSI_ADD_S = 0x46000000,
MIPSI_SUB_S = 0x46000001,
MIPSI_CVT_D_S = 0x46000021,
MIPSI_CVT_W_S = 0x46000024,
MIPSI_CVT_S_D = 0x46200020,
MIPSI_CVT_W_D = 0x46200024,
MIPSI_CVT_S_W = 0x46800020,
MIPSI_CVT_D_W = 0x46800021,
MIPSI_TRUNC_W_S = 0x4600000d,
MIPSI_TRUNC_W_D = 0x4620000d,
MIPSI_FLOOR_W_S = 0x4600000f,
MIPSI_FLOOR_W_D = 0x4620000f,
MIPSI_MFC1 = 0x44000000,
MIPSI_MTC1 = 0x44800000,
MIPSI_BC1F = 0x45000000,
MIPSI_BC1T = 0x45010000,
MIPSI_C_EQ_D = 0x46200032,
MIPSI_C_OLT_D = 0x46200034,
MIPSI_C_ULT_D = 0x46200035,
MIPSI_C_OLE_D = 0x46200036,
MIPSI_C_ULE_D = 0x46200037,
} MIPSIns;
#endif