Compare commits

...

6 Commits

Author SHA1 Message Date
Ilya Leoshkevich
ea4f07f642
Merge 035f133798 into eec7a8016c 2025-04-12 07:47:31 +08:00
Mike Pall
eec7a8016c Prevent Clang UB 'optimization' which breaks integerness checks.
Thanks to Kacper Michajłow. #1351 #1355
2025-04-10 22:53:50 +02:00
Mike Pall
51d4c26ec7 ARM: Fix soft-float math.min()/math.max().
Reported by Dong Jianqiang. #1356
2025-04-10 22:45:38 +02:00
Mike Pall
c262976486 ARM64: Fix pass-by-value struct calling conventions.
Reported by AnthonyK213. #1357
2025-04-10 22:06:47 +02:00
Ilya Leoshkevich
035f133798 Add s390x architecture support
This is a cumulative patch that adds the s390x LuaJIT implementation
by @ketank-new, @mundaym and @niravthakkar and others. It contains all
their contributions squashed together, plus minor stylistic cleanups.
It passes all the tests from LuaJIT-test-cleanup, except for
contents.lua, which fails on x86_64 as well.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
2024-11-14 12:26:21 +01:00
Ilya Leoshkevich
dee73f516f Add s390x architecture support to DynASM
s390x (IBM Z) is an architecture of server computers produced by IBM.
It is supported by a number of open source code generators, such as
GCC, LLVM, OpenJDK, eBPF, QEMU, Valgrind and Cranelift. One of the
missing pieces in the ecosystem support is LuaJIT.

The s390x support for LuaJIT was initially developed by @ketank-new,
@mundaym and @niravthakkar. It found its way into moonjit and luajit2
forks, as well as Fedora distro (as a patch). There were also smaller
contributions by @preetikhorjuvenkar, @Bisht13, @velemas,
@AlekseiNikiforovIBM, and @iii-i.

This is a cumulative patch of the DynASM changes from this work. It
contains all the contributions squashed together, plus minor stylistic
cleanups.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
2024-11-14 12:25:58 +01:00
20 changed files with 6725 additions and 8 deletions

546
dynasm/dasm_s390x.h Normal file
View File

@ -0,0 +1,546 @@
/*
** DynASM s390x encoding engine.
** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "s390x"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC,
DASM_DISP12, DASM_DISP20,
DASM_IMM8, DASM_IMM16, DASM_IMM32,
DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned short *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels)
DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels)
DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList) actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels)
memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos =
(int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned short ins = *p++;
unsigned short action = ins;
if (action >= DASM__MAX) {
ofs += 2;
continue;
}
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP:
goto stop;
case DASM_SECTION:
n = *p++ & 255;
CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n];
goto stop;
case DASM_ESC:
p++;
ofs += 2;
break;
case DASM_REL_EXT:
p++;
ofs += 4;
break;
case DASM_ALIGN:
ofs += *p++;
b[pos++] = ofs;
break;
case DASM_REL_LG:
if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
ofs += 2;
}
n = *p++ - 10;
pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) {
CK(n >= 10 || *pl < 0, RANGE_LG);
CKPL(lg, LG);
goto putrel;
}
pl += 10;
n = *pl;
if (n < 0)
n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
ofs += 2;
}
pl = D->pclabels + n;
CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
ofs += 2;
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + *p++ - 10;
CKPL(lg, LG);
goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n;
CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) {
int *pb = DASM_POS2PTR(D, n);
n = *pb;
*pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM8:
b[pos++] = n;
break;
case DASM_IMM16:
CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */
ofs += 2;
b[pos++] = n;
break;
case DASM_IMM32:
ofs += 4;
b[pos++] = n;
break;
case DASM_DISP20:
CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
b[pos++] = n;
break;
case DASM_DISP12:
CK((n >> 12) == 0, RANGE_I);
b[pos++] = n;
break;
case DASM_LEN8R:
CK(n >= 1 && n <= 256, RANGE_I);
b[pos++] = n;
break;
case DASM_LEN4HR:
case DASM_LEN4LR:
CK(n >= 1 && n <= 128, RANGE_I);
b[pos++] = n;
break;
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t * szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK)
return D->status;
{
int pc;
for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0)
return DASM_S_UNDEF_PC | pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) {
int *pb = DASM_POS2PTR(D, n);
n = *pb;
*pb = -idx;
}
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned short ins = *p++;
unsigned short action = ins;
switch (action) {
case DASM_STOP:
case DASM_SECTION:
goto stop;
case DASM_ESC:
p++;
break;
case DASM_REL_EXT:
p++;
break;
case DASM_ALIGN:
ofs -= (b[pos++] + ofs) & *p++;
break;
case DASM_REL_LG:
case DASM_REL_PC:
p++;
pos++;
break;
case DASM_LABEL_LG:
case DASM_LABEL_PC:
p++;
b[pos++] += ofs;
break;
case DASM_IMM8:
case DASM_IMM16:
case DASM_IMM32:
case DASM_DISP20:
case DASM_DISP12:
case DASM_LEN8R:
case DASM_LEN4HR:
case DASM_LEN4LR:
pos++;
break;
}
}
stop:(void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned short *cp = (unsigned short *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned short ins = *p++;
unsigned short action = ins;
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP:
case DASM_SECTION:
goto stop;
case DASM_ESC:
*cp++ = *p++;
break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4;
goto patchrel;
case DASM_ALIGN:
ins = *p++;
/* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
while ((((char *)cp - base) & ins))
*cp++ = 0x0700; /* nop */
break;
case DASM_REL_LG:
CK(n >= 0, UNDEF_LG);
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
p++; /* skip argument */
patchrel:
/* Offsets are halfword aligned (so need to be halved). */
n += 2; /* Offset is relative to start of instruction. */
if (cp[-1] >> 12 == 0xc) {
*cp++ = n >> 17;
} else {
CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG);
}
*cp++ = n >> 1;
break;
case DASM_LABEL_LG:
ins = *p++;
if (ins >= 20)
D->globals[ins - 10] = (void *)(base + n);
break;
case DASM_LABEL_PC:
break;
case DASM_IMM8:
cp[-1] |= n & 0xff;
break;
case DASM_IMM16:
*cp++ = n;
break;
case DASM_IMM32:
*cp++ = n >> 16;
*cp++ = n;
break;
case DASM_DISP20:
cp[-2] |= n & 0xfff;
cp[-1] |= (n >> 4) & 0xff00;
break;
case DASM_DISP12:
cp[-1] |= n & 0xfff;
break;
case DASM_LEN8R:
cp[-1] |= (n - 1) & 0xff;
break;
case DASM_LEN4HR:
cp[-1] |= ((n - 1) << 4) & 0xf0;
break;
case DASM_LEN4LR:
cp[-1] |= (n - 1) & 0x0f;
break;
default:
*cp++ = ins;
break;
}
}
stop:(void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc * sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0)
return *DASM_POS2PTR(D, -pos);
if (pos > 0)
return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) {
D->status = DASM_S_UNDEF_LG | i;
break;
}
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
return D->status;
}
#endif

1634
dynasm/dasm_s390x.lua Normal file

File diff suppressed because it is too large Load Diff

View File

@ -243,6 +243,9 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
TARGET_LJARCH= s390x
else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__AARCH64EB__=1
@ -274,6 +277,7 @@ endif
endif
endif
endif
endif
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
@ -302,6 +306,9 @@ endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
endif
##############################################################################
# Target system detection.

View File

@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
#elif LJ_TARGET_S390X
#include "../dynasm/dasm_s390x.h"
#else
#error "No support for this architecture (yet)"
#endif

View File

@ -87,6 +87,54 @@ err:
}
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
#elif LJ_TARGET_S390X
/* Emit halfwords piecewise as assembler text. */
static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
{
uint16_t *cp = (uint16_t*)p;
n /= 2;
int i;
for (i = 0; i < n; i++) {
if ((i & 7) == 0)
fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
else
fprintf(ctx->fp, ",0x%hx", cp[i]);
if ((i & 7) == 7) putc('\n', ctx->fp);
}
if ((n & 7) != 0) putc('\n', ctx->fp);
}
/* Emit s390x text relocations. */
static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
const char *sym)
{
if (n & 1 || n < 2) {
fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
exit(1);
}
n -= 2;
const char *opname = NULL;
const char *argt = ""; /* Inserted before argument. */
int opcode = *(uint16_t*)(&cp[n]);
int arg = (opcode>>4) & 0xf;
switch (opcode & 0xff0f) {
case 0xa705: opname = "bras"; argt = "%r"; break;
case 0xc005: opname = "brasl"; argt = "%r"; break;
case 0xa704: opname = "brc"; break;
case 0xc004: opname = "brcl"; break;
default:
fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
sym);
exit(1);
}
emit_asm_halfwords(ctx, cp, n);
if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
/* Various fixups for external symbols outside of our binary. */
fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
return;
}
fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
}
#else
/* Emit words piecewise as assembler text. */
static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
@ -302,6 +350,9 @@ void emit_asm(BuildCtx *ctx)
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
}
ofs += n+4;
#elif LJ_TARGET_S390X
emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n+4;
#else
emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
ofs += n;
@ -310,6 +361,8 @@ void emit_asm(BuildCtx *ctx)
}
#if LJ_TARGET_X86ORX64
emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
#elif LJ_TARGET_S390X
emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
#else
emit_asm_words(ctx, ctx->code+ofs, next-ofs);
#endif

View File

@ -101,6 +101,7 @@ local map_arch = {
mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
s390x = { e = "be", b = 64, m = 22, },
}
local map_os = {

1
src/jit/dis_s390x.lua Normal file
View File

@ -0,0 +1 @@
-- Not yet implemented.

View File

@ -702,6 +702,8 @@ static uint32_t jit_cpudetect(void)
}
#endif
#elif LJ_TARGET_S390X
/* No optional CPU features to detect (for now). */
#else
#error "Missing CPU detection for this architecture"
#endif

View File

@ -31,6 +31,8 @@
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
#define LUAJIT_ARCH_S390X 8
#define LUAJIT_ARCH_s390x 8
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@ -59,6 +61,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__) || defined(_M_ARM64)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__s390x__) || defined(__s390x)
#define LUAJIT_TARGET LUAJIT_ARCH_S390X
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
@ -439,6 +443,21 @@
#define LJ_ARCH_VERSION 10
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
#define LJ_ARCH_NAME "s390x"
#define LJ_ARCH_BITS 64
#define LJ_ARCH_ENDIAN LUAJIT_BE
#define LJ_TARGET_S390X 1
#define LJ_TARGET_EHRETREG 0xe
#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNALIGNED 1
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#define LJ_TARGET_GC64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
#else
#error "No target architecture defined"
#endif

View File

@ -1708,6 +1708,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
#elif LJ_TARGET_S390X
#include "lj_asm_s390x.h"
#else
#error "Missing assembler for target CPU"
#endif

View File

@ -1927,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi)
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS);
return;
#elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) {

View File

@ -575,6 +575,40 @@
goto done; \
}
#elif LJ_TARGET_S390X
/* -- POSIX/s390x calling conventions --------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET \
cc->retref = 1; /* Return all complex values by reference. */ \
cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET2 \
UNUSED(dp); /* Nothing to do. */
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex numbers by reference. */ \
/* TODO: not sure why this is different to structs. */ \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
#define CCALL_HANDLE_REGARG \
if (isfp) { \
if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -781,17 +815,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) {
while (ct->sib && n <= 4) {
unsigned int m = 1;
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isarray(sct->info)) {
CType *cct = ctype_rawchild(cts, sct);
if (!cct->size) continue;
m = sct->size / cct->size;
sct = cct;
}
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1;
if (!isu) n += m; else if (n < m) n = m;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2;
if (!isu) n += 2*m; else if (n < 2*m) n = 2*m;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
@ -803,10 +844,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct);
unsigned int s = ccall_classify_struct(cts, sct), sn;
if (s <= 1) goto noth;
r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
sn = (s >> 8) * m;
if (!isu) n += sn; else if (n < sn) n = sn;
}
}
}
@ -999,6 +1041,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CType *d;
CTSize sz;
MSize n, isfp = 0, isva = 0;
#if LJ_TARGET_S390X
MSize onstack = 0;
#endif
void *dp, *rp = NULL;
if (fid) { /* Get argument type from field. */
@ -1037,6 +1082,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
#if LJ_TARGET_S390X
onstack = 1;
#endif
if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info)) - 1;
if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
@ -1086,6 +1134,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
}
#endif
#if LJ_TARGET_S390X
/* Arguments need to be sign-/zero-extended to 64-bits. */
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
(isfp && onstack)) && d->size <= 4) {
if (d->info & CTF_UNSIGNED || isfp)
*(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
else
*(int64_t *)dp = (int64_t)*(int32_t *)dp;
}
#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)

View File

@ -129,6 +129,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#elif LJ_TARGET_S390X
#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
#define CCALL_NRET_GPR 1 /* GPR 2 */
#define CCALL_NRET_FPR 1 /* FPR 0 */
#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
float f;
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -531,6 +531,15 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_S390X
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -442,6 +442,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
if (version != 1)
return _URC_FATAL_PHASE1_ERROR;
cf = (void *)_Unwind_GetCFA(ctx);
#ifdef LJ_TARGET_S390X
cf -= 160; /* CFA points 160 bytes above r15. */
#endif
L = cframe_L(cf);
if ((actions & _UA_SEARCH_PHASE)) {
#if LJ_UNWIND_EXT

View File

@ -264,6 +264,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_S390X
#define CFRAME_OFS_ERRF 280
#define CFRAME_OFS_NRES 272
#define CFRAME_OFS_PREV 264
#define CFRAME_OFS_L 256
#define CFRAME_OFS_PC 168
#define CFRAME_OFS_MULTRES 160
#define CFRAME_SIZE 240
/*
** TODO: it would be good if we always decoded param*8 like
** the RISC architectures do. If so then SHIFT_MULTRES will
** need to change to 3.
*/
#define CFRAME_SHIFT_MULTRES 0
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif

View File

@ -143,6 +143,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
#elif LJ_TARGET_S390X
#include "lj_target_s390x.h"
#else
#error "Missing include for target CPU"
#endif

80
src/lj_target_s390x.h Normal file
View File

@ -0,0 +1,80 @@
/*
** Definitions for IBM z/Architecture (s390x) CPUs.
** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_S390X_H
#define _LJ_TARGET_S390X_H
/* -- Registers IDs ------------------------------------------------------- */
#define GPRDEF(_) \
_(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) \
_(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) \
_(F12) _(F13) _(F14) _(F15)
// TODO: VREG?
#define RIDENUM(name) RID_##name,
enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
/* Calling conventions. */
RID_SP = RID_R15,
RID_RET = RID_R2,
RID_FPRET = RID_F0,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_R7, /* Interpreter BASE. */
RID_LPC = RID_R9, /* Interpreter PC. */
RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_R0,
RID_MIN_FPR = RID_F0,
RID_MAX_GPR = RID_MIN_FPR,
RID_MAX_FPR = RID_MAX,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
};
/* -- Register sets ------------------------------------------------------- */
/* -- Spill slots --------------------------------------------------------- */
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
**
** SPS_FIXED: Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
**
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
*/
#define SPS_FIXED 2
#define SPS_FIRST 2
#define SPOFS_TMP 0
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
/* -- Exit state ---------------------------------------------------------- */
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
#define EXITSTUB_SPACING 4
#define EXITSTUBS_PER_GROUP 32
/* -- Instructions -------------------------------------------------------- */
#endif

View File

@ -1717,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|.endmacro
|
| math_minmax math_min, gt, pl
| math_minmax math_max, lt, le
| math_minmax math_min, gt, hs
| math_minmax math_max, lt, ls
|
|//-- String library -----------------------------------------------------
|

4269
src/vm_s390x.dasc Normal file

File diff suppressed because it is too large Load Diff