This commit is contained in:
gns 2025-04-07 10:53:21 +02:00 committed by GitHub
commit 2fbe36c976
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 10911 additions and 7 deletions

View File

@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64.lua dis_mips64el.lua \
dis_mips64r6.lua dis_mips64r6el.lua \
dis_riscv.lua dis_riscv64.lua \
vmdef.lua
ifeq (,$(findstring Windows,$(OS)))

433
dynasm/dasm_riscv.h Normal file
View File

@ -0,0 +1,433 @@
/*
** DynASM RISC-V encoding engine.
** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "riscv"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals. */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl;
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
static int dasm_imms(int n)
{
return (n >= -2048 && n < 2048) ? n : 4096;
}
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 20);
if (action >= DASM__MAX || (ins & 0xf)) {
ofs += 4;
} else {
ins >>= 4;
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif
n >>= ((ins>>10)&31);
#ifdef DASM_CHECKS
if (ins & 0x8000)
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
case DASM_IMMS:
#ifdef DASM_CHECKS
CK(dasm_imms(n) != 4096, RANGE_I);
#endif
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 20);
if (ins & 0xf) continue; else ins >>= 4;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMMS: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
if (ins & 0xf) { *cp++ = ins; continue; }
unsigned int action = (ins >> 20);
unsigned int val = (ins >> 4);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1);
goto patchrel;
case DASM_ALIGN:
val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
if (n < 0) {
n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
goto patchrel;
}
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
patchrel:
if (val & 2048) { /* B */
CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL);
cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000)
| ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080);
} else { /* J */
CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000)
| ((n << 9) & 0x00100000) | (n & 0x000ff000);
}
break;
case DASM_LABEL_LG:
val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31);
break;
case DASM_IMMS:
cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80));
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
return D->status;
}
#endif

979
dynasm/dasm_riscv.lua Normal file
View File

@ -0,0 +1,979 @@
------------------------------------------------------------------------------
-- DynASM RISC-V module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
--
-- Contributed by gns from PLCT Lab, ISCAS.
------------------------------------------------------------------------------
local riscv32 = riscv32
local riscv64 = riscv64
-- Module information:
local _info = {
arch = riscv32 and "riscv32" or riscv64 and "riscv64",
description = "DynASM RISC-V module",
version = "1.5.0",
vernum = 10500,
release = "2022-07-12",
author = "Mike Pall",
license = "MIT",
}
-- Exported glue functions for the arch-specific module.
local _M = { _info = _info }
-- Cache library functions.
local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
local assert, setmetatable = assert, setmetatable
local _s = string
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local match, gmatch = _s.match, _s.gmatch
local concat, sort = table.concat, table.sort
local bit = bit or require("bit")
local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
local tohex = bit.tohex
local function __orderedIndexGen(t)
local orderedIndex = {}
for key in pairs(t) do
table.insert(orderedIndex, key)
end
table.sort( orderedIndex )
return orderedIndex
end
local function __orderedNext(t, state)
local key = nil
if state == nil then
t.__orderedIndex = __orderedIndexGen(t)
key = t.__orderedIndex[1]
else
local j = 0
for _,_ in pairs(t.__orderedIndex) do j = j + 1 end
for i = 1, j do
if t.__orderedIndex[i] == state then
key = t.__orderedIndex[i+1]
end
end
end
if key then
return key, t[key]
end
t.__orderedIndex = nil
return
end
local function opairs(t)
return __orderedNext, t, nil
end
-- Inherited tables and callbacks.
local g_opt, g_arch
local wline, werror, wfatal, wwarn
-- Action name list.
-- CHECK: Keep this in sync with the C code!
local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG",
"REL_PC", "LABEL_PC", "IMM", "IMMS",
}
-- Maximum number of section buffer positions for dasm_put().
-- CHECK: Keep this in sync with the C code!
local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
-- Action name -> action number.
local map_action = {}
for n,name in ipairs(action_names) do
map_action[name] = n-1
end
-- Action list buffer.
local actlist = {}
-- Argument list for next dasm_put(). Start with offset 0 into action list.
local actargs = { 0 }
-- Current number of section buffer positions for dasm_put().
local secpos = 1
------------------------------------------------------------------------------
-- Dump action names and numbers.
local function dumpactions(out)
out:write("DynASM encoding engine action codes:\n")
for n,name in ipairs(action_names) do
local num = map_action[name]
out:write(format(" %-10s %02X %d\n", name, num, num))
end
out:write("\n")
end
-- Write action list buffer as a huge static C array.
local function writeactions(out, name)
local nn = #actlist
if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
out:write("static const unsigned int ", name, "[", nn, "] = {\n")
for i = 1,nn-1 do
assert(out:write("0x", tohex(actlist[i]), ",\n"))
end
assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
end
------------------------------------------------------------------------------
-- Add word to action list.
local function wputxw(n)
assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
actlist[#actlist+1] = n
end
-- Add action to list with optional arg. Advance buffer pos, too.
local function waction(action, val, a, num)
local w = assert(map_action[action], "bad action name `"..action.."'")
wputxw(w * 0x100000 + (val or 0) * 16)
if a then actargs[#actargs+1] = a end
if a or num then secpos = secpos + (num or 1) end
end
-- Flush action list (intervening C code or buffer pos overflow).
local function wflush(term)
if #actlist == actargs[1] then return end -- Nothing to flush.
if not term then waction("STOP") end -- Terminate action list.
wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
secpos = 1 -- The actionlist offset occupies a buffer position, too.
end
-- Put escaped word.
local function wputw(n)
if band(n, 0xf) == 0 then waction("ESC") end
wputxw(n)
end
-- Reserve position for word.
local function wpos()
local pos = #actlist+1
actlist[pos] = ""
return pos
end
-- Store word to reserved position.
local function wputpos(pos, n)
assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range")
actlist[pos] = n
end
------------------------------------------------------------------------------
-- Global label name -> global label number. With auto assignment on 1st use.
local next_global = 20
local map_global = setmetatable({}, { __index = function(t, name)
if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
local n = next_global
if n > 2047 then werror("too many global labels") end
next_global = n + 1
t[name] = n
return n
end})
-- Dump global labels.
local function dumpglobals(out, lvl)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("Global labels:\n")
for i=20,next_global-1 do
out:write(format(" %s\n", t[i]))
end
out:write("\n")
end
-- Write global label enum.
local function writeglobals(out, prefix)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("enum {\n")
for i=20,next_global-1 do
out:write(" ", prefix, t[i], ",\n")
end
out:write(" ", prefix, "_MAX\n};\n")
end
-- Write global label names.
local function writeglobalnames(out, name)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("static const char *const ", name, "[] = {\n")
for i=20,next_global-1 do
out:write(" \"", t[i], "\",\n")
end
out:write(" (const char *)0\n};\n")
end
------------------------------------------------------------------------------
-- Extern label name -> extern label number. With auto assignment on 1st use.
local next_extern = 0
local map_extern_ = {}
local map_extern = setmetatable({}, { __index = function(t, name)
-- No restrictions on the name for now.
local n = next_extern
if n > 2047 then werror("too many extern labels") end
next_extern = n + 1
t[name] = n
map_extern_[n] = name
return n
end})
-- Dump extern labels.
local function dumpexterns(out, lvl)
out:write("Extern labels:\n")
for i=0,next_extern-1 do
out:write(format(" %s\n", map_extern_[i]))
end
out:write("\n")
end
-- Write extern label names.
local function writeexternnames(out, name)
out:write("static const char *const ", name, "[] = {\n")
for i=0,next_extern-1 do
out:write(" \"", map_extern_[i], "\",\n")
end
out:write(" (const char *)0\n};\n")
end
------------------------------------------------------------------------------
-- Arch-specific maps.
local map_archdef = {
ra = "x1", sp = "x2",
} -- Ext. register name -> int. name.
local map_type = {} -- Type name -> { ctype, reg }
local ctypenum = 0 -- Type number (for Dt... macros).
-- Reverse defines for registers.
function _M.revdef(s)
if s == "x1" then return "ra"
elseif s == "x2" then return "sp" end
return s
end
------------------------------------------------------------------------------
-- Template strings for RISC-V instructions.
local map_op = {}
local map_op_rv32imafd = {
-- RV32I
lui_2 = "00000037DU",
auipc_2 = "00000017DA",
jal_2 = "0000006fDJ",
jalr_3 = "00000067DRJ",
-- pseudo-instrs
j_1 = "0000006fJ",
jal_1 = "000000efJ",
jr_1 = "00000067R",
jalr_1 = "000000e7R",
jalr_2 = "000000e7RJ",
beq_3 = "00000063RrB",
bne_3 = "00001063RrB",
blt_3 = "00004063RrB",
bge_3 = "00005063RrB",
bltu_3 = "00006063RrB",
bgeu_3 = "00007063RrB",
-- pseudo-instrs
bnez_2 = "00001063RB",
beqz_2 = "00000063RB",
blez_2 = "00005063rB",
bgez_2 = "00005063RB",
bltz_2 = "00004063RB",
bgtz_2 = "00004063rB",
bgt_3 = "00004063rRB",
ble_3 = "00005063rRB",
bgtu_3 = "00006063rRB",
bleu_3 = "00007063rRB",
lb_2 = "00000003DL",
lh_2 = "00001003DL",
lw_2 = "00002003DL",
lbu_2 = "00004003DL",
lhu_2 = "00005003DL",
sb_2 = "00000023rS",
sh_2 = "00001023rS",
sw_2 = "00002023rS",
addi_3 = "00000013DRI",
slti_3 = "00002013DRI",
sltiu_3 = "00003013DRI",
xori_3 = "00004013DRI",
ori_3 = "00006013DRI",
andi_3 = "00007013DRI",
slli_3 = "00001013DRi",
srli_3 = "00005013DRi",
srai_3 = "40005013DRi",
-- pseudo-instrs
seqz_2 = "00103013DR",
["zext.b_2"] = "0ff07013DR",
add_3 = "00000033DRr",
sub_3 = "40000033DRr",
sll_3 = "00001033DRr",
slt_3 = "00002033DRr",
sltu_3 = "00003033DRr",
xor_3 = "00004033DRr",
srl_3 = "00005033DRr",
sra_3 = "40005033DRr",
or_3 = "00006033DRr",
and_3 = "00007033DRr",
-- pseudo-instrs
snez_2 = "00003033Dr",
sltz_2 = "00002033DR",
sgtz_2 = "00002033Dr",
ecall_0 = "00000073",
ebreak_0 = "00100073",
nop_0 = "00000013",
li_2 = "00000013DI",
mv_2 = "00000013DR",
not_2 = "fff04013DR",
neg_2 = "40000033Dr",
ret_0 = "00008067",
-- RV32M
mul_3 = "02000033DRr",
mulh_3 = "02001033DRr",
mulhsu_3 = "02002033DRr",
mulhu_3 = "02003033DRr",
div_3 = "02004033DRr",
divu_3 = "02005033DRr",
rem_3 = "02006033DRr",
remu_3 = "02007033DRr",
-- RV32A
["lr.w_2"] = "c0000053FR",
["sc.w_2"] = "c0001053FRr",
["amoswap.w_3"] = "c0002053FRr",
["amoadd.w_3"] = "c0003053FRr",
["amoxor.w_3"] = "c0004053FRr",
["amoor.w_3"] = "c0005053FRr",
["amoand.w_3"] = "c0006053FRr",
["amomin.w_3"] = "c0007053FRr",
["amomax.w_3"] = "c0008053FRr",
["amominu.w_3"] = "c0009053FRr",
["amomaxu.w_3"] = "c000a053FRr",
-- RV32F
["flw_2"] = "00002007FL",
["fsw_2"] = "00002027gS",
["fmadd.s_4"] = "00000043FGgH",
["fmsub.s_4"] = "00000047FGgH",
["fnmsub.s_4"] = "0000004bFGgH",
["fnmadd.s_4"] = "0000004fFGgH",
["fmadd.s_5"] = "00000043FGgHM",
["fmsub.s_5"] = "00000047FGgHM",
["fnmsub.s_5"] = "0000004bFGgHM",
["fnmadd.s_5"] = "0000004fFGgHM",
["fadd.s_3"] = "00000053FGg",
["fsub.s_3"] = "08000053FGg",
["fmul.s_3"] = "10000053FGg",
["fdiv.s_3"] = "18000053FGg",
["fsqrt.s_2"] = "58000053FG",
["fadd.s_4"] = "00000053FGgM",
["fsub.s_4"] = "08000053FGgM",
["fmul.s_4"] = "10000053FGgM",
["fdiv.s_4"] = "18000053FGgM",
["fsqrt.s_3"] = "58000053FGM",
["fsgnj.s_3"] = "20000053FGg",
["fsgnjn.s_3"] = "20001053FGg",
["fsgnjx.s_3"] = "20002053FGg",
["fmin.s_3"] = "28000053FGg",
["fmax.s_3"] = "28001053FGg",
["fcvt.w.s_2"] = "c0000053DG",
["fcvt.wu.s_2"] = "c0100053DG",
["fcvt.w.s_3"] = "c0000053DGM",
["fcvt.wu.s_3"] = "c0100053DGM",
["fmv.x.w_2"] = "e0000053DG",
["feq.s_3"] = "a0002053DGg",
["flt.s_3"] = "a0001053DGg",
["fle.s_3"] = "a0000053DGg",
["fclass.s_2"] = "e0001053DG",
["fcvt.s.w_2"] = "d0000053FR",
["fcvt.s.wu_2"] = "d0100053FR",
["fcvt.s.w_3"] = "d0000053FRM",
["fcvt.s.wu_3"] = "d0100053FRM",
["fmv.w.x_2"] = "f0000053FR",
-- RV32D
["fld_2"] = "00003007FL",
["fsd_2"] = "00003027gS",
["fmadd.d_4"] = "02000043FGgH",
["fmsub.d_4"] = "02000047FGgH",
["fnmsub.d_4"] = "0200004bFGgH",
["fnmadd.d_4"] = "0200004fFGgH",
["fmadd.d_5"] = "02000043FGgHM",
["fmsub.d_5"] = "02000047FGgHM",
["fnmsub.d_5"] = "0200004bFGgHM",
["fnmadd.d_5"] = "0200004fFGgHM",
["fadd.d_3"] = "02000053FGg",
["fsub.d_3"] = "0a000053FGg",
["fmul.d_3"] = "12000053FGg",
["fdiv.d_3"] = "1a000053FGg",
["fsqrt.d_2"] = "5a000053FG",
["fadd.d_4"] = "02000053FGgM",
["fsub.d_4"] = "0a000053FGgM",
["fmul.d_4"] = "12000053FGgM",
["fdiv.d_4"] = "1a000053FGgM",
["fsqrt.d_3"] = "5a000053FGM",
["fsgnj.d_3"] = "22000053FGg",
["fsgnjn.d_3"] = "22001053FGg",
["fsgnjx.d_3"] = "22002053FGg",
["fmin.d_3"] = "2a000053FGg",
["fmax.d_3"] = "2a001053FGg",
["fcvt.s.d_2"] = "40100053FG",
["fcvt.d.s_2"] = "42000053FG",
["feq.d_3"] = "a2002053DGg",
["flt.d_3"] = "a2001053DGg",
["fle.d_3"] = "a2000053DGg",
["fclass.d_2"] = "e2001053DG",
["fcvt.w.d_2"] = "c2000053DG",
["fcvt.wu.d_2"] = "c2100053DG",
["fcvt.d.w_2"] = "d2000053FR",
["fcvt.d.wu_2"] = "d2100053FR",
["fcvt.w.d_3"] = "c2000053DGM",
["fcvt.wu.d_3"] = "c2100053DGM",
["fcvt.d.w_3"] = "d2000053FRM",
["fcvt.d.wu_3"] = "d2100053FRM",
["fmv.d_2"] = "22000053FY",
["fneg.d_2"] = "22001053FY",
["fabs.d_2"] = "22002053FY",
}
local map_op_rv64imafd = {
-- RV64I
lwu_2 = "00006003DL",
ld_2 = "00003003DL",
sd_2 = "00003023rS",
slli_3 = "00001013DRj",
srli_3 = "00005013DRj",
srai_3 = "40005013DRj",
addiw_3 = "0000001bDRI",
slliw_3 = "0000101bDRi",
srliw_3 = "0000501bDRi",
sraiw_3 = "4000501bDRi",
addw_3 = "0000003bDRr",
subw_3 = "4000003bDRr",
sllw_3 = "0000103bDRr",
srlw_3 = "0000503bDRr",
sraw_3 = "4000503bDRr",
negw_2 = "4000003bDr",
["sext.w_2"] = "0000001bDR",
-- RV64M
mulw_3 = "0200003bDRr",
divw_3 = "0200403bDRr",
divuw_3 = "0200503bDRr",
remw_3 = "0200603bDRr",
remuw_3 = "0200703bDRr",
-- RV64A
["lr.d_2"] = "c2000053FR",
["sc.d_2"] = "c2001053FRr",
["amoswap.d_3"] = "c2002053FRr",
["amoadd.d_3"] = "c2003053FRr",
["amoxor.d_3"] = "c2004053FRr",
["amoor.d_3"] = "c2005053FRr",
["amoand.d_3"] = "c2006053FRr",
["amomin.d_3"] = "c2007053FRr",
["amomax.d_3"] = "c2008053FRr",
["amominu.d_3"] = "c2009053FRr",
["amomaxu.d_3"] = "c200a053FRr",
-- RV64F
["fcvt.l.s_2"] = "c0200053DG",
["fcvt.lu.s_2"] = "c0300053DG",
["fcvt.l.s_3"] = "c0200053DGM",
["fcvt.lu.s_3"] = "c0300053DGM",
["fcvt.s.l_2"] = "d0200053FR",
["fcvt.s.lu_2"] = "d0300053FR",
["fcvt.s.l_3"] = "d0200053FRM",
["fcvt.s.lu_3"] = "d0300053FRM",
-- RV64D
["fcvt.l.d_2"] = "c2200053DG",
["fcvt.lu.d_2"] = "c2300053DG",
["fcvt.l.d_3"] = "c2200053DGM",
["fcvt.lu.d_3"] = "c2300053DGM",
["fmv.x.d_2"] = "e2000053DG",
["fcvt.d.l_2"] = "d2200053FR",
["fcvt.d.lu_2"] = "d2300053FR",
["fcvt.d.l_3"] = "d2200053FRM",
["fcvt.d.lu_3"] = "d2300053FRM",
["fmv.d.x_2"] = "f2000053FR",
}
local map_op_zicsr = {
csrrw_3 = "00001073DCR",
csrrs_3 = "00002073DCR",
csrrc_3 = "00003073DCR",
csrrwi_3 = "00005073DCu",
csrrsi_3 = "00006073DCu",
csrrci_3 = "00007073DCu",
-- pseudo-ops
csrrw_2 = "00001073DC",
csrrs_2 = "00002073CR",
csrrc_2 = "00003073CR",
csrrwi_2 = "00005073Cu",
csrrsi_2 = "00006073Cu",
csrrci_2 = "00007073Cu",
rdinstret_1 = "C0202073D",
rdcycle_1 = "C0002073D",
rdtime_1 = "C0102073D",
rdinstreth_1 = "C8202073D",
rdcycleh_1 = "C8002073D",
rdtimeh_1 = "C8102073D",
frcsr_1 = "00302073D",
fscsr_2 = "00301073DR",
fscsr_1 = "00301073R",
frrm_1 = "00202073D",
fsrm_2 = "00201073DR",
fsrm_1 = "00201073R",
fsrmi_2 = "00205073Du",
fsrmi_1 = "00205073u",
frflags_1 = "00102073D",
fsflags_2 = "00101073DR",
fsflagsi_2 = "00105073Du",
fsflagsi_1 = "00105073u",
}
local map_op_zifencei = {
["fence.i_3"] = "0000100fDRI",
}
local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr }
local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr }
if riscv32 then for _, map in opairs(list_map_op_rv32) do
for k, v in pairs(map) do map_op[k] = v end
end
end
if riscv64 then for _, map in opairs(list_map_op_rv64) do
for k, v in pairs(map) do map_op[k] = v end
end
end
------------------------------------------------------------------------------
local function parse_gpr(expr)
local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$")
local tp = map_type[tname or expr]
if tp then
local reg = ovreg or tp.reg
if not reg then
werror("type `"..(tname or expr).."' needs a register override")
end
expr = reg
end
local r = match(expr, "^x([1-3]?[0-9])$")
if r then
r = tonumber(r)
if r <= 31 then return r, tp end
end
werror("bad register name `"..expr.."'")
end
local function parse_fpr(expr)
local r = match(expr, "^f([1-3]?[0-9])$")
if r then
r = tonumber(r)
if r <= 31 then return r end
end
werror("bad register name `"..expr.."'")
end
local function parse_imm(imm, bits, shift, scale, signed, action)
local n = tonumber(imm)
if n then
local m = sar(n, scale)
if shl(m, scale) == n then
if signed then
local s = sar(m, bits-1)
if s == 0 then return shl(m, shift)
elseif s == -1 then return shl(m + shl(1, bits), shift) end
else
if sar(m, bits) == 0 then return shl(m, shift) end
end
end
werror("out of range immediate `"..imm.."'")
elseif match(imm, "^[xf]([1-3]?[0-9])$") or
match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
waction(action or "IMM",
(signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
return 0
end
end
local function parse_csr(expr)
local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$")
if r then
r = tonumber(r)
if r <= 4095 then return r end
end
werror("bad register name `"..expr.."'")
end
local function parse_imms(imm)
local n = tonumber(imm)
if n then
if n >= -2048 and n < 2048 then
local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5)
return shl(imm5, 7) + shl(imm7, 25)
end
werror("out of range immediate `"..imm.."'")
elseif match(imm, "^[xf]([1-3]?[0-9])$") or
match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
waction("IMMS", 0, imm); return 0
end
end
local function parse_rm(mode)
local rnd_mode = {
rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7
}
local n = rnd_mode[mode]
if n then return n
else werror("bad rounding mode `"..mode.."'") end
end
local function parse_disp(disp, mode)
local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
if imm then
local r = shl(parse_gpr(reg), 15)
local extname = match(imm, "^extern%s+(%S+)$")
if extname then
waction("REL_EXT", map_extern[extname], nil, 1)
return r
else
if mode == "load" then
return r + parse_imm(imm, 12, 20, 0, true)
elseif mode == "store" then
return r + parse_imms(imm)
else
werror("bad displacement mode '"..mode.."'")
end
end
end
local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
if reg and tailr ~= "" then
local r, tp = parse_gpr(reg)
if tp then
if mode == "load" then
waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr))
elseif mode == "store" then
waction("IMMS", 0, format(tp.ctypefmt, tailr))
else
werror("bad displacement mode '"..mode.."'")
end
return shl(r, 15)
end
end
werror("bad displacement `"..disp.."'")
end
local function parse_label(label, def)
local prefix = sub(label, 1, 2)
-- =>label (pc label reference)
if prefix == "=>" then
return "PC", 0, sub(label, 3)
end
-- ->name (global label reference)
if prefix == "->" then
return "LG", map_global[sub(label, 3)]
end
if def then
-- [1-9] (local label definition)
if match(label, "^[1-9]$") then
return "LG", 10+tonumber(label)
end
else
-- [<>][1-9] (local label reference)
local dir, lnum = match(label, "^([<>])([1-9])$")
if dir then -- Fwd: 1-9, Bkwd: 11-19.
return "LG", lnum + (dir == ">" and 0 or 10)
end
-- extern label (extern label reference)
local extname = match(label, "^extern%s+(%S+)$")
if extname then
return "EXT", map_extern[extname]
end
end
werror("bad label `"..label.."'")
end
------------------------------------------------------------------------------
-- Handle opcodes defined with template strings.
map_op[".template__"] = function(params, template, nparams)
if not params then return sub(template, 9) end
local op = tonumber(sub(template, 1, 8), 16)
local n = 1
-- Limit number of section buffer positions used by a single dasm_put().
-- A single opcode needs a maximum of 2 positions (ins/ext).
if secpos+2 > maxsecpos then wflush() end
local pos = wpos()
-- Process each character.
for p in gmatch(sub(template, 9), ".") do
if p == "D" then -- gpr rd
op = op + shl(parse_gpr(params[n]), 7); n = n + 1
elseif p == "R" then -- gpr rs1
op = op + shl(parse_gpr(params[n]), 15); n = n + 1
elseif p == "r" then -- gpr rs2
op = op + shl(parse_gpr(params[n]), 20); n = n + 1
elseif p == "F" then -- fpr rd
op = op + shl(parse_fpr(params[n]), 7); n = n + 1
elseif p == "G" then -- fpr rs1
op = op + shl(parse_fpr(params[n]), 15); n = n + 1
elseif p == "g" then -- fpr rs2
op = op + shl(parse_fpr(params[n]), 20); n = n + 1
elseif p == "H" then -- fpr rs3
op = op + shl(parse_fpr(params[n]), 27); n = n + 1
elseif p == "C" then -- csr
op = op + shl(parse_csr(params[n]), 20); n = n + 1
elseif p == "M" then -- fpr rounding mode
op = op + shl(parse_rm(params[n]), 12); n = n + 1
elseif p == "Y" then -- fpr psuedo-op
local r = parse_fpr(params[n])
op = op + shl(r, 15) + shl(r, 20); n = n + 1
elseif p == "I" then -- I-type imm12
op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1
elseif p == "i" then -- I-type shamt5
op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1
elseif p == "j" then -- I-type shamt6
op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1
elseif p == "u" then -- I-type uimm
op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1
elseif p == "U" then -- U-type imm20
op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1
elseif p == "L" then -- load
op = op + parse_disp(params[n], "load"); n = n + 1
elseif p == "S" then -- store
op = op + parse_disp(params[n], "store"); n = n + 1
elseif p == "B" or p == "J" then -- control flow
local mode, m, s = parse_label(params[n], false)
if p == "B" then m = m + 2048 end
waction("REL_"..mode, m, s, 1); n = n + 1
elseif p == "A" then -- AUIPC
local mode, m, s = parse_label(params[n], false)
waction("REL_"..mode, m, s, 1); n = n + 1
else
assert(false)
end
end
wputpos(pos, op)
end
------------------------------------------------------------------------------
-- Pseudo-opcode to mark the position where the action list is to be emitted.
map_op[".actionlist_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeactions(out, name) end)
end
-- Pseudo-opcode to mark the position where the global enum is to be emitted.
map_op[".globals_1"] = function(params)
if not params then return "prefix" end
local prefix = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeglobals(out, prefix) end)
end
-- Pseudo-opcode to mark the position where the global names are to be emitted.
map_op[".globalnames_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeglobalnames(out, name) end)
end
-- Pseudo-opcode to mark the position where the extern names are to be emitted.
map_op[".externnames_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeexternnames(out, name) end)
end
------------------------------------------------------------------------------
-- Label pseudo-opcode (converted from trailing colon form).
map_op[".label_1"] = function(params)
if not params then return "[1-9] | ->global | =>pcexpr" end
if secpos+1 > maxsecpos then wflush() end
local mode, n, s = parse_label(params[1], true)
if mode == "EXT" then werror("bad label definition") end
waction("LABEL_"..mode, n, s, 1)
end
------------------------------------------------------------------------------
-- Pseudo-opcodes for data storage.
map_op[".long_*"] = function(params)
if not params then return "imm..." end
for _,p in ipairs(params) do
local n = tonumber(p)
if not n then werror("bad immediate `"..p.."'") end
if n < 0 then n = n + 2^32 end
wputw(n)
if secpos+2 > maxsecpos then wflush() end
end
end
-- Alignment pseudo-opcode.
map_op[".align_1"] = function(params)
if not params then return "numpow2" end
if secpos+1 > maxsecpos then wflush() end
local align = tonumber(params[1])
if align then
local x = align
-- Must be a power of 2 in the range (2 ... 256).
for i=1,8 do
x = x / 2
if x == 1 then
waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
return
end
end
end
werror("bad alignment")
end
------------------------------------------------------------------------------
-- Pseudo-opcode for (primitive) type definitions (map to C types).
map_op[".type_3"] = function(params, nparams)
if not params then
return nparams == 2 and "name, ctype" or "name, ctype, reg"
end
local name, ctype, reg = params[1], params[2], params[3]
if not match(name, "^[%a_][%w_]*$") then
werror("bad type name `"..name.."'")
end
local tp = map_type[name]
if tp then
werror("duplicate type `"..name.."'")
end
-- Add #type to defines. A bit unclean to put it in map_archdef.
map_archdef["#"..name] = "sizeof("..ctype..")"
-- Add new type and emit shortcut define.
local num = ctypenum + 1
map_type[name] = {
ctype = ctype,
ctypefmt = format("Dt%X(%%s)", num),
reg = reg,
}
wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
ctypenum = num
end
map_op[".type_2"] = map_op[".type_3"]
-- Dump type definitions.
local function dumptypes(out, lvl)
local t = {}
for name in pairs(map_type) do t[#t+1] = name end
sort(t)
out:write("Type definitions:\n")
for _,name in ipairs(t) do
local tp = map_type[name]
local reg = tp.reg or ""
out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
end
out:write("\n")
end
------------------------------------------------------------------------------
-- Set the current section.
function _M.section(num)
waction("SECTION", num)
wflush(true) -- SECTION is a terminal action.
end
------------------------------------------------------------------------------
-- Dump architecture description.
function _M.dumparch(out)
out:write(format("DynASM %s version %s, released %s\n\n",
_info.arch, _info.version, _info.release))
dumpactions(out)
end
-- Dump all user defined elements.
function _M.dumpdef(out, lvl)
dumptypes(out, lvl)
dumpglobals(out, lvl)
dumpexterns(out, lvl)
end
------------------------------------------------------------------------------
-- Pass callbacks from/to the DynASM core.
function _M.passcb(wl, we, wf, ww)
wline, werror, wfatal, wwarn = wl, we, wf, ww
return wflush
end
-- Setup the arch-specific module.
function _M.setup(arch, opt)
g_arch, g_opt = arch, opt
end
-- Merge the core maps and the arch-specific maps.
function _M.mergemaps(map_coreop, map_def)
setmetatable(map_op, { __index = map_coreop })
setmetatable(map_def, { __index = map_archdef })
return map_op, map_def
end
return _M
------------------------------------------------------------------------------

12
dynasm/dasm_riscv32.lua Normal file
View File

@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM RISC-V 32 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 32 bit mode for the combined RISC-V module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
riscv32 = true -- Using a global is an ugly, but effective solution.
return require("dasm_riscv")

12
dynasm/dasm_riscv64.lua Normal file
View File

@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM RISC-V 64 module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined RISC-V module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
riscv64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_riscv")

View File

@ -52,6 +52,7 @@ CCOPT_arm=
CCOPT_arm64=
CCOPT_ppc=
CCOPT_mips=
CCOPT_riscv64=
#
CCDEBUG=
# Uncomment the next line to generate debug information:
@ -266,6 +267,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
else
TARGET_LJARCH= mips
endif
else
ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= riscv64
else
$(error Unsupported target architecture)
endif
@ -274,6 +278,7 @@ endif
endif
endif
endif
endif
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
@ -471,6 +476,9 @@ ifeq (ppc,$(TARGET_LJARCH))
DASM_AFLAGS+= -D PPE -D TOC
endif
endif
ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D RISCV64
endif
endif
endif

View File

@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
#elif LJ_TARGET_RISCV64
#include "../dynasm/dasm_riscv.h"
#else
#error "No support for this architecture (yet)"
#endif

View File

@ -156,6 +156,34 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
"Error: unsupported opcode %08x for %s symbol relocation.\n",
ins, sym);
exit(1);
#elif LJ_TARGET_RISCV64
if ((ins & 0x7f) == 0x17u) {
fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym);
} else if ((ins & 0x7f) == 0x67u) {
fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym);
} else if ((ins & 0x7f) == 0x6fu) {
fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym);
} else if ((ins & 0x7f) == 0x03u) {
uint8_t funct3 = (ins >> 12) & 7;
uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31;
switch (funct3) {
case 0: fprintf(ctx->fp, "\tlb"); break;
case 1: fprintf(ctx->fp, "\tlh"); break;
case 2: fprintf(ctx->fp, "\tlw"); break;
case 3: fprintf(ctx->fp, "\tld"); break;
case 4: fprintf(ctx->fp, "\tlbu"); break;
case 5: fprintf(ctx->fp, "\tlhu"); break;
case 6: fprintf(ctx->fp, "\tlwu"); break;
default: goto rv_reloc_err;
}
fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1);
} else {
rv_reloc_err:
fprintf(stderr,
"Error: unsupported opcode %08x for %s symbol relocation.\n",
ins, sym);
exit(1);
}
#else
#error "missing relocation support for this architecture"
#endif
@ -248,6 +276,9 @@ void emit_asm(BuildCtx *ctx)
#endif
#if LJ_TARGET_MIPS
fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
#endif
#if LJ_TARGET_RISCV64
fprintf(ctx->fp, ".option arch, -c\n.option norelax\n");
#endif
emit_asm_align(ctx, 4);

View File

@ -101,6 +101,7 @@ local map_arch = {
mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, },
}
local map_os = {

979
src/jit/dis_riscv.lua Normal file
View File

@ -0,0 +1,979 @@
------------------------------------------------------------------------------
-- LuaJIT RISC-V disassembler module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
--
-- Contributed by Milos Poletanovic from Syrmia.com.
-- Contributed by gns from PLCT Lab, ISCAS.
------------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.
--
-- It disassembles most standard RISC-V instructions.
-- Mode is little-endian
------------------------------------------------------------------------------
local type = type
local byte, format = string.byte, string.format
local match, gmatch = string.match, string.gmatch
local concat = table.concat
local bit = require("bit")
local band, bor, tohex = bit.band, bit.bor, bit.tohex
local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
local jit = require("jit")
local jstat = { jit.status() }
local function is_opt_enabled(opt)
for _, v in ipairs(jstat) do
if v == opt then
return true
end
end
return false
end
local xthead = is_opt_enabled("XThead")
------------------------------------------------------------------------------
-- Opcode maps
------------------------------------------------------------------------------
--RVC32 extension
local map_quad0 = {
shift = 13, mask = 7,
[0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn",
false, "c.fsdNMh", "c.swZMn", "c.fswNMn"
}
local map_sub2quad1 = {
shift = 5, mask = 3,
[0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ"
}
local map_sub1quad1 = {
shift = 10, mask = 3,
[0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1
}
local map_quad1 = {
shift = 13, mask = 7,
[0] = {
shift = 7, mask = 31,
[0] = "c.nop", _ = "c.addiDx"
},
[1] = "c.jalT", [2] = "c.liDx",
[3] = {
shift = 7, mask = 31,
[0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX",
_ = "c.luiDK"
},
[4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq"
}
local map_sub1quad2 = {
shift = 12, mask = 1,
[0] = {
shift = 2, mask = 31,
[0] = "c.jrD", _ = "c.mvDE"
},
[1] = {
shift = 2, mask = 31,
[0] = {
shift = 7, mask = 31,
[0] = "c.ebreak", _ = "c.jalrD"
},
_ = "c.addDE"
}
}
local map_quad2 = {
shift = 13, mask = 7,
[0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY",
[4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu"
}
local map_compr = {
[0] = map_quad0, map_quad1, map_quad2
}
--RV32M
local map_mext = {
shift = 12, mask = 7,
[0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr",
"divDRr", "divuDRr", "remDRr", "remuDRr"
}
--RV64M
local map_mext64 = {
shift = 12, mask = 7,
[0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr",
[7] = "remuwDRr"
}
--RV32F, RV64F, RV32D, RV64D
local map_fload = {
shift = 12, mask = 7,
[2] = "flwFL", [3] = "fldFL"
}
local map_fstore = {
shift = 12, mask = 7,
[2] = "fswSg", [3] = "fsdSg"
}
local map_fmadd = {
shift = 25, mask = 3,
[0] = "fmadd.sFGgHo", "fmadd.dFGgHo"
}
local map_fmsub = {
shift = 25, mask = 3,
[0] = "fmsub.sFGgHo", "fmsub.dFGgHo"
}
local map_fnmsub = {
shift = 25, mask = 3,
[0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo"
}
local map_fnmadd = {
shift = 25, mask = 3,
[0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo"
}
local map_fsgnjs = {
shift = 12, mask = 7,
[0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6"
}
local map_fsgnjd = {
shift = 12, mask = 7,
[0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6"
}
local map_fms = {
shift = 12, mask = 7,
[0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg"
}
local map_fmd = {
shift = 12, mask = 7,
[0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg"
}
local map_fcomps = {
shift = 12, mask = 7,
[0] = "fle.sDGg", "flt.sDGg", "feq.sDGg",
[4] = "fleq.sDGg", "fltq.sDGg"
}
local map_fcompd = {
shift = 12, mask = 7,
[0] = "fle.dDGg", "flt.dDGg", "feq.dDGg",
[4] = "fleq.dDGg", "fltq.dDGg"
}
local map_fcvtwls = {
shift = 20, mask = 31,
[0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo"
}
local map_fcvtwld = {
shift = 20, mask = 31,
[0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo",
[8] = {
shift = 12, mask = 7,
[1] = "fcvtmodw.dDG"
}
}
local map_fcvts = {
shift = 20, mask = 31,
[0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo"
}
local map_fcvtd = {
shift = 20, mask = 31,
[0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo"
}
local map_fcvtsd = {
shift = 20, mask = 31,
[0] = "fcvt.s.dFGo",
[4] = "fround.sFGo", [5] = "froundnx.sFGo"
}
local map_fcvtds = {
shift = 20, mask = 31,
[0] = "fcvt.d.sFGo",
[4] = "fround.dFGo", [5] = "froundnx.dFGo"
}
local map_fmvwx = {
shift = 20, mask = 31,
[0] = "fmv.w.xFR", [1] = "fli.sFy"
}
local map_fmvdx = {
shift = 20, mask = 31,
[0] = "fmv.d.xFR", [1] = "fli.dFy"
}
local map_fext = {
shift = 25, mask = 127,
[0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo",
[8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo",
[16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd,
[32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo",
[80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld,
[104] = map_fcvts, [105] = map_fcvtd,
[112] = {
shift = 12, mask = 7,
[0] = "fmv.x.wDG", "fclass.sDG"
},
[113] = {
shift = 12, mask = 7,
[0] = "fmv.x.dDG", "fclass.dDG"
},
[120] = map_fmvwx, [121] = map_fmvdx
}
--RV32A, RV64A
local map_aext = {
shift = 27, mask = 31,
[0] = {
shift = 12, mask = 7,
[2] = "amoadd.wDrO", [3] = "amoadd.dDrO"
},
{
shift = 12, mask = 7,
[2] = "amoswap.wDrO", [3] = "amoswap.dDrO"
},
{
shift = 12, mask = 7,
[2] = "lr.wDO", [3] = "lr.dDO"
},
{
shift = 12, mask = 7,
[2] = "sc.wDrO", [3] = "sc.dDrO"
},
{
shift = 12, mask = 7,
[2] = "amoxor.wDrO", [3] = "amoxor.dDrO"
},
[8] = {
shift = 12, mask = 7,
[2] = "amoor.wDrO", [3] = "amoor.dDrO"
},
[12] = {
shift = 12, mask = 7,
[2] = "amoand.wDrO", [3] = "amoand.dDrO"
},
[16] = {
shift = 12, mask = 7,
[2] = "amomin.wDrO", [3] = "amomin.dDrO"
},
[20] = {
shift = 12, mask = 7,
[2] = "amomax.wDrO", [3] = "amomax.dDrO"
},
[24] = {
shift = 12, mask = 7,
[2] = "amominu.wDrO", [3] = "amominu.dDrO"
},
[28] = {
shift = 12, mask = 7,
[2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO"
},
}
-- RV32I, RV64I
local map_load = {
shift = 12, mask = 7,
[0] = "lbDL", "lhDL", "lwDL", "ldDL",
"lbuDL", "lhuDL", "lwuDL"
}
local map_opimm = {
shift = 12, mask = 7,
[0] = {
shift = 7, mask = 0x1ffffff,
[0] = "nop", _ = "addi|li|mvDR0I2"
},
{
shift = 25, mask = 127,
[48] = {
shift = 20, mask = 31,
[4] = "sext.bDR", [5] = "sext.hDR"
},
_ = "slliDRi",
}, "sltiDRI", "sltiu|seqzDRI5",
"xori|notDRI4",
{
shift = 26, mask = 63,
[0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi",
[26] = {
shift = 20, mask = 63,
[56] = "rev8DR"
}
},
"oriDRI", "andiDRI"
}
local map_branch = {
shift = 12, mask = 7,
[0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false,
"blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB"
}
local map_store = {
shift = 12, mask = 7,
[0] = "sbSr", "shSr", "swSr", "sdSr"
}
local map_op = {
shift = 25, mask = 127,
[0] = {
shift = 12, mask = 7,
[0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r",
"xorDRr", "srlDRr", "orDRr", "andDRr"
},
[1] = map_mext,
[4] = {
},
[5] = { -- Zbb
shift = 12, mask = 7,
[4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr"
},
[7] = { -- Zicond
shift = 12, mask = 7,
[5] = "czero.eqzDRr", [7] = "czero.nezDRr"
},
[16] = { -- Zba
shift = 12, mask = 7,
[2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr"
},
[32] = { -- Zbb
shift = 12, mask = 7,
[0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr"
},
[48] = { -- Zbb
shift = 12, mask = 7,
[1] = "rolDRr", [5] = "rorDRr"
}
}
--- 64I
local map_opimm32 = {
shift = 12, mask = 7,
[0] = "addiw|sext.wDRI0", "slliwDRi",
[2] = { -- Zba
shift = 25, mask = 127,
[1] = "slli.uwDRi"
},
[5] = { -- 64I
shift = 25, mask = 127,
[0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi"
},
[48] = { -- Zbb
shift = 25, mask = 127,
[5] = "roriwDRi"
}
}
local map_op32 = {
shift = 25, mask = 127,
[0] = { -- 64I
shift = 12, mask = 7,
[0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr"
},
[1] = map_mext64,
[4] = { -- Zba & Zbb
shift = 12, mask = 7,
[0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr"
},
[16] = { -- Zba
shift = 12, mask = 7,
[2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw"
},
[32] = { -- 64I
shift = 12, mask = 7,
[0] = "subw|negwDR0r", [5] = "srawDRr"
},
[48] = { -- Zbb
shift = 12, mask = 7,
[1] = "rolwDRr", [5] = "rorwDRr"
}
}
local map_ecabre = {
shift = 12, mask = 7,
[0] = {
shift = 20, mask = 4095,
[0] = "ecall", "ebreak"
}
}
local map_fence = {
shift = 12, mask = 1,
[0] = "fence", --"fence.i" ZIFENCEI EXTENSION
}
local map_jalr = {
shift = 7, mask = 0x1ffffff,
_ = "jalr|jrDRI7", [256] = "ret"
}
local map_xthead_custom0 = {
shift = 12, mask = 7,
[1] = { -- Arithmetic
shift = 27, mask = 31,
[0] = "th.addslDRrv",
[2] = {
shift = 26, mask = 63,
[4] = "th.srriDRi",
[5] = {
shift = 25, mask = 127,
[10] = "th.srriwDRi"
}
},
[4] = { -- XTheadMac
shift = 25, mask = 3,
[0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr"
},
[5] = { -- XTheadMac
shift = 25, mask = 3,
[0] = "th.mulahDRr", "th.mulshDRr"
},
[8] = { -- XTheadCondMov
shift = 25, mask = 3,
[0] = "th.mveqzDRr", "th.mvnezDRr"
},
[16] = { -- XTheadBb
shift = 20, mask = 31,
[0] = {
shift = 25, mask = 3,
[0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR"
}
},
[17] = { -- XTheadBb
shift = 26, mask = 1,
[0] = "th.tstDRi"
},
[18] = { -- XTheadBb
shift = 20, mask = 31,
[0] = {
shift = 25, mask = 3,
[0] = "th.revwDR"
}
}
},
[2] = "th.extDRji", [3] = "th.extuDRji",
{ -- MemLoad
shift = 29, mask = 7,
[7] = { -- XTheadMemPair
shift = 25, mask = 3,
[0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP"
}
},
{ -- MemStore
shift = 29, mask = 7,
[7] = { -- XTheadMemPair
shift = 25, mask = 3,
[0] = "th.swdDrP", [3] = "th.sddDrP"
}
}
}
local map_custom0 = xthead and map_xthead_custom0 or nil
local map_pri = {
[3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm,
[23] = "auipcDA", [27] = map_opimm32,
[35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op,
[55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub,
[75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext,
[103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre
}
------------------------------------------------------------------------------
local map_gpr = {
[0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
"x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31",
}
local map_fgpr = {
[0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
}
local map_rm = {
[0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn"
}
local map_fli = {
[0] = "-1.0",
"min",
"0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7",
"0.0625", "0.125",
"0.25", "0.3125", "0.375", "0.4375",
"0.5", "0.625", "0.75", "0.875",
"1.0", "1.25", "1.5", "1.75",
"2.0", "2.5", "3.0",
"4.0", "8.0", "16.0", "128.0", "256.0",
"32768.0", "65536.0", "inf", "nan"
}
------------------------------------------------------------------------------
-- Output a nicely formatted line with an opcode and operands.
local function putop(ctx, text, operands)
local pos = ctx.pos
local extra = ""
if ctx.rel then
local sym = ctx.symtab[ctx.rel]
if sym then extra = "\t->"..sym end
end
if ctx.hexdump > 0 then
ctx.out:write((format("%08x %s %-7s %s%s\n",
ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra)))
else
ctx.out(format("%08x %-7s %s%s\n",
ctx.addr+pos, text, concat(operands, ", "), extra))
end
local pos = ctx.pos
local first_byte = byte(ctx.code, ctx.pos+1)
--Examine if the next instruction is 16-bits or 32-bits
if(band(first_byte, 3) < 3) then
ctx.pos = pos + 2
else
ctx.pos = pos + 4
end
end
-- Fallback for unknown opcodes.
local function unknown(ctx)
return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
end
local function get_le(ctx)
local pos = ctx.pos
--Examine if the next instruction is 16-bits or 32-bits
local first_byte = byte(ctx.code, pos+1)
if(band(first_byte, 3) < 3) then --checking first two bits of opcode
local b0, b1 = byte(ctx.code, pos+1, pos+2)
return bor(lshift(b1, 8), b0)
else
local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
end
end
local function parse_W(opcode)
local part1 = band(rshift(opcode, 7), 15) --9:6
local part2 = band(rshift(opcode, 11), 3) --5:4
local part3 = band(rshift(opcode, 5), 1)--3
local part4 = band(rshift(opcode, 6), 1)--2
return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4),
lshift(part3, 3), lshift(part4, 2))
end
local function parse_x(opcode)
local part1 = band(rshift(opcode, 12), 1) --5
local part2 = band(rshift(opcode, 2), 31) --4:0
if(part1 == 1) then
return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2)
else
return bor(lshift(0, 31), lshift(part1, 5), part2)
end
end
local function parse_X(opcode)
local part1 = band(rshift(opcode, 12), 1) --12
local part2 = band(rshift(opcode, 3), 3) --8:7
local part3 = band(rshift(opcode, 5), 1) --6
local part4 = band(rshift(opcode, 2), 1) --5
local part5 = band(rshift(opcode, 6), 1) --4
if(part1 == 1) then
return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7),
lshift(part3, 6), lshift(part4, 5), lshift(part5, 4))
else
return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6),
lshift(part4, 5), lshift(part5, 4))
end
end
local function parse_S(opcode)
local part1 = band(rshift(opcode, 25), 127) --11:5
local sign = band(rshift(part1, 6), 1)
local part2 = band(rshift(opcode, 7), 31) --4:0
if (sign == 1) then
return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2)
else
return bor(lshift(0, 31), lshift(part1, 5), part2)
end
end
local function parse_B(opcode)
local part1 = band(rshift(opcode, 7), 1) --11
local part2 = band(rshift(opcode, 25), 63) --10:5
local part3 = band(rshift(opcode, 8), 15) -- 4 : 1
if (part1 == 1) then
return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
lshift(part2, 5), lshift(part3, 1), 0)
else
return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5),
lshift(part3, 1), 0)
end
end
local function parse_q(opcode)
local part1 = band(rshift(opcode, 12), 1) --8
local part2 = band(rshift(opcode, 5), 3) --7:6
local part3 = band(rshift(opcode, 2), 1) --5
local part4 = band(rshift(opcode, 10), 3) --4:3
local part5 = band(rshift(opcode, 3), 3) --2:1
if(part1 == 1) then
return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6),
lshift(part3, 5), lshift(part4, 3), lshift(part5, 1))
else
return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5),
lshift(part4, 3), lshift(part5, 1))
end
end
local function parse_J(opcode)
local part1 = band(rshift(opcode, 31), 1) --20
local part2 = band(rshift(opcode, 12), 255) -- 19:12
local part3 = band(rshift(opcode, 20), 1) --11
local part4 = band(rshift(opcode, 21), 1023) --10:1
if(part1 == 1) then
return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12),
lshift(part3, 11), lshift(part4, 1))
else
return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12),
lshift(part3, 11), lshift(part4, 1))
end
end
local function parse_T(opcode)
local part1 = band(rshift(opcode, 12), 1) --11
local part2 = band(rshift(opcode, 8), 1) --10
local part3 = band(rshift(opcode, 9), 3)--9:8
local part4 = band(rshift(opcode, 6), 1) --7
local part5 = band(rshift(opcode, 7), 1) -- 6
local part6 = band(rshift(opcode, 2), 1) --5
local part7 = band(rshift(opcode, 11), 1) --4
local part8 = band(rshift(opcode, 3), 7) --3:1
if(part1 == 1) then
return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
lshift(part2, 10), lshift(part3, 8), lshift(part4, 7),
lshift(part5, 6), lshift(part6, 5), lshift(part7, 4),
lshift(part8, 1))
else
return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10),
lshift(part3, 8), lshift(part4, 7), lshift(part5, 6),
lshift(part6, 5), lshift(part7, 4), lshift(part8, 1))
end
end
local function parse_K(opcode)
local part1 = band(rshift(opcode, 12), 1) --5 17
local part2 = band(rshift(opcode, 2), 31) --4:0 16:12
if(part1 == 1) then
return bor(lshift(0, 31), lshift(0x7fff, 5), part2)
else
return bor(lshift(0, 31), lshift(part1, 5), part2)
end
end
-- Disassemble a single instruction.
local function disass_ins(ctx)
local op = ctx:get()
local operands = {}
local last = nil
ctx.op = op
ctx.rel =nil
local opat = 0
--for compressed instructions
if(band(op, 3) < 3) then
opat = ctx.map_compr[band(op, 3)]
while type(opat) ~= "string" do
if not opat then return unknown(ctx) end
local test = band(rshift(op, opat.shift), opat.mask)
opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
end
else
opat = ctx.map_pri[band(op,127)]
while type(opat) ~= "string" do
if not opat then return unknown(ctx) end
opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
end
end
local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
local a1, a2 = 0
if altname then
pat = pat2
end
local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0
for p in gmatch(pat, ".") do
local x = nil
if p == "D" then
x = map_gpr[band(rshift(op, 7), 31)]
elseif p == "F" then
x = map_fgpr[band(rshift(op, 7), 31)]
elseif p == "R" then
x = map_gpr[band(rshift(op, 15), 31)]
elseif p == "G" then
x = map_fgpr[band(rshift(op, 15), 31)]
elseif p == "r" then
x = map_gpr[band(rshift(op, 20), 31)]
if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then
local temp = last --because of the diffrent order of the characters
operands[#operands] = x
x = temp
end
elseif p == "g" then
x = map_fgpr[band(rshift(op, 20), 31)]
if(name == "fsw" or name == "fsd") then
local temp = last
operands[#operands] = x
x = temp
end
elseif p == "Z" then
x = map_gpr[8 + band(rshift(op, 2), 7)]
elseif p == "N" then
x = map_fgpr[8 + band(rshift(op, 2), 7)]
elseif p == "M" then
x = map_gpr[8 + band(rshift(op, 7), 7)]
elseif p == "E" then
x = map_gpr[band(rshift(op, 2), 31)]
elseif p == "W" then
local uimm = parse_W(op)
x = format("%s,%d", "sp", uimm)
elseif p == "x" then
x = parse_x(op)
elseif p == "h" then
local part1 = band(rshift(op, 5), 3) --7:6
local part2 = band(rshift(op, 10), 7) --5:3
local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3))
operands[#operands] = format("%d(%s)", uimm, last)
elseif p == "X" then
local imm = parse_X(op)
x = format("%s,%d", "sp", imm)
elseif p == "O" then
x = format("(%s)", map_gpr[band(rshift(op, 15), 31)])
elseif p == "H" then
x = map_fgpr[band(rshift(op, 27), 31)]
elseif p == "L" then
local register = map_gpr[band(rshift(op, 15), 31)]
local disp = arshift(op, 20)
x = format("%d(%s)", disp, register)
elseif p == "P" then -- XTheadMemPair
local register = map_gpr[band(rshift(op, 15), 31)]
local disp = band(arshift(op, 25), 3)
local isword = bxor(band(arshift(op, 26), 1), 1)
x = format("(%s), %d, %d", register, disp, isword and 3 or 4)
elseif p == "I" then
x = arshift(op, 20)
--different for jalr
if(name == "jalr") then
local reg = map_gpr[band(rshift(op, 15), 31)]
if(ctx.reltab[reg] == nil) then
operands[#operands] = format("%d(%s)", x, last)
else
local target = ctx.reltab[reg] + x
operands[#operands] = format("%d(%s) #0x%08x", x, last, target)
ctx.rel = target
ctx.reltab[reg] = nil --assume no reuses of the register
end
x = nil --not to add additional operand
end
elseif p == "i" then
--both for RV32I AND RV64I
local value = band(arshift(op, 20), 63)
x = string.format("%d", value)
elseif p == "j" then -- XThead imm1[31..26]
local value = band(rshift(op, 26), 63)
x = string.format("%d", value)
elseif p == "v" then --XThead imm[2][26..25]
local value = band(rshift(op, 25), 3)
x = string.format("%d", value)
elseif p == "S" then
local register = map_gpr[band(rshift(op, 15), 31)] --register
local imm = parse_S(op)
x = format("%d(%s)", imm, register)
elseif p == "n" then
local part1 = band(rshift(op, 5), 1) --6
local part2 = band(rshift(op, 10), 7) --5:3
local part3 = band(rshift(op, 6), 1) --2
local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3),
lshift(part3, 2))
operands[#operands] = format("%d(%s)", uimm, last)
elseif p == "A" then
local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)]
ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12)
x = format("0x%x", value)
elseif p == "B" then
x = ctx.addr + ctx.pos + parse_B(op)
ctx.rel = x
x = format("0x%08x", x)
elseif p == "U" then
local value = band(rshift(op, 12), 0xfffff)
x = string.format("0x%x", value)
elseif p == "Q" then
local part1 = band(rshift(op, 2), 7) --8:6
local part2 = band(rshift(op, 12), 1) --5
local part3 = band(rshift(op, 5), 3) --4:3
local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
lshift(part3, 3))
x = format("%d(%s)", uimm, "sp")
elseif p == "q" then
x = ctx.addr + ctx.pos + parse_q(op)
ctx.rel = x
x = format("0x%08x", x)
elseif p == "J" then
x = ctx.addr + ctx.pos + parse_J(op)
ctx.rel = x
x = format("0x%08x", x)
elseif p == "K" then
local value = parse_K(op)
x = string.format("0x%x", value)
elseif p == "Y" then
local part1 = band(rshift(op, 2), 3) --7:6
local part2 = band(rshift(op, 12), 1) --5
local part3 = band(rshift(op, 4), 7) --4:2
local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
lshift(part3, 2))
x = format("%d(%s)", uimm, "sp")
elseif p == "o" then -- rounding mode
x = map_rm[band(rshift(op, 12), 7)]
elseif p == "y" then -- fli lut
x = map_fli[band(rshift(op, 15), 31)]
elseif p == "1" then
local part1 = band(rshift(op, 12), 1) --5
local part2 = band(rshift(op, 2), 31) --4:0
local uimm = bor(lshift(0, 31), lshift(part1, 5), part2)
x = string.format("0x%x", uimm)
elseif p == "T" then
x = ctx.addr + ctx.pos + parse_T(op)
ctx.rel = x
x = format("0x%08x", x)
elseif p == "t" then
local part1 = band(rshift(op, 7), 7) --8:6
local part2 = band(rshift(op, 10), 7) --5:3
local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3))
x = format("%d(%s)", uimm, "sp")
elseif p == "u" then
local part1 = band(rshift(op, 7), 3) --7:6
local part2 = band(rshift(op, 9), 15) --5:2
local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2))
x = format("%d(%s)", uimm, "sp")
elseif p == "V" then
x = map_fgpr[band(rshift(op, 2), 31)]
elseif p == "0" then --PSEUDOINSTRUCTIONS
if (last == "zero" or last == 0) then
local n = #operands
operands[n] = nil
last = operands[n-1]
local a1, a2 = match(altname, "([^|]*)|(.*)")
if a1 then name, altname = a1, a2
else name = altname end
alias_done = true
end
elseif (p == "4") then
if(last == -1) then
name = altname
operands[#operands] = nil
end
elseif (p == "5") then
if(last == 1) then
name = altname
operands[#operands] = nil
end
elseif (p == "6") then
if(last == operands[#operands - 1]) then
name = altname
operands[#operands] = nil
end
elseif (p == "7") then --jalr rs
local value = string.sub(operands[#operands], 1, 1)
local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1)
if(value == "0" and
(operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then
if(operands[#operands - 1] == "zero") then
name = altname
end
operands[#operands] = nil
operands[#operands] = reg
end
elseif (p == "2" and alias_done == false) then
if (last == "zero" or last == 0) then
local a1, a2 = match(altname, "([^|]*)|(.*)")
name = a2
operands[#operands] = nil
end
end
if x then operands[#operands+1] = x; last = x end
end
return putop(ctx, name, operands)
end
------------------------------------------------------------------------------
-- Disassemble a block of code.
local function disass_block(ctx, ofs, len)
if not ofs then
ofs = 0
end
local stop = len and ofs+len or #ctx.code
--instructions can be both 32 and 16 bits
stop = stop - stop % 2
ctx.pos = ofs - ofs % 2
ctx.rel = nil
while ctx.pos < stop do disass_ins(ctx) end
end
-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
local function create(code, addr, out)
local ctx = {}
ctx.code = code
ctx.addr = addr or 0
ctx.out = out or io.write
ctx.symtab = {}
ctx.disass = disass_block
ctx.hexdump = 8
ctx.get = get_le
ctx.map_pri = map_pri
ctx.map_compr = map_compr
ctx.reltab = {}
return ctx
end
-- Simple API: disassemble code (a string) at address and output via out.
local function disass(code, addr, out)
create(code, addr, out):disass(addr)
end
-- Return register name for RID.
local function regname(r)
if r < 32 then return map_gpr[r] end
return "f"..(r-32)
end
-- Public module functions.
return {
create = create,
disass = disass,
regname = regname
}

16
src/jit/dis_riscv64.lua Normal file
View File

@ -0,0 +1,16 @@
----------------------------------------------------------------------------
-- LuaJIT RISC-V 64 disassembler wrapper module.
--
-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the default riscv little-endian functions from the
-- RISC-V disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------
local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv")
return {
create = dis_riscv.create,
disass = dis_riscv.disass,
regname = dis_riscv.regname
}

View File

@ -631,6 +631,104 @@ JIT_PARAMDEF(JIT_PARAMINIT)
#include <sys/utsname.h>
#endif
#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX
#if LJ_TARGET_LINUX
#include <unistd.h>
struct riscv_hwprobe hwprobe_requests[] = {
{RISCV_HWPROBE_KEY_IMA_EXT_0}
};
const uint64_t *hwprobe_ext = &hwprobe_requests[0].value;
int hwprobe_ret = 0;
#endif
static int riscv_compressed()
{
#if defined(__riscv_c) || defined(__riscv_compressed)
/* Don't bother checking for RVC -- would crash before getting here. */
return 1;
#elif LJ_TARGET_LINUX
return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0;
#else
return 0;
#endif
}
static int riscv_zba()
{
#if defined(__riscv_b) || defined(__riscv_zba)
/* Don't bother checking for Zba -- would crash before getting here. */
return 1;
#elif LJ_TARGET_LINUX
return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0;
#else
return 0;
#endif
}
static int riscv_zbb()
{
#if defined(__riscv_b) || defined(__riscv_zbb)
/* Don't bother checking for Zbb -- would crash before getting here. */
return 1;
#elif LJ_TARGET_LINUX
return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0;
#else
return 0;
#endif
}
static int riscv_zicond()
{
#if defined(__riscv_zicond)
/* Don't bother checking for Zicond -- would crash before getting here. */
return 1;
#elif LJ_TARGET_LINUX
return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0;
#else
return 0;
#endif
}
static int riscv_zfa()
{
#if defined(__riscv_zfa)
/* Don't bother checking for Zfa -- would crash before getting here. */
return 1;
#elif LJ_TARGET_LINUX
return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0;
#else
return 0;
#endif
}
static int riscv_xthead()
{
#if (defined(__riscv_xtheadba) \
&& defined(__riscv_xtheadbb) \
&& defined(__riscv_xtheadcondmov) \
&& defined(__riscv_xtheadmac))
/* Don't bother checking for XThead -- would crash before getting here. */
return 1;
#else
/*
** Hardcoded as there's no easy way of detection:
** - SIGILL have some trouble with libluajit as we speak
** - Checking mvendorid looks good, but might not be reliable.
*/
return 0;
#endif
}
static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
{
return func() ? flag : 0;
}
#endif
/* Arch-dependent CPU feature detection. */
static uint32_t jit_cpudetect(void)
{
@ -702,6 +800,28 @@ static uint32_t jit_cpudetect(void)
}
#endif
#elif LJ_TARGET_RISCV64
#if LJ_HASJIT
#if LJ_TARGET_LINUX
/* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */
hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests,
sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0,
NULL, 0);
flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond);
flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa);
flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
#endif
/* Detect V/P? */
/* V have no hardware available, P not ratified yet. */
#endif
#else
#error "Missing CPU detection for this architecture"
#endif

View File

@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
#define CALL_MREMAP_NOMOVE 0
#define CALL_MREMAP_MAYMOVE 1
#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64)
#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
#else
#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE

View File

@ -31,6 +31,8 @@
#define LUAJIT_ARCH_mips32 6
#define LUAJIT_ARCH_MIPS64 7
#define LUAJIT_ARCH_mips64 7
#define LUAJIT_ARCH_riscv64 8
#define LUAJIT_ARCH_RISCV64 8
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@ -65,6 +67,8 @@
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
#else
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#endif
@ -439,6 +443,21 @@
#define LJ_ARCH_VERSION 10
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64
#define LJ_ARCH_NAME "riscv64"
#define LJ_ARCH_BITS 64
#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */
#define LJ_TARGET_RISCV64 1
#define LJ_TARGET_GC64 1
#define LJ_TARGET_EHRETREG 10
#define LJ_TARGET_EHRAREG 1
#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\
AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#else
#error "No target architecture defined"
#endif
@ -531,6 +550,10 @@
#error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif
#elif LJ_TARGET_RISCV64
#if !defined(__riscv_float_abi_double)
#error "Only RISC-V 64 double float supported for now"
#endif
#endif
#endif

View File

@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs)
#include "lj_emit_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_emit_mips.h"
#elif LJ_TARGET_RISCV64
#include "lj_emit_riscv.h"
#else
#error "Missing instruction emitter for target CPU"
#endif
@ -1708,6 +1710,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_asm_mips.h"
#elif LJ_TARGET_RISCV64
#include "lj_asm_riscv64.h"
#else
#error "Missing assembler for target CPU"
#endif

2037
src/lj_asm_riscv64.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -575,6 +575,97 @@
goto done; \
}
#elif LJ_TARGET_RISCV64
/* -- RISC-V lp64d calling conventions ------------------------------------ */
#define CCALL_HANDLE_STRUCTRET \
/* Return structs of size > 16 by reference. */ \
cc->retref = !(sz <= 16); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_STRUCTRET2 \
unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4) && (cl >> 8) <= 2) { \
CTSize i = (cl >> 8) - 1; \
do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
} else { \
if (cl > 1) { \
sp = (uint8_t *)&cc->fpr[0]; \
if ((cl >> 8) > 2) \
sp = (uint8_t *)&cc->gpr[0]; \
} \
memcpy(dp, sp, ctr->size); \
} \
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
((float *)dp)[1] = cc->fpr[1].f; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass long double complex by reference. */ \
if (sz == 2*sizeof(long double)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} \
/* Pass complex in two FPRs or on stack. */ \
else if (sz == 2*sizeof(float)) { \
isfp = 2; \
sz = 2*CTSIZE_PTR; \
} else { \
isfp = 1; \
sz = 2*CTSIZE_PTR; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size >16 by reference. */ \
unsigned int cl = ccall_classify_struct(cts, d); \
nff = cl >> 8; \
if (sz > 16) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} \
/* Pass struct in FPRs. */ \
if (cl > 1) { \
isfp = (cl & 4) ? 2 : 1; \
}
#define CCALL_HANDLE_REGARG \
if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
int n2 = ctype_isvector(d->info) ? 1 : \
isfp == 1 ? n : 2; \
if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
dp = &cc->fpr[nfpr]; \
nfpr += n2; \
goto done; \
} else { \
if (ngpr + n2 <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n2; \
goto done; \
} \
} \
} else { /* Try to pass argument in GPRs. */ \
if (ngpr + n <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
#endif
/* -- RISC-V ABI struct classification ---------------------------- */
#if LJ_TARGET_RISCV64
static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
goto noth;
}
} else if (ctype_isbitfield(ct->info)) {
goto noth;
} else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct);
if (s <= 1) goto noth;
r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
}
}
}
if ((r == 4 || r == 8) && n <= 4)
return r + (n << 8);
noth: /* Not a homogeneous float/double aggregate. */
return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
}
#endif
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif
#endif
#if LJ_TARGET_RISCV64
int nff = 0;
#endif
/* Clear unused regs to get some determinism in case of misdeclaration. */
memset(cc->gpr, 0, sizeof(cc->gpr));
#if CCALL_NUM_FPR
@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
#if LJ_TARGET_RISCV64
if (isfp && d->size == sizeof(float))
((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
|| (isfp && nsp == 0)
@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize i = (sz >> 2) - 1;
do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
}
#elif LJ_TARGET_RISCV64
if (isfp == 2 && nff <= 2) {
/* Split complex float into separate registers. */
CTSize i = (sz >> 2) - 1;
do {
((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
} while (i--);
}
#else
UNUSED(isfp);
#endif
@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);

View File

@ -129,6 +129,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#elif LJ_TARGET_RISCV64
#define CCALL_NARG_GPR 8
#define CCALL_NARG_FPR 8
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR 2
#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
#elif LJ_TARGET_ARM64
void *retp; /* Aggregate return pointer in x8. */
#elif LJ_TARGET_PPC
#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
uint8_t nfpr; /* Number of arguments in FPRs. */
#endif
#if LJ_32

View File

@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#define CALLBACK_MCODE_HEAD 52
#elif LJ_TARGET_RISCV64
#define CALLBACK_MCODE_HEAD 68
#else
/* Missing support for this architecture. */
@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
}
return p;
}
#elif LJ_TARGET_RISCV64
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
uintptr_t ug = (uintptr_t)(void *)g;
uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
MSize slot;
*p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
*p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
*p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
*p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
p++;
}
return p;
}
#else
/* Missing support for this architecture. */
#define callback_mcode_init(g, p) (p)
@ -527,6 +564,31 @@ void lj_ccallback_mcode_free(CTState *cts)
}
#endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_RISCV64
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr + n <= CCALL_NARG_FPR) { \
sp = &cts->cb.fpr[nfpr]; \
nfpr += n; \
goto done; \
} else if (ngpr + n <= maxgpr) { \
sp = &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
} else { \
if (ngpr + n <= maxgpr) { \
sp = &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
}
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
@ -677,7 +739,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))

574
src/lj_emit_riscv.h Normal file
View File

@ -0,0 +1,574 @@
/*
** RISC-V instruction emitter.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
**
** Contributed by gns from PLCT Lab, ISCAS.
*/
static intptr_t get_k64val(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
if (ir->o == IR_KINT64) {
return (intptr_t)ir_kint64(ir)->u64;
} else if (ir->o == IR_KGC) {
return (intptr_t)ir_kgc(ir);
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
return (intptr_t)ir_kptr(ir);
} else {
lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
"bad 64 bit const IR op %d", ir->o);
return ir->i; /* Sign-extended. */
}
}
#define get_kval(as, ref) get_k64val(as, ref)
/* -- Emit basic instructions --------------------------------------------- */
static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2)
{
*--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2);
}
#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0)
#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2)
#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2)
static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3)
{
*--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3);
}
#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3)
static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i)
{
*--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff);
}
#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i)
#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i)
#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f)
static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
{
*--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff);
}
#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i)
/*
static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
{
*--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe);
}
*/
static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i)
{
*--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff);
}
#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i)
/*
static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i)
{
*--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe);
}
*/
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
static Reg ra_scratch(ASMState *as, RegSet allow);
static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs)
{
lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs);
switch (riscvi) {
case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB:
case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU:
case RISCVI_FLW: case RISCVI_FLD:
emit_dsi(as, riscvi, data, base, ofs);
break;
case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB:
case RISCVI_FSW: case RISCVI_FSD:
emit_s1s2i(as, riscvi, base, data, ofs);
break;
default: lj_assertA(0, "invalid lso"); break;
}
}
static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp,
int32_t shamt)
{
if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) {
if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) {
case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break;
case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break;
default: lj_assertA(0, "invalid roti op"); break;
}
emit_dsshamt(as, riscvi, rd, rs1, shamt);
} else {
RISCVIns ai, bi;
int32_t shwid, shmsk;
switch (riscvi) {
case RISCVI_RORI:
ai = RISCVI_SRLI, bi = RISCVI_SLLI;
shwid = 64, shmsk = 63;
break;
case RISCVI_RORIW:
ai = RISCVI_SRLIW, bi = RISCVI_SLLIW;
shwid = 32, shmsk = 31;
break;
default:
lj_assertA(0, "invalid roti op");
return;
}
emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk);
emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk);
}
}
static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp)
{
if (as->flags & JIT_F_RVZbb) {
emit_ds1s2(as, riscvi, rd, rs1, rs2);
} else {
RISCVIns sai, sbi;
switch (riscvi) {
case RISCVI_ROL:
sai = RISCVI_SLL, sbi = RISCVI_SRL;
break;
case RISCVI_ROR:
sai = RISCVI_SRL, sbi = RISCVI_SLL;
break;
case RISCVI_ROLW:
sai = RISCVI_SLLW, sbi = RISCVI_SRLW;
break;
case RISCVI_RORW:
sai = RISCVI_SRLW, sbi = RISCVI_SLLW;
break;
default:
lj_assertA(0, "invalid rot op");
return;
}
if (rd == rs2) {
emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
emit_ds1s2(as, sbi, tmp, rs1, tmp);
emit_ds1s2(as, sai, rd, rs1, rs2);
emit_ds2(as, RISCVI_NEG, tmp, rs2);
} else {
emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
emit_ds1s2(as, sai, rd, rs1, rs2);
emit_ds1s2(as, sbi, tmp, rs1, tmp);
emit_ds2(as, RISCVI_NEG, tmp, rs2);
}
}
}
static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1)
{
if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) ||
(riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) {
emit_ds(as, riscvi, rd, rs1);
} else if (as->flags & JIT_F_RVXThead) {
uint32_t hi, sext;
switch (riscvi) {
case RISCVI_ZEXT_B:
case RISCVI_SEXT_W:
emit_ds(as, riscvi, rd, rs1);
return;
case RISCVI_ZEXT_H:
hi = 15, sext = 0;
break;
case RISCVI_ZEXT_W:
hi = 31, sext = 0;
break;
case RISCVI_SEXT_B:
hi = 7, sext = 1;
break;
case RISCVI_SEXT_H:
hi = 15, sext = 1;
break;
default:
lj_assertA(0, "invalid ext op");
return;
}
emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU,
rd, rs1, hi << 6);
} else {
RISCVIns sli, sri;
int32_t shamt;
switch (riscvi) {
case RISCVI_ZEXT_B:
case RISCVI_SEXT_W:
emit_ds(as, riscvi, rd, rs1);
return;
case RISCVI_ZEXT_H:
sli = RISCVI_SLLI, sri = RISCVI_SRLI;
shamt = 48;
break;
case RISCVI_ZEXT_W:
sli = RISCVI_SLLI, sri = RISCVI_SRLI;
shamt = 32;
break;
case RISCVI_SEXT_B:
sli = RISCVI_SLLI, sri = RISCVI_SRAI;
shamt = 56;
break;
case RISCVI_SEXT_H:
sli = RISCVI_SLLI, sri = RISCVI_SRAI;
shamt = 48;
break;
default:
lj_assertA(0, "invalid ext op");
return;
}
emit_dsshamt(as, sri, rd, rd, shamt);
emit_dsshamt(as, sli, rd, rs1, shamt);
}
}
static void emit_cleartp(ASMState *as, Reg rd, Reg rs1)
{
if (as->flags & JIT_F_RVXThead) {
emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6);
} else {
emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17);
emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17);
}
}
/*
static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
{
if (as->flags & JIT_F_RVZbb) {
emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2);
} else {
emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp);
emit_ds(as, RISCVI_NOT, tmp, rs2);
}
}
*/
/*
static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
{
if (as->flags & JIT_F_RVZbb) {
emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2);
} else {
emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp);
emit_ds(as, RISCVI_NOT, tmp, rs2);
}
}
*/
static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2)
{
if (as->flags & JIT_F_RVZbb) {
emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2);
} else {
emit_ds(as, RISCVI_NOT, rd, rd);
emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2);
}
}
static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt)
{
if (as->flags & JIT_F_RVZba) {
switch (shamt) {
case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break;
case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break;
case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break;
default: return;
}
} else if (as->flags & JIT_F_RVXThead) {
emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2);
} else {
emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp);
emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt);
}
}
#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1)
#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2)
#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3)
static void emit_loadk12(ASMState *as, Reg rd, int32_t i)
{
emit_di(as, RISCVI_ADDI, rd, i);
}
static void emit_loadk32(ASMState *as, Reg rd, int32_t i)
{
if (checki12((int64_t)i)) {
emit_loadk12(as, rd, i);
} else {
if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0))
emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i));
else
emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i));
emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i));
}
}
/* -- Emit loads/stores --------------------------------------------------- */
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Load a 32 bit constant into a GPR. */
#define emit_loadi(as, r, i) emit_loadk32(as, r, i);
/* Load a 64 bit constant into a GPR. */
static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
{
int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2));
if (checki32((int64_t)u64)) {
emit_loadk32(as, r, (int32_t)u64);
} else if (checki32auipc(u64_delta)) {
emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta));
emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta));
} else {
uint32_t lo32 = u64 & 0xfffffffful;
if (checku11(lo32)) {
if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32);
emit_dsshamt(as, RISCVI_SLLI, r, r, 32);
} else {
RISCVIns li_insn[7] = {0};
int shamt = 0, step = 0;
for(int bit = 0; bit < 32; bit++) {
if (lo32 & (1u << bit)) {
if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
int inc = bit+10 > 31 ? 31-bit : 10;
bit += inc, shamt = inc+1;
uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1);
uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0);
li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload);
} else shamt++;
}
if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
if (step < 6) {
for(int i = 0; i < step; i++)
*--as->mcp = li_insn[i];
} else {
emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff);
emit_dsshamt(as, RISCVI_SLLI, r, r, 10);
emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff);
emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff);
emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
}
}
uint32_t hi32 = u64 >> 32;
if (hi32 & 0xfff) emit_loadk32(as, r, hi32);
else emit_du(as, RISCVI_LUI, r, hi32 >> 12);
}
}
#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow)
{
emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0);
}
/* Load 64 bit IR constant into register. */
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
const uint64_t *k = &ir_k64(ir)->u64;
Reg r64 = r;
if (rset_test(RSET_FPR, r)) {
if (as->flags & JIT_F_RVZfa) {
uint8_t sign = (*k >> 63) & 1;
uint16_t k_hi16 = (*k >> 48) & 0xffff;
uint64_t k_lo48 = *k & 0xffffffffffff;
uint16_t mk_hi16 = k_hi16 & 0x7fff;
if (!k_lo48) {
if (riscv_fli_map_hi16[0] == k_hi16) {
emit_ds(as, RISCVI_FLI_D, r, 0);
return;
}
for (int i = 1; i < 32; i++) {
if (riscv_fli_map_hi16[i] == mk_hi16) {
if (sign)
emit_ds1s2(as, RISCVI_FNEG_D, r, r, r);
emit_ds(as, RISCVI_FLI_D, r, i);
return;
}
}
}
}
r64 = RID_TMP;
emit_ds(as, RISCVI_FMV_D_X, r, r64);
}
emit_loadu64(as, r64, *k);
}
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs)
{
emit_lso(as, riscvi, r, RID_GL, ofs);
}
#define emit_getgl(as, r, field) \
emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field))
#define emit_setgl(as, r, field) \
emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field))
/* Trace number is determined from per-trace exit stubs. */
#define emit_setvmstate(as, i) UNUSED(i)
/* -- Emit control-flow instructions -------------------------------------- */
/* Label for internal jumps. */
typedef MCode *MCLabel;
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump)
{
MCode *p = as->mcp;
ptrdiff_t delta = (char *)target - (char *)(p - 1);
switch (jump) {
case -1:
lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */
*--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
break;
case 0: case 1:
lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */
if (checki13(delta) && !jump) {
*--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
*--p = RISCVI_NOP;
} else {
*--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */
*--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
}
break;
default:
lj_assertA(0, "invalid jump type");
break;
}
as->mcp = p;
}
static void emit_jump(ASMState *as, MCode *target, int jump)
{
MCode *p = as->mcp;
ptrdiff_t delta;
switch(jump) {
case -1:
delta = (char *)target - (char *)(p - 1);
lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */
*--p = RISCVI_JAL | RISCVF_IMMJ(delta);
break;
case 0: case 1:
delta = (char *)target - (char *)(p - 2);
lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */
if (checki21(delta) && !jump) {
*--p = RISCVI_NOP;
*--p = RISCVI_JAL | RISCVF_IMMJ(delta);
} else {
*--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
*--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
}
break;
default:
lj_assertA(0, "invalid jump type");
break;
}
as->mcp = p;
}
#define emit_jmp(as, target) emit_jump(as, target, 0)
#define emit_mv(as, dst, src) \
emit_ds(as, RISCVI_MV, (dst), (src))
static void emit_call(ASMState *as, void *target, int needcfa)
{
MCode *p = as->mcp;
ptrdiff_t delta = (char *)target - (char *)(p - 2);
if (checki21(delta)) {
*--p = RISCVI_NOP;
*--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta);
} else if (checki32(delta)) {
*--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
*--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
needcfa = 1;
} else {
*--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0);
needcfa = 2;
}
as->mcp = p;
if (needcfa > 1)
ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
}
/* -- Emit generic operations --------------------------------------------- */
/* Generic move between two regs. */
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
{
if (src < RID_MAX_GPR && dst < RID_MAX_GPR)
emit_mv(as, dst, src);
else if (src < RID_MAX_GPR)
emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src);
else if (dst < RID_MAX_GPR)
emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src);
else
emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src);
}
/* Emit an arithmetic operation with a constant operand. */
static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src,
Reg tmp, intptr_t k)
{
if (checki12(k)) emit_dsi(as, riscvi, dest, src, k);
else {
switch (riscvi) {
case RISCVI_ADDI: riscvi = RISCVI_ADD; break;
case RISCVI_XORI: riscvi = RISCVI_XOR; break;
case RISCVI_ORI: riscvi = RISCVI_OR; break;
case RISCVI_ANDI: riscvi = RISCVI_AND; break;
default: lj_assertA(0, "NYI arithmetic RISCVIns"); return;
}
emit_ds1s2(as, riscvi, dest, src, tmp);
emit_loadu64(as, tmp, (uintptr_t)k);
}
}
/* Generic load of register with base and (small) offset address. */
static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs);
else
emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs);
}
/* Generic store of register with base and (small) offset address. */
static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs);
else
emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs);
}
/* Add offset to pointer. */
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs)
emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs);
}
#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))

View File

@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#endif
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_RISCV64
#define CFRAME_OFS_ERRF 252
#define CFRAME_OFS_NRES 248
#define CFRAME_OFS_PREV 240
#define CFRAME_OFS_L 232
#define CFRAME_OFS_PC 224
#define CFRAME_OFS_MULTRES 0
#define CFRAME_SIZE 256
#define CFRAME_SHIFT_MULTRES 3
#else
#error "Missing CFRAME_* definitions for this architecture"
#endif

View File

@ -306,6 +306,9 @@ enum {
#elif LJ_TARGET_MIPS
DW_REG_SP = 29,
DW_REG_RA = 31,
#elif LJ_TARGET_RISCV64
DW_REG_SP = 2,
DW_REG_RA = 1,
#else
#error "Unsupported target architecture"
#endif
@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = {
.machine = 20,
#elif LJ_TARGET_MIPS
.machine = 8,
#elif LJ_TARGET_RISCV64
.machine = 243,
#else
#error "Unsupported target architecture"
#endif
@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); }
for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); }
}
#elif LJ_TARGET_RISCV64
{
int i;
for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); }
DB(DW_CFA_offset|9); DUV(17);
DB(DW_CFA_offset|8); DUV(18);
for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); }
DB(DW_CFA_offset|32|9); DUV(29);
DB(DW_CFA_offset|32|8); DUV(30);
}
#else
#error "Unsupported target architecture"
#endif

View File

@ -67,6 +67,46 @@
#endif
#endif
#elif LJ_TARGET_RISCV64
#define JIT_F_RVC (JIT_F_CPU << 0)
#define JIT_F_RVZba (JIT_F_CPU << 1)
#define JIT_F_RVZbb (JIT_F_CPU << 2)
#define JIT_F_RVZicond (JIT_F_CPU << 3)
#define JIT_F_RVZfa (JIT_F_CPU << 4)
#define JIT_F_RVXThead (JIT_F_CPU << 5)
#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead"
#if LJ_TARGET_LINUX
#include <sys/syscall.h>
#ifndef __NR_riscv_hwprobe
#ifndef __NR_arch_specific_syscall
#define __NR_arch_specific_syscall 244
#endif
#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14)
#endif
struct riscv_hwprobe {
int64_t key;
uint64_t value;
};
#define RISCV_HWPROBE_KEY_MVENDORID 0
#define RISCV_HWPROBE_KEY_MARCHID 1
#define RISCV_HWPROBE_KEY_MIMPID 2
#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_C (1 << 1)
#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
#endif
#else
#define JIT_F_CPUSTRING ""

View File

@ -38,6 +38,12 @@
void sys_icache_invalidate(void *start, size_t len);
#endif
#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/cachectl.h>
#endif
/* Synchronize data/instruction cache. */
void lj_mcode_sync(void *start, void *end)
{
@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *end)
sys_icache_invalidate(start, (char *)end-(char *)start);
#elif LJ_TARGET_PPC
lj_vm_cachesync(start, end);
#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
#if (defined(__GNUC__) || defined(__clang__))
__asm__ volatile("fence rw, rw");
#else
lj_vm_fence_rw_rw();
#endif
#ifdef __GLIBC__
__riscv_flush_icache(start, end, 0);
#else
syscall(__NR_riscv_flush_icache, start, end, 0UL);
#endif
#elif defined(__GNUC__) || defined(__clang__)
__clear_cache(start, end);
#else

View File

@ -55,7 +55,7 @@ typedef uint32_t RegSP;
/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
typedef uint64_t RegSet;
#define RSET_BITS 6
#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
@ -143,6 +143,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
#elif LJ_TARGET_RISCV64
#include "lj_target_riscv.h"
#else
#error "Missing include for target CPU"
#endif

542
src/lj_target_riscv.h Normal file
View File

@ -0,0 +1,542 @@
/*
** Definitions for RISC-V CPUs.
** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_RISCV_H
#define _LJ_TARGET_RISCV_H
/* -- Registers IDs ------------------------------------------------------- */
#define GPRDEF(_) \
_(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
_(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
_(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
_(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
_(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
_(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
#define VRIDDEF(_)
#define RIDENUM(name) RID_##name,
enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_ZERO = RID_X0,
RID_TMP = RID_RA,
RID_GP = RID_X3,
RID_TP = RID_X4,
/* Calling conventions. */
RID_RET = RID_X10,
RID_RETLO = RID_X10,
RID_RETHI = RID_X11,
RID_FPRET = RID_F10,
RID_CFUNCADDR = RID_X5,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_X18, /* Interpreter BASE. */
RID_LPC = RID_X20, /* Interpreter PC. */
RID_GL = RID_X21, /* Interpreter GL. */
RID_LREG = RID_X23, /* Interpreter L. */
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_X0,
RID_MAX_GPR = RID_X31+1,
RID_MIN_FPR = RID_MAX_GPR,
RID_MAX_FPR = RID_F31+1,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
};
#define RID_NUM_KREF RID_NUM_GPR
#define RID_MIN_KREF RID_X0
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */
#define RSET_FIXED \
(RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL))
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
#define RSET_SCRATCH_GPR \
(RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\
RSET_RANGE(RID_X10, RID_X17+1))
#define RSET_SCRATCH_FPR \
(RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\
RSET_RANGE(RID_F28, RID_F31+1))
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_X10
#define REGARG_LASTGPR RID_X17
#define REGARG_NUMGPR 8
#define REGARG_FIRSTFPR RID_F10
#define REGARG_LASTFPR RID_F17
#define REGARG_NUMFPR 8
/* -- Spill slots --------------------------------------------------------- */
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
**
** SPS_FIXED: Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
**
** SPS_FIRST: First spill slot for general use.
*/
#if LJ_32
#define SPS_FIXED 5
#else
#define SPS_FIXED 4
#endif
#define SPS_FIRST 4
#define SPOFS_TMP 0
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
/* -- Exit state ---------------------------------------------------------- */
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
/* Highest exit + 1 indicates stack check. */
#define EXITSTATE_CHECKEXIT 1
/* Return the address of a per-trace exit stub. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */
return p + 4 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
#define exitstub_trace_addr(T, exitno) \
exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
/* -- Instructions -------------------------------------------------------- */
/* Instruction fields. */
#define RISCVF_D(d) (((d)&31) << 7)
#define RISCVF_S1(r) (((r)&31) << 15)
#define RISCVF_S2(r) (((r)&31) << 20)
#define RISCVF_S3(r) (((r)&31) << 27)
#define RISCVF_FUNCT2(f) (((f)&3) << 25)
#define RISCVF_FUNCT3(f) (((f)&7) << 12)
#define RISCVF_FUNCT7(f) (((f)&127) << 25)
#define RISCVF_SHAMT(s) ((s) << 20)
#define RISCVF_RM(m) (((m)&7) << 12)
#define RISCVF_IMMI(i) ((i) << 20)
#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7)
#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7)
#define RISCVF_IMMU(i) (((i)&0xfffff) << 12)
#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20)
/* Encode helpers. */
#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800))
#define RISCVF_W_LO(w) ((w)&0xfff)
#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff)
#define RISCVF_LO(i) RISCVF_W_LO(i)
/* Check for valid field range. */
#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0)
#define checku11(i) RISCVF_UIMM_OK(i, 11)
#define checki12(i) RISCVF_SIMM_OK(i, 12)
#define checki13(i) RISCVF_SIMM_OK(i, 13)
#define checki20(i) RISCVF_SIMM_OK(i, 20)
#define checki21(i) RISCVF_SIMM_OK(i, 21)
#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800)
typedef enum RISCVIns {
/* --- RVI --- */
RISCVI_LUI = 0x00000037,
RISCVI_AUIPC = 0x00000017,
RISCVI_JAL = 0x0000006f,
RISCVI_JALR = 0x00000067,
RISCVI_ADDI = 0x00000013,
RISCVI_SLTI = 0x00002013,
RISCVI_SLTIU = 0x00003013,
RISCVI_XORI = 0x00004013,
RISCVI_ORI = 0x00006013,
RISCVI_ANDI = 0x00007013,
RISCVI_SLLI = 0x00001013,
RISCVI_SRLI = 0x00005013,
RISCVI_SRAI = 0x40005013,
RISCVI_ADD = 0x00000033,
RISCVI_SUB = 0x40000033,
RISCVI_SLL = 0x00001033,
RISCVI_SLT = 0x00002033,
RISCVI_SLTU = 0x00003033,
RISCVI_XOR = 0x00004033,
RISCVI_SRL = 0x00005033,
RISCVI_SRA = 0x40005033,
RISCVI_OR = 0x00006033,
RISCVI_AND = 0x00007033,
RISCVI_LB = 0x00000003,
RISCVI_LH = 0x00001003,
RISCVI_LW = 0x00002003,
RISCVI_LBU = 0x00004003,
RISCVI_LHU = 0x00005003,
RISCVI_SB = 0x00000023,
RISCVI_SH = 0x00001023,
RISCVI_SW = 0x00002023,
RISCVI_BEQ = 0x00000063,
RISCVI_BNE = 0x00001063,
RISCVI_BLT = 0x00004063,
RISCVI_BGE = 0x00005063,
RISCVI_BLTU = 0x00006063,
RISCVI_BGEU = 0x00007063,
RISCVI_ECALL = 0x00000073,
RISCVI_EBREAK = 0x00100073,
RISCVI_NOP = 0x00000013,
RISCVI_MV = 0x00000013,
RISCVI_NOT = 0xfff04013,
RISCVI_NEG = 0x40000033,
RISCVI_RET = 0x00008067,
RISCVI_ZEXT_B = 0x0ff07013,
#if LJ_TARGET_RISCV64
RISCVI_LWU = 0x00007003,
RISCVI_LD = 0x00003003,
RISCVI_SD = 0x00003023,
RISCVI_ADDIW = 0x0000001b,
RISCVI_SLLIW = 0x0000101b,
RISCVI_SRLIW = 0x0000501b,
RISCVI_SRAIW = 0x4000501b,
RISCVI_ADDW = 0x0000003b,
RISCVI_SUBW = 0x4000003b,
RISCVI_SLLW = 0x0000103b,
RISCVI_SRLW = 0x0000503b,
RISCVI_SRAW = 0x4000503b,
RISCVI_NEGW = 0x4000003b,
RISCVI_SEXT_W = 0x0000001b,
#endif
/* --- RVM --- */
RISCVI_MUL = 0x02000033,
RISCVI_MULH = 0x02001033,
RISCVI_MULHSU = 0x02002033,
RISCVI_MULHU = 0x02003033,
RISCVI_DIV = 0x02004033,
RISCVI_DIVU = 0x02005033,
RISCVI_REM = 0x02006033,
RISCVI_REMU = 0x02007033,
#if LJ_TARGET_RISCV64
RISCVI_MULW = 0x0200003b,
RISCVI_DIVW = 0x0200403b,
RISCVI_DIVUW = 0x0200503b,
RISCVI_REMW = 0x0200603b,
RISCVI_REMUW = 0x0200703b,
#endif
/* --- RVF --- */
RISCVI_FLW = 0x00002007,
RISCVI_FSW = 0x00002027,
RISCVI_FMADD_S = 0x00000043,
RISCVI_FMSUB_S = 0x00000047,
RISCVI_FNMSUB_S = 0x0000004b,
RISCVI_FNMADD_S = 0x0000004f,
RISCVI_FADD_S = 0x00000053,
RISCVI_FSUB_S = 0x08000053,
RISCVI_FMUL_S = 0x10000053,
RISCVI_FDIV_S = 0x18000053,
RISCVI_FSQRT_S = 0x58000053,
RISCVI_FSGNJ_S = 0x20000053,
RISCVI_FSGNJN_S = 0x20001053,
RISCVI_FSGNJX_S = 0x20002053,
RISCVI_FMIN_S = 0x28000053,
RISCVI_FMAX_S = 0x28001053,
RISCVI_FCVT_W_S = 0xc0000053,
RISCVI_FCVT_WU_S = 0xc0100053,
RISCVI_FMV_X_W = 0xe0000053,
RISCVI_FEQ_S = 0xa0002053,
RISCVI_FLT_S = 0xa0001053,
RISCVI_FLE_S = 0xa0000053,
RISCVI_FCLASS_S = 0xe0001053,
RISCVI_FCVT_S_W = 0xd0000053,
RISCVI_FCVT_S_WU = 0xd0100053,
RISCVI_FMV_W_X = 0xf0000053,
RISCVI_FMV_S = 0x20000053,
RISCVI_FNEG_S = 0x20001053,
RISCVI_FABS_S = 0x20002053,
#if LJ_TARGET_RISCV64
RISCVI_FCVT_L_S = 0xc0200053,
RISCVI_FCVT_LU_S = 0xc0300053,
RISCVI_FCVT_S_L = 0xd0200053,
RISCVI_FCVT_S_LU = 0xd0300053,
#endif
/* --- RVD --- */
RISCVI_FLD = 0x00003007,
RISCVI_FSD = 0x00003027,
RISCVI_FMADD_D = 0x02000043,
RISCVI_FMSUB_D = 0x02000047,
RISCVI_FNMSUB_D = 0x0200004b,
RISCVI_FNMADD_D = 0x0200004f,
RISCVI_FADD_D = 0x02000053,
RISCVI_FSUB_D = 0x0a000053,
RISCVI_FMUL_D = 0x12000053,
RISCVI_FDIV_D = 0x1a000053,
RISCVI_FSQRT_D = 0x5a000053,
RISCVI_FSGNJ_D = 0x22000053,
RISCVI_FSGNJN_D = 0x22001053,
RISCVI_FSGNJX_D = 0x22002053,
RISCVI_FMIN_D = 0x2a000053,
RISCVI_FMAX_D = 0x2a001053,
RISCVI_FCVT_S_D = 0x40100053,
RISCVI_FCVT_D_S = 0x42000053,
RISCVI_FEQ_D = 0xa2002053,
RISCVI_FLT_D = 0xa2001053,
RISCVI_FLE_D = 0xa2000053,
RISCVI_FCLASS_D = 0xe2001053,
RISCVI_FCVT_W_D = 0xc2000053,
RISCVI_FCVT_WU_D = 0xc2100053,
RISCVI_FCVT_D_W = 0xd2000053,
RISCVI_FCVT_D_WU = 0xd2100053,
RISCVI_FMV_D = 0x22000053,
RISCVI_FNEG_D = 0x22001053,
RISCVI_FABS_D = 0x22002053,
#if LJ_TARGET_RISCV64
RISCVI_FCVT_L_D = 0xc2200053,
RISCVI_FCVT_LU_D = 0xc2300053,
RISCVI_FMV_X_D = 0xe2000053,
RISCVI_FCVT_D_L = 0xd2200053,
RISCVI_FCVT_D_LU = 0xd2300053,
RISCVI_FMV_D_X = 0xf2000053,
#endif
/* --- Zifencei --- */
RISCVI_FENCE = 0x0000000f,
RISCVI_FENCE_I = 0x0000100f,
/* --- Zicsr --- */
RISCVI_CSRRW = 0x00001073,
RISCVI_CSRRS = 0x00002073,
RISCVI_CSRRC = 0x00003073,
RISCVI_CSRRWI = 0x00005073,
RISCVI_CSRRSI = 0x00006073,
RISCVI_CSRRCI = 0x00007073,
/* --- RVB --- */
/* Zba */
RISCVI_SH1ADD = 0x20002033,
RISCVI_SH2ADD = 0x20004033,
RISCVI_SH3ADD = 0x20006033,
#if LJ_TARGET_RISCV64
RISCVI_ADD_UW = 0x0800003b,
RISCVI_SH1ADD_UW = 0x2000203b,
RISCVI_SH2ADD_UW = 0x2000403b,
RISCVI_SH3ADD_UW = 0x2000603b,
RISCVI_SLLI_UW = 0x0800101b,
RISCVI_ZEXT_W = 0x0800003b,
#endif
/* Zbb */
RISCVI_ANDN = 0x40007033,
RISCVI_ORN = 0x40006033,
RISCVI_XNOR = 0x40004033,
RISCVI_CLZ = 0x60001013,
RISCVI_CTZ = 0x60101013,
RISCVI_CPOP = 0x60201013,
RISCVI_MAX = 0x0a006033,
RISCVI_MAXU = 0x0a007033,
RISCVI_MIN = 0x0a004033,
RISCVI_MINU = 0x0a005033,
RISCVI_SEXT_B = 0x60401013,
RISCVI_SEXT_H = 0x60501013,
#if LJ_TARGET_RISCV64
RISCVI_ZEXT_H = 0x0800403b,
#endif
RISCVI_ROL = 0x60001033,
RISCVI_ROR = 0x60005033,
RISCVI_RORI = 0x60005013,
RISCVI_ORC_B = 0x28705013,
#if LJ_TARGET_RISCV64
RISCVI_REV8 = 0x6b805013,
RISCVI_CLZW = 0x6000101b,
RISCVI_CTZW = 0x6010101b,
RISCVI_CPOPW = 0x6020101b,
RISCVI_ROLW = 0x6000103b,
RISCVI_RORIW = 0x6000501b,
RISCVI_RORW = 0x6000503b,
#endif
/* NYI: Zbc, Zbs */
/* --- Zicond --- */
RISCVI_CZERO_EQZ = 0x0e005033,
RISCVI_CZERO_NEZ = 0x0e007033,
/* --- Zfa --- */
RISCVI_FLI_S = 0xf0100053,
RISCVI_FMINM_S = 0x28002053,
RISCVI_FMAXM_S = 0x28003053,
RISCVI_FROUND_S = 0x40400053,
RISCVI_FROUNDNX_S = 0x40500053,
RISCVI_FCVTMOD_W_D = 0xc2801053,
RISCVI_FLEQ_S = 0xa0004053,
RISCVI_FLTQ_S = 0xa0005053,
RISCVI_FLI_D = 0xf2100053,
RISCVI_FMINM_D = 0x2a002053,
RISCVI_FMAXM_D = 0x2a003053,
RISCVI_FROUND_D = 0x42400053,
RISCVI_FROUNDNX_D = 0x42500053,
RISCVI_FLEQ_D = 0xa2004053,
RISCVI_FLTQ_D = 0xa2005053,
RISCVI_FROUND_S_RTZ = 0x40401053,
RISCVI_FROUND_S_RDN = 0x40402053,
RISCVI_FROUND_S_RUP = 0x40403053,
RISCVI_FROUNDNX_S_RTZ = 0x40501053,
RISCVI_FROUNDNX_S_RDN = 0x40502053,
RISCVI_FROUNDNX_S_RUP = 0x40503053,
RISCVI_FROUND_D_RTZ = 0x42401053,
RISCVI_FROUND_D_RDN = 0x42402053,
RISCVI_FROUND_D_RUP = 0x42403053,
RISCVI_FROUNDNX_D_RTZ = 0x42501053,
RISCVI_FROUNDNX_D_RDN = 0x42502053,
RISCVI_FROUNDNX_D_RUP = 0x42503053,
/* TBD: RVV?, RVP?, RVJ? */
/* --- XThead* --- */
/* XTHeadBa */
RISCVI_TH_ADDSL = 0x0000100b,
/* XTHeadBb */
RISCVI_TH_SRRI = 0x1000100b,
#if LJ_TARGET_RISCV64
RISCVI_TH_SRRIW = 0x1400100b,
#endif
RISCVI_TH_EXT = 0x0000200b,
RISCVI_TH_EXTU = 0x0000300b,
RISCVI_TH_FF0 = 0x8400100b,
RISCVI_TH_FF1 = 0x8600100b,
RISCVI_TH_REV = 0x8200100b,
#if LJ_TARGET_RISCV64
RISCVI_TH_REVW = 0x9000100b,
#endif
RISCVI_TH_TSTNBZ = 0x8000100b,
/* XTHeadBs */
RISCVI_TH_TST = 0x8800100b,
/* XTHeadCondMov */
RISCVI_TH_MVEQZ = 0x4000100b,
RISCVI_TH_MVNEZ = 0x4200100b,
/* XTHeadMac */
RISCVI_TH_MULA = 0x2000100b,
RISCVI_TH_MULAH = 0x2800100b,
#if LJ_TARGET_RISCV64
RISCVI_TH_MULAW = 0x2400100b,
#endif
RISCVI_TH_MULS = 0x2200100b,
RISCVI_TH_MULSH = 0x2a00100b,
RISCVI_TH_MULSW = 0x2600100b,
/* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */
} RISCVIns;
typedef enum RISCVRM {
RISCVRM_RNE = 0,
RISCVRM_RTZ = 1,
RISCVRM_RDN = 2,
RISCVRM_RUP = 3,
RISCVRM_RMM = 4,
RISCVRM_DYN = 7,
} RISCVRM;
static const uint16_t riscv_fli_map_hi16[32] = {
0xbff0u, // -1
0x0010u, // min
0x3ef0u, // 2^-16
0x3f00u, // 2^-15
0x3f70u, // 2^-8
0x3f80u, // 2^-7
0x3fb0u, // 2^-4
0x3fc0u, // 2^-3, 0.125
0x3fd0u, // 2^-2, 0.25
0x3fd4u, // 0.3125
0x3fd8u, // 0.375
0x3fdcu, // 0.4375
0x3fe0u, // 0.5
0x3fe4u, // 0.625
0x3fe8u, // 0.75
0x3fecu, // 0.875
0x3ff0u, // 1
0x3ff4u, // 1.25
0x3ff8u, // 1.5
0x3ffcu, // 1.75
0x4000u, // 2
0x4004u, // 2.5
0x4008u, // 3
0x4010u, // 4
0x4020u, // 8
0x4030u, // 16
0x4060u, // 128
0x4070u, // 256
0x40e0u, // 2^15, 32768
0x40f0u, // 2^16, 65536
0x7ff0u, // inf
0x7ff8u, // canonical nan
};
#endif

View File

@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
#if LJ_TARGET_PPC
void lj_vm_cachesync(void *start, void *end);
#endif
#if LJ_TARGET_RISCV64
void lj_vm_fence_rw_rw();
#endif
LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
#if LJ_HASJIT
LJ_ASMF double lj_vm_foldfpm(double x, int op);

View File

@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op)
/* -- Helper functions for generated machine code ------------------------- */
#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
|| LJ_TARGET_RISCV64
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
{
uint32_t y, ua, ub;

4814
src/vm_riscv64.dasc Normal file

File diff suppressed because it is too large Load Diff