mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 15:34:09 +00:00
Added support for 4 byte opcode intrinsics
This commit is contained in:
parent
7c697b0a5c
commit
57ff67552a
@ -669,6 +669,10 @@ static MCode* emit_intrins(ASMState *as, CIntrinsic *intrins, Reg r1,
|
||||
if (intrins->flags & INTRINSFLAG_IMMB) {
|
||||
*--as->mcp = intrins->immb;
|
||||
}
|
||||
/* Tell emit_op the opcode is 4 bytes long */
|
||||
if (intrins->flags & INTRINSFLAG_LARGEOP) {
|
||||
r2 |= OP4B;
|
||||
}
|
||||
|
||||
emit_mrm(as, intrins->opcode, (Reg)r2, r1);
|
||||
|
||||
|
@ -385,7 +385,7 @@ static void setopcode(lua_State *L, CIntrinsic *intrins, uint32_t opcode)
|
||||
if (len < 4) {
|
||||
opcode |= (uint8_t)(int8_t)-(len+1);
|
||||
} else {
|
||||
lj_err_callermsg(L, "bad opcode literal");
|
||||
intrins->flags |= INTRINSFLAG_LARGEOP;
|
||||
}
|
||||
|
||||
if (intrin_regmode(intrins) == DYNREG_OPEXT) {
|
||||
|
@ -57,6 +57,8 @@ typedef enum INTRINSFLAGS {
|
||||
INTRINSFLAG_PREFIX = 0x200,
|
||||
/* Opcode has an immediate byte that needs to be set at construction time */
|
||||
INTRINSFLAG_IMMB = 0x400,
|
||||
/* Opcode is larger than the emit system normally handles x86/x64(4 bytes) */
|
||||
INTRINSFLAG_LARGEOP = 0x800,
|
||||
|
||||
/* Opcode uses ymm registers */
|
||||
INTRINSFLAG_VEX256 = 0x4000,
|
||||
@ -89,7 +91,7 @@ typedef struct AsmHeader {
|
||||
#define intrin_setopextb(intrins, opext) \
|
||||
lua_assert((intrins)->outsz < 4); \
|
||||
((intrins)->out[3] = (opext))
|
||||
#define intrin_oplen(intrins) ((-(int8_t)(intrins)->opcode)-1)
|
||||
#define intrin_oplen(intrins) (((intrins)->flags & INTRINSFLAG_LARGEOP) ? 4 : (-(int8_t)(intrins)->opcode)-1)
|
||||
|
||||
/* odd numbered have an dynamic output */
|
||||
#define intrin_dynrout(intrins) (intrin_regmode(intrins) && reg_isdyn(intrins->out[0]))
|
||||
|
@ -899,8 +899,47 @@ it("shufps", function()
|
||||
assert_equal(vout[3], 1.5)
|
||||
end)
|
||||
|
||||
it("phaddd 4byte opcode", function()
|
||||
|
||||
ffi.cdef([[int4 phaddd(int4 v1, int4 v2) __mcode("660F3802rM");]])
|
||||
|
||||
local phaddd = ffi.C.phaddd
|
||||
|
||||
function hsum(v)
|
||||
local result = phaddd(v, v)
|
||||
result = phaddd(result, result)
|
||||
return result[0]
|
||||
end
|
||||
|
||||
local v = ffi.new("int4", 1, 2, 3, 4)
|
||||
local vzero = ffi.new("int4", 0)
|
||||
|
||||
assert_equal(hsum(v), 10)
|
||||
assert_equal(hsum(vzero), 0)
|
||||
end)
|
||||
|
||||
context("mixed register type opcodes", function()
|
||||
|
||||
it("pcmpstr", function()
|
||||
ffi.cdef([[void pcmpistri(byte16 string, byte16 mask) __mcode("660F3A63rMU", 0x2) __reglist(out, int32_t ecx)]])
|
||||
|
||||
local charlist = ffi.new("byte16", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
|
||||
local string = ffi.new("byte16", 2, 2, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 7, 7, 2, 2)
|
||||
|
||||
local ecx = ffi.C.pcmpistri(charlist, string)
|
||||
assert_equal(ecx, 4)
|
||||
|
||||
ffi.cdef([[
|
||||
void pcmpistrm(byte16 string, byte16 mask) __mcode("660F3A62rMU", 0x40) __reglist(out, byte16 xmm0v);
|
||||
int32_t pmovmskb(byte16 mask) __mcode("660FD7rM");
|
||||
]])
|
||||
|
||||
local mask = ffi.C.pcmpistrm(charlist, string)
|
||||
mask = ffi.C.pmovmskb(mask)
|
||||
|
||||
assert_equal(mask, 48)
|
||||
end)
|
||||
|
||||
it("cvttsd2s", function()
|
||||
assert_cdef([[int cvttsd2s(double n) __mcode("F20F2CrM");]], "cvttsd2s")
|
||||
local cvttsd2s = ffi.C.cvttsd2s
|
||||
|
Loading…
Reference in New Issue
Block a user