Added support for 4 byte opcode intrinsics

This commit is contained in:
fsfod 2016-03-29 11:13:19 +01:00
parent 7c697b0a5c
commit 57ff67552a
4 changed files with 47 additions and 2 deletions

View File

@ -669,6 +669,10 @@ static MCode* emit_intrins(ASMState *as, CIntrinsic *intrins, Reg r1,
if (intrins->flags & INTRINSFLAG_IMMB) {
*--as->mcp = intrins->immb;
}
/* Tell emit_op the opcode is 4 bytes long */
if (intrins->flags & INTRINSFLAG_LARGEOP) {
r2 |= OP4B;
}
emit_mrm(as, intrins->opcode, (Reg)r2, r1);

View File

@ -385,7 +385,7 @@ static void setopcode(lua_State *L, CIntrinsic *intrins, uint32_t opcode)
if (len < 4) {
opcode |= (uint8_t)(int8_t)-(len+1);
} else {
lj_err_callermsg(L, "bad opcode literal");
intrins->flags |= INTRINSFLAG_LARGEOP;
}
if (intrin_regmode(intrins) == DYNREG_OPEXT) {

View File

@ -57,6 +57,8 @@ typedef enum INTRINSFLAGS {
INTRINSFLAG_PREFIX = 0x200,
/* Opcode has an immediate byte that needs to be set at construction time */
INTRINSFLAG_IMMB = 0x400,
/* Opcode is larger than the emit system normally handles x86/x64(4 bytes) */
INTRINSFLAG_LARGEOP = 0x800,
/* Opcode uses ymm registers */
INTRINSFLAG_VEX256 = 0x4000,
@ -89,7 +91,7 @@ typedef struct AsmHeader {
#define intrin_setopextb(intrins, opext) \
lua_assert((intrins)->outsz < 4); \
((intrins)->out[3] = (opext))
#define intrin_oplen(intrins) ((-(int8_t)(intrins)->opcode)-1)
#define intrin_oplen(intrins) (((intrins)->flags & INTRINSFLAG_LARGEOP) ? 4 : (-(int8_t)(intrins)->opcode)-1)
/* odd numbered have an dynamic output */
#define intrin_dynrout(intrins) (intrin_regmode(intrins) && reg_isdyn(intrins->out[0]))

View File

@ -899,8 +899,47 @@ it("shufps", function()
assert_equal(vout[3], 1.5)
end)
it("phaddd 4byte opcode", function()
ffi.cdef([[int4 phaddd(int4 v1, int4 v2) __mcode("660F3802rM");]])
local phaddd = ffi.C.phaddd
function hsum(v)
local result = phaddd(v, v)
result = phaddd(result, result)
return result[0]
end
local v = ffi.new("int4", 1, 2, 3, 4)
local vzero = ffi.new("int4", 0)
assert_equal(hsum(v), 10)
assert_equal(hsum(vzero), 0)
end)
context("mixed register type opcodes", function()
it("pcmpstr", function()
ffi.cdef([[void pcmpistri(byte16 string, byte16 mask) __mcode("660F3A63rMU", 0x2) __reglist(out, int32_t ecx)]])
local charlist = ffi.new("byte16", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
local string = ffi.new("byte16", 2, 2, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 7, 7, 2, 2)
local ecx = ffi.C.pcmpistri(charlist, string)
assert_equal(ecx, 4)
ffi.cdef([[
void pcmpistrm(byte16 string, byte16 mask) __mcode("660F3A62rMU", 0x40) __reglist(out, byte16 xmm0v);
int32_t pmovmskb(byte16 mask) __mcode("660FD7rM");
]])
local mask = ffi.C.pcmpistrm(charlist, string)
mask = ffi.C.pmovmskb(mask)
assert_equal(mask, 48)
end)
it("cvttsd2s", function()
assert_cdef([[int cvttsd2s(double n) __mcode("F20F2CrM");]], "cvttsd2s")
local cvttsd2s = ffi.C.cvttsd2s