diff --git a/src/lib_jit.c b/src/lib_jit.c index 1b69caa5..125b48ce 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -549,8 +549,6 @@ static uint32_t jit_cpudetect(lua_State *L) flags |= JIT_F_LEA_AGU; } else if (vendor[2] == 0x444d4163) { /* AMD. */ uint32_t fam = (features[0] & 0x0ff00f00); - if (fam == 0x00000f00) /* K8. */ - flags |= JIT_F_SPLIT_XMM; if (fam >= 0x00000f00) /* K8, K10. */ flags |= JIT_F_PREFER_IMUL; } diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index e9c53a09..5621b616 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -551,7 +551,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); } if ((ci->flags & CCI_CASTU64)) { @@ -662,8 +662,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) asm_guardcc(as, CC_NE); emit_rr(as, XO_UCOMISD, left, tmp); emit_rr(as, XO_CVTSI2SD, tmp, dest); - if (!(as->flags & JIT_F_SPLIT_XMM)) - emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ + emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ emit_rr(as, XO_CVTTSD2SI, dest, left); /* Can't fuse since left is needed twice. */ } @@ -719,8 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); } - if (!(as->flags & JIT_F_SPLIT_XMM)) - emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ + emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ @@ -824,8 +822,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, - dest, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); } emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); @@ -1262,7 +1259,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) case IRT_U8: xo = XO_MOVZXb; break; case IRT_I16: xo = XO_MOVSXw; break; case IRT_U16: xo = XO_MOVZXw; break; - case IRT_NUM: xo = XMM_MOVRM(as); break; + case IRT_NUM: xo = XO_MOVSD; break; case IRT_FLOAT: xo = XO_MOVSS; break; default: if (LJ_64 && irt_is64(ir->t)) @@ -1376,7 +1373,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); - emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); + emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { asm_fuseahuref(as, ir->op1, RSET_GPR); } @@ -1442,7 +1439,7 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ base = ra_alloc1(as, REF_BASE, RSET_GPR); - emit_rmro(as, XMM_MOVRM(as), left, base, ofs); + emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ #if LJ_64 } else if (irt_islightud(t)) { @@ -1461,10 +1458,8 @@ static void asm_sload(ASMState *as, IRIns *ir) if ((ir->op2 & IRSLOAD_CONVERT)) { t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); - } else if (irt_isnum(t)) { - emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); } else { - emit_rmro(as, XO_MOV, dest, base, ofs); + emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { if (!(ir->op2 & IRSLOAD_TYPECHECK)) @@ -1696,7 +1691,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); - emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); + emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); } emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); switch (fpm) { /* st0 = lj_vm_*(st0) */ diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bd184a30..2454c899 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) /* -- Emit loads/stores --------------------------------------------------- */ -/* Instruction selection for XMM moves. */ -#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) -#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) - /* mov [base+ofs], i */ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) { @@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) if (tvispzero(tv)) /* Use xor only for +0. */ emit_rr(as, XO_XORPS, r, r); else - emit_rma(as, XMM_MOVRM(as), r, &tv->n); + emit_rma(as, XO_MOVSD, r, &tv->n); } /* -- Emit control-flow instructions -------------------------------------- */ @@ -427,7 +423,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) if (dst < RID_MAX_GPR) emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); else - emit_rr(as, XMM_MOVRR(as), dst, src); + emit_rr(as, XO_MOVAPS, dst, src); } /* Generic load of register from stack slot. */ @@ -436,7 +432,7 @@ static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) if (r < RID_MAX_GPR) emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); else - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, RID_ESP, ofs); } /* Generic store of register to stack slot. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 8b42dd4e..2683b462 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -18,12 +18,11 @@ #define JIT_F_SSE3 0x00000020 #define JIT_F_SSE4_1 0x00000040 #define JIT_F_PREFER_IMUL 0x00000080 -#define JIT_F_SPLIT_XMM 0x00000100 -#define JIT_F_LEA_AGU 0x00000200 +#define JIT_F_LEA_AGU 0x00000100 /* Names for the CPU-specific flags. Must match the order above. */ #define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\2K8\4ATOM" +#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM" #elif LJ_TARGET_ARM #define JIT_F_ARMV6_ 0x00000010 #define JIT_F_ARMV6T2_ 0x00000020