From 1dab08b1ab2a89aa07f49d2f00b8ef3d05fa459f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 30 Apr 2018 16:16:36 +0200 Subject: [PATCH] Fix x86-64 JIT conversion of negative floats to unsigned ints Thanks to Peter Cawley for advice, help on the test case, an initial patch, and suggested assembly. --- src/lj_asm_x86.h | 54 +++++++++++++++++++++++++-------------------- src/lj_jit.h | 9 +++----- src/lj_target_x86.h | 1 + src/lj_trace.c | 7 +++--- 4 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index af54dc7f..f8cee46d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -851,32 +851,38 @@ static void asm_conv(ASMState *as, IRIns *ir) asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); + Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : + ra_scratch(as, RSET_FPR); x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; - if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { - /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ - /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ - Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : - ra_scratch(as, RSET_FPR); - MCLabel l_end = emit_label(as); - if (LJ_32) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); - emit_rr(as, op, dest|REX_64, tmp); - if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); - else - emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); - emit_sjcc(as, CC_NS, l_end); - emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ - emit_rr(as, op, dest|REX_64, tmp); - ra_left(as, tmp, lref); - } else { - if (LJ_64 && irt_isu32(ir->t)) - emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ - emit_mrm(as, op, - dest|((LJ_64 && - (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), - asm_fuseload(as, lref, RSET_FPR)); + Reg r64 = (LJ_64 && irt_is64 (ir->t)) ? REX_64 : 0; + if (LJ_64 && (irt_isu32(ir->t) || irt_isint(ir->t))) + emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ + if (irt_isu64(ir->t) || irt_isu32(ir->t)) { + /* The cvtsd2si family of instructions operates on the signed integers, + producing INT_MIN on error. However we're converting to an unsigned + integer, so we want to accept the whole unsigned integer range. + Convert both the number and the number minus INT_MIN, choosing the + first result if successful and the second otherwise. */ + Reg dest2 = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + Reg tmp2 = ra_scratch(as, rset_exclude(RSET_FPR, tmp)); + x86Op sub_op; + void *krange; + if (st == IRT_NUM) { + sub_op = XO_SUBSD; + krange = &as->J->k64[irt_isu64(ir->t) ? LJ_K64_2P64 : LJ_K64_2P32]; + } else { + sub_op = XO_SUBSS; + krange = &as->J->k32[irt_isu64(ir->t) ? LJ_K32_2P64 : LJ_K32_2P32]; + } + emit_rr(as, XO_CMOV + (CC_O<<24), dest|r64, dest2|r64); + emit_i8(as, 1); + emit_rr(as, XO_ARITHi8, XOg_CMP|r64, dest); + emit_rr(as, op, dest2|r64, tmp2); + emit_rma(as, sub_op, tmp2, krange); + emit_rr(as, XO_MOVAPS, tmp2, tmp); } + emit_rr(as, op, dest|r64, tmp); + ra_left(as, tmp, lref); } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left, dest = ra_dest(as, ir, RSET_GPR); diff --git a/src/lj_jit.h b/src/lj_jit.h index f37e7927..692b88c2 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -332,14 +332,10 @@ enum { enum { #if LJ_TARGET_X86ORX64 + LJ_K64_2P32, /* 2^32 */ LJ_K64_TOBIT, /* 2^52 + 2^51 */ LJ_K64_2P64, /* 2^64 */ LJ_K64_M2P64, /* -2^64 */ -#if LJ_32 - LJ_K64_M2P64_31, /* -2^64 or -2^31 */ -#else - LJ_K64_M2P64_31 = LJ_K64_M2P64, -#endif #endif #if LJ_TARGET_MIPS LJ_K64_2P31, /* 2^31 */ @@ -353,7 +349,8 @@ enum { enum { #if LJ_TARGET_X86ORX64 - LJ_K32_M2P64_31, /* -2^64 or -2^31 */ + LJ_K32_2P32, /* 2^32 */ + LJ_K32_2P64, /* 2^64 */ #endif #if LJ_TARGET_PPC LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 356f7924..c0a5fd5e 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -305,6 +305,7 @@ typedef enum { XO_CVTSS2SD = XO_f30f(5a), XO_CVTSD2SS = XO_f20f(5a), XO_ADDSS = XO_f30f(58), + XO_SUBSS = XO_f30f(5c), XO_MOVD = XO_660f(6e), XO_MOVDto = XO_660f(7e), diff --git a/src/lj_trace.c b/src/lj_trace.c index d85b47f8..dee83456 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -318,12 +318,11 @@ void lj_trace_initstate(global_State *g) /* Initialize 32/64 bit constants. */ #if LJ_TARGET_X86ORX64 + J->k64[LJ_K64_2P32].u64 = U64x(41f00000,00000000); J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); -#if LJ_32 - J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); -#endif J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); - J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; + J->k32[LJ_K32_2P64] = 0x5f800000; + J->k32[LJ_K32_2P32] = 0x4f800000; #endif #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);