mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 15:14:08 +00:00
Fix pow() optimization inconsistencies.
This commit is contained in:
parent
c18acfe756
commit
9512d5c1ac
@ -1670,7 +1670,6 @@ static void asm_loop(ASMState *as)
|
||||
#if !LJ_SOFTFP32
|
||||
#if !LJ_TARGET_X86ORX64
|
||||
#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
|
||||
#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
|
||||
#endif
|
||||
|
||||
static void asm_pow(ASMState *as, IRIns *ir)
|
||||
@ -1681,10 +1680,8 @@ static void asm_pow(ASMState *as, IRIns *ir)
|
||||
IRCALL_lj_carith_powu64);
|
||||
else
|
||||
#endif
|
||||
if (irt_isnum(IR(ir->op2)->t))
|
||||
asm_callid(as, ir, IRCALL_pow);
|
||||
else
|
||||
asm_fppowi(as, ir);
|
||||
asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow :
|
||||
IRCALL_lj_vm_powi);
|
||||
}
|
||||
|
||||
static void asm_div(ASMState *as, IRIns *ir)
|
||||
|
@ -2017,19 +2017,6 @@ static void asm_ldexp(ASMState *as, IRIns *ir)
|
||||
asm_x87load(as, ir->op2);
|
||||
}
|
||||
|
||||
static void asm_fppowi(ASMState *as, IRIns *ir)
|
||||
{
|
||||
/* The modified regs must match with the *.dasc implementation. */
|
||||
RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
|
||||
if (ra_hasreg(ir->r))
|
||||
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
||||
ra_evictset(as, drop);
|
||||
ra_destreg(as, ir, RID_XMM0);
|
||||
emit_call(as, lj_vm_powi_sse);
|
||||
ra_left(as, RID_XMM0, ir->op1);
|
||||
ra_left(as, RID_EAX, ir->op2);
|
||||
}
|
||||
|
||||
static int asm_swapops(ASMState *as, IRIns *ir)
|
||||
{
|
||||
IRIns *irl = IR(ir->op1);
|
||||
|
@ -44,7 +44,7 @@ extern double __divdf3(double a, double b);
|
||||
#define GOTDEF(_) \
|
||||
_(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
|
||||
_(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
|
||||
_(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
|
||||
_(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \
|
||||
_(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
|
||||
_(lj_dispatch_profile) _(lj_err_throw) \
|
||||
_(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
|
||||
|
@ -218,7 +218,7 @@ typedef struct CCallInfo {
|
||||
_(ANY, log, 1, N, NUM, XA_FP) \
|
||||
_(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
|
||||
_(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
|
||||
_(ANY, pow, 2, N, NUM, XA2_FP) \
|
||||
_(ANY, lj_vm_pow, 2, N, NUM, XA2_FP) \
|
||||
_(ANY, atan2, 2, N, NUM, XA2_FP) \
|
||||
_(ANY, ldexp, 2, N, NUM, XA_FP) \
|
||||
_(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
|
||||
|
@ -1143,33 +1143,6 @@ LJFOLDF(simplify_numpow_xkint)
|
||||
return ref;
|
||||
}
|
||||
|
||||
LJFOLD(POW any KNUM)
|
||||
LJFOLDF(simplify_numpow_xknum)
|
||||
{
|
||||
if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
|
||||
return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(POW KNUM any)
|
||||
LJFOLDF(simplify_numpow_kx)
|
||||
{
|
||||
lua_Number n = knumleft;
|
||||
if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
|
||||
#if LJ_TARGET_X86ORX64
|
||||
/* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
|
||||
fins->o = IR_CONV;
|
||||
fins->op1 = fins->op2;
|
||||
fins->op2 = IRCONV_NUM_INT;
|
||||
fins->op2 = (IRRef1)lj_opt_fold(J);
|
||||
#endif
|
||||
fins->op1 = (IRRef1)lj_ir_knum_one(J);
|
||||
fins->o = IR_LDEXP;
|
||||
return RETRYFOLD;
|
||||
}
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* -- Simplify conversions ------------------------------------------------ */
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
|
||||
|
@ -590,20 +590,14 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
|
||||
rb = conv_str_tonum(J, rb, vb);
|
||||
rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
|
||||
rc = conv_str_tonum(J, rc, vc);
|
||||
/* Narrowing must be unconditional to preserve (-x)^i semantics. */
|
||||
if (tvisint(vc) || numisint(numV(vc))) {
|
||||
int checkrange = 0;
|
||||
/* pow() is faster for bigger exponents. But do this only for (+k)^i. */
|
||||
if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
|
||||
int32_t k = numberVint(vc);
|
||||
if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
|
||||
checkrange = 1;
|
||||
}
|
||||
int32_t k = numberVint(vc);
|
||||
if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
|
||||
if (!tref_isinteger(rc)) {
|
||||
/* Guarded conversion to integer! */
|
||||
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
|
||||
}
|
||||
if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
|
||||
if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
|
||||
TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
|
||||
emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
|
||||
}
|
||||
|
@ -83,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
|
||||
LJ_ASMF void lj_vm_floor_sse(void);
|
||||
LJ_ASMF void lj_vm_ceil_sse(void);
|
||||
LJ_ASMF void lj_vm_trunc_sse(void);
|
||||
LJ_ASMF void lj_vm_powi_sse(void);
|
||||
#define lj_vm_powi NULL
|
||||
#else
|
||||
LJ_ASMF double lj_vm_powi(double, int32_t);
|
||||
#endif
|
||||
#if LJ_TARGET_PPC || LJ_TARGET_ARM64
|
||||
#define lj_vm_trunc trunc
|
||||
@ -102,6 +98,9 @@ LJ_ASMF int lj_vm_errno(void);
|
||||
LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
|
||||
#endif
|
||||
|
||||
LJ_ASMF double lj_vm_powi(double, int32_t);
|
||||
LJ_ASMF double lj_vm_pow(double, double);
|
||||
|
||||
/* Continuations for metamethods. */
|
||||
LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */
|
||||
LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */
|
||||
|
102
src/lj_vmmath.c
102
src/lj_vmmath.c
@ -30,57 +30,11 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
|
||||
LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
|
||||
LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
|
||||
LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
|
||||
LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
|
||||
LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
|
||||
#endif
|
||||
|
||||
/* -- Helper functions for generated machine code ------------------------- */
|
||||
/* -- Helper functions ---------------------------------------------------- */
|
||||
|
||||
double lj_vm_foldarith(double x, double y, int op)
|
||||
{
|
||||
switch (op) {
|
||||
case IR_ADD - IR_ADD: return x+y; break;
|
||||
case IR_SUB - IR_ADD: return x-y; break;
|
||||
case IR_MUL - IR_ADD: return x*y; break;
|
||||
case IR_DIV - IR_ADD: return x/y; break;
|
||||
case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
|
||||
case IR_POW - IR_ADD: return pow(x, y); break;
|
||||
case IR_NEG - IR_ADD: return -x; break;
|
||||
case IR_ABS - IR_ADD: return fabs(x); break;
|
||||
#if LJ_HASJIT
|
||||
case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
|
||||
case IR_MIN - IR_ADD: return x < y ? x : y; break;
|
||||
case IR_MAX - IR_ADD: return x > y ? x : y; break;
|
||||
#endif
|
||||
default: return x;
|
||||
}
|
||||
}
|
||||
|
||||
#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
|
||||
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
|
||||
{
|
||||
uint32_t y, ua, ub;
|
||||
/* This must be checked before using this function. */
|
||||
lj_assertX(b != 0, "modulo with zero divisor");
|
||||
ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
|
||||
ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
|
||||
y = ua % ub;
|
||||
if (y != 0 && (a^b) < 0) y = y - ub;
|
||||
if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
|
||||
return (int32_t)y;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LJ_HASJIT
|
||||
|
||||
#ifdef LUAJIT_NO_LOG2
|
||||
double lj_vm_log2(double a)
|
||||
{
|
||||
return log(a) * 1.4426950408889634074;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !LJ_TARGET_X86ORX64
|
||||
/* Unsigned x^k. */
|
||||
static double lj_vm_powui(double x, uint32_t k)
|
||||
{
|
||||
@ -112,6 +66,60 @@ double lj_vm_powi(double x, int32_t k)
|
||||
else
|
||||
return 1.0 / lj_vm_powui(x, (uint32_t)-k);
|
||||
}
|
||||
|
||||
double lj_vm_pow(double x, double y)
|
||||
{
|
||||
int32_t k = lj_num2int(y);
|
||||
if ((k >= -65536 && k <= 65536) && y == (double)k)
|
||||
return lj_vm_powi(x, k);
|
||||
else
|
||||
return pow(x, y);
|
||||
}
|
||||
|
||||
double lj_vm_foldarith(double x, double y, int op)
|
||||
{
|
||||
switch (op) {
|
||||
case IR_ADD - IR_ADD: return x+y; break;
|
||||
case IR_SUB - IR_ADD: return x-y; break;
|
||||
case IR_MUL - IR_ADD: return x*y; break;
|
||||
case IR_DIV - IR_ADD: return x/y; break;
|
||||
case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
|
||||
case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
|
||||
case IR_NEG - IR_ADD: return -x; break;
|
||||
case IR_ABS - IR_ADD: return fabs(x); break;
|
||||
#if LJ_HASJIT
|
||||
case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
|
||||
case IR_MIN - IR_ADD: return x < y ? x : y; break;
|
||||
case IR_MAX - IR_ADD: return x > y ? x : y; break;
|
||||
#endif
|
||||
default: return x;
|
||||
}
|
||||
}
|
||||
|
||||
/* -- Helper functions for generated machine code ------------------------- */
|
||||
|
||||
#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
|
||||
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
|
||||
{
|
||||
uint32_t y, ua, ub;
|
||||
/* This must be checked before using this function. */
|
||||
lj_assertX(b != 0, "modulo with zero divisor");
|
||||
ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
|
||||
ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
|
||||
y = ua % ub;
|
||||
if (y != 0 && (a^b) < 0) y = y - ub;
|
||||
if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
|
||||
return (int32_t)y;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LJ_HASJIT
|
||||
|
||||
#ifdef LUAJIT_NO_LOG2
|
||||
double lj_vm_log2(double a)
|
||||
{
|
||||
return log(a) * 1.4426950408889634074;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Computes fpm(x) for extended math functions. */
|
||||
|
@ -1477,11 +1477,11 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|.endif
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
|.macro math_extern2, name, func
|
||||
|.if HFABI
|
||||
| .ffunc_dd math_ .. func
|
||||
| .ffunc_dd math_ .. name
|
||||
|.else
|
||||
| .ffunc_nn math_ .. func
|
||||
| .ffunc_nn math_ .. name
|
||||
|.endif
|
||||
| .IOS mov RA, BASE
|
||||
| bl extern func
|
||||
@ -1492,6 +1492,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->fff_restv
|
||||
|.endif
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
|.if FPU
|
||||
| .ffunc_d math_sqrt
|
||||
@ -1537,7 +1540,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -3203,7 +3206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
break;
|
||||
case BC_POW:
|
||||
| // NYI: (partial) integer arithmetic.
|
||||
| ins_arithfp extern, extern pow
|
||||
| ins_arithfp extern, extern lj_vm_pow
|
||||
break;
|
||||
|
||||
case BC_CAT:
|
||||
|
@ -1387,11 +1387,14 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->fff_resn
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nn math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nn math_ .. name
|
||||
| bl extern func
|
||||
| b ->fff_resn
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
|.ffunc_n math_sqrt
|
||||
| fsqrt d0, d0
|
||||
@ -1420,7 +1423,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -2674,7 +2677,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| ins_arithload FARG1, FARG2
|
||||
| ins_arithfallback ins_arithcheck_num
|
||||
|.if "fpins" == "fpow"
|
||||
| bl extern pow
|
||||
| bl extern lj_vm_pow
|
||||
|.else
|
||||
| fpins FARG1, FARG1, FARG2
|
||||
|.endif
|
||||
|
@ -1623,14 +1623,17 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|. nop
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nn math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nn math_ .. name
|
||||
|. load_got func
|
||||
| call_extern
|
||||
|. nop
|
||||
| b ->fff_resn
|
||||
|. nop
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
|// TODO: Return integer type if result is integer (own sf implementation).
|
||||
|.macro math_round, func
|
||||
@ -1684,7 +1687,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -3689,7 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| sltiu AT, SFARG1HI, LJ_TISNUM
|
||||
| sltiu TMP0, SFARG2HI, LJ_TISNUM
|
||||
| and AT, AT, TMP0
|
||||
| load_got pow
|
||||
| load_got lj_vm_pow
|
||||
| beqz AT, ->vmeta_arith
|
||||
|. addu RA, BASE, RA
|
||||
|.if FPU
|
||||
|
@ -1667,14 +1667,17 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
|. nop
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nn math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nn math_ .. name
|
||||
|. load_got func
|
||||
| call_extern
|
||||
|. nop
|
||||
| b ->fff_resn
|
||||
|. nop
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
|// TODO: Return integer type if result is integer (own sf implementation).
|
||||
|.macro math_round, func
|
||||
@ -1728,7 +1731,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -3915,7 +3918,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| sltiu TMP0, TMP0, LJ_TISNUM
|
||||
| sltiu TMP1, TMP1, LJ_TISNUM
|
||||
| and AT, TMP0, TMP1
|
||||
| load_got pow
|
||||
| load_got lj_vm_pow
|
||||
| beqz AT, ->vmeta_arith
|
||||
|. daddu RA, BASE, RA
|
||||
|.if FPU
|
||||
|
@ -2012,11 +2012,14 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| b ->fff_resn
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nn math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nn math_ .. name
|
||||
| blex func
|
||||
| b ->fff_resn
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_round, func
|
||||
| .ffunc_1 math_ .. func
|
||||
@ -2141,7 +2144,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -4139,7 +4142,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| checknum cr1, CARG3
|
||||
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
|
||||
| bge ->vmeta_arith_vv
|
||||
| blex pow
|
||||
| blex lj_vm_pow
|
||||
| ins_next1
|
||||
|.if FPU
|
||||
| stfdx FARG1, BASE, RA
|
||||
|
@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| jmp ->fff_resxmm0
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nn math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nn math_ .. name
|
||||
| mov RB, BASE
|
||||
| call extern func
|
||||
| mov BASE, RB
|
||||
| jmp ->fff_resxmm0
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
| math_extern log10
|
||||
| math_extern exp
|
||||
@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| subsd xmm0, xmm1
|
||||
| ret
|
||||
|
|
||||
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|
||||
|->vm_powi_sse:
|
||||
| cmp eax, 1; jle >6 // i<=1?
|
||||
| // Now 1 < (unsigned)i <= 0x80000000.
|
||||
|1: // Handle leading zeros.
|
||||
| test eax, 1; jnz >2
|
||||
| mulsd xmm0, xmm0
|
||||
| shr eax, 1
|
||||
| jmp <1
|
||||
|2:
|
||||
| shr eax, 1; jz >5
|
||||
| movaps xmm1, xmm0
|
||||
|3: // Handle trailing bits.
|
||||
| mulsd xmm0, xmm0
|
||||
| shr eax, 1; jz >4
|
||||
| jnc <3
|
||||
| mulsd xmm1, xmm0
|
||||
| jmp <3
|
||||
|4:
|
||||
| mulsd xmm0, xmm1
|
||||
|5:
|
||||
| ret
|
||||
|6:
|
||||
| je <5 // x^1 ==> x
|
||||
| jb >7 // x^0 ==> 1
|
||||
| neg eax
|
||||
| call <1
|
||||
| sseconst_1 xmm1, RD
|
||||
| divsd xmm1, xmm0
|
||||
| movaps xmm0, xmm1
|
||||
| ret
|
||||
|7:
|
||||
| sseconst_1 xmm0, RD
|
||||
| ret
|
||||
|
|
||||
|//-----------------------------------------------------------------------
|
||||
|//-- Miscellaneous functions --------------------------------------------
|
||||
|//-----------------------------------------------------------------------
|
||||
|
@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| jmp ->fff_resfp
|
||||
|.endmacro
|
||||
|
|
||||
|.macro math_extern2, func
|
||||
| .ffunc_nnsse math_ .. func
|
||||
|.macro math_extern2, name, func
|
||||
| .ffunc_nnsse math_ .. name
|
||||
|.if not X64
|
||||
| movsd FPARG1, xmm0
|
||||
| movsd FPARG3, xmm1
|
||||
@ -2149,6 +2149,9 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| mov BASE, RB
|
||||
| jmp ->fff_resfp
|
||||
|.endmacro
|
||||
|.macro math_extern2, func
|
||||
| math_extern2 func, func
|
||||
|.endmacro
|
||||
|
|
||||
| math_extern log10
|
||||
| math_extern exp
|
||||
@ -2161,7 +2164,7 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| math_extern sinh
|
||||
| math_extern cosh
|
||||
| math_extern tanh
|
||||
| math_extern2 pow
|
||||
| math_extern2 pow, lj_vm_pow
|
||||
| math_extern2 atan2
|
||||
| math_extern2 fmod
|
||||
|
|
||||
@ -3038,41 +3041,6 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| subsd xmm0, xmm1
|
||||
| ret
|
||||
|
|
||||
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|
||||
|->vm_powi_sse:
|
||||
| cmp eax, 1; jle >6 // i<=1?
|
||||
| // Now 1 < (unsigned)i <= 0x80000000.
|
||||
|1: // Handle leading zeros.
|
||||
| test eax, 1; jnz >2
|
||||
| mulsd xmm0, xmm0
|
||||
| shr eax, 1
|
||||
| jmp <1
|
||||
|2:
|
||||
| shr eax, 1; jz >5
|
||||
| movaps xmm1, xmm0
|
||||
|3: // Handle trailing bits.
|
||||
| mulsd xmm0, xmm0
|
||||
| shr eax, 1; jz >4
|
||||
| jnc <3
|
||||
| mulsd xmm1, xmm0
|
||||
| jmp <3
|
||||
|4:
|
||||
| mulsd xmm0, xmm1
|
||||
|5:
|
||||
| ret
|
||||
|6:
|
||||
| je <5 // x^1 ==> x
|
||||
| jb >7 // x^0 ==> 1
|
||||
| neg eax
|
||||
| call <1
|
||||
| sseconst_1 xmm1, RDa
|
||||
| divsd xmm1, xmm0
|
||||
| movaps xmm0, xmm1
|
||||
| ret
|
||||
|7:
|
||||
| sseconst_1 xmm0, RDa
|
||||
| ret
|
||||
|
|
||||
|//-----------------------------------------------------------------------
|
||||
|//-- Miscellaneous functions --------------------------------------------
|
||||
|//-----------------------------------------------------------------------
|
||||
@ -3954,7 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
||||
| movsd FPARG1, xmm0
|
||||
| movsd FPARG3, xmm1
|
||||
|.endif
|
||||
| call extern pow
|
||||
| call extern lj_vm_pow
|
||||
| movzx RA, PC_RA
|
||||
| mov BASE, RB
|
||||
|.if X64
|
||||
|
Loading…
Reference in New Issue
Block a user