mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
x86/x64: Drop internal x87 math functions. Use libm functions.
This commit is contained in:
parent
e03df1e339
commit
ad03eba715
@ -426,11 +426,11 @@
|
|||||||
#define LJ_TARGET_UNALIGNED 0
|
#define LJ_TARGET_UNALIGNED 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Various workarounds for embedded operating systems. */
|
/* Various workarounds for embedded operating systems or weak C runtimes. */
|
||||||
#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
|
#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
|
||||||
#define LUAJIT_NO_LOG2
|
#define LUAJIT_NO_LOG2
|
||||||
#endif
|
#endif
|
||||||
#if defined(__symbian__)
|
#if defined(__symbian__) || LJ_TARGET_WINDOWS
|
||||||
#define LUAJIT_NO_EXP2
|
#define LUAJIT_NO_EXP2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1262,9 +1262,6 @@ static void asm_call(ASMState *as, IRIns *ir)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if !LJ_SOFTFP
|
#if !LJ_SOFTFP
|
||||||
static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
|
|
||||||
|
|
||||||
#if !LJ_TARGET_X86ORX64
|
|
||||||
static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
|
static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
|
||||||
{
|
{
|
||||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
|
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
|
||||||
@ -1274,7 +1271,6 @@ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
|
|||||||
asm_setupresult(as, ir, ci);
|
asm_setupresult(as, ir, ci);
|
||||||
asm_gencall(as, ci, args);
|
asm_gencall(as, ci, args);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
|
static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
|
||||||
{
|
{
|
||||||
|
@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
|
|
||||||
{
|
|
||||||
/* The modified regs must match with the *.dasc implementation. */
|
|
||||||
RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
|
|
||||||
IRIns *irx;
|
|
||||||
if (ra_hasreg(ir->r))
|
|
||||||
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
||||||
ra_evictset(as, drop);
|
|
||||||
ra_destreg(as, ir, RID_XMM0);
|
|
||||||
emit_call(as, lj_vm_pow_sse);
|
|
||||||
irx = IR(lref);
|
|
||||||
if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
|
|
||||||
irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
|
|
||||||
ra_left(as, RID_XMM0, lref);
|
|
||||||
ra_left(as, RID_XMM1, rref);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void asm_fpmath(ASMState *as, IRIns *ir)
|
static void asm_fpmath(ASMState *as, IRIns *ir)
|
||||||
{
|
{
|
||||||
IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
|
IRFPMathOp fpm = (IRFPMathOp)ir->op2;
|
||||||
if (fpm == IRFPM_SQRT) {
|
if (fpm == IRFPM_SQRT) {
|
||||||
Reg dest = ra_dest(as, ir, RSET_FPR);
|
Reg dest = ra_dest(as, ir, RSET_FPR);
|
||||||
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
|
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
|
||||||
@ -1645,7 +1628,15 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
|
|||||||
}
|
}
|
||||||
} else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
|
} else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
|
||||||
/* Rejoined to pow(). */
|
/* Rejoined to pow(). */
|
||||||
} else { /* Handle x87 ops. */
|
} else {
|
||||||
|
asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
|
||||||
|
|
||||||
|
static void asm_ldexp(ASMState *as, IRIns *ir)
|
||||||
|
{
|
||||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
||||||
Reg dest = ir->r;
|
Reg dest = ir->r;
|
||||||
if (ra_hasreg(dest)) {
|
if (ra_hasreg(dest)) {
|
||||||
@ -1654,44 +1645,11 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
|
|||||||
emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
|
emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
|
||||||
}
|
}
|
||||||
emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
|
emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
|
||||||
switch (fpm) { /* st0 = lj_vm_*(st0) */
|
emit_x87op(as, XI_FPOP1);
|
||||||
case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
|
emit_x87op(as, XI_FSCALE);
|
||||||
case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
|
|
||||||
case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
|
|
||||||
case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
|
|
||||||
case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
|
|
||||||
case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
|
|
||||||
/* Note: the use of fyl2xp1 would be pointless here. When computing
|
|
||||||
** log(1.0+eps) the precision is already lost after 1.0 is added.
|
|
||||||
** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
|
|
||||||
*/
|
|
||||||
emit_x87op(as, XI_FYL2X); break;
|
|
||||||
case IRFPM_OTHER:
|
|
||||||
switch (ir->o) {
|
|
||||||
case IR_ATAN2:
|
|
||||||
emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
|
|
||||||
case IR_LDEXP:
|
|
||||||
emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
|
|
||||||
default: lua_assert(0); break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default: lua_assert(0); break;
|
|
||||||
}
|
|
||||||
asm_x87load(as, ir->op1);
|
asm_x87load(as, ir->op1);
|
||||||
switch (fpm) {
|
asm_x87load(as, ir->op2);
|
||||||
case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
|
|
||||||
case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
|
|
||||||
case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
|
|
||||||
case IRFPM_OTHER:
|
|
||||||
if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
|
|
||||||
break;
|
|
||||||
default: break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define asm_atan2(as, ir) asm_fpmath(as, ir)
|
|
||||||
#define asm_ldexp(as, ir) asm_fpmath(as, ir)
|
|
||||||
|
|
||||||
static void asm_fppowi(ASMState *as, IRIns *ir)
|
static void asm_fppowi(ASMState *as, IRIns *ir)
|
||||||
{
|
{
|
||||||
|
@ -169,18 +169,18 @@ typedef struct CCallInfo {
|
|||||||
_(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
|
_(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
|
_(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, sqrt, 1, N, NUM, XA_FP) \
|
_(FPMATH, sqrt, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, exp, 1, N, NUM, XA_FP) \
|
_(ANY, exp, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \
|
_(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, log, 1, N, NUM, XA_FP) \
|
_(ANY, log, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \
|
_(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, log10, 1, N, NUM, XA_FP) \
|
_(ANY, log10, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, sin, 1, N, NUM, XA_FP) \
|
_(ANY, sin, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, cos, 1, N, NUM, XA_FP) \
|
_(ANY, cos, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, tan, 1, N, NUM, XA_FP) \
|
_(ANY, tan, 1, N, NUM, XA_FP) \
|
||||||
_(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \
|
_(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
|
||||||
_(FPMATH, pow, 2, N, NUM, XA2_FP) \
|
_(ANY, pow, 2, N, NUM, XA2_FP) \
|
||||||
_(FPMATH, atan2, 2, N, NUM, XA2_FP) \
|
_(ANY, atan2, 2, N, NUM, XA2_FP) \
|
||||||
_(FPMATH, ldexp, 2, N, NUM, XA_FP) \
|
_(ANY, ldexp, 2, N, NUM, XA_FP) \
|
||||||
_(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
|
_(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
|
||||||
_(SOFTFP, softfp_add, 4, N, NUM, 0) \
|
_(SOFTFP, softfp_add, 4, N, NUM, 0) \
|
||||||
_(SOFTFP, softfp_sub, 4, N, NUM, 0) \
|
_(SOFTFP, softfp_sub, 4, N, NUM, 0) \
|
||||||
|
12
src/lj_vm.h
12
src/lj_vm.h
@ -55,15 +55,13 @@ LJ_ASMF void lj_vm_exit_interp(void);
|
|||||||
#define lj_vm_ceil ceil
|
#define lj_vm_ceil ceil
|
||||||
#else
|
#else
|
||||||
LJ_ASMF double lj_vm_floor(double);
|
LJ_ASMF double lj_vm_floor(double);
|
||||||
#if !LJ_TARGET_X86ORX64
|
|
||||||
LJ_ASMF double lj_vm_ceil(double);
|
LJ_ASMF double lj_vm_ceil(double);
|
||||||
#endif
|
|
||||||
#if LJ_TARGET_ARM
|
#if LJ_TARGET_ARM
|
||||||
LJ_ASMF double lj_vm_floor_sf(double);
|
LJ_ASMF double lj_vm_floor_sf(double);
|
||||||
LJ_ASMF double lj_vm_ceil_sf(double);
|
LJ_ASMF double lj_vm_ceil_sf(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
|
#ifdef LUAJIT_NO_LOG2
|
||||||
LJ_ASMF double lj_vm_log2(double);
|
LJ_ASMF double lj_vm_log2(double);
|
||||||
#else
|
#else
|
||||||
#define lj_vm_log2 log2
|
#define lj_vm_log2 log2
|
||||||
@ -74,11 +72,11 @@ LJ_ASMF double lj_vm_log2(double);
|
|||||||
LJ_ASMF void lj_vm_floor_sse(void);
|
LJ_ASMF void lj_vm_floor_sse(void);
|
||||||
LJ_ASMF void lj_vm_ceil_sse(void);
|
LJ_ASMF void lj_vm_ceil_sse(void);
|
||||||
LJ_ASMF void lj_vm_trunc_sse(void);
|
LJ_ASMF void lj_vm_trunc_sse(void);
|
||||||
LJ_ASMF void lj_vm_exp_x87(void);
|
|
||||||
LJ_ASMF void lj_vm_exp2_x87(void);
|
|
||||||
LJ_ASMF void lj_vm_pow_sse(void);
|
|
||||||
LJ_ASMF void lj_vm_powi_sse(void);
|
LJ_ASMF void lj_vm_powi_sse(void);
|
||||||
|
#define lj_vm_powi NULL
|
||||||
#else
|
#else
|
||||||
|
LJ_ASMF double lj_vm_powi(double, int32_t);
|
||||||
|
#endif
|
||||||
#if LJ_TARGET_PPC
|
#if LJ_TARGET_PPC
|
||||||
#define lj_vm_trunc trunc
|
#define lj_vm_trunc trunc
|
||||||
#else
|
#else
|
||||||
@ -87,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double);
|
|||||||
LJ_ASMF double lj_vm_trunc_sf(double);
|
LJ_ASMF double lj_vm_trunc_sf(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
LJ_ASMF double lj_vm_powi(double, int32_t);
|
|
||||||
#ifdef LUAJIT_NO_EXP2
|
#ifdef LUAJIT_NO_EXP2
|
||||||
LJ_ASMF double lj_vm_exp2(double);
|
LJ_ASMF double lj_vm_exp2(double);
|
||||||
#else
|
#else
|
||||||
#define lj_vm_exp2 exp2
|
#define lj_vm_exp2 exp2
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
|
LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
|
||||||
#if LJ_HASFFI
|
#if LJ_HASFFI
|
||||||
LJ_ASMF int lj_vm_errno(void);
|
LJ_ASMF int lj_vm_errno(void);
|
||||||
|
@ -17,14 +17,25 @@
|
|||||||
|
|
||||||
#if LJ_TARGET_X86 && __ELF__ && __PIC__
|
#if LJ_TARGET_X86 && __ELF__ && __PIC__
|
||||||
/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
|
/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
|
||||||
|
LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
|
||||||
LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
|
LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
|
||||||
LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
|
LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
|
||||||
LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
|
LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
|
||||||
|
LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
|
||||||
|
LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
|
||||||
|
LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* -- Helper functions for generated machine code ------------------------- */
|
/* -- Helper functions for generated machine code ------------------------- */
|
||||||
|
|
||||||
#if !LJ_TARGET_X86ORX64
|
|
||||||
double lj_vm_foldarith(double x, double y, int op)
|
double lj_vm_foldarith(double x, double y, int op)
|
||||||
{
|
{
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@ -45,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op)
|
|||||||
default: return x;
|
default: return x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#if LJ_HASJIT
|
#if LJ_HASJIT
|
||||||
|
|
||||||
@ -109,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
|
|||||||
else
|
else
|
||||||
return 1.0 / lj_vm_powui(x, (uint32_t)-k);
|
return 1.0 / lj_vm_powui(x, (uint32_t)-k);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Computes fpm(x) for extended math functions. */
|
/* Computes fpm(x) for extended math functions. */
|
||||||
double lj_vm_foldfpm(double x, int fpm)
|
double lj_vm_foldfpm(double x, int fpm)
|
||||||
@ -130,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#if LJ_HASFFI
|
#if LJ_HASFFI
|
||||||
int lj_vm_errno(void)
|
int lj_vm_errno(void)
|
||||||
|
427
src/vm_x86.dasc
427
src/vm_x86.dasc
@ -373,7 +373,6 @@
|
|||||||
| fpop
|
| fpop
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|.macro fdup; fld st0; .endmacro
|
|
||||||
|.macro fpop1; fstp st1; .endmacro
|
|.macro fpop1; fstp st1; .endmacro
|
||||||
|
|
|
|
||||||
|// Synthesize SSE FP constants.
|
|// Synthesize SSE FP constants.
|
||||||
@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| cmp NARGS:RD, 2+1; jb ->fff_fallback
|
| cmp NARGS:RD, 2+1; jb ->fff_fallback
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|.macro .ffunc_n, name
|
|
||||||
| .ffunc_1 name
|
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
|
||||||
| fld qword [BASE]
|
|
||||||
|.endmacro
|
|
||||||
|
|
|
||||||
|.macro .ffunc_n, name, op
|
|
||||||
| .ffunc_1 name
|
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
|
||||||
| op
|
|
||||||
| fld qword [BASE]
|
|
||||||
|.endmacro
|
|
||||||
|
|
|
||||||
|.macro .ffunc_nsse, name, op
|
|.macro .ffunc_nsse, name, op
|
||||||
| .ffunc_1 name
|
| .ffunc_1 name
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
||||||
@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| .ffunc_nsse name, movsd
|
| .ffunc_nsse name, movsd
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|.macro .ffunc_nn, name
|
|
||||||
| .ffunc_2 name
|
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
|
||||||
| cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
|
|
||||||
| fld qword [BASE]
|
|
||||||
| fld qword [BASE+8]
|
|
||||||
|.endmacro
|
|
||||||
|
|
|
||||||
|.macro .ffunc_nnsse, name
|
|.macro .ffunc_nnsse, name
|
||||||
| .ffunc_2 name
|
| .ffunc_2 name
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
||||||
@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
|
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
|
||||||
| jmp ->vm_return
|
| jmp ->vm_return
|
||||||
|
|
|
|
||||||
|
|.if X64
|
||||||
|
|.define fff_resfp, fff_resxmm0
|
||||||
|
|.else
|
||||||
|
|.define fff_resfp, fff_resn
|
||||||
|
|.endif
|
||||||
|
|
|
||||||
|.macro math_round, func
|
|.macro math_round, func
|
||||||
| .ffunc math_ .. func
|
| .ffunc math_ .. func
|
||||||
|.if DUALNUM
|
|.if DUALNUM
|
||||||
@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|.ffunc math_log
|
|.ffunc math_log
|
||||||
| cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
|
| cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
|
||||||
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|
||||||
| fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
|
| movsd xmm0, qword [BASE]
|
||||||
|
|
|.if not X64
|
||||||
|.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
|
| movsd FPARG1, xmm0
|
||||||
|.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn
|
|.endif
|
||||||
|
|
| mov RB, BASE
|
||||||
|.ffunc_n math_sin; fsin; jmp ->fff_resn
|
| call extern log
|
||||||
|.ffunc_n math_cos; fcos; jmp ->fff_resn
|
| mov BASE, RB
|
||||||
|.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
|
| jmp ->fff_resfp
|
||||||
|
|
|
||||||
|.ffunc_n math_asin
|
|
||||||
| fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
|
|
||||||
| jmp ->fff_resn
|
|
||||||
|.ffunc_n math_acos
|
|
||||||
| fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
|
|
||||||
| jmp ->fff_resn
|
|
||||||
|.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
|
|
||||||
|
|
|
|
||||||
|.macro math_extern, func
|
|.macro math_extern, func
|
||||||
| .ffunc_nsse math_ .. func
|
| .ffunc_nsse math_ .. func
|
||||||
@ -2086,18 +2062,36 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| mov RB, BASE
|
| mov RB, BASE
|
||||||
| call extern func
|
| call extern func
|
||||||
| mov BASE, RB
|
| mov BASE, RB
|
||||||
|.if X64
|
| jmp ->fff_resfp
|
||||||
| jmp ->fff_resxmm0
|
|
||||||
|.else
|
|
||||||
| jmp ->fff_resn
|
|
||||||
|.endif
|
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|
|.macro math_extern2, func
|
||||||
|
| .ffunc_nnsse math_ .. func
|
||||||
|
|.if not X64
|
||||||
|
| movsd FPARG1, xmm0
|
||||||
|
| movsd FPARG3, xmm1
|
||||||
|
|.endif
|
||||||
|
| mov RB, BASE
|
||||||
|
| call extern func
|
||||||
|
| mov BASE, RB
|
||||||
|
| jmp ->fff_resfp
|
||||||
|
|.endmacro
|
||||||
|
|
|
||||||
|
| math_extern log10
|
||||||
|
| math_extern exp
|
||||||
|
| math_extern sin
|
||||||
|
| math_extern cos
|
||||||
|
| math_extern tan
|
||||||
|
| math_extern asin
|
||||||
|
| math_extern acos
|
||||||
|
| math_extern atan
|
||||||
| math_extern sinh
|
| math_extern sinh
|
||||||
| math_extern cosh
|
| math_extern cosh
|
||||||
| math_extern tanh
|
| math_extern tanh
|
||||||
|
| math_extern2 pow
|
||||||
|
| math_extern2 atan2
|
||||||
|
| math_extern2 fmod
|
||||||
|
|
|
|
||||||
|.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
|
|
||||||
|.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
|
|.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
|
||||||
|
|
|
|
||||||
|.ffunc_1 math_frexp
|
|.ffunc_1 math_frexp
|
||||||
@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|4:
|
|4:
|
||||||
| xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
|
| xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
|
||||||
|
|
|
|
||||||
|.ffunc_nnr math_fmod
|
|
||||||
|1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
|
|
||||||
| fpop1
|
|
||||||
| jmp ->fff_resn
|
|
||||||
|
|
|
||||||
|.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
|
|
||||||
|
|
|
||||||
|.macro math_minmax, name, cmovop, sseop
|
|.macro math_minmax, name, cmovop, sseop
|
||||||
| .ffunc name
|
| .ffunc name
|
||||||
| mov RA, 2
|
| mov RA, 2
|
||||||
@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
|
|
|
|
||||||
|// FP value rounding. Called by math.floor/math.ceil fast functions
|
|// FP value rounding. Called by math.floor/math.ceil fast functions
|
||||||
|// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
|
|// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
|
||||||
|.macro vm_round, name, mode
|
|.macro vm_round, name, mode, cond
|
||||||
|
|->name:
|
||||||
|
|.if not X64 and cond
|
||||||
|
| movsd xmm0, qword [esp+4]
|
||||||
|
| call ->name .. _sse
|
||||||
|
| movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
|
||||||
|
| fld qword [esp+4]
|
||||||
|
| ret
|
||||||
|
|.endif
|
||||||
|
|
|
||||||
|->name .. _sse:
|
|->name .. _sse:
|
||||||
| sseconst_abs xmm2, RDa
|
| sseconst_abs xmm2, RDa
|
||||||
| sseconst_2p52 xmm3, RDa
|
| sseconst_2p52 xmm3, RDa
|
||||||
@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| ret
|
| ret
|
||||||
|.endmacro
|
|.endmacro
|
||||||
|
|
|
|
||||||
|->vm_floor:
|
| vm_round vm_floor, 0, 1
|
||||||
|.if not X64
|
| vm_round vm_ceil, 1, JIT
|
||||||
| movsd xmm0, qword [esp+4]
|
| vm_round vm_trunc, 2, JIT
|
||||||
| call ->vm_floor_sse
|
|
||||||
| movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
|
|
||||||
| fld qword [esp+4]
|
|
||||||
| ret
|
|
||||||
|.endif
|
|
||||||
|
|
|
||||||
| vm_round vm_floor, 0
|
|
||||||
| vm_round vm_ceil, 1
|
|
||||||
| vm_round vm_trunc, 2
|
|
||||||
|
|
|
|
||||||
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|
||||||
|->vm_mod:
|
|->vm_mod:
|
||||||
@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| subsd xmm0, xmm1
|
| subsd xmm0, xmm1
|
||||||
| ret
|
| ret
|
||||||
|
|
|
|
||||||
|// FP log2(x). Called by math.log(x, base).
|
|
||||||
|->vm_log2:
|
|
||||||
|.if X64WIN
|
|
||||||
| movsd qword [rsp+8], xmm0 // Use scratch area.
|
|
||||||
| fld1
|
|
||||||
| fld qword [rsp+8]
|
|
||||||
| fyl2x
|
|
||||||
| fstp qword [rsp+8]
|
|
||||||
| movsd xmm0, qword [rsp+8]
|
|
||||||
|.elif X64
|
|
||||||
| movsd qword [rsp-8], xmm0 // Use red zone.
|
|
||||||
| fld1
|
|
||||||
| fld qword [rsp-8]
|
|
||||||
| fyl2x
|
|
||||||
| fstp qword [rsp-8]
|
|
||||||
| movsd xmm0, qword [rsp-8]
|
|
||||||
|.else
|
|
||||||
| fld1
|
|
||||||
| fld qword [esp+4]
|
|
||||||
| fyl2x
|
|
||||||
|.endif
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|// FP exponentiation e^x and 2^x. Called by math.exp fast function and
|
|
||||||
|// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
|
|
||||||
|// Caveat: needs 3 slots on x87 stack!
|
|
||||||
|->vm_exp_x87:
|
|
||||||
| fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
|
|
||||||
|->vm_exp2_x87:
|
|
||||||
| .if X64WIN
|
|
||||||
| .define expscratch, dword [rsp+8] // Use scratch area.
|
|
||||||
| .elif X64
|
|
||||||
| .define expscratch, dword [rsp-8] // Use red zone.
|
|
||||||
| .else
|
|
||||||
| .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
|
|
||||||
| .endif
|
|
||||||
| fst expscratch // Caveat: overwrites ARG1.
|
|
||||||
| cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
|
|
||||||
| cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
|
|
||||||
|->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
|
|
||||||
| fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
|
|
||||||
| f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
|
|
||||||
|1:
|
|
||||||
| ret
|
|
||||||
|2:
|
|
||||||
| fpop; fldz; ret
|
|
||||||
|
|
|
||||||
|// Generic power function x^y. Called by BC_POW, math.pow fast function,
|
|
||||||
|// and vm_arith.
|
|
||||||
|// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
|
|
||||||
|// Needs 16 byte scratch area for x86. Also called from JIT code.
|
|
||||||
|->vm_pow_sse:
|
|
||||||
| cvttsd2si eax, xmm1
|
|
||||||
| cvtsi2sd xmm2, eax
|
|
||||||
| ucomisd xmm1, xmm2
|
|
||||||
| jnz >8 // Branch for FP exponents.
|
|
||||||
| jp >9 // Branch for NaN exponent.
|
|
||||||
| // Fallthrough.
|
|
||||||
|
|
|
||||||
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|
||||||
|->vm_powi_sse:
|
|->vm_powi_sse:
|
||||||
| cmp eax, 1; jle >6 // i<=1?
|
| cmp eax, 1; jle >6 // i<=1?
|
||||||
@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|||||||
| sseconst_1 xmm0, RDa
|
| sseconst_1 xmm0, RDa
|
||||||
| ret
|
| ret
|
||||||
|
|
|
|
||||||
|8: // FP/FP power function x^y.
|
|
||||||
|.if X64
|
|
||||||
| movd rax, xmm1; shl rax, 1
|
|
||||||
| rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
|
|
||||||
| movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
|
|
||||||
| rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
|
|
||||||
| .if X64WIN
|
|
||||||
| movsd qword [rsp+16], xmm1 // Use scratch area.
|
|
||||||
| movsd qword [rsp+8], xmm0
|
|
||||||
| fld qword [rsp+16]
|
|
||||||
| fld qword [rsp+8]
|
|
||||||
| .else
|
|
||||||
| movsd qword [rsp-16], xmm1 // Use red zone.
|
|
||||||
| movsd qword [rsp-8], xmm0
|
|
||||||
| fld qword [rsp-16]
|
|
||||||
| fld qword [rsp-8]
|
|
||||||
| .endif
|
|
||||||
|.else
|
|
||||||
| movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
|
|
||||||
| movsd qword [esp+4], xmm0
|
|
||||||
| cmp dword [esp+12], 0; jne >1
|
|
||||||
| mov eax, [esp+16]; shl eax, 1
|
|
||||||
| cmp eax, 0xffe00000; je >2 // x^+-Inf?
|
|
||||||
|1:
|
|
||||||
| cmp dword [esp+4], 0; jne >1
|
|
||||||
| mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
|
|
||||||
| cmp eax, 0xffe00000; je >5 // +-Inf^y?
|
|
||||||
|1:
|
|
||||||
| fld qword [esp+12]
|
|
||||||
| fld qword [esp+4]
|
|
||||||
|.endif
|
|
||||||
| fyl2x // y*log2(x)
|
|
||||||
| fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
|
|
||||||
| f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
|
|
||||||
|.if X64WIN
|
|
||||||
| fstp qword [rsp+8] // Use scratch area.
|
|
||||||
| movsd xmm0, qword [rsp+8]
|
|
||||||
|.elif X64
|
|
||||||
| fstp qword [rsp-8] // Use red zone.
|
|
||||||
| movsd xmm0, qword [rsp-8]
|
|
||||||
|.else
|
|
||||||
| fstp qword [esp+4] // Needs 8 byte scratch area.
|
|
||||||
| movsd xmm0, qword [esp+4]
|
|
||||||
|.endif
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|9: // Handle x^NaN.
|
|
||||||
| sseconst_1 xmm2, RDa
|
|
||||||
| ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
|
|
||||||
| movaps xmm0, xmm1 // x^NaN ==> NaN
|
|
||||||
|1:
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|2: // Handle x^+-Inf.
|
|
||||||
| sseconst_abs xmm2, RDa
|
|
||||||
| andpd xmm0, xmm2 // |x|
|
|
||||||
| sseconst_1 xmm2, RDa
|
|
||||||
| ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
|
|
||||||
| movmskpd eax, xmm1
|
|
||||||
| xorps xmm0, xmm0
|
|
||||||
| mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
|
|
||||||
|3:
|
|
||||||
| sseconst_hi xmm0, RDa, 7ff00000 // +Inf
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|4: // Handle +-0^y.
|
|
||||||
| movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
|
|
||||||
| xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|5: // Handle +-Inf^y.
|
|
||||||
| movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
|
|
||||||
| xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
|
|
||||||
| ret
|
|
||||||
|
|
|
||||||
|// Callable from C: double lj_vm_foldfpm(double x, int fpm)
|
|
||||||
|// Computes fpm(x) for extended math functions. ORDER FPM.
|
|
||||||
|->vm_foldfpm:
|
|
||||||
|.if JIT
|
|
||||||
|.if X64
|
|
||||||
| .if X64WIN
|
|
||||||
| .define fpmop, CARG2d
|
|
||||||
| .else
|
|
||||||
| .define fpmop, CARG1d
|
|
||||||
| .endif
|
|
||||||
| cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
|
|
||||||
| cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
|
|
||||||
| sqrtsd xmm0, xmm0; ret
|
|
||||||
|2:
|
|
||||||
| .if X64WIN
|
|
||||||
| movsd qword [rsp+8], xmm0 // Use scratch area.
|
|
||||||
| fld qword [rsp+8]
|
|
||||||
| .else
|
|
||||||
| movsd qword [rsp-8], xmm0 // Use red zone.
|
|
||||||
| fld qword [rsp-8]
|
|
||||||
| .endif
|
|
||||||
| cmp fpmop, 5; ja >2
|
|
||||||
| .if X64WIN; pop rax; .endif
|
|
||||||
| je >1
|
|
||||||
| call ->vm_exp_x87
|
|
||||||
| .if X64WIN; push rax; .endif
|
|
||||||
| jmp >7
|
|
||||||
|1:
|
|
||||||
| call ->vm_exp2_x87
|
|
||||||
| .if X64WIN; push rax; .endif
|
|
||||||
| jmp >7
|
|
||||||
|2: ; cmp fpmop, 7; je >1; ja >2
|
|
||||||
| fldln2; fxch; fyl2x; jmp >7
|
|
||||||
|1: ; fld1; fxch; fyl2x; jmp >7
|
|
||||||
|2: ; cmp fpmop, 9; je >1; ja >2
|
|
||||||
| fldlg2; fxch; fyl2x; jmp >7
|
|
||||||
|1: ; fsin; jmp >7
|
|
||||||
|2: ; cmp fpmop, 11; je >1; ja >9
|
|
||||||
| fcos; jmp >7
|
|
||||||
|1: ; fptan; fpop
|
|
||||||
|7:
|
|
||||||
| .if X64WIN
|
|
||||||
| fstp qword [rsp+8] // Use scratch area.
|
|
||||||
| movsd xmm0, qword [rsp+8]
|
|
||||||
| .else
|
|
||||||
| fstp qword [rsp-8] // Use red zone.
|
|
||||||
| movsd xmm0, qword [rsp-8]
|
|
||||||
| .endif
|
|
||||||
| ret
|
|
||||||
|.else // x86 calling convention.
|
|
||||||
| .define fpmop, eax
|
|
||||||
| mov fpmop, [esp+12]
|
|
||||||
| movsd xmm0, qword [esp+4]
|
|
||||||
| cmp fpmop, 1; je >1; ja >2
|
|
||||||
| call ->vm_floor_sse; jmp >7
|
|
||||||
|1: ; call ->vm_ceil_sse; jmp >7
|
|
||||||
|2: ; cmp fpmop, 3; je >1; ja >2
|
|
||||||
| call ->vm_trunc_sse; jmp >7
|
|
||||||
|1:
|
|
||||||
| sqrtsd xmm0, xmm0
|
|
||||||
|7:
|
|
||||||
| movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
|
|
||||||
| fld qword [esp+4]
|
|
||||||
| ret
|
|
||||||
|2: ; fld qword [esp+4]
|
|
||||||
| cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
|
|
||||||
|2: ; cmp fpmop, 7; je >1; ja >2
|
|
||||||
| fldln2; fxch; fyl2x; ret
|
|
||||||
|1: ; fld1; fxch; fyl2x; ret
|
|
||||||
|2: ; cmp fpmop, 9; je >1; ja >2
|
|
||||||
| fldlg2; fxch; fyl2x; ret
|
|
||||||
|1: ; fsin; ret
|
|
||||||
|2: ; cmp fpmop, 11; je >1; ja >9
|
|
||||||
| fcos; ret
|
|
||||||
|1: ; fptan; fpop; ret
|
|
||||||
|.endif
|
|
||||||
|9: ; int3 // Bad fpm.
|
|
||||||
|.endif
|
|
||||||
|
|
|
||||||
|// Callable from C: double lj_vm_foldarith(double x, double y, int op)
|
|
||||||
|// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
|
|
||||||
|// and basic math functions. ORDER ARITH
|
|
||||||
|->vm_foldarith:
|
|
||||||
|.if X64
|
|
||||||
|
|
|
||||||
| .if X64WIN
|
|
||||||
| .define foldop, CARG3d
|
|
||||||
| .else
|
|
||||||
| .define foldop, CARG1d
|
|
||||||
| .endif
|
|
||||||
| cmp foldop, 1; je >1; ja >2
|
|
||||||
| addsd xmm0, xmm1; ret
|
|
||||||
|1: ; subsd xmm0, xmm1; ret
|
|
||||||
|2: ; cmp foldop, 3; je >1; ja >2
|
|
||||||
| mulsd xmm0, xmm1; ret
|
|
||||||
|1: ; divsd xmm0, xmm1; ret
|
|
||||||
|2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
|
|
||||||
| cmp foldop, 7; je >1; ja >2
|
|
||||||
| sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
|
|
||||||
|1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
|
|
||||||
|2: ; cmp foldop, 9; ja >2
|
|
||||||
|.if X64WIN
|
|
||||||
| movsd qword [rsp+8], xmm0 // Use scratch area.
|
|
||||||
| movsd qword [rsp+16], xmm1
|
|
||||||
| fld qword [rsp+8]
|
|
||||||
| fld qword [rsp+16]
|
|
||||||
|.else
|
|
||||||
| movsd qword [rsp-8], xmm0 // Use red zone.
|
|
||||||
| movsd qword [rsp-16], xmm1
|
|
||||||
| fld qword [rsp-8]
|
|
||||||
| fld qword [rsp-16]
|
|
||||||
|.endif
|
|
||||||
| je >1
|
|
||||||
| fpatan
|
|
||||||
|7:
|
|
||||||
|.if X64WIN
|
|
||||||
| fstp qword [rsp+8] // Use scratch area.
|
|
||||||
| movsd xmm0, qword [rsp+8]
|
|
||||||
|.else
|
|
||||||
| fstp qword [rsp-8] // Use red zone.
|
|
||||||
| movsd xmm0, qword [rsp-8]
|
|
||||||
|.endif
|
|
||||||
| ret
|
|
||||||
|1: ; fxch; fscale; fpop1; jmp <7
|
|
||||||
|2: ; cmp foldop, 11; je >1; ja >9
|
|
||||||
| minsd xmm0, xmm1; ret
|
|
||||||
|1: ; maxsd xmm0, xmm1; ret
|
|
||||||
|9: ; int3 // Bad op.
|
|
||||||
|
|
|
||||||
|.else // x86 calling convention.
|
|
||||||
|
|
|
||||||
| .define foldop, eax
|
|
||||||
| mov foldop, [esp+20]
|
|
||||||
| movsd xmm0, qword [esp+4]
|
|
||||||
| movsd xmm1, qword [esp+12]
|
|
||||||
| cmp foldop, 1; je >1; ja >2
|
|
||||||
| addsd xmm0, xmm1
|
|
||||||
|7:
|
|
||||||
| movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
|
|
||||||
| fld qword [esp+4]
|
|
||||||
| ret
|
|
||||||
|1: ; subsd xmm0, xmm1; jmp <7
|
|
||||||
|2: ; cmp foldop, 3; je >1; ja >2
|
|
||||||
| mulsd xmm0, xmm1; jmp <7
|
|
||||||
|1: ; divsd xmm0, xmm1; jmp <7
|
|
||||||
|2: ; cmp foldop, 5
|
|
||||||
| je >1; ja >2
|
|
||||||
| call ->vm_mod; jmp <7
|
|
||||||
|1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
|
|
||||||
|2: ; cmp foldop, 7; je >1; ja >2
|
|
||||||
| sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
|
|
||||||
|1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
|
|
||||||
|2: ; cmp foldop, 9; ja >2
|
|
||||||
| fld qword [esp+4] // Reload from stack
|
|
||||||
| fld qword [esp+12]
|
|
||||||
| je >1
|
|
||||||
| fpatan; ret
|
|
||||||
|1: ; fxch; fscale; fpop1; ret
|
|
||||||
|2: ; cmp foldop, 11; je >1; ja >9
|
|
||||||
| minsd xmm0, xmm1; jmp <7
|
|
||||||
|1: ; maxsd xmm0, xmm1; jmp <7
|
|
||||||
|9: ; int3 // Bad op.
|
|
||||||
|
|
|
||||||
|.endif
|
|
||||||
|
|
|
||||||
|//-----------------------------------------------------------------------
|
|//-----------------------------------------------------------------------
|
||||||
|//-- Miscellaneous functions --------------------------------------------
|
|//-- Miscellaneous functions --------------------------------------------
|
||||||
|//-----------------------------------------------------------------------
|
|//-----------------------------------------------------------------------
|
||||||
@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|||||||
break;
|
break;
|
||||||
case BC_POW:
|
case BC_POW:
|
||||||
| ins_arithpre movsd, xmm1
|
| ins_arithpre movsd, xmm1
|
||||||
| call ->vm_pow_sse
|
| mov RB, BASE
|
||||||
|
|.if not X64
|
||||||
|
| movsd FPARG1, xmm0
|
||||||
|
| movsd FPARG3, xmm1
|
||||||
|
|.endif
|
||||||
|
| call extern pow
|
||||||
|
| movzx RA, PC_RA
|
||||||
|
| mov BASE, RB
|
||||||
|
|.if X64
|
||||||
| ins_arithpost
|
| ins_arithpost
|
||||||
|
|.else
|
||||||
|
| fstp qword [BASE+RA*8]
|
||||||
|
|.endif
|
||||||
| ins_next
|
| ins_next
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user