From 690760aa3853e63331f46e40c8276d9f5939261d Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 25 Dec 2009 23:12:30 +0100 Subject: [PATCH] Add SSE variant of pow/powi to interpreter. Use SSE pow/powi helper functions from compiled code. Cleanup use of helper functions. Related cleanups of folding functions in x64 interpreter. --- src/buildvm_x86.dasc | 432 +++++++++++----- src/buildvm_x86.h | 1139 +++++++++++++++++++++--------------------- src/lj_asm.c | 111 ++-- src/lj_vm.h | 7 +- 4 files changed, 949 insertions(+), 740 deletions(-) diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 99842d08..9ce8ef16 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc @@ -96,10 +96,6 @@ |.type TRACE, Trace |.type EXITINFO, ExitInfo | -|// x86/x64 portability macros -|.macro push_eax; .if X64; push rax; .else; push eax; .endif; .endmacro -|.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro -| |// Stack layout while in interpreter. Must match with lj_frame.h. |//----------------------------------------------------------------------- |.if not X64 // x86 stack layout. @@ -2072,10 +2068,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | fpop1 | jmp ->fff_resn | - if (0 && sse) { // NYI - |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 + if (sse) { + |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 } else { - |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn + |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn } | |.macro math_minmax, name, cmovop, nocmovop, sseop @@ -2091,6 +2087,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | add RB, 1 | jmp <1 ||} else { + |.if not X64 |.ffunc_n name | mov RB, 2 |1: @@ -2101,12 +2098,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) ||if (cmov) { | fucomi st1; cmovop st1; fpop1 ||} else { - | push_eax + | push eax | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop - | pop_eax + | pop eax ||} | add RB, 1 | jmp <1 + |.endif ||} |.endmacro | @@ -2842,19 +2840,29 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) |->vm_exp: | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) |->vm_exp2: - | fst dword [esp+4] // Caveat: overwrites ARG1. - | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf - | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0 + | .if X64WIN + | .define expscratch, dword [rsp+8] // Use scratch area. + | .elif X64 + | .define expscratch, dword [rsp-8] // Use red zone. + | .else + | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. + | .endif + | fst expscratch // Caveat: overwrites ARG1. + | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf + | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. - | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. + | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int |1: | ret |2: | fpop; fldz; ret | - |// Generic power function x^y. Called by BC_POW, math.pow fast function - |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified. + |// Generic power function x^y. Called by BC_POW, math.pow fast function, + |// and vm_arith. + if (!sse) { + |.if not X64 + |// Args/ret on x87 stack (y on top). RC (eax) modified. |// Caveat: needs 3 slots on x87 stack! |->vm_pow: | fist dword [esp+4] // Store/reload int before comparison. @@ -2862,18 +2870,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) ||if (cmov) { | fucomip st1 ||} else { - | push_eax; fucomp st1; fnstsw ax; sahf; pop_eax + | fucomp st1; fnstsw ax; sahf ||} | jnz >8 // Branch for FP exponents. | jp >9 // Branch for NaN exponent. | fpop // Pop y and fallthrough to vm_powi. | - |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack. - |// Arg2 (int) on C stack. No int/xmm regs modified. + |// FP/int power function x^i. Arg1/ret on x87 stack. + |// Arg2 (int) on C stack. RC (eax) modified. |// Caveat: needs 2 slots on x87 stack! - |->vm_powi: - | push_eax - | mov eax, [esp+8] + | mov eax, [esp+4] | cmp eax, 1; jle >6 // i<=1? | // Now 1 < (unsigned)i <= 0x80000000. |1: // Handle leading zeros. @@ -2893,7 +2899,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) |4: | fmulp st1 |5: - | pop_eax | ret |6: | je <5 // x^1 ==> x @@ -2904,19 +2909,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | jmp <1 // x^-i ==> (1/x)^i |7: | fpop; fld1 // x^0 ==> 1 - | pop_eax | ret | |8: // FP/FP power function x^y. - | push_eax - | fst dword [esp+8] + | fst dword [esp+4] | fxch - | fst dword [esp+12] - | mov eax, [esp+8]; shl eax, 1 + | fst dword [esp+8] + | mov eax, [esp+4]; shl eax, 1 | cmp eax, 0xff000000; je >2 // x^+-Inf? - | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y? + | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | cmp eax, 0xff000000; je >4 // +-Inf^y? - | pop_eax | fyl2x | jmp ->vm_exp2raw | @@ -2925,7 +2927,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) ||if (cmov) { | fucomip st2 ||} else { - | push_eax; fucomp st2; fnstsw ax; sahf; pop_eax + | fucomp st2; fnstsw ax; sahf ||} | je >1 // 1^NaN ==> 1 | fxch // x^NaN ==> NaN @@ -2943,41 +2945,205 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) ||} | je >3 // +-1^+-Inf ==> 1 | fpop; fabs; fldz; mov eax, 0; setc al - | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 + | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | fxch |3: - | fpop1; fabs; pop_eax + | fpop1; fabs | ret | |4: // Handle +-0^y or +-Inf^y. - | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x| + | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | fpop; fpop - | test eax, eax; pop_eax; jz >5 // y < 0, +-0^y ==> +Inf + | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | fldz // y < 0, +-Inf^y ==> 0 | ret |5: - | mov dword [esp+8], 0x7f800000 // Return +Inf. - | fld dword [esp+8] + | mov dword [esp+4], 0x7f800000 // Return +Inf. + | fld dword [esp+4] + | ret + |.endif + } else { + |->vm_pow: + } + | + |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. + |// Needs 16 byte scratch area for x86. Also called from JIT code. + |->vm_pow_sse: + | cvtsd2si eax, xmm1 + | cvtsi2sd xmm2, eax + | ucomisd xmm1, xmm2 + | jnz >8 // Branch for FP exponents. + | jp >9 // Branch for NaN exponent. + | // Fallthrough to vm_powi_sse. + | + |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. + |->vm_powi_sse: + | cmp eax, 1; jle >6 // i<=1? + | // Now 1 < (unsigned)i <= 0x80000000. + |1: // Handle leading zeros. + | test eax, 1; jnz >2 + | mulsd xmm0, xmm0 + | shr eax, 1 + | jmp <1 + |2: + | shr eax, 1; jz >5 + | movaps xmm1, xmm0 + |3: // Handle trailing bits. + | mulsd xmm0, xmm0 + | shr eax, 1; jz >4 + | jnc <3 + | mulsd xmm1, xmm0 + | jmp <3 + |4: + | mulsd xmm0, xmm1 + |5: + | ret + |6: + | je <5 // x^1 ==> x + | jb >7 + | push RDa + | sseconst_1 xmm1, RDa + | divsd xmm1, xmm0 + | pop RDa + | movaps xmm0, xmm1 + | neg eax + | cmp eax, 1; je <5 // x^-1 ==> 1/x + | jmp <1 // x^-i ==> (1/x)^i + |7: + | sseconst_1 xmm0, RDa + | ret + | + |8: // FP/FP power function x^y. + |.if X64 + | movd rax, xmm1; shl rax, 1 + | ror rax, 32; cmp rax, 0xffe00000; je >2 // x^+-Inf? + | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? + | ror rax, 32; cmp rax, 0xffe00000; je >5 // +-Inf^y? + | .if X64WIN + | movsd qword [rsp+16], xmm1 // Use scratch area. + | movsd qword [rsp+8], xmm0 + | fld qword [rsp+16] + | fld qword [rsp+8] + | .else + | movsd qword [rsp-16], xmm1 // Use red zone. + | movsd qword [rsp-8], xmm0 + | fld qword [rsp-16] + | fld qword [rsp-8] + | .endif + |.else + | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. + | movsd qword [esp+4], xmm0 + | cmp dword [esp+12], 0; jne >1 + | mov eax, [esp+16]; shl eax, 1 + | cmp eax, 0xffe00000; je >2 // x^+-Inf? + |1: + | cmp dword [esp+4], 0; jne >1 + | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? + | cmp eax, 0xffe00000; je >5 // +-Inf^y? + |1: + | fld qword [esp+12] + | fld qword [esp+4] + |.endif + | fyl2x // y*log2(x) + | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. + | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int + |.if X64WIN + | fstp qword [rsp+8] // Use scratch area. + | movsd xmm0, qword [rsp+8] + |.elif X64 + | fstp qword [rsp-8] // Use red zone. + | movsd xmm0, qword [rsp-8] + |.else + | fstp qword [esp+4] // Needs 8 byte scratch area. + | movsd xmm0, qword [esp+4] + |.endif + | ret + | + |9: // Handle x^NaN. + | sseconst_1 xmm2, RDa + | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 + | movaps xmm0, xmm1 // x^NaN ==> NaN + |1: + | ret + | + |2: // Handle x^+-Inf. + | sseconst_abs xmm2, RDa + | andpd xmm0, xmm2 // |x| + | sseconst_1 xmm2, RDa + | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 + | movmskpd eax, xmm1 + | xorps xmm0, xmm0 + | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 + |3: + | sseconst_hi xmm0, RDa, 7ff00000 // +Inf + | ret + | + |4: // Handle +-0^y. + | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf + | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 + | ret + | + |5: // Handle +-Inf^y. + | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf + | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ret | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) |// Computes fpm(x) for extended math functions. ORDER FPM. |->vm_foldfpm: if (sse) { - |.if X64WIN - | .define fpmop, CARG2d - |.elif X64 - | .define fpmop, CARG1d - |.else - | .define fpmop, eax - | mov fpmop, [esp+12] - | movsd xmm0, qword [esp+4] - |.endif |.if X64 + | + | .if X64WIN + | .define fpmop, CARG2d + | .else + | .define fpmop, CARG1d + | .endif | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | cmp fpmop, 3; jb ->vm_trunc; ja >2 | sqrtsd xmm0, xmm0; ret - |.else + |2: + | .if X64WIN + | movsd qword [rsp+8], xmm0 // Use scratch area. + | fld qword [rsp+8] + | .else + | movsd qword [rsp-8], xmm0 // Use red zone. + | fld qword [rsp-8] + | .endif + | cmp fpmop, 5; ja >2 + | .if X64WIN; pop rax; .endif + | je >1 + | call ->vm_exp + | .if X64WIN; push rax; .endif + | jmp >7 + |1: + | call ->vm_exp2 + | .if X64WIN; push rax; .endif + | jmp >7 + |2: ; cmp fpmop, 7; je >1; ja >2 + | fldln2; fxch; fyl2x; jmp >7 + |1: ; fld1; fxch; fyl2x; jmp >7 + |2: ; cmp fpmop, 9; je >1; ja >2 + | fldlg2; fxch; fyl2x; jmp >7 + |1: ; fsin; jmp >7 + |2: ; cmp fpmop, 11; je >1; ja >9 + | fcos; jmp >7 + |1: ; fptan; fpop + |7: + | .if X64WIN + | fstp qword [rsp+8] // Use scratch area. + | movsd xmm0, qword [rsp+8] + | .else + | fstp qword [rsp-8] // Use red zone. + | movsd xmm0, qword [rsp-8] + | .endif + | ret + | + |.else // x86 calling convention. + | + | .define fpmop, eax + | mov fpmop, [esp+12] + | movsd xmm0, qword [esp+4] | cmp fpmop, 1; je >1; ja >2 | call ->vm_floor; jmp >7 |1: ; call ->vm_ceil; jmp >7 @@ -2989,27 +3155,36 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | fld qword [esp+4] | ret + |2: ; fld qword [esp+4] + | cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2 + |2: ; cmp fpmop, 7; je >1; ja >2 + | fldln2; fxch; fyl2x; ret + |1: ; fld1; fxch; fyl2x; ret + |2: ; cmp fpmop, 9; je >1; ja >2 + | fldlg2; fxch; fyl2x; ret + |1: ; fsin; ret + |2: ; cmp fpmop, 11; je >1; ja >9 + | fcos; ret + |1: ; fptan; fpop; ret + | |.endif - |2: - | fld qword [esp+4] } else { | mov fpmop, [esp+12] | fld qword [esp+4] | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | cmp fpmop, 3; jb ->vm_trunc; ja >2 | fsqrt; ret - |2: + |2: ; cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2 + | cmp fpmop, 7; je >1; ja >2 + | fldln2; fxch; fyl2x; ret + |1: ; fld1; fxch; fyl2x; ret + |2: ; cmp fpmop, 9; je >1; ja >2 + | fldlg2; fxch; fyl2x; ret + |1: ; fsin; ret + |2: ; cmp fpmop, 11; je >1; ja >9 + | fcos; ret + |1: ; fptan; fpop; ret } - | cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2 - | cmp fpmop, 7; je >1; ja >2 - | fldln2; fxch; fyl2x; ret - |1: ; fld1; fxch; fyl2x; ret - |2: ; cmp fpmop, 9; je >1; ja >2 - | fldlg2; fxch; fyl2x; ret - |1: ; fsin; ret - |2: ; cmp fpmop, 11; je >1; ja >9 - | fcos; ret - |1: ; fptan; fpop; ret |9: ; int3 // Bad fpm. | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) @@ -3017,72 +3192,87 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) |// and basic math functions. ORDER ARITH |->vm_foldarith: if (sse) { - |.macro retxmm0; .if X64; ret; .else; jmp >7; .endif; .endmacro - |.macro retst0; .if X64; jmp >7; .else; ret; .endif; .endmacro + |.if X64 | + | .if X64WIN + | .define foldop, CARG3d + | .else + | .define foldop, CARG1d + | .endif + | cmp foldop, 1; je >1; ja >2 + | addsd xmm0, xmm1; ret + |1: ; subsd xmm0, xmm1; ret + |2: ; cmp foldop, 3; je >1; ja >2 + | mulsd xmm0, xmm1; ret + |1: ; divsd xmm0, xmm1; ret + |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow + | cmp foldop, 7; je >1; ja >2 + | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret + |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret + |2: ; cmp foldop, 9; ja >2 |.if X64WIN - | .define foldop, CARG3d - |.elif X64 - | .define foldop, CARG1d + | movsd qword [rsp+8], xmm0 // Use scratch area. + | movsd qword [rsp+16], xmm1 + | fld qword [rsp+8] + | fld qword [rsp+16] |.else + | movsd qword [rsp-8], xmm0 // Use red zone. + | movsd qword [rsp-16], xmm1 + | fld qword [rsp-8] + | fld qword [rsp-16] + |.endif + | je >1 + | fpatan + |7: + |.if X64WIN + | fstp qword [rsp+8] // Use scratch area. + | movsd xmm0, qword [rsp+8] + |.else + | fstp qword [rsp-8] // Use red zone. + | movsd xmm0, qword [rsp-8] + |.endif + | ret + |1: ; fxch; fscale; fpop1; jmp <7 + |2: ; cmp foldop, 11; je >1; ja >9 + | minsd xmm0, xmm1; ret + |1: ; maxsd xmm0, xmm1; ret + |9: ; int3 // Bad op. + | + |.else // x86 calling convention. + | | .define foldop, eax | mov foldop, [esp+20] | movsd xmm0, qword [esp+4] | movsd xmm1, qword [esp+12] - |.endif | cmp foldop, 1; je >1; ja >2 - | addsd xmm0, xmm1; retxmm0 - |1: ; subsd xmm0, xmm1; retxmm0 - |2: ; cmp foldop, 3; je >1; ja >2 - | mulsd xmm0, xmm1; retxmm0 - |1: ; divsd xmm0, xmm1; retxmm0 - |2: ; cmp foldop, 5 - |.if X64 - | jb ->vm_mod; je ->vm_pow // NYI: broken without SSE vm_pow. - |.else - | je >1; ja >2 - | call ->vm_mod; retxmm0 - |1: ; fld qword [esp+4]; fld qword [esp+12]; jmp ->vm_pow // NYI - |2: - |.endif - | cmp foldop, 7; je >1; ja >2 - | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; retxmm0 - |1: - | sseconst_abs xmm1, RDa; andps xmm0, xmm1; retxmm0 - |2: ; cmp foldop, 9; ja >2 - |.if X64WIN - | movsd qword [esp+8], xmm0 // Use scratch area. - | movsd qword [esp+16], xmm1 - | fld qword [esp+8] - | fld qword [esp+16] - |.elif X64 - | movsd qword [esp-8], xmm0 // Use red zone. - | movsd qword [esp-16], xmm1 - | fld qword [esp-8] - | fld qword [esp-16] - |.else - | fld qword [esp+4] // Reload from stack - | fld qword [esp+12] - |.endif - | je >1 - | fpatan; retst0 - |1: ; fxch; fscale; fpop1; retst0 - |2: ; cmp foldop, 11; je >1; ja >9 - | minsd xmm0, xmm1; retxmm0 - |1: ; maxsd xmm0, xmm1; retxmm0 - |9: ; int3 // Bad op. - |7: // Move return value depending on calling convention. - |.if X64WIN - | fstp qword [esp+8] // Use scratch area. - | movsd xmm0, qword [esp+8] - |.elif X64 - | fstp qword [esp-8] // Use red zone. - | movsd xmm0, qword [esp-8] - |.else + | addsd xmm0, xmm1 + |7: | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | fld qword [esp+4] - |.endif | ret + |1: ; subsd xmm0, xmm1; jmp <7 + |2: ; cmp foldop, 3; je >1; ja >2 + | mulsd xmm0, xmm1; jmp <7 + |1: ; divsd xmm0, xmm1; jmp <7 + |2: ; cmp foldop, 5 + | je >1; ja >2 + | call ->vm_mod; jmp <7 + |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. + |2: ; cmp foldop, 7; je >1; ja >2 + | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 + |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 + |2: ; cmp foldop, 9; ja >2 + | fld qword [esp+4] // Reload from stack + | fld qword [esp+12] + | je >1 + | fpatan; ret + |1: ; fxch; fscale; fpop1; ret + |2: ; cmp foldop, 11; je >1; ja >9 + | minsd xmm0, xmm1; jmp <7 + |1: ; maxsd xmm0, xmm1; jmp <7 + |9: ; int3 // Bad op. + | + |.endif } else { | mov eax, [esp+20] | fld qword [esp+4] @@ -3483,17 +3673,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. break; case BC_POW: - if (sse) { - sse = 0; /* NYI: temporary workaround. */ - | ins_arithpre fld, movsd, xmm1 - | call ->vm_pow - | ins_arithpost - sse = 1; - } else { - | ins_arithpre fld, movsd, xmm1 - | call ->vm_pow - | ins_arithpost - } + | ins_arithpre fld, movsd, xmm1 + | call ->vm_pow + | ins_arithpost | ins_next break; diff --git a/src/buildvm_x86.h b/src/buildvm_x86.h index e2ba7c1e..f1f14e70 100644 --- a/src/buildvm_x86.h +++ b/src/buildvm_x86.h @@ -12,7 +12,7 @@ #define DASM_SECTION_CODE_OP 0 #define DASM_SECTION_CODE_SUB 1 #define DASM_MAXSECTION 2 -static const unsigned char build_actionlist[14716] = { +static const unsigned char build_actionlist[15226] = { 254,1,248,10,137,202,139,173,233,137,114,252,252,15,182,141,233,139,181,233, 139,189,233,139,108,36,48,141,12,202,141,68,194,252,252,59,141,233,15,135, 244,11,248,9,189,237,248,1,137,40,137,104,8,131,192,16,57,200,15,130,244, @@ -469,48 +469,72 @@ static const unsigned char build_actionlist[14716] = { 222,201,248,146,217,84,36,4,129,124,36,4,0,0,128,127,15,132,244,247,129,124, 36,4,0,0,128,252,255,15,132,244,248,248,147,217,192,217,252,252,220,252,233, 217,201,217,252,240,217,232,222,193,217,252,253,221,217,248,1,195,248,2,221, - 216,217,252,238,195,248,106,219,84,36,4,219,68,36,4,255,223,252,233,255,80, - 221,252,233,223,224,158,88,255,15,133,244,254,15,138,244,255,221,216,248, - 148,80,139,68,36,8,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,133, - 244,248,216,200,209,232,252,233,244,1,248,2,209,232,15,132,244,251,217,192, - 248,3,216,200,209,232,15,132,244,250,15,131,244,3,255,220,201,252,233,244, - 3,248,4,222,201,248,5,88,195,248,6,15,132,244,5,15,130,244,253,217,232,222, - 252,241,252,247,216,131,252,248,1,15,132,244,5,252,233,244,1,248,7,221,216, - 217,232,88,195,248,8,80,217,84,36,8,217,201,217,84,36,12,139,68,36,8,209, - 224,61,0,0,0,252,255,15,132,244,248,139,68,36,12,209,224,15,132,244,250,61, - 0,0,0,252,255,15,132,244,250,255,88,217,252,241,252,233,244,147,248,9,217, - 232,255,223,252,234,255,80,221,252,234,223,224,158,88,255,15,132,244,247, - 217,201,248,1,221,216,195,248,2,217,225,217,232,255,221,252,233,223,224,158, - 255,15,132,244,249,221,216,217,225,217,252,238,184,0,0,0,0,15,146,208,209, - 200,51,68,36,8,15,137,244,249,217,201,248,3,221,217,217,225,88,195,248,4, - 131,124,36,8,0,15,141,244,3,221,216,221,216,133,192,88,15,132,244,251,217, - 252,238,195,248,5,199,68,36,8,0,0,128,127,217,68,36,8,195,248,149,255,139, - 68,36,12,252,242,15,16,68,36,4,131,252,248,1,15,132,244,247,15,135,244,248, - 232,244,81,252,233,244,253,248,1,232,244,83,252,233,244,253,248,2,131,252, - 248,3,15,132,244,247,15,135,244,248,232,244,103,255,252,233,244,253,248,1, - 252,242,15,81,192,248,7,252,242,15,17,68,36,4,221,68,36,4,195,248,2,221,68, - 36,4,255,139,68,36,12,221,68,36,4,131,252,248,1,15,130,244,81,15,132,244, - 83,131,252,248,3,15,130,244,103,15,135,244,248,217,252,250,195,248,2,255, - 131,252,248,5,15,130,244,87,15,132,244,146,131,252,248,7,15,132,244,247,15, - 135,244,248,217,252,237,217,201,217,252,241,195,248,1,217,232,217,201,217, - 252,241,195,248,2,131,252,248,9,15,132,244,247,15,135,244,248,217,252,236, - 217,201,217,252,241,195,248,1,217,252,254,195,248,2,131,252,248,11,15,132, - 244,247,15,135,244,255,255,217,252,255,195,248,1,217,252,242,221,216,195, - 248,9,204,248,150,255,139,68,36,20,252,242,15,16,68,36,4,252,242,15,16,76, - 36,12,131,252,248,1,15,132,244,247,15,135,244,248,252,242,15,88,193,252,233, - 244,253,248,1,252,242,15,92,193,252,233,244,253,248,2,131,252,248,3,15,132, - 244,247,15,135,244,248,252,242,15,89,193,252,233,244,253,248,1,252,242,15, - 94,193,252,233,244,253,248,2,255,131,252,248,5,15,132,244,247,15,135,244, - 248,232,244,145,252,233,244,253,248,1,221,68,36,4,221,68,36,12,252,233,244, - 106,248,2,131,252,248,7,15,132,244,247,15,135,244,248,184,0,0,0,128,102,15, - 110,200,102,15,112,201,81,15,87,193,252,233,244,253,248,1,102,15,252,239, - 201,102,15,118,201,102,15,115,209,1,15,84,193,252,233,244,253,248,2,255,131, - 252,248,9,15,135,244,248,221,68,36,4,221,68,36,12,15,132,244,247,217,252, - 243,195,248,1,217,201,217,252,253,221,217,195,248,2,131,252,248,11,15,132, - 244,247,15,135,244,255,252,242,15,93,193,252,233,244,253,248,1,252,242,15, - 95,193,252,233,244,253,248,9,204,248,7,252,242,15,17,68,36,4,221,68,36,4, - 195,255,139,68,36,20,221,68,36,4,221,68,36,12,131,252,248,1,15,132,244,247, - 15,135,244,248,222,193,195,248,1,222,252,233,195,248,2,131,252,248,3,15,132, + 216,217,252,238,195,255,248,106,219,84,36,4,219,68,36,4,255,223,252,233,255, + 221,252,233,223,224,158,255,15,133,244,254,15,138,244,255,221,216,139,68, + 36,4,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,133,244,248,216,200, + 209,232,252,233,244,1,248,2,209,232,15,132,244,251,217,192,248,3,216,200, + 209,232,15,132,244,250,15,131,244,3,220,201,252,233,244,3,248,4,255,222,201, + 248,5,195,248,6,15,132,244,5,15,130,244,253,217,232,222,252,241,252,247,216, + 131,252,248,1,15,132,244,5,252,233,244,1,248,7,221,216,217,232,195,248,8, + 217,84,36,4,217,201,217,84,36,8,139,68,36,4,209,224,61,0,0,0,252,255,15,132, + 244,248,139,68,36,8,209,224,15,132,244,250,61,0,0,0,252,255,15,132,244,250, + 217,252,241,252,233,244,147,248,9,255,217,232,255,223,252,234,255,221,252, + 234,223,224,158,255,15,132,244,247,217,201,248,1,221,216,195,248,2,217,225, + 217,232,255,15,132,244,249,221,216,217,225,217,252,238,184,0,0,0,0,15,146, + 208,209,200,51,68,36,4,15,137,244,249,217,201,248,3,221,217,217,225,195,248, + 4,131,124,36,4,0,15,141,244,3,221,216,221,216,133,192,15,132,244,251,217, + 252,238,195,248,5,199,68,36,4,0,0,128,127,217,68,36,4,195,255,248,106,255, + 248,148,252,242,15,45,193,252,242,15,42,208,102,15,46,202,15,133,244,254, + 15,138,244,255,248,149,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15, + 133,244,248,252,242,15,89,192,209,232,252,233,244,1,248,2,209,232,15,132, + 244,251,15,40,200,248,3,252,242,15,89,192,209,232,15,132,244,250,15,131,244, + 3,255,252,242,15,89,200,252,233,244,3,248,4,252,242,15,89,193,248,5,195,248, + 6,15,132,244,5,15,130,244,253,80,184,0,0,252,240,63,102,15,110,200,102,15, + 112,201,81,252,242,15,94,200,88,15,40,193,252,247,216,131,252,248,1,15,132, + 244,5,252,233,244,1,248,7,184,0,0,252,240,63,102,15,110,192,102,15,112,192, + 81,195,248,8,252,242,15,17,76,36,12,252,242,15,17,68,36,4,131,124,36,12,0, + 15,133,244,247,139,68,36,16,209,224,61,0,0,224,252,255,15,132,244,248,248, + 1,255,131,124,36,4,0,15,133,244,247,139,68,36,8,209,224,15,132,244,250,61, + 0,0,224,252,255,15,132,244,251,248,1,221,68,36,12,221,68,36,4,217,252,241, + 217,192,217,252,252,220,252,233,217,201,217,252,240,217,232,222,193,217,252, + 253,221,217,221,92,36,4,252,242,15,16,68,36,4,195,248,9,184,0,0,252,240,63, + 102,15,110,208,102,15,112,210,81,102,15,46,194,15,132,244,247,15,40,193,248, + 1,195,248,2,102,15,252,239,210,102,15,118,210,102,15,115,210,1,102,15,84, + 194,184,0,0,252,240,63,102,15,110,208,102,15,112,210,81,102,15,46,194,15, + 132,244,1,102,15,80,193,15,87,192,136,196,15,146,208,48,224,15,133,244,1, + 248,3,184,0,0,252,240,127,102,15,110,192,102,15,112,192,81,195,248,4,102, + 15,80,193,133,192,15,133,244,3,255,15,87,192,195,248,5,102,15,80,193,133, + 192,15,132,244,3,15,87,192,195,248,150,255,139,68,36,12,252,242,15,16,68, + 36,4,131,252,248,1,15,132,244,247,15,135,244,248,232,244,81,252,233,244,253, + 248,1,232,244,83,252,233,244,253,248,2,131,252,248,3,15,132,244,247,15,135, + 244,248,232,244,103,255,252,233,244,253,248,1,252,242,15,81,192,248,7,252, + 242,15,17,68,36,4,221,68,36,4,195,248,2,221,68,36,4,131,252,248,5,15,130, + 244,87,15,132,244,146,248,2,131,252,248,7,15,132,244,247,15,135,244,248,217, + 252,237,217,201,217,252,241,195,248,1,217,232,217,201,217,252,241,195,248, + 2,131,252,248,9,15,132,244,247,15,135,244,248,255,217,252,236,217,201,217, + 252,241,195,248,1,217,252,254,195,248,2,131,252,248,11,15,132,244,247,15, + 135,244,255,217,252,255,195,248,1,217,252,242,221,216,195,255,139,68,36,12, + 221,68,36,4,131,252,248,1,15,130,244,81,15,132,244,83,131,252,248,3,15,130, + 244,103,15,135,244,248,217,252,250,195,248,2,131,252,248,5,15,130,244,87, + 15,132,244,146,131,252,248,7,15,132,244,247,15,135,244,248,217,252,237,217, + 201,217,252,241,195,248,1,217,232,217,201,217,252,241,195,248,2,131,252,248, + 9,15,132,244,247,255,15,135,244,248,217,252,236,217,201,217,252,241,195,248, + 1,217,252,254,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,217, + 252,255,195,248,1,217,252,242,221,216,195,255,248,9,204,248,151,255,139,68, + 36,20,252,242,15,16,68,36,4,252,242,15,16,76,36,12,131,252,248,1,15,132,244, + 247,15,135,244,248,252,242,15,88,193,248,7,252,242,15,17,68,36,4,221,68,36, + 4,195,248,1,252,242,15,92,193,252,233,244,7,248,2,131,252,248,3,15,132,244, + 247,15,135,244,248,252,242,15,89,193,252,233,244,7,248,1,252,242,15,94,193, + 252,233,244,7,248,2,131,252,248,5,15,132,244,247,255,15,135,244,248,232,244, + 145,252,233,244,7,248,1,90,232,244,106,82,252,233,244,7,248,2,131,252,248, + 7,15,132,244,247,15,135,244,248,184,0,0,0,128,102,15,110,200,102,15,112,201, + 81,15,87,193,252,233,244,7,248,1,102,15,252,239,201,102,15,118,201,102,15, + 115,209,1,15,84,193,252,233,244,7,248,2,255,131,252,248,9,15,135,244,248, + 221,68,36,4,221,68,36,12,15,132,244,247,217,252,243,195,248,1,217,201,217, + 252,253,221,217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,252, + 242,15,93,193,252,233,244,7,248,1,252,242,15,95,193,252,233,244,7,248,9,204, + 255,139,68,36,20,221,68,36,4,221,68,36,12,131,252,248,1,15,132,244,247,15, + 135,244,248,222,193,195,248,1,222,252,233,195,248,2,131,252,248,3,15,132, 244,247,15,135,244,248,222,201,195,248,1,222,252,249,195,248,2,131,252,248, 5,15,130,244,145,15,132,244,106,131,252,248,7,15,132,244,247,15,135,244,248, 255,221,216,217,224,195,248,1,221,216,217,225,195,248,2,131,252,248,9,15, @@ -518,208 +542,208 @@ static const unsigned char build_actionlist[14716] = { 217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,255,219,252,233, 219,209,221,217,195,248,1,219,252,233,218,209,221,217,195,255,221,225,223, 224,252,246,196,1,15,132,244,248,217,201,248,2,221,216,195,248,1,221,225, - 223,224,252,246,196,1,15,133,244,248,217,201,248,2,221,216,195,255,248,9, - 204,255,248,151,156,90,137,209,129,252,242,0,0,32,0,82,157,156,90,49,192, - 57,209,15,132,244,247,139,68,36,4,87,83,15,162,139,124,36,16,137,7,137,95, - 4,137,79,8,137,87,12,91,95,248,1,195,255,249,255,129,124,253,202,4,239,15, - 135,244,41,129,124,253,194,4,239,15,135,244,41,255,252,242,15,16,4,194,131, - 198,4,102,15,46,4,202,255,221,4,202,221,4,194,131,198,4,255,223,252,233,221, - 216,255,218,252,233,223,224,158,255,15,134,244,248,255,15,131,244,248,255, - 248,1,15,183,70,252,254,141,180,253,134,233,248,2,139,6,15,182,204,15,182, - 232,131,198,4,193,232,16,252,255,36,171,255,139,108,194,4,131,198,4,129,252, - 253,239,15,135,244,251,129,124,253,202,4,239,15,135,244,251,255,252,242,15, - 16,4,194,102,15,46,4,202,255,221,4,202,221,4,194,255,15,138,244,248,15,133, - 244,248,255,15,138,244,248,15,132,244,247,255,248,1,15,183,70,252,254,141, - 180,253,134,233,248,2,255,248,2,15,183,70,252,254,141,180,253,134,233,248, - 1,255,248,5,57,108,202,4,15,133,244,2,129,252,253,239,15,131,244,1,139,12, - 202,139,4,194,57,193,15,132,244,1,129,252,253,239,15,135,244,2,139,169,233, - 133,252,237,15,132,244,2,252,246,133,233,235,15,133,244,2,255,49,252,237, - 255,189,1,0,0,0,255,252,233,244,45,255,252,247,208,131,198,4,129,124,253, - 202,4,239,15,133,244,248,139,12,202,59,12,135,255,131,198,4,129,124,253,202, - 4,239,15,135,244,248,255,252,242,15,16,4,199,102,15,46,4,202,255,221,4,202, - 221,4,199,255,252,247,208,131,198,4,57,68,202,4,255,139,108,194,4,131,198, - 4,129,252,253,239,255,15,131,244,247,255,15,130,244,247,255,137,108,202,4, - 139,44,194,137,44,202,255,15,183,70,252,254,141,180,253,134,233,248,1,139, - 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,139,108,194, - 4,139,4,194,137,108,202,4,137,4,202,139,6,15,182,204,15,182,232,131,198,4, - 193,232,16,252,255,36,171,255,49,252,237,129,124,253,194,4,239,129,213,239, - 137,108,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, - 171,255,129,124,253,194,4,239,15,135,244,48,255,252,242,15,16,4,194,184,0, - 0,0,128,102,15,110,200,102,15,112,201,81,15,87,193,252,242,15,17,4,202,255, - 221,4,194,217,224,221,28,202,255,129,124,253,194,4,239,15,133,244,248,139, - 4,194,255,15,87,192,252,242,15,42,128,233,248,1,252,242,15,17,4,202,255,219, - 128,233,248,1,221,28,202,255,139,6,15,182,204,15,182,232,131,198,4,193,232, - 16,252,255,36,171,248,2,129,124,253,194,4,239,15,133,244,50,139,12,194,137, - 213,232,251,1,18,255,252,242,15,42,192,137,252,234,255,137,4,36,137,252,234, - 219,4,36,255,15,182,78,252,253,252,233,244,1,255,15,182,252,236,15,182,192, - 255,129,124,253,252,234,4,239,15,135,244,46,255,252,242,15,16,4,252,234,252, - 242,15,88,4,199,255,221,4,252,234,220,4,199,255,129,124,253,252,234,4,239, - 15,135,244,47,255,252,242,15,16,4,199,252,242,15,88,4,252,234,255,221,4,199, - 220,4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,129,124,253,194, - 4,239,15,135,244,49,255,252,242,15,16,4,252,234,252,242,15,88,4,194,255,221, - 4,252,234,220,4,194,255,252,242,15,16,4,252,234,252,242,15,92,4,199,255,221, - 4,252,234,220,36,199,255,252,242,15,16,4,199,252,242,15,92,4,252,234,255, - 221,4,199,220,36,252,234,255,252,242,15,16,4,252,234,252,242,15,92,4,194, - 255,221,4,252,234,220,36,194,255,252,242,15,16,4,252,234,252,242,15,89,4, - 199,255,221,4,252,234,220,12,199,255,252,242,15,16,4,199,252,242,15,89,4, - 252,234,255,221,4,199,220,12,252,234,255,252,242,15,16,4,252,234,252,242, - 15,89,4,194,255,221,4,252,234,220,12,194,255,252,242,15,16,4,252,234,252, - 242,15,94,4,199,255,221,4,252,234,220,52,199,255,252,242,15,16,4,199,252, - 242,15,94,4,252,234,255,221,4,199,220,52,252,234,255,252,242,15,16,4,252, - 234,252,242,15,94,4,194,255,221,4,252,234,220,52,194,255,252,242,15,16,4, - 252,234,252,242,15,16,12,199,255,221,4,252,234,221,4,199,255,252,242,15,16, - 4,199,252,242,15,16,12,252,234,255,221,4,199,221,4,252,234,255,252,242,15, - 16,4,252,234,252,242,15,16,12,194,255,221,4,252,234,221,4,194,255,248,152, - 232,244,145,255,252,233,244,152,255,232,244,106,255,15,182,252,236,15,182, - 192,141,12,194,41,232,137,76,36,4,137,68,36,8,248,33,139,108,36,48,137,44, - 36,137,116,36,24,137,149,233,232,251,1,23,139,149,233,133,192,15,133,244, - 42,15,182,110,252,255,15,182,78,252,253,139,68,252,234,4,139,44,252,234,137, - 68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252, - 255,36,171,255,252,247,208,139,4,135,199,68,202,4,237,137,4,202,139,6,15, - 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,15,191,192,252, - 242,15,42,192,252,242,15,17,4,202,255,223,70,252,254,221,28,202,255,252,242, - 15,16,4,199,252,242,15,17,4,202,255,221,4,199,221,28,202,255,252,247,208, - 137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, - 171,255,141,76,202,12,141,68,194,4,189,237,137,105,252,248,248,1,137,41,131, - 193,8,57,193,15,134,244,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16, - 252,255,36,171,255,139,106,252,248,139,172,253,133,233,139,173,233,139,69, - 4,139,109,0,137,68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4, - 193,232,16,252,255,36,171,255,139,106,252,248,139,172,253,141,233,128,189, - 233,0,139,173,233,139,12,194,139,68,194,4,137,77,0,137,69,4,15,132,244,247, - 252,246,133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131, - 198,4,193,232,16,252,255,36,171,248,2,129,232,239,129,252,248,239,15,134, - 244,1,252,246,129,233,235,15,132,244,1,135,213,141,139,233,255,232,251,1, - 24,137,252,234,252,233,244,1,255,252,247,208,139,106,252,248,139,172,253, - 141,233,139,12,135,139,133,233,137,8,199,64,4,237,252,246,133,233,235,15, - 133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, - 36,171,248,2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132,244,1, - 137,213,137,194,141,139,233,232,251,1,24,137,252,234,252,233,244,1,255,139, - 106,252,248,255,252,242,15,16,4,199,255,139,172,253,141,233,139,141,233,255, - 252,247,208,139,106,252,248,139,172,253,141,233,139,141,233,137,65,4,139, - 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,141,180,253, - 134,233,139,108,36,48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4, - 137,44,36,137,149,233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15, - 182,232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,139,74,252,248, - 139,4,135,139,108,36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137, - 149,233,232,251,1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4, - 237,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137, - 197,37,252,255,7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36, - 8,1,200,139,108,36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44, - 36,59,139,233,137,149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15, - 182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131, - 198,4,193,232,16,252,255,36,171,248,2,137,252,233,232,251,1,28,252,233,244, - 1,255,252,247,208,139,108,36,48,139,139,233,137,116,36,24,59,139,233,137, - 149,233,15,131,244,249,248,2,139,20,135,137,252,233,232,251,1,29,139,149, - 233,15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182, - 232,131,198,4,193,232,16,252,255,36,171,248,3,137,252,233,232,251,1,28,15, - 183,70,252,254,252,247,208,252,233,244,2,255,252,247,208,139,106,252,248, - 139,173,233,139,4,135,252,233,244,153,255,252,247,208,139,106,252,248,139, - 173,233,139,4,135,252,233,244,154,255,15,182,252,236,15,182,192,129,124,253, - 252,234,4,239,15,133,244,36,139,44,252,234,129,124,253,194,4,239,15,135,244, - 251,255,252,242,15,16,4,194,252,242,15,45,192,252,242,15,42,200,102,15,46, - 193,255,221,4,194,219,20,36,219,4,36,255,139,4,36,255,15,133,244,36,59,133, - 233,15,131,244,36,193,224,3,3,133,233,129,120,253,4,239,15,132,244,248,248, - 1,139,40,139,64,4,137,44,202,137,68,202,4,139,6,15,182,204,15,182,232,131, - 198,4,193,232,16,252,255,36,171,248,2,131,189,233,0,15,132,244,1,139,141, - 233,252,246,129,233,235,15,132,244,36,15,182,78,252,253,252,233,244,1,248, - 5,255,129,124,253,194,4,239,15,133,244,36,139,4,194,252,233,244,153,255,15, - 182,252,236,15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15, - 133,244,34,139,44,252,234,248,153,139,141,233,35,136,233,105,201,239,3,141, - 233,248,1,129,185,233,239,15,133,244,250,57,129,233,15,133,244,250,129,121, - 253,4,239,15,132,244,251,15,182,70,252,253,139,41,139,73,4,137,44,194,248, - 2,255,137,76,194,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, - 36,171,248,3,15,182,70,252,253,185,237,252,233,244,2,248,4,139,137,233,133, - 201,15,133,244,1,248,5,139,141,233,133,201,15,132,244,3,252,246,129,233,235, - 15,133,244,3,252,233,244,34,255,15,182,252,236,15,182,192,129,124,253,252, - 234,4,239,15,133,244,35,139,44,252,234,59,133,233,15,131,244,35,193,224,3, - 3,133,233,129,120,253,4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202, - 137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, - 171,248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233,235,15,132, - 244,35,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244, - 39,139,44,252,234,129,124,253,194,4,239,15,135,244,251,255,15,133,244,39, - 59,133,233,15,131,244,39,193,224,3,3,133,233,129,120,253,4,239,15,132,244, - 249,248,1,252,246,133,233,235,15,133,244,253,248,2,139,108,202,4,139,12,202, - 137,104,4,137,8,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, - 36,171,248,3,131,189,233,0,15,132,244,1,139,141,233,255,252,246,129,233,235, - 15,132,244,39,15,182,78,252,253,252,233,244,1,248,5,129,124,253,194,4,239, - 15,133,244,39,139,4,194,252,233,244,154,248,7,128,165,233,235,139,139,233, - 137,171,233,137,141,233,15,182,78,252,253,252,233,244,2,255,15,182,252,236, - 15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,244,37, - 139,44,252,234,248,154,139,141,233,35,136,233,105,201,239,198,133,233,0,3, - 141,233,248,1,129,185,233,239,15,133,244,251,57,129,233,15,133,244,251,129, - 121,253,4,239,15,132,244,250,248,2,255,252,246,133,233,235,15,133,244,253, - 248,3,15,182,70,252,253,139,108,194,4,139,4,194,137,105,4,137,1,139,6,15, - 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,4,131,189,233, - 0,15,132,244,2,137,76,36,16,139,141,233,252,246,129,233,235,15,132,244,37, - 139,76,36,16,252,233,244,2,248,5,139,137,233,133,201,15,133,244,1,255,139, - 141,233,133,201,15,132,244,252,252,246,129,233,235,15,132,244,37,248,6,137, - 68,36,16,199,68,36,20,237,141,68,36,16,137,108,36,12,137,108,36,4,139,108, - 36,48,137,68,36,8,137,44,36,137,116,36,24,137,149,233,232,251,1,30,139,149, - 233,139,108,36,12,137,193,252,233,244,2,248,7,128,165,233,235,139,131,233, - 137,171,233,137,133,233,252,233,244,3,255,15,182,252,236,15,182,192,129,124, - 253,252,234,4,239,15,133,244,38,139,44,252,234,59,133,233,15,131,244,38,193, - 224,3,3,133,233,129,120,253,4,239,15,132,244,249,248,1,252,246,133,233,235, - 15,133,244,253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182, - 204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15, - 132,244,1,255,139,141,233,252,246,129,233,235,15,132,244,38,15,182,78,252, - 253,252,233,244,1,248,7,128,165,233,235,139,139,233,137,171,233,137,141,233, - 15,182,78,252,253,252,233,244,2,255,137,124,36,16,255,221,4,199,219,92,36, - 12,255,248,1,141,12,202,139,105,252,248,252,246,133,233,235,15,133,244,253, - 248,2,139,68,36,20,255,252,242,15,45,252,248,255,139,124,36,12,255,131,232, - 1,15,132,244,250,1,252,248,59,133,233,15,131,244,251,41,252,248,193,231,3, - 3,189,233,248,3,139,41,137,47,139,105,4,131,193,8,137,111,4,131,199,8,131, - 232,1,15,133,244,3,248,4,139,124,36,16,139,6,15,182,204,15,182,232,131,198, - 4,193,232,16,252,255,36,171,248,5,137,108,36,4,139,108,36,48,137,68,36,8, - 137,44,36,137,116,36,24,137,149,233,232,251,1,31,139,149,233,15,182,78,252, - 253,252,233,244,1,248,7,128,165,233,235,139,131,233,137,171,233,255,137,133, - 233,252,233,244,2,255,3,68,36,20,255,141,76,202,8,139,105,252,248,129,121, - 253,252,252,239,15,133,244,29,252,255,165,233,255,141,76,202,8,137,215,139, - 105,252,248,129,121,253,252,252,239,15,133,244,29,248,51,139,114,252,252, - 252,247,198,237,15,133,244,253,248,1,137,106,252,248,137,68,36,20,131,232, - 1,15,132,244,249,248,2,139,41,137,47,139,105,4,137,111,4,131,199,8,131,193, - 8,131,232,1,15,133,244,2,139,106,252,248,248,3,137,209,128,189,233,1,15,135, - 244,251,248,4,139,68,36,20,252,255,165,233,248,5,255,252,247,198,237,15,133, - 244,4,15,182,70,252,253,252,247,208,141,20,194,139,122,252,248,139,191,233, - 139,191,233,252,233,244,4,248,7,15,139,244,1,131,230,252,248,41,252,242,137, - 215,139,114,252,252,252,233,244,1,255,141,76,202,8,139,105,232,139,65,252, - 236,137,41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139, - 105,224,139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0, - 0,0,15,133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12, - 202,139,128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252, - 252,133,252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244, - 248,248,1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193, - 8,57,252,233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193, - 8,57,252,233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232, - 131,198,4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41, - 252,248,15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137, - 108,36,20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252, - 248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,215,15,130,244,6, - 252,233,244,3,248,7,137,149,233,137,141,233,137,116,36,24,41,215,137,44,36, - 232,251,1,0,139,149,233,139,141,233,1,215,252,233,244,6,255,193,225,3,255, - 248,1,139,114,252,252,137,68,36,20,252,247,198,237,15,133,244,253,255,248, - 17,137,215,131,232,1,15,132,244,249,248,2,139,44,15,137,111,252,248,139,108, - 15,4,137,111,252,252,131,199,8,131,232,1,15,133,244,2,248,3,139,68,36,20, - 15,182,110,252,255,248,5,57,197,15,135,244,252,255,139,108,10,4,137,106,252, - 252,139,44,10,137,106,252,248,255,248,5,56,70,252,255,15,135,244,252,255, - 15,182,78,252,253,252,247,209,141,20,202,139,122,252,248,139,191,233,139, - 191,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248, - 6,255,199,71,252,252,237,131,199,8,255,199,68,194,252,244,237,255,131,192, - 1,252,233,244,5,248,7,15,139,244,18,131,230,252,248,41,252,242,255,1,252, - 241,255,137,252,245,209,252,237,129,229,239,102,131,172,253,43,233,1,15,132, - 244,138,255,141,12,202,255,129,121,253,4,239,15,135,244,52,129,121,253,12, - 239,15,135,244,52,255,139,105,20,255,129,252,253,239,15,135,244,52,255,252, - 242,15,16,1,252,242,15,16,73,8,255,252,242,15,88,65,16,252,242,15,17,1,133, - 252,237,15,136,244,249,255,15,140,244,249,255,102,15,46,200,248,1,252,242, - 15,17,65,24,255,221,65,8,221,1,255,220,65,16,221,17,221,81,24,133,252,237, - 15,136,244,247,255,221,81,24,15,140,244,247,255,217,201,248,1,255,15,183, - 70,252,254,255,15,131,244,248,141,180,253,134,233,255,141,180,253,134,233, - 15,183,70,252,254,15,131,245,255,15,130,244,248,141,180,253,134,233,255,248, - 3,102,15,46,193,252,233,244,1,255,141,12,202,139,105,4,129,252,253,239,15, - 132,244,247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,255,141, - 180,253,134,233,139,1,137,105,252,252,137,65,252,248,255,139,139,233,139, - 4,129,139,128,233,139,108,36,48,137,147,233,137,171,233,252,255,224,255,141, - 180,253,134,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, - 36,171,255,254,0 + 223,224,252,246,196,1,15,133,244,248,217,201,248,2,221,216,195,255,248,152, + 156,90,137,209,129,252,242,0,0,32,0,82,157,156,90,49,192,57,209,15,132,244, + 247,139,68,36,4,87,83,15,162,139,124,36,16,137,7,137,95,4,137,79,8,137,87, + 12,91,95,248,1,195,255,249,255,129,124,253,202,4,239,15,135,244,41,129,124, + 253,194,4,239,15,135,244,41,255,252,242,15,16,4,194,131,198,4,102,15,46,4, + 202,255,221,4,202,221,4,194,131,198,4,255,223,252,233,221,216,255,218,252, + 233,223,224,158,255,15,134,244,248,255,15,131,244,248,255,248,1,15,183,70, + 252,254,141,180,253,134,233,248,2,139,6,15,182,204,15,182,232,131,198,4,193, + 232,16,252,255,36,171,255,139,108,194,4,131,198,4,129,252,253,239,15,135, + 244,251,129,124,253,202,4,239,15,135,244,251,255,252,242,15,16,4,194,102, + 15,46,4,202,255,221,4,202,221,4,194,255,15,138,244,248,15,133,244,248,255, + 15,138,244,248,15,132,244,247,255,248,1,15,183,70,252,254,141,180,253,134, + 233,248,2,255,248,2,15,183,70,252,254,141,180,253,134,233,248,1,255,248,5, + 57,108,202,4,15,133,244,2,129,252,253,239,15,131,244,1,139,12,202,139,4,194, + 57,193,15,132,244,1,129,252,253,239,15,135,244,2,139,169,233,133,252,237, + 15,132,244,2,252,246,133,233,235,15,133,244,2,255,49,252,237,255,189,1,0, + 0,0,255,252,233,244,45,255,252,247,208,131,198,4,129,124,253,202,4,239,15, + 133,244,248,139,12,202,59,12,135,255,131,198,4,129,124,253,202,4,239,15,135, + 244,248,255,252,242,15,16,4,199,102,15,46,4,202,255,221,4,202,221,4,199,255, + 252,247,208,131,198,4,57,68,202,4,255,139,108,194,4,131,198,4,129,252,253, + 239,255,15,131,244,247,255,15,130,244,247,255,137,108,202,4,139,44,194,137, + 44,202,255,15,183,70,252,254,141,180,253,134,233,248,1,139,6,15,182,204,15, + 182,232,131,198,4,193,232,16,252,255,36,171,255,139,108,194,4,139,4,194,137, + 108,202,4,137,4,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252, + 255,36,171,255,49,252,237,129,124,253,194,4,239,129,213,239,137,108,202,4, + 139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,129,124, + 253,194,4,239,15,135,244,48,255,252,242,15,16,4,194,184,0,0,0,128,102,15, + 110,200,102,15,112,201,81,15,87,193,252,242,15,17,4,202,255,221,4,194,217, + 224,221,28,202,255,129,124,253,194,4,239,15,133,244,248,139,4,194,255,15, + 87,192,252,242,15,42,128,233,248,1,252,242,15,17,4,202,255,219,128,233,248, + 1,221,28,202,255,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, + 36,171,248,2,129,124,253,194,4,239,15,133,244,50,139,12,194,137,213,232,251, + 1,18,255,252,242,15,42,192,137,252,234,255,137,4,36,137,252,234,219,4,36, + 255,15,182,78,252,253,252,233,244,1,255,15,182,252,236,15,182,192,255,129, + 124,253,252,234,4,239,15,135,244,46,255,252,242,15,16,4,252,234,252,242,15, + 88,4,199,255,221,4,252,234,220,4,199,255,129,124,253,252,234,4,239,15,135, + 244,47,255,252,242,15,16,4,199,252,242,15,88,4,252,234,255,221,4,199,220, + 4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,129,124,253,194,4,239, + 15,135,244,49,255,252,242,15,16,4,252,234,252,242,15,88,4,194,255,221,4,252, + 234,220,4,194,255,252,242,15,16,4,252,234,252,242,15,92,4,199,255,221,4,252, + 234,220,36,199,255,252,242,15,16,4,199,252,242,15,92,4,252,234,255,221,4, + 199,220,36,252,234,255,252,242,15,16,4,252,234,252,242,15,92,4,194,255,221, + 4,252,234,220,36,194,255,252,242,15,16,4,252,234,252,242,15,89,4,199,255, + 221,4,252,234,220,12,199,255,252,242,15,16,4,199,252,242,15,89,4,252,234, + 255,221,4,199,220,12,252,234,255,252,242,15,16,4,252,234,252,242,15,89,4, + 194,255,221,4,252,234,220,12,194,255,252,242,15,16,4,252,234,252,242,15,94, + 4,199,255,221,4,252,234,220,52,199,255,252,242,15,16,4,199,252,242,15,94, + 4,252,234,255,221,4,199,220,52,252,234,255,252,242,15,16,4,252,234,252,242, + 15,94,4,194,255,221,4,252,234,220,52,194,255,252,242,15,16,4,252,234,252, + 242,15,16,12,199,255,221,4,252,234,221,4,199,255,252,242,15,16,4,199,252, + 242,15,16,12,252,234,255,221,4,199,221,4,252,234,255,252,242,15,16,4,252, + 234,252,242,15,16,12,194,255,221,4,252,234,221,4,194,255,248,153,232,244, + 145,255,252,233,244,153,255,232,244,106,255,15,182,252,236,15,182,192,141, + 12,194,41,232,137,76,36,4,137,68,36,8,248,33,139,108,36,48,137,44,36,137, + 116,36,24,137,149,233,232,251,1,23,139,149,233,133,192,15,133,244,42,15,182, + 110,252,255,15,182,78,252,253,139,68,252,234,4,139,44,252,234,137,68,202, + 4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, + 171,255,252,247,208,139,4,135,199,68,202,4,237,137,4,202,139,6,15,182,204, + 15,182,232,131,198,4,193,232,16,252,255,36,171,255,15,191,192,252,242,15, + 42,192,252,242,15,17,4,202,255,223,70,252,254,221,28,202,255,252,242,15,16, + 4,199,252,242,15,17,4,202,255,221,4,199,221,28,202,255,252,247,208,137,68, + 202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255, + 141,76,202,12,141,68,194,4,189,237,137,105,252,248,248,1,137,41,131,193,8, + 57,193,15,134,244,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252, + 255,36,171,255,139,106,252,248,139,172,253,133,233,139,173,233,139,69,4,139, + 109,0,137,68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232, + 16,252,255,36,171,255,139,106,252,248,139,172,253,141,233,128,189,233,0,139, + 173,233,139,12,194,139,68,194,4,137,77,0,137,69,4,15,132,244,247,252,246, + 133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,4,193, + 232,16,252,255,36,171,248,2,129,232,239,129,252,248,239,15,134,244,1,252, + 246,129,233,235,15,132,244,1,135,213,141,139,233,255,232,251,1,24,137,252, + 234,252,233,244,1,255,252,247,208,139,106,252,248,139,172,253,141,233,139, + 12,135,139,133,233,137,8,199,64,4,237,252,246,133,233,235,15,133,244,248, + 248,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248, + 2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132,244,1,137,213,137, + 194,141,139,233,232,251,1,24,137,252,234,252,233,244,1,255,139,106,252,248, + 255,252,242,15,16,4,199,255,139,172,253,141,233,139,141,233,255,252,247,208, + 139,106,252,248,139,172,253,141,233,139,141,233,137,65,4,139,6,15,182,204, + 15,182,232,131,198,4,193,232,16,252,255,36,171,255,141,180,253,134,233,139, + 108,36,48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4,137,44,36,137, + 149,233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15,182,232,131,198, + 4,193,232,16,252,255,36,171,255,252,247,208,139,74,252,248,139,4,135,139, + 108,36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137,149,233,232, + 251,1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15, + 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137,197,37,252, + 255,7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36,8,1,200,139, + 108,36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44,36,59,139,233, + 137,149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15,182,78,252,253, + 137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193,232, + 16,252,255,36,171,248,2,137,252,233,232,251,1,28,252,233,244,1,255,252,247, + 208,139,108,36,48,139,139,233,137,116,36,24,59,139,233,137,149,233,15,131, + 244,249,248,2,139,20,135,137,252,233,232,251,1,29,139,149,233,15,182,78,252, + 253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193, + 232,16,252,255,36,171,248,3,137,252,233,232,251,1,28,15,183,70,252,254,252, + 247,208,252,233,244,2,255,252,247,208,139,106,252,248,139,173,233,139,4,135, + 252,233,244,154,255,252,247,208,139,106,252,248,139,173,233,139,4,135,252, + 233,244,155,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133, + 244,36,139,44,252,234,129,124,253,194,4,239,15,135,244,251,255,252,242,15, + 16,4,194,252,242,15,45,192,252,242,15,42,200,102,15,46,193,255,221,4,194, + 219,20,36,219,4,36,255,139,4,36,255,15,133,244,36,59,133,233,15,131,244,36, + 193,224,3,3,133,233,129,120,253,4,239,15,132,244,248,248,1,139,40,139,64, + 4,137,44,202,137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16, + 252,255,36,171,248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233, + 235,15,132,244,36,15,182,78,252,253,252,233,244,1,248,5,255,129,124,253,194, + 4,239,15,133,244,36,139,4,194,252,233,244,154,255,15,182,252,236,15,182,192, + 252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,244,34,139,44,252, + 234,248,154,139,141,233,35,136,233,105,201,239,3,141,233,248,1,129,185,233, + 239,15,133,244,250,57,129,233,15,133,244,250,129,121,253,4,239,15,132,244, + 251,15,182,70,252,253,139,41,139,73,4,137,44,194,248,2,255,137,76,194,4,139, + 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,15,182, + 70,252,253,185,237,252,233,244,2,248,4,139,137,233,133,201,15,133,244,1,248, + 5,139,141,233,133,201,15,132,244,3,252,246,129,233,235,15,133,244,3,252,233, + 244,34,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244, + 35,139,44,252,234,59,133,233,15,131,244,35,193,224,3,3,133,233,129,120,253, + 4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202,137,68,202,4,139,6, + 15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,2,131,189,233, + 0,15,132,244,1,139,141,233,252,246,129,233,235,15,132,244,35,255,15,182,252, + 236,15,182,192,129,124,253,252,234,4,239,15,133,244,39,139,44,252,234,129, + 124,253,194,4,239,15,135,244,251,255,15,133,244,39,59,133,233,15,131,244, + 39,193,224,3,3,133,233,129,120,253,4,239,15,132,244,249,248,1,252,246,133, + 233,235,15,133,244,253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139, + 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,131,189, + 233,0,15,132,244,1,139,141,233,255,252,246,129,233,235,15,132,244,39,15,182, + 78,252,253,252,233,244,1,248,5,129,124,253,194,4,239,15,133,244,39,139,4, + 194,252,233,244,155,248,7,128,165,233,235,139,139,233,137,171,233,137,141, + 233,15,182,78,252,253,252,233,244,2,255,15,182,252,236,15,182,192,252,247, + 208,139,4,135,129,124,253,252,234,4,239,15,133,244,37,139,44,252,234,248, + 155,139,141,233,35,136,233,105,201,239,198,133,233,0,3,141,233,248,1,129, + 185,233,239,15,133,244,251,57,129,233,15,133,244,251,129,121,253,4,239,15, + 132,244,250,248,2,255,252,246,133,233,235,15,133,244,253,248,3,15,182,70, + 252,253,139,108,194,4,139,4,194,137,105,4,137,1,139,6,15,182,204,15,182,232, + 131,198,4,193,232,16,252,255,36,171,248,4,131,189,233,0,15,132,244,2,137, + 76,36,16,139,141,233,252,246,129,233,235,15,132,244,37,139,76,36,16,252,233, + 244,2,248,5,139,137,233,133,201,15,133,244,1,255,139,141,233,133,201,15,132, + 244,252,252,246,129,233,235,15,132,244,37,248,6,137,68,36,16,199,68,36,20, + 237,141,68,36,16,137,108,36,12,137,108,36,4,139,108,36,48,137,68,36,8,137, + 44,36,137,116,36,24,137,149,233,232,251,1,30,139,149,233,139,108,36,12,137, + 193,252,233,244,2,248,7,128,165,233,235,139,131,233,137,171,233,137,133,233, + 252,233,244,3,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15, + 133,244,38,139,44,252,234,59,133,233,15,131,244,38,193,224,3,3,133,233,129, + 120,253,4,239,15,132,244,249,248,1,252,246,133,233,235,15,133,244,253,248, + 2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182,204,15,182,232,131, + 198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15,132,244,1,255,139, + 141,233,252,246,129,233,235,15,132,244,38,15,182,78,252,253,252,233,244,1, + 248,7,128,165,233,235,139,139,233,137,171,233,137,141,233,15,182,78,252,253, + 252,233,244,2,255,137,124,36,16,255,221,4,199,219,92,36,12,255,248,1,141, + 12,202,139,105,252,248,252,246,133,233,235,15,133,244,253,248,2,139,68,36, + 20,255,252,242,15,45,252,248,255,139,124,36,12,255,131,232,1,15,132,244,250, + 1,252,248,59,133,233,15,131,244,251,41,252,248,193,231,3,3,189,233,248,3, + 139,41,137,47,139,105,4,131,193,8,137,111,4,131,199,8,131,232,1,15,133,244, + 3,248,4,139,124,36,16,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252, + 255,36,171,248,5,137,108,36,4,139,108,36,48,137,68,36,8,137,44,36,137,116, + 36,24,137,149,233,232,251,1,31,139,149,233,15,182,78,252,253,252,233,244, + 1,248,7,128,165,233,235,139,131,233,137,171,233,255,137,133,233,252,233,244, + 2,255,3,68,36,20,255,141,76,202,8,139,105,252,248,129,121,253,252,252,239, + 15,133,244,29,252,255,165,233,255,141,76,202,8,137,215,139,105,252,248,129, + 121,253,252,252,239,15,133,244,29,248,51,139,114,252,252,252,247,198,237, + 15,133,244,253,248,1,137,106,252,248,137,68,36,20,131,232,1,15,132,244,249, + 248,2,139,41,137,47,139,105,4,137,111,4,131,199,8,131,193,8,131,232,1,15, + 133,244,2,139,106,252,248,248,3,137,209,128,189,233,1,15,135,244,251,248, + 4,139,68,36,20,252,255,165,233,248,5,255,252,247,198,237,15,133,244,4,15, + 182,70,252,253,252,247,208,141,20,194,139,122,252,248,139,191,233,139,191, + 233,252,233,244,4,248,7,15,139,244,1,131,230,252,248,41,252,242,137,215,139, + 114,252,252,252,233,244,1,255,141,76,202,8,139,105,232,139,65,252,236,137, + 41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139,105,224, + 139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0,0,0,15, + 133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12,202,139, + 128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252,252,133, + 252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244,248,248, + 1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,252, + 233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193,8,57,252, + 233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232,131,198, + 4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41,252,248, + 15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137,108,36, + 20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252,248,137, + 1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,215,15,130,244,6,252,233, + 244,3,248,7,137,149,233,137,141,233,137,116,36,24,41,215,137,44,36,232,251, + 1,0,139,149,233,139,141,233,1,215,252,233,244,6,255,193,225,3,255,248,1,139, + 114,252,252,137,68,36,20,252,247,198,237,15,133,244,253,255,248,17,137,215, + 131,232,1,15,132,244,249,248,2,139,44,15,137,111,252,248,139,108,15,4,137, + 111,252,252,131,199,8,131,232,1,15,133,244,2,248,3,139,68,36,20,15,182,110, + 252,255,248,5,57,197,15,135,244,252,255,139,108,10,4,137,106,252,252,139, + 44,10,137,106,252,248,255,248,5,56,70,252,255,15,135,244,252,255,15,182,78, + 252,253,252,247,209,141,20,202,139,122,252,248,139,191,233,139,191,233,139, + 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,6,255,199, + 71,252,252,237,131,199,8,255,199,68,194,252,244,237,255,131,192,1,252,233, + 244,5,248,7,15,139,244,18,131,230,252,248,41,252,242,255,1,252,241,255,137, + 252,245,209,252,237,129,229,239,102,131,172,253,43,233,1,15,132,244,138,255, + 141,12,202,255,129,121,253,4,239,15,135,244,52,129,121,253,12,239,15,135, + 244,52,255,139,105,20,255,129,252,253,239,15,135,244,52,255,252,242,15,16, + 1,252,242,15,16,73,8,255,252,242,15,88,65,16,252,242,15,17,1,133,252,237, + 15,136,244,249,255,15,140,244,249,255,102,15,46,200,248,1,252,242,15,17,65, + 24,255,221,65,8,221,1,255,220,65,16,221,17,221,81,24,133,252,237,15,136,244, + 247,255,221,81,24,15,140,244,247,255,217,201,248,1,255,15,183,70,252,254, + 255,15,131,244,248,141,180,253,134,233,255,141,180,253,134,233,15,183,70, + 252,254,15,131,245,255,15,130,244,248,141,180,253,134,233,255,248,3,102,15, + 46,193,252,233,244,1,255,141,12,202,139,105,4,129,252,253,239,15,132,244, + 247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,255,141,180,253, + 134,233,139,1,137,105,252,252,137,65,252,248,255,139,139,233,139,4,129,139, + 128,233,139,108,36,48,137,147,233,137,171,233,252,255,224,255,141,180,253, + 134,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255, + 254,0 }; enum { @@ -861,7 +885,8 @@ enum { GLOB_vm_mod, GLOB_vm_exp2, GLOB_vm_exp2raw, - GLOB_vm_powi, + GLOB_vm_pow_sse, + GLOB_vm_powi_sse, GLOB_vm_foldfpm, GLOB_vm_foldarith, GLOB_vm_cpuid, @@ -1009,7 +1034,8 @@ static const char *const globnames[] = { "vm_mod", "vm_exp2", "vm_exp2raw", - "vm_powi", + "vm_pow_sse", + "vm_powi_sse", "vm_foldfpm", "vm_foldarith", "vm_cpuid", @@ -1231,7 +1257,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) dasm_put(Dst, 5245); } dasm_put(Dst, 5255, 2+1, LJ_TISNUM, LJ_TISNUM); - if (0 && sse) { // NYI + if (sse) { dasm_put(Dst, 5307, 1+1, LJ_TISNUM, LJ_TISNUM); } else { dasm_put(Dst, 5354, 2+1, LJ_TISNUM, LJ_TISNUM); @@ -1456,56 +1482,66 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) dasm_put(Dst, 9407); } dasm_put(Dst, 9454); + if (!sse) { + dasm_put(Dst, 9528); if (cmov) { - dasm_put(Dst, 9538); + dasm_put(Dst, 9539); } else { - dasm_put(Dst, 9542); + dasm_put(Dst, 9543); } - dasm_put(Dst, 9551); - dasm_put(Dst, 9620); - dasm_put(Dst, 9722); + dasm_put(Dst, 9550); + dasm_put(Dst, 9624); + dasm_put(Dst, 9724); if (cmov) { - dasm_put(Dst, 9735); + dasm_put(Dst, 9727); } else { - dasm_put(Dst, 9739); + dasm_put(Dst, 9731); } - dasm_put(Dst, 9748); + dasm_put(Dst, 9738); if (cmov) { - dasm_put(Dst, 9538); + dasm_put(Dst, 9539); } else { - dasm_put(Dst, 9766); + dasm_put(Dst, 9543); } - dasm_put(Dst, 9773); + dasm_put(Dst, 9756); + } else { + dasm_put(Dst, 9835); + } + dasm_put(Dst, 9838); + dasm_put(Dst, 9923); + dasm_put(Dst, 10054); + dasm_put(Dst, 10253); if (sse) { - dasm_put(Dst, 9856); - dasm_put(Dst, 9913); + dasm_put(Dst, 10276); + dasm_put(Dst, 10333); + dasm_put(Dst, 10424); } else { - dasm_put(Dst, 9945); + dasm_put(Dst, 10466); + dasm_put(Dst, 10558); } - dasm_put(Dst, 9984); - dasm_put(Dst, 10071); + dasm_put(Dst, 10604); if (sse) { - dasm_put(Dst, 10089); - dasm_put(Dst, 10176); - dasm_put(Dst, 10270); + dasm_put(Dst, 10610); + dasm_put(Dst, 10715); + dasm_put(Dst, 10798); } else { - dasm_put(Dst, 10356); - dasm_put(Dst, 10439); + dasm_put(Dst, 10870); + dasm_put(Dst, 10953); if (cmov) { - dasm_put(Dst, 10494); + dasm_put(Dst, 11008); } else { - dasm_put(Dst, 10513); + dasm_put(Dst, 11027); } - dasm_put(Dst, 10554); + dasm_put(Dst, 10866); } - dasm_put(Dst, 10558); + dasm_put(Dst, 11068); } /* Generate the code for a single instruction. */ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) { int vk = 0; - dasm_put(Dst, 10612, defop); + dasm_put(Dst, 11122, defop); switch (op) { @@ -1514,495 +1550,456 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) /* Remember: all ops branch for a true comparison, fall through otherwise. */ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - dasm_put(Dst, 10614, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11124, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 10635); + dasm_put(Dst, 11145); } else { - dasm_put(Dst, 10650); + dasm_put(Dst, 11160); if (cmov) { - dasm_put(Dst, 10660); + dasm_put(Dst, 11170); } else { - dasm_put(Dst, 10666); + dasm_put(Dst, 11176); } } switch (op) { case BC_ISLT: - dasm_put(Dst, 10673); + dasm_put(Dst, 11183); break; case BC_ISGE: - dasm_put(Dst, 10434); + dasm_put(Dst, 10419); break; case BC_ISLE: dasm_put(Dst, 6614); break; case BC_ISGT: - dasm_put(Dst, 10678); + dasm_put(Dst, 11188); break; default: break; /* Shut up GCC. */ } - dasm_put(Dst, 10683, -BCBIAS_J*4); + dasm_put(Dst, 11193, -BCBIAS_J*4); break; case BC_ISEQV: case BC_ISNEV: vk = op == BC_ISEQV; - dasm_put(Dst, 10716, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11226, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 10742); + dasm_put(Dst, 11252); } else { - dasm_put(Dst, 10754); + dasm_put(Dst, 11264); if (cmov) { - dasm_put(Dst, 10660); + dasm_put(Dst, 11170); } else { - dasm_put(Dst, 10666); + dasm_put(Dst, 11176); } } iseqne_fp: if (vk) { - dasm_put(Dst, 10761); + dasm_put(Dst, 11271); } else { - dasm_put(Dst, 10770); + dasm_put(Dst, 11280); } iseqne_end: if (vk) { - dasm_put(Dst, 10779, -BCBIAS_J*4); + dasm_put(Dst, 11289, -BCBIAS_J*4); } else { - dasm_put(Dst, 10794, -BCBIAS_J*4); + dasm_put(Dst, 11304, -BCBIAS_J*4); } dasm_put(Dst, 8621); if (op == BC_ISEQV || op == BC_ISNEV) { - dasm_put(Dst, 10809, LJ_TISPRI, LJ_TISTABUD, Dt6(->metatable), Dt6(->nomm), 1<metatable), Dt6(->nomm), 1<len)); + dasm_put(Dst, 11655, Dt5(->len)); } else { - dasm_put(Dst, 11163, Dt5(->len)); + dasm_put(Dst, 11673, Dt5(->len)); } - dasm_put(Dst, 11172, LJ_TTAB); + dasm_put(Dst, 11682, LJ_TTAB); if (sse) { - dasm_put(Dst, 11212); + dasm_put(Dst, 11722); } else { - dasm_put(Dst, 11221); + dasm_put(Dst, 11731); } - dasm_put(Dst, 11231); + dasm_put(Dst, 11741); break; /* -- Binary ops -------------------------------------------------------- */ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11261); + dasm_put(Dst, 11771); } else { - dasm_put(Dst, 11275); + dasm_put(Dst, 11785); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11295); + dasm_put(Dst, 11805); } else { - dasm_put(Dst, 11309); + dasm_put(Dst, 11819); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11339); + dasm_put(Dst, 11849); } else { - dasm_put(Dst, 11353); + dasm_put(Dst, 11863); } break; } if (sse) { - dasm_put(Dst, 11115); + dasm_put(Dst, 11625); } else { - dasm_put(Dst, 11127); + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11361); + dasm_put(Dst, 11871); } else { - dasm_put(Dst, 11375); + dasm_put(Dst, 11885); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11383); + dasm_put(Dst, 11893); } else { - dasm_put(Dst, 11397); + dasm_put(Dst, 11907); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11405); + dasm_put(Dst, 11915); } else { - dasm_put(Dst, 11419); + dasm_put(Dst, 11929); } break; } if (sse) { - dasm_put(Dst, 11115); + dasm_put(Dst, 11625); } else { - dasm_put(Dst, 11127); + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11427); + dasm_put(Dst, 11937); } else { - dasm_put(Dst, 11441); + dasm_put(Dst, 11951); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11449); + dasm_put(Dst, 11959); } else { - dasm_put(Dst, 11463); + dasm_put(Dst, 11973); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11471); + dasm_put(Dst, 11981); } else { - dasm_put(Dst, 11485); + dasm_put(Dst, 11995); } break; } if (sse) { - dasm_put(Dst, 11115); + dasm_put(Dst, 11625); } else { - dasm_put(Dst, 11127); + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11493); + dasm_put(Dst, 12003); } else { - dasm_put(Dst, 11507); + dasm_put(Dst, 12017); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11515); + dasm_put(Dst, 12025); } else { - dasm_put(Dst, 11529); + dasm_put(Dst, 12039); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11537); + dasm_put(Dst, 12047); } else { - dasm_put(Dst, 11551); + dasm_put(Dst, 12061); } break; } if (sse) { - dasm_put(Dst, 11115); + dasm_put(Dst, 11625); } else { - dasm_put(Dst, 11127); + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_MODVN: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11559); + dasm_put(Dst, 12069); } else { - dasm_put(Dst, 11573); + dasm_put(Dst, 12083); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11581); + dasm_put(Dst, 12091); } else { - dasm_put(Dst, 11595); + dasm_put(Dst, 12105); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11603); + dasm_put(Dst, 12113); } else { - dasm_put(Dst, 11617); + dasm_put(Dst, 12127); } break; } - dasm_put(Dst, 11625); + dasm_put(Dst, 12135); if (sse) { - dasm_put(Dst, 11115); + dasm_put(Dst, 11625); } else { - dasm_put(Dst, 11127); + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_MODNV: case BC_MODVV: - dasm_put(Dst, 11241); + dasm_put(Dst, 11751); vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); switch (vk) { case 0: - dasm_put(Dst, 11249, LJ_TISNUM); + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11559); + dasm_put(Dst, 12069); } else { - dasm_put(Dst, 11573); + dasm_put(Dst, 12083); } break; case 1: - dasm_put(Dst, 11283, LJ_TISNUM); + dasm_put(Dst, 11793, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11581); + dasm_put(Dst, 12091); } else { - dasm_put(Dst, 11595); + dasm_put(Dst, 12105); } break; default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); if (sse) { - dasm_put(Dst, 11603); + dasm_put(Dst, 12113); } else { - dasm_put(Dst, 11617); + dasm_put(Dst, 12127); } break; } - dasm_put(Dst, 11631); + dasm_put(Dst, 12141); break; case BC_POW: + dasm_put(Dst, 11751); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 11759, LJ_TISNUM); if (sse) { - sse = 0; /* NYI: temporary workaround. */ - dasm_put(Dst, 11241); - vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - switch (vk) { - case 0: - dasm_put(Dst, 11249, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11559); - } else { - dasm_put(Dst, 11573); - } - break; - case 1: - dasm_put(Dst, 11283, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11581); - } else { - dasm_put(Dst, 11595); - } - break; - default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11603); - } else { - dasm_put(Dst, 11617); - } - break; - } - dasm_put(Dst, 11636); - if (sse) { - dasm_put(Dst, 11115); - } else { - dasm_put(Dst, 11127); - } - sse = 1; + dasm_put(Dst, 12069); } else { - dasm_put(Dst, 11241); - vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - switch (vk) { - case 0: - dasm_put(Dst, 11249, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11559); - } else { - dasm_put(Dst, 11573); - } - break; - case 1: - dasm_put(Dst, 11283, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11581); - } else { - dasm_put(Dst, 11595); - } - break; - default: - dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); - if (sse) { - dasm_put(Dst, 11603); - } else { - dasm_put(Dst, 11617); - } - break; - } - dasm_put(Dst, 11636); - if (sse) { - dasm_put(Dst, 11115); - } else { - dasm_put(Dst, 11127); - } + dasm_put(Dst, 12083); + } + break; + case 1: + dasm_put(Dst, 11793, LJ_TISNUM); + if (sse) { + dasm_put(Dst, 12091); + } else { + dasm_put(Dst, 12105); + } + break; + default: + dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM); + if (sse) { + dasm_put(Dst, 12113); + } else { + dasm_put(Dst, 12127); + } + break; + } + dasm_put(Dst, 12146); + if (sse) { + dasm_put(Dst, 11625); + } else { + dasm_put(Dst, 11637); } dasm_put(Dst, 8621); break; case BC_CAT: - dasm_put(Dst, 11640, Dt1(->base), Dt1(->base)); + dasm_put(Dst, 12150, Dt1(->base), Dt1(->base)); break; /* -- Constant ops ------------------------------------------------------ */ case BC_KSTR: - dasm_put(Dst, 11734, LJ_TSTR); + dasm_put(Dst, 12244, LJ_TSTR); break; case BC_KSHORT: if (sse) { - dasm_put(Dst, 11767); + dasm_put(Dst, 12277); } else { - dasm_put(Dst, 11782); + dasm_put(Dst, 12292); } dasm_put(Dst, 8621); break; case BC_KNUM: if (sse) { - dasm_put(Dst, 11790); + dasm_put(Dst, 12300); } else { - dasm_put(Dst, 11803); + dasm_put(Dst, 12313); } dasm_put(Dst, 8621); break; case BC_KPRI: - dasm_put(Dst, 11810); + dasm_put(Dst, 12320); break; case BC_KNIL: - dasm_put(Dst, 11836, LJ_TNIL); + dasm_put(Dst, 12346, LJ_TNIL); break; /* -- Upvalue and function ops ------------------------------------------ */ case BC_UGET: - dasm_put(Dst, 11882, offsetof(GCfuncL, uvptr), DtA(->v)); + dasm_put(Dst, 12392, offsetof(GCfuncL, uvptr), DtA(->v)); break; case BC_USETV: #define TV2MARKOFS \ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) - dasm_put(Dst, 11926, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G); - dasm_put(Dst, 12016); + dasm_put(Dst, 12436, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G); + dasm_put(Dst, 12526); break; #undef TV2MARKOFS case BC_USETS: - dasm_put(Dst, 12028, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G); + dasm_put(Dst, 12538, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G); break; case BC_USETN: - dasm_put(Dst, 12119); + dasm_put(Dst, 12629); if (sse) { - dasm_put(Dst, 12124); + dasm_put(Dst, 12634); } else { - dasm_put(Dst, 10934); + dasm_put(Dst, 11444); } - dasm_put(Dst, 12131, offsetof(GCfuncL, uvptr), DtA(->v)); + dasm_put(Dst, 12641, offsetof(GCfuncL, uvptr), DtA(->v)); if (sse) { dasm_put(Dst, 4988); } else { @@ -2011,159 +2008,159 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) dasm_put(Dst, 8621); break; case BC_USETP: - dasm_put(Dst, 12140, offsetof(GCfuncL, uvptr), DtA(->v)); + dasm_put(Dst, 12650, offsetof(GCfuncL, uvptr), DtA(->v)); break; case BC_UCLO: - dasm_put(Dst, 12177, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base)); + dasm_put(Dst, 12687, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base)); break; case BC_FNEW: - dasm_put(Dst, 12235, Dt1(->base), Dt1(->base), LJ_TFUNC); + dasm_put(Dst, 12745, Dt1(->base), Dt1(->base), LJ_TFUNC); break; /* -- Table ops --------------------------------------------------------- */ case BC_TNEW: - dasm_put(Dst, 12306, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); + dasm_put(Dst, 12816, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); break; case BC_TDUP: - dasm_put(Dst, 12417, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); + dasm_put(Dst, 12927, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); break; case BC_GGET: - dasm_put(Dst, 12509, Dt7(->env)); + dasm_put(Dst, 13019, Dt7(->env)); break; case BC_GSET: - dasm_put(Dst, 12527, Dt7(->env)); + dasm_put(Dst, 13037, Dt7(->env)); break; case BC_TGETV: - dasm_put(Dst, 12545, LJ_TTAB, LJ_TISNUM); + dasm_put(Dst, 13055, LJ_TTAB, LJ_TISNUM); if (sse) { - dasm_put(Dst, 12578); + dasm_put(Dst, 13088); } else { - dasm_put(Dst, 12599); + dasm_put(Dst, 13109); if (cmov) { - dasm_put(Dst, 10660); + dasm_put(Dst, 11170); } else { - dasm_put(Dst, 10666); + dasm_put(Dst, 11176); } - dasm_put(Dst, 12609); + dasm_put(Dst, 13119); } - dasm_put(Dst, 12613, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); - dasm_put(Dst, 12807, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); + dasm_put(Dst, 13317, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable)); - dasm_put(Dst, 13093, Dt6(->nomm), 1<marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); + dasm_put(Dst, 13520, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable)); + dasm_put(Dst, 13603, Dt6(->nomm), 1<marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); break; case BC_TSETS: - dasm_put(Dst, 13155, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); - dasm_put(Dst, 13230, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<next)); - dasm_put(Dst, 13322, Dt6(->metatable), Dt6(->nomm), 1<base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); + dasm_put(Dst, 13665, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); + dasm_put(Dst, 13740, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<next)); + dasm_put(Dst, 13832, Dt6(->metatable), Dt6(->nomm), 1<base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); break; case BC_TSETB: - dasm_put(Dst, 13418, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable)); - dasm_put(Dst, 13516, Dt6(->metatable), Dt6(->nomm), 1<marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); + dasm_put(Dst, 13928, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable)); + dasm_put(Dst, 14026, Dt6(->metatable), Dt6(->nomm), 1<marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); break; case BC_TSETM: - dasm_put(Dst, 13562); + dasm_put(Dst, 14072); if (sse) { - dasm_put(Dst, 12124); + dasm_put(Dst, 12634); } else { - dasm_put(Dst, 13567); + dasm_put(Dst, 14077); } - dasm_put(Dst, 13575, Dt6(->marked), LJ_GC_BLACK); + dasm_put(Dst, 14085, Dt6(->marked), LJ_GC_BLACK); if (sse) { - dasm_put(Dst, 13600); + dasm_put(Dst, 14110); } else { - dasm_put(Dst, 13607); + dasm_put(Dst, 14117); } - dasm_put(Dst, 13612, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain)); - dasm_put(Dst, 13740, Dt6(->gclist)); + dasm_put(Dst, 14122, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain)); + dasm_put(Dst, 14250, Dt6(->gclist)); break; /* -- Calls and vararg handling ----------------------------------------- */ case BC_CALL: case BC_CALLM: - dasm_put(Dst, 11245); + dasm_put(Dst, 11755); if (op == BC_CALLM) { - dasm_put(Dst, 13748); + dasm_put(Dst, 14258); } - dasm_put(Dst, 13753, LJ_TFUNC, Dt7(->gate)); + dasm_put(Dst, 14263, LJ_TFUNC, Dt7(->gate)); break; case BC_CALLMT: - dasm_put(Dst, 13748); + dasm_put(Dst, 14258); break; case BC_CALLT: - dasm_put(Dst, 13776, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate)); - dasm_put(Dst, 13881, FRAME_TYPE, Dt7(->pt), Dt9(->k)); + dasm_put(Dst, 14286, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate)); + dasm_put(Dst, 14391, FRAME_TYPE, Dt7(->pt), Dt9(->k)); break; case BC_ITERC: - dasm_put(Dst, 13938, LJ_TFUNC, Dt7(->gate)); + dasm_put(Dst, 14448, LJ_TFUNC, Dt7(->gate)); break; case BC_VARG: - dasm_put(Dst, 14000, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL); - dasm_put(Dst, 14144, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top)); + dasm_put(Dst, 14510, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL); + dasm_put(Dst, 14654, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top)); break; /* -- Returns ----------------------------------------------------------- */ case BC_RETM: - dasm_put(Dst, 13748); + dasm_put(Dst, 14258); break; case BC_RET: case BC_RET0: case BC_RET1: if (op != BC_RET0) { - dasm_put(Dst, 14239); + dasm_put(Dst, 14749); } - dasm_put(Dst, 14243, FRAME_TYPE); + dasm_put(Dst, 14753, FRAME_TYPE); switch (op) { case BC_RET: - dasm_put(Dst, 14262); + dasm_put(Dst, 14772); break; case BC_RET1: - dasm_put(Dst, 14320); + dasm_put(Dst, 14830); /* fallthrough */ case BC_RET0: - dasm_put(Dst, 14336); + dasm_put(Dst, 14846); default: break; } - dasm_put(Dst, 14347, Dt7(->pt), Dt9(->k)); + dasm_put(Dst, 14857, Dt7(->pt), Dt9(->k)); if (op == BC_RET) { - dasm_put(Dst, 14389, LJ_TNIL); + dasm_put(Dst, 14899, LJ_TNIL); } else { - dasm_put(Dst, 14398, LJ_TNIL); + dasm_put(Dst, 14908, LJ_TNIL); } - dasm_put(Dst, 14405); + dasm_put(Dst, 14915); if (op != BC_RET0) { - dasm_put(Dst, 14426); + dasm_put(Dst, 14936); } dasm_put(Dst, 5084); break; @@ -2173,7 +2170,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) case BC_FORL: #if LJ_HASJIT - dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); + dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT); #endif break; @@ -2185,57 +2182,57 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) case BC_FORI: case BC_IFORL: vk = (op == BC_IFORL || op == BC_JFORL); - dasm_put(Dst, 14451); + dasm_put(Dst, 14961); if (!vk) { - dasm_put(Dst, 14455, LJ_TISNUM, LJ_TISNUM); + dasm_put(Dst, 14965, LJ_TISNUM, LJ_TISNUM); } - dasm_put(Dst, 14474); + dasm_put(Dst, 14984); if (!vk) { - dasm_put(Dst, 14478, LJ_TISNUM); + dasm_put(Dst, 14988, LJ_TISNUM); } if (sse) { - dasm_put(Dst, 14487); + dasm_put(Dst, 14997); if (vk) { - dasm_put(Dst, 14499); + dasm_put(Dst, 15009); } else { - dasm_put(Dst, 14518); + dasm_put(Dst, 15028); } - dasm_put(Dst, 14523); + dasm_put(Dst, 15033); } else { - dasm_put(Dst, 14536); + dasm_put(Dst, 15046); if (vk) { - dasm_put(Dst, 14542); + dasm_put(Dst, 15052); } else { - dasm_put(Dst, 14558); + dasm_put(Dst, 15068); } - dasm_put(Dst, 14566); + dasm_put(Dst, 15076); if (cmov) { - dasm_put(Dst, 10660); + dasm_put(Dst, 11170); } else { - dasm_put(Dst, 10666); + dasm_put(Dst, 11176); } if (!cmov) { - dasm_put(Dst, 14571); + dasm_put(Dst, 15081); } } if (op == BC_FORI) { - dasm_put(Dst, 14577, -BCBIAS_J*4); + dasm_put(Dst, 15087, -BCBIAS_J*4); } else if (op == BC_JFORI) { - dasm_put(Dst, 14587, -BCBIAS_J*4, BC_JLOOP); + dasm_put(Dst, 15097, -BCBIAS_J*4, BC_JLOOP); } else if (op == BC_IFORL) { - dasm_put(Dst, 14601, -BCBIAS_J*4); + dasm_put(Dst, 15111, -BCBIAS_J*4); } else { - dasm_put(Dst, 14597, BC_JLOOP); + dasm_put(Dst, 15107, BC_JLOOP); } - dasm_put(Dst, 10695); + dasm_put(Dst, 11205); if (sse) { - dasm_put(Dst, 14611); + dasm_put(Dst, 15121); } break; case BC_ITERL: #if LJ_HASJIT - dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); + dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT); #endif break; @@ -2244,18 +2241,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) break; #endif case BC_IITERL: - dasm_put(Dst, 14622, LJ_TNIL); + dasm_put(Dst, 15132, LJ_TNIL); if (op == BC_JITERL) { - dasm_put(Dst, 14637, BC_JLOOP); + dasm_put(Dst, 15147, BC_JLOOP); } else { - dasm_put(Dst, 14651, -BCBIAS_J*4); + dasm_put(Dst, 15161, -BCBIAS_J*4); } - dasm_put(Dst, 10992); + dasm_put(Dst, 11502); break; case BC_LOOP: #if LJ_HASJIT - dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); + dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT); #endif break; @@ -2265,12 +2262,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) case BC_JLOOP: #if LJ_HASJIT - dasm_put(Dst, 14667, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L)); + dasm_put(Dst, 15177, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L)); #endif break; case BC_JMP: - dasm_put(Dst, 14690, -BCBIAS_J*4); + dasm_put(Dst, 15200, -BCBIAS_J*4); break; /* ---------------------------------------------------------------------- */ @@ -2298,7 +2295,7 @@ static int build_backend(BuildCtx *ctx) build_subroutines(ctx, cmov, sse); - dasm_put(Dst, 14714); + dasm_put(Dst, 15224); for (op = 0; op < BC__MAX; op++) build_ins(ctx, (BCOp)op, op, cmov, sse); diff --git a/src/lj_asm.c b/src/lj_asm.c index c2cc4342..eb14b0e5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1991,9 +1991,19 @@ static int fpmjoin_pow(ASMState *as, IRIns *ir) IRIns *irpp = IR(irp->op1); if (irpp == ir-2 && irpp->o == IR_FPMATH && irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */ - asm_x87load(as, irp->op2); - asm_x87load(as, irpp->op1); + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); + IRIns *irx; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + ra_destreg(as, ir, RID_XMM0); + emit_call(as, lj_vm_pow_sse); + irx = IR(irpp->op1); + if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) + irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ + ra_left(as, RID_XMM0, irpp->op1); + ra_left(as, RID_XMM1, irp->op2); return 1; } } @@ -2007,30 +2017,35 @@ static void asm_fpmath(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_FPR); Reg left = asm_fuseload(as, ir->op1, RSET_FPR); emit_mrm(as, XO_SQRTSD, dest, left); - } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = asm_fuseload(as, ir->op1, RSET_FPR); - /* Round down/up/trunc == 1001/1010/1011. */ - emit_i8(as, 0x09 + fpm); - /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */ - emit_mrm(as, XO_ROUNDSD, dest, left); - /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */ - /* This is atrocious, but the alternatives are much worse. */ - if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { - as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ - } - *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ } else if (fpm <= IRFPM_TRUNC) { - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - ra_destreg(as, ir, RID_XMM0); - emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse : - fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); - ra_left(as, RID_XMM0, ir->op1); - } else { + if (as->flags & JIT_F_SSE4_1) { /* SSE4.1 has a rounding instruction. */ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = asm_fuseload(as, ir->op1, RSET_FPR); + /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. + ** Let's pretend it's a 3-byte opcode, and compensate afterwards. + ** This is atrocious, but the alternatives are much worse. + */ + /* Round down/up/trunc == 1001/1010/1011. */ + emit_i8(as, 0x09 + fpm); + emit_mrm(as, XO_ROUNDSD, dest, left); + if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { + as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ + } + *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ + } else { /* Call helper functions for SSE2 variant. */ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + ra_destreg(as, ir, RID_XMM0); + emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse : + fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); + ra_left(as, RID_XMM0, ir->op1); + } + } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { + /* Rejoined to pow(). */ + } else { /* Handle x87 ops. */ int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ Reg dest = ir->r; if (ra_hasreg(dest)) { @@ -2040,14 +2055,8 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); switch (fpm) { /* st0 = lj_vm_*(st0) */ - case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break; - case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break; - case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break; case IRFPM_EXP: emit_call(as, lj_vm_exp); break; - case IRFPM_EXP2: - if (fpmjoin_pow(as, ir)) return; - emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */ - break; + case IRFPM_EXP2: emit_call(as, lj_vm_exp2); break; case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; case IRFPM_COS: emit_x87op(as, XI_FCOS); break; case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; @@ -2063,10 +2072,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; case IR_LDEXP: emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; - case IR_POWI: - emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */ - emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0); - break; default: lua_assert(0); break; } break; @@ -2085,6 +2090,19 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } } +static void asm_powi(ASMState *as, IRIns *ir) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + ra_destreg(as, ir, RID_XMM0); + emit_call(as, lj_vm_powi_sse); + ra_left(as, RID_XMM0, ir->op1); + ra_left(as, RID_EAX, ir->op2); +} + /* Find out whether swapping operands might be beneficial. */ static int swapops(ASMState *as, IRIns *ir) { @@ -3132,9 +3150,10 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_MIN: asm_fparith(as, ir, XO_MINSD); break; case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break; - case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI: + case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: asm_fpmath(as, ir); break; + case IR_POWI: asm_powi(as, ir); break; /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; @@ -3285,8 +3304,22 @@ static void asm_setup_regsp(ASMState *as, Trace *T) if (inloop) as->modset = RSET_SCRATCH; break; + case IR_POWI: + ir->prev = REGSP_HINT(RID_XMM0); + if (inloop) + as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); + continue; case IR_FPMATH: - if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { + if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ + ir->prev = REGSP_HINT(RID_XMM0); +#if !LJ_64 + if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ + as->evenspill = 4; +#endif + if (inloop) + as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); + continue; + } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { ir->prev = REGSP_HINT(RID_XMM0); if (inloop) as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); diff --git a/src/lj_vm.h b/src/lj_vm.h index 07adc36d..ed375747 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -34,16 +34,13 @@ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); /* Handlers callable from compiled code. */ -LJ_ASMF void lj_vm_floor(void); -LJ_ASMF void lj_vm_ceil(void); -LJ_ASMF void lj_vm_trunc(void); LJ_ASMF void lj_vm_floor_sse(void); LJ_ASMF void lj_vm_ceil_sse(void); LJ_ASMF void lj_vm_trunc_sse(void); LJ_ASMF void lj_vm_exp(void); LJ_ASMF void lj_vm_exp2(void); -LJ_ASMF void lj_vm_pow(void); -LJ_ASMF void lj_vm_powi(void); +LJ_ASMF void lj_vm_pow_sse(void); +LJ_ASMF void lj_vm_powi_sse(void); /* Call gates for functions. */ LJ_ASMF void lj_gate_lf(void);