This is a list of changes between the released versions of LuaJIT.
-The current stable version is LuaJIT 2.0.4.
+The current stable version is LuaJIT 2.0.5.
Please check the @@ -74,6 +74,30 @@ to see whether newer versions are available.
LuaJIT 2.1.0-beta3 — 2017-05-01
+-
+
- Rewrite memory block allocator. +
- Add various extension from Lua 5.2/5.3. +
- Remove old Lua 5.0 compatibility defines. +
- Set arg table before evaluating LUA_INIT and -e chunks. +
- Fix FOLD rules for math.abs() and FP negation. +
- Fix soft-float math.abs() and negation. +
- Fix formatting of some small denormals at low precision. +
- LJ_GC64: Add JIT compiler support. +
- x64/LJ_GC64: Add JIT compiler backend. +
- x86/x64: Generate BMI2 shifts and rotates, if available. +
- Windows/x86: Add full exception interoperability. +
- ARM64: Add big-endian support. +
- ARM64: Add JIT compiler backend. +
- MIPS: Fix TSETR barrier. +
- MIPS: Support MIPS16 interlinking. +
- MIPS soft-float: Fix code generation for HREF. +
- MIPS64: Add MIPS64 hard-float JIT compiler backend. +
- MIPS64: Add MIPS64 hard-float/soft-float support to interpreter. +
- FFI: Compile bitfield loads/stores. +
- Various fixes common with the 2.0 branch. +
LuaJIT 2.1.0-beta2 — 2016-03-03
- Enable trace stitching. @@ -140,6 +164,48 @@ Please take a look at the commit history for more details.
LuaJIT 2.0.5 — 2017-05-01
+-
+
- Add workaround for MSVC 2015 stdio changes. +
- Limit mcode alloc probing, depending on the available pool size. +
- Fix overly restrictive range calculation in mcode allocation. +
- Fix out-of-scope goto handling in parser. +
- Remove internal __mode = "K" and replace with safe check. +
- Add "proto" field to jit.util.funcinfo(). +
- Fix GC step size calculation. +
- Initialize uv->immutable for upvalues of loaded chunks. +
- Fix for cdata vs. non-cdata arithmetics/comparisons. +
- Drop leftover regs in 'for' iterator assignment, too. +
- Fix PHI remarking in SINK pass. +
- Don't try to record outermost pcall() return to lower frame. +
- Add guard for obscure aliasing between open upvalues and SSA slots. +
- Remove assumption that lj_math_random_step() doesn't clobber FPRs. +
- Fix handling of non-numeric strings in arithmetic coercions. +
- Fix recording of select(n, ...) with off-trace varargs +
- Fix install for cross-builds. +
- Don't allocate unused 2nd result register in JIT compiler backend. +
- Drop marks from replayed instructions when sinking. +
- Fix unsinking check. +
- Properly handle OOM in trace_save(). +
- Limit number of arguments given to io.lines() and fp:lines(). +
- Fix narrowing of TOBIT. +
- OSX: Fix build with recent XCode. +
- x86/x64: Don't spill an explicit REF_BASE in the IR. +
- x86/x64: Fix instruction length decoder. +
- x86/x64: Search for exit jumps with instruction length decoder. +
- ARM: Fix BLX encoding for Thumb interworking calls. +
- MIPS: Don't use RID_GP as a scratch register. +
- MIPS: Fix emitted code for U32 to float conversion. +
- MIPS: Backport workaround for compact unwind tables. +
- MIPS: Fix cross-endian jit.bcsave. +
- MIPS: Fix BC_ISNEXT fallback path. +
- MIPS: Fix use of ffgccheck delay slots in interpreter. +
- FFI: Fix FOLD rules for int64_t comparisons. +
- FFI: Fix SPLIT pass for CONV i64.u64. +
- FFI: Fix ipairs() recording. +
- FFI: Don't propagate qualifiers into subtypes of complex. +
LuaJIT 2.0.4 — 2015-05-14
- Fix stack check in narrowing optimization. @@ -807,7 +873,7 @@ no point in listing differences over earlier versions.
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/contact.html b/doc/contact.html index 52623764..fe4751c0 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -4,7 +4,7 @@
+If you want to report bugs, propose fixes or suggest enhancements, +please use the +GitHub issue tracker. +
+Please send general questions to the » LuaJIT mailing list. +
+You can also send any questions you have directly to me:
@@ -86,7 +93,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")Copyright
All documentation is -Copyright © 2005-2016 Mike Pall. +Copyright © 2005-2017 Mike Pall.
@@ -94,7 +101,7 @@ Copyright © 2005-2016 Mike Pall.-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index e1af3ed0..ad462c63 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index d814f4d0..5e1daaf5 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 6d405960..91af2e1d 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index 15843d7c..800b6b18 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -4,7 +4,7 @@
-
-
- Bitfield accesses and initializations.
- Vector operations.
- Table initializers.
- Initialization of nested struct/union types.
- Non-default initialization of VLA/VLS or large C types (> 128 bytes or > 16 array elements. -
- Conversions from lightuserdata to void *. +
- Bitfield initializations.
- Pointer differences for element sizes that are not a power of two.
- Calls to C functions with aggregates passed or returned by @@ -1253,7 +1252,7 @@ compiled.
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html index 01200c45..36500664 100644 --- a/doc/ext_ffi_tutorial.html +++ b/doc/ext_ffi_tutorial.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_jit.html b/doc/ext_jit.html index 86098019..e4088bcb 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html index 69ea25db..71b8c033 100644 --- a/doc/ext_profiler.html +++ b/doc/ext_profiler.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/extensions.html b/doc/extensions.html index 9d666293..d7cc9693 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -4,7 +4,7 @@
Note: LJ_GC64 mode requires a different frame layout, which implies a different, incompatible bytecode format for ports that use this mode (e.g. -ARM64). This may be rectified in the future. +ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified +in the future.
table.new(narray, nhash) allocates a pre-sized table
@@ -291,8 +292,8 @@ enabled:Note: this provides only partial compatibility with Lua 5.2 at the @@ -350,6 +373,12 @@ LuaJIT supports some extensions from Lua 5.3:
- Unicode escape '\u{XX...}' embeds the UTF-8 encoding in string literals.
- The argument table arg can be read (and modified) by LUA_INIT and -e chunks. +
- io.read() and file:read() accept formats with or without a leading *. +
- table.move(a1, f, e, t [,a2]). +
- coroutine.isyieldable(). +
- Lua/C API extensions: +lua_isyieldable() +
C++ Exception Interoperability
@@ -443,7 +472,7 @@ C++ destructors.-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/faq.html b/doc/faq.html index 605fdd0a..2c930743 100644 --- a/doc/faq.html +++ b/doc/faq.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/install.html b/doc/install.html index efeda33c..c491c601 100644 --- a/doc/install.html +++ b/doc/install.html @@ -4,7 +4,7 @@
+LuaJIT on x64 currently uses 32 bit GC objects by default. +LJ_GC64 mode may be explicitly enabled: +add XCFLAGS=-DLUAJIT_ENABLE_GC64 to the make command or run +msvcbuild gc64 for MSVC/WinSDK. Please check the note +about the bytecode format +differences, too. +
POSIX Systems (Linux, OSX, *BSD etc.)
Prerequisites
@@ -190,8 +198,8 @@ open a terminal window and change to this directory. Now unpack the archive and change to the newly created directory:-tar zxf LuaJIT-2.0.4.tar.gz -cd LuaJIT-2.0.4+tar zxf LuaJIT-2.0.5.tar.gz +cd LuaJIT-2.0.5
Building LuaJIT
The supplied Makefiles try to auto-detect the settings needed for your @@ -584,14 +592,11 @@ intend to load Lua/C modules at runtime.
-pagezero_size 10000 -image_base 100000000-Also, it's recommended to rebase all (self-compiled) shared libraries -which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua). -See: man rebase
Additional hints for initializing LuaJIT using the C API functions:
@@ -677,7 +682,7 @@ to me (the upstream) and not you (the package maintainer), anyway.-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/luajit.html b/doc/luajit.html index 44a7b8a1..ef5b824c 100644 --- a/doc/luajit.html +++ b/doc/luajit.html @@ -4,7 +4,7 @@
-LuaJIT is Copyright © 2005-2016 Mike Pall, released under the +LuaJIT is Copyright © 2005-2017 Mike Pall, released under the » MIT open source license.
@@ -226,7 +226,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/running.html b/doc/running.html index 28a73264..666b0abc 100644 --- a/doc/running.html +++ b/doc/running.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/doc/status.html b/doc/status.html index d10033b0..cad6ca65 100644 --- a/doc/status.html +++ b/doc/status.html @@ -4,7 +4,7 @@
-Copyright © 2005-2016 Mike Pall +Copyright © 2005-2017 Mike Pall · Contact diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h index 0fa69ac0..a43f7c66 100644 --- a/dynasm/dasm_arm.h +++ b/dynasm/dasm_arm.h @@ -1,6 +1,6 @@ /* ** DynASM ARM encoding engine. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua index e2ed922a..32f595af 100644 --- a/dynasm/dasm_arm.lua +++ b/dynasm/dasm_arm.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h index d64e60a3..47e1e074 100644 --- a/dynasm/dasm_arm64.h +++ b/dynasm/dasm_arm64.h @@ -1,6 +1,6 @@ /* ** DynASM ARM64 encoding engine. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index 4a7d8dfe..8a5f735d 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h index f3b43211..4b49fd8c 100644 --- a/dynasm/dasm_mips.h +++ b/dynasm/dasm_mips.h @@ -1,6 +1,6 @@ /* ** DynASM MIPS encoding engine. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua index c8010561..78a4e34a 100644 --- a/dynasm/dasm_mips.lua +++ b/dynasm/dasm_mips.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM MIPS32/MIPS64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua index 94f21921..5636b23a 100644 --- a/dynasm/dasm_mips64.lua +++ b/dynasm/dasm_mips64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM MIPS64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h index 3f267fb8..3a7ee9b0 100644 --- a/dynasm/dasm_ppc.h +++ b/dynasm/dasm_ppc.h @@ -1,6 +1,6 @@ /* ** DynASM PPC/PPC64 encoding engine. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua index ff144edb..4dc39da6 100644 --- a/dynasm/dasm_ppc.lua +++ b/dynasm/dasm_ppc.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM PPC/PPC64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. -- -- Support for various extensions contributed by Caio Souza Oliveira. diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h index 22a654cb..59d9e2b2 100644 --- a/dynasm/dasm_proto.h +++ b/dynasm/dasm_proto.h @@ -1,6 +1,6 @@ /* ** DynASM encoding engine prototypes. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua index 21333556..e8bdeb37 100644 --- a/dynasm/dasm_x64.lua +++ b/dynasm/dasm_x64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ -- This module just sets 64 bit mode for the combined x86/x64 module. diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index bf5b7296..bc636357 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -1,6 +1,6 @@ /* ** DynASM x86 encoding engine. -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index a5efd98f..4c031e2c 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x86/x64 module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua index 1e5899a4..5ec21a79 100644 --- a/dynasm/dynasm.lua +++ b/dynasm/dynasm.lua @@ -2,7 +2,7 @@ -- DynASM. A dynamic assembler for code generation engines. -- Originally designed and implemented for LuaJIT. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See below for full copyright notice. ------------------------------------------------------------------------------ @@ -17,7 +17,7 @@ local _info = { url = "http://luajit.org/dynasm.html", license = "MIT", copyright = [[ -Copyright (C) 2005-2016 Mike Pall. All rights reserved. +Copyright (C) 2005-2017 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/etc/luajit.1 b/etc/luajit.1 index 11e88c04..0d263db7 100644 --- a/etc/luajit.1 +++ b/etc/luajit.1 @@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" Runs some nested loops and shows the resulting traces. .SH COPYRIGHT .PP -\fBLuaJIT\fR is Copyright \(co 2005-2016 Mike Pall. +\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall. .br \fBLuaJIT\fR is open source software, released under the MIT license. .SH SEE ALSO diff --git a/etc/luajit.pc b/etc/luajit.pc index 0fdd1efd..a78f1746 100644 --- a/etc/luajit.pc +++ b/etc/luajit.pc @@ -2,7 +2,7 @@ majver=2 minver=1 relver=0 -version=${majver}.${minver}.${relver}-beta2 +version=${majver}.${minver}.${relver}-beta3 abiver=5.1 prefix=/usr/local diff --git a/src/Makefile b/src/Makefile index 4e479ae5..f56465d1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -7,7 +7,7 @@ # Also works with MinGW and Cygwin on Windows. # Please check msvcbuild.bat for building with MSVC on Windows. # -# Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 @@ -110,6 +110,9 @@ XCFLAGS= #XCFLAGS+= -DLUAJIT_NUMMODE=1 #XCFLAGS+= -DLUAJIT_NUMMODE=2 # +# Enable GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_ENABLE_GC64 +# ############################################################################## ############################################################################## @@ -206,7 +209,7 @@ TARGET_CC= $(STATIC_CC) TARGET_STCC= $(STATIC_CC) TARGET_DYNCC= $(DYNAMIC_CC) TARGET_LD= $(CROSS)$(CC) -TARGET_AR= $(CROSS)ar rcus +TARGET_AR= $(CROSS)ar rcus 2>/dev/null TARGET_STRIP= $(CROSS)strip TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) @@ -239,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif TARGET_LJARCH= arm64 else ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) @@ -310,7 +316,6 @@ ifeq (Darwin,$(TARGET_SYS)) export MACOSX_DEPLOYMENT_TARGET=10.4 endif TARGET_STRIP+= -x - TARGET_AR+= 2>/dev/null TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) @@ -321,7 +326,6 @@ ifeq (Darwin,$(TARGET_SYS)) else ifeq (iOS,$(TARGET_SYS)) TARGET_STRIP+= -x - TARGET_AR+= 2>/dev/null TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) diff --git a/src/Makefile.dep b/src/Makefile.dep index 4ef002e9..2b1cb5ef 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -3,8 +3,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ + lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ + lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ lj_strfmt.h lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ diff --git a/src/host/buildvm.c b/src/host/buildvm.c index 57b4dc97..de23fabd 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** This is a tool to build the hand-tuned assembler code required for ** LuaJIT's bytecode interpreter. It supports a variety of output formats diff --git a/src/host/buildvm.h b/src/host/buildvm.h index 90d21b15..b90428dc 100644 --- a/src/host/buildvm.h +++ b/src/host/buildvm.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _BUILDVM_H diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 00bedee7..259ab9b6 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: Assembler source code emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" @@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) { int i; for (i = 0; i < n; i += 4) { + uint32_t ins = *(uint32_t *)(p+i); +#if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ +#endif if ((i & 15) == 0) - fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, "\t.long 0x%08x", ins); else - fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, ",0x%08x", ins); if ((i & 15) == 12) putc('\n', ctx->fp); } if ((n & 15) != 0) putc('\n', ctx->fp); @@ -222,7 +226,8 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc case BUILD_machasm: fprintf(ctx->fp, "\n\t.private_extern %s\n" - "%s:\n", name, name); + "\t.no_dead_strip %s\n" + "%s:\n", name, name, name); break; default: break; diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c index e87a0af3..d579f4d4 100644 --- a/src/host/buildvm_fold.c +++ b/src/host/buildvm_fold.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: IR folding hash table generator. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index 1adc3cbd..2956fdb6 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: library definition compiler. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h index 45f8f8cb..b2600bd5 100644 --- a/src/host/buildvm_libbc.h +++ b/src/host/buildvm_libbc.h @@ -15,7 +15,12 @@ static const uint8_t libbc_code[] = { 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0 +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #else 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, @@ -28,7 +33,12 @@ static const uint8_t libbc_code[] = { 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0 +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #endif }; @@ -40,6 +50,7 @@ static const struct { const char *name; int ofs; } libbc_map[] = { {"table_foreach",136}, {"table_getn",207}, {"table_remove",226}, -{NULL,355} +{"table_move",355}, +{NULL,502} }; diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 42f6ac84..2eb2bb7b 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: PE object emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Only used for building on Windows, since we cannot assume the presence ** of a suitable assembler. The host and target byte order must match. diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua index 7a17a3d0..6f5a05cc 100644 --- a/src/host/genlibbc.lua +++ b/src/host/genlibbc.lua @@ -2,7 +2,7 @@ -- Lua script to dump the bytecode of the library functions written in Lua. -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua index 16a81a23..50feff01 100644 --- a/src/host/genminilua.lua +++ b/src/host/genminilua.lua @@ -2,7 +2,7 @@ -- Lua script to generate a customized, minified version of Lua. -- The resulting 'minilua' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- diff --git a/src/jit/bc.lua b/src/jit/bc.lua index a8cb8496..193cf01f 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT bytecode listing module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 5117f714..c17c88e0 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT module to save/list bytecode. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -63,8 +63,8 @@ local map_type = { } local map_arch = { - x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, - mips = true, mipsel = true, + x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, + ppc = true, mips = true, mipsel = true, } local map_os = { @@ -200,7 +200,7 @@ typedef struct { ]] local symname = LJBC_PREFIX..ctx.modname local is64, isbe = false, false - if ctx.arch == "x64" or ctx.arch == "arm64" then + if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then is64 = true elseif ctx.arch == "ppc" or ctx.arch == "mips" then isbe = true @@ -237,9 +237,9 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) + hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) if ctx.arch == "mips" or ctx.arch == "mipsel" then - hdr.flags = 0x50001006 + hdr.flags = f32(0x50001006) end hdr.version = f32(1) hdr.shofs = fofs(ffi.offsetof(o, "sect")) diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index 5d2cdbda..c2dd7769 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT ARM disassembler module. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..a7173326 --- /dev/null +++ b/src/jit/dis_arm64.lua @@ -0,0 +1,1216 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64 disassembler module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +-- Sponsored by Cisco Systems, Inc. +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most user-mode AArch64 instructions. +-- NYI: Advanced SIMD and VFP instructions. +------------------------------------------------------------------------------ + +local type = type +local sub, byte, format = string.sub, string.byte, string.format +local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local concat = table.concat +local bit = require("bit") +local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local ror = bit.ror + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +local map_adr = { -- PC-relative addressing. + shift = 31, mask = 1, + [0] = "adrDBx", "adrpDBx" +} + +local map_addsubi = { -- Add/subtract immediate. + shift = 29, mask = 3, + [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", +} + +local map_logi = { -- Logical immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + } +} + +local map_movwi = { -- Move wide immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, +} + +local map_bitf = { -- Bitfield. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", + "bfm|bfi|bfxilDN13w", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" + } + }, + { + shift = 22, mask = 1, + { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", + "bfm|bfi|bfxilDN13x", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" + } + } +} + +local map_datai = { -- Data processing - immediate. + shift = 23, mask = 7, + [0] = map_adr, map_adr, map_addsubi, false, + map_logi, map_movwi, map_bitf, + { + shift = 15, mask = 0x1c0c1, + [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", + [0x10081] = "extr|rorDNM4x" + } +} + +local map_logsr = { -- Logical, shifted register. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + } +} + +local map_assh = { + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMSg", "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + }, + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + } + } +} + +local map_addsubsh = { -- Add/subtract, shifted register. + shift = 22, mask = 3, + [0] = map_assh, map_assh, map_assh +} + +local map_addsubex = { -- Add/subtract, extended register. + shift = 22, mask = 3, + [0] = { + shift = 29, mask = 3, + [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", + } +} + +local map_addsubc = { -- Add/subtract, with carry. + shift = 10, mask = 63, + [0] = { + shift = 29, mask = 3, + [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", + } +} + +local map_ccomp = { + shift = 4, mask = 1, + [0] = { + shift = 10, mask = 3, + [0] = { -- Conditional compare register. + shift = 29, mask = 3, + "ccmnNMVCg", false, "ccmpNMVCg", + }, + [2] = { -- Conditional compare immediate. + shift = 29, mask = 3, + "ccmnN5VCg", false, "ccmpN5VCg", + } + } +} + +local map_csel = { -- Conditional select. + shift = 11, mask = 1, + [0] = { + shift = 10, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, + }, + { + shift = 29, mask = 3, + [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, + } + } +} + +local map_data1s = { -- Data processing, 1 source. + shift = 29, mask = 1, + [0] = { + shift = 31, mask = 1, + [0] = { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" + }, + { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" + } + } +} + +local map_data2s = { -- Data processing, 2 sources. + shift = 29, mask = 1, + [0] = { + shift = 10, mask = 63, + false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", + "lsrDNMg", "asrDNMg", "rorDNMg" + } +} + +local map_data3s = { -- Data processing, 3 sources. + shift = 29, mask = 7, + [0] = { + shift = 21, mask = 7, + [0] = { + shift = 15, mask = 1, + [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" + } + }, false, false, false, + { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 7, + [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, + false, "umaddl|umullDxNMwA0x", "umulhDNMx" + }, + { + shift = 21, mask = 7, + [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, + false, "umsubl|umneglDxNMwA0x" + } + } +} + +local map_datar = { -- Data processing, register. + shift = 28, mask = 1, + [0] = { + shift = 24, mask = 1, + [0] = map_logsr, + { + shift = 21, mask = 1, + [0] = map_addsubsh, map_addsubex + } + }, + { + shift = 21, mask = 15, + [0] = map_addsubc, false, map_ccomp, false, map_csel, false, + { + shift = 30, mask = 1, + [0] = map_data2s, map_data1s + }, + false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, + map_data3s, map_data3s, map_data3s + } +} + +local map_lrl = { -- Load register, literal. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = "ldrDwB", "ldrDxB", "ldrswDxB" + }, + { + shift = 30, mask = 3, + [0] = "ldrDsB", "ldrDdB" + } +} + +local map_lsriind = { -- Load/store register, immediate pre/post-indexed. + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDszL", "ldrDszL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDxzL", "ldrDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDdzL", "ldrDdzL" + } + } +} + +local map_lsriro = { + shift = 21, mask = 1, + [0] = { -- Load/store register immediate. + shift = 10, mask = 3, + [0] = { -- Unscaled immediate. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "sturbDwK", "ldurbDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturhDwK", "ldurhDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDwK", "ldurDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDxK", "ldurDxK" + } + } + }, map_lsriind, false, map_lsriind + }, + { -- Load/store register, register offset. + shift = 10, mask = 3, + [2] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" + }, + { + shift = 22, mask = 3, + [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" + }, + { + shift = 22, mask = 3, + [0] = "strDwO", "ldrDwO", "ldrswDxO" + }, + { + shift = 22, mask = 3, + [0] = "strDxO", "ldrDxO" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDsO", "ldrDsO" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdO", "ldrDdO" + } + } + } + } +} + +local map_lsp = { -- Load/store register pair, offset. + shift = 22, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "stpDzAzwP", "stpDzAzsP", + }, + { + shift = 26, mask = 1, + "stpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "stpDzAzxP" + } + }, + { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "ldpDzAzwP", "ldpDzAzsP", + }, + { + shift = 26, mask = 1, + [0] = "ldpswDAxP", "ldpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "ldpDzAzxP" + } + } +} + +local map_ls = { -- Loads and stores. + shift = 24, mask = 0x31, + [0x10] = map_lrl, [0x30] = map_lsriro, + [0x20] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x21] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x31] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzU", "ldrbDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strhDwzU", "ldrhDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDwzU", "ldrDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDxzU", "ldrDxzU" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDszU", "ldrDszU" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdzU", "ldrDdzU" + } + } + }, +} + +local map_datafp = { -- Data processing, SIMD and FP. + shift = 28, mask = 7, + { -- 001 + shift = 24, mask = 1, + [0] = { + shift = 21, mask = 1, + { + shift = 10, mask = 3, + [0] = { + shift = 12, mask = 1, + [0] = { + shift = 13, mask = 1, + [0] = { + shift = 14, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { -- FP/int conversion. + shift = 31, mask = 1, + [0] = { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", + [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", + [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", + [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", + [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", + [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", + [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", + [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", + [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", + [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", + [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", + [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", + [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" + }, + { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", + [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", + [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", + [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", + [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", + [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", + [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", + [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", + [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", + [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", + [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", + [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", + [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" + } + } + }, + { -- FP data-processing, 1 source. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", false, "fcvtDdNs", false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + }, + { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", "fcvtDsNd", false, false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + } + } + } + }, + { -- FP compare. + shift = 31, mask = 1, + [0] = { + shift = 14, mask = 3, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 0, mask = 31, + [0] = "fcmpNMf", [8] = "fcmpNZf", + [16] = "fcmpeNMf", [24] = "fcmpeNZf", + } + } + } + } + }, + { -- FP immediate. + shift = 31, mask = 1, + [0] = { + shift = 5, mask = 31, + [0] = { + shift = 23, mask = 1, + [0] = "fmovDFf" + } + } + } + }, + { -- FP conditional compare. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 4, mask = 1, + [0] = "fccmpNMVCf", "fccmpeNMVCf" + } + } + }, + { -- FP data-processing, 2 sources. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 12, mask = 15, + [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", + "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", + "fnmulDNMf" + } + } + }, + { -- FP conditional select. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = "fcselDNMCf" + } + } + } + }, + { -- FP data-processing, 3 sources. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 5, + [0] = "fmaddDNMAf", "fnmaddDNMAf" + }, + { + shift = 21, mask = 5, + [0] = "fmsubDNMAf", "fnmsubDNMAf" + } + } + } + } +} + +local map_br = { -- Branches, exception generating and system instructions. + shift = 29, mask = 7, + [0] = "bB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" + }, + { -- Conditional branch, immediate. + shift = 24, mask = 3, + [0] = { + shift = 4, mask = 1, + [0] = { + shift = 0, mask = 15, + [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", + "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" + } + } + }, false, "blB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" + }, + { + shift = 24, mask = 3, + [0] = { -- Exception generation. + shift = 0, mask = 0xe0001f, + [0x200000] = "brkW" + }, + { -- System instructions. + shift = 0, mask = 0x3fffff, + [0x03201f] = "nop" + }, + { -- Unconditional branch, register. + shift = 0, mask = 0xfffc1f, + [0x1f0000] = "brNx", [0x3f0000] = "blrNx", + [0x5f0000] = "retNx" + }, + } +} + +local map_init = { + shift = 25, mask = 15, + [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, + map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp +} + +------------------------------------------------------------------------------ + +local map_regs = { x = {}, w = {}, d = {}, s = {} } + +for i=0,30 do + map_regs.x[i] = "x"..i + map_regs.w[i] = "w"..i + map_regs.d[i] = "d"..i + map_regs.s[i] = "s"..i +end +map_regs.x[31] = "sp" +map_regs.w[31] = "wsp" +map_regs.d[31] = "d31" +map_regs.s[31] = "s31" + +local map_cond = { + [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", +} + +local map_shift = { [0] = "lsl", "lsr", "asr", } + +local map_extend = { + [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then + extra = "\t->"..sym + end + end + if ctx.hexdump > 0 then + ctx.out(format("%08x %s %-5s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) + else + ctx.out(format("%08x %-5s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + ctx.pos = pos + 4 +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function match_reg(p, pat, regnum) + return map_regs[match(pat, p.."%w-([xwds])")][regnum] +end + +local function fmt_hex32(x) + if x < 0 then + return tohex(x) + else + return format("%x", x) + end +end + +local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } + +local function decode_imm13(op) + local imms = band(rshift(op, 10), 63) + local immr = band(rshift(op, 16), 63) + if band(op, 0x00400000) == 0 then + local len = 5 + if imms >= 56 then + if imms >= 60 then len = 1 else len = 2 end + elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end + local l = lshift(1, len)-1 + local s = band(imms, l) + local r = band(immr, l) + local imm = ror(rshift(-1, 31-s), r) + if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end + imm = imm * imm13_rep[len] + local ix = fmt_hex32(imm) + if rshift(op, 31) ~= 0 then + return ix..tohex(imm) + else + return ix + end + else + local lo, hi = -1, 0 + if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end + if immr ~= 0 then + lo, hi = ror(lo, immr), ror(hi, immr) + local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) + lo, hi = bxor(lo, x), bxor(hi, x) + if immr >= 32 then lo, hi = hi, lo end + end + if hi ~= 0 then + return fmt_hex32(hi)..tohex(lo) + else + return fmt_hex32(lo) + end + end +end + +local function parse_immpc(op, name) + if name == "b" or name == "bl" then + return arshift(lshift(op, 6), 4) + elseif name == "adr" or name == "adrp" then + local immlo = band(rshift(op, 29), 3) + local immhi = lshift(arshift(lshift(op, 8), 13), 2) + return bor(immhi, immlo) + elseif name == "tbz" or name == "tbnz" then + return lshift(arshift(lshift(op, 13), 18), 2) + else + return lshift(arshift(lshift(op, 8), 13), 2) + end +end + +local function parse_fpimm8(op) + local sign = band(op, 0x100000) == 0 and 1 or -1 + local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 + local frac = 16+band(rshift(op, 13), 15) + return sign * frac * 2^exp +end + +local function prefer_bfx(sf, uns, imms, immr) + if imms < immr or imms == 31 or imms == 63 then + return false + end + if immr == 0 then + if sf == 0 and (imms == 7 or imms == 15) then + return false + end + if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then + return false + end + end + return true +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local pos = ctx.pos + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + local operands = {} + local suffix = "" + local last, name, pat + local map_reg + ctx.op = op + ctx.rel = nil + last = nil + local opat + opat = map_init[band(rshift(op, 25), 15)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + name, pat = match(opat, "^([a-z0-9]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + if altname then pat = pat2 end + if sub(pat, 1, 1) == "." then + local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") + suffix = suffix..s2 + pat = p2 + end + + local rt = match(pat, "[gf]") + if rt then + if rt == "g" then + map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w + else + map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s + end + end + + local second0, immr + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + local regnum = band(op, 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "N" then + local regnum = band(rshift(op, 5), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "M" then + local regnum = band(rshift(op, 16), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "A" then + local regnum = band(rshift(op, 10), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "B" then + local addr = ctx.addr + pos + parse_immpc(op, name) + ctx.rel = addr + x = "0x"..tohex(addr) + elseif p == "T" then + x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) + elseif p == "V" then + x = band(op, 15) + elseif p == "C" then + x = map_cond[band(rshift(op, 12), 15)] + elseif p == "c" then + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + local cond = band(rshift(op, 12), 15) + local invc = bxor(cond, 1) + x = map_cond[cond] + if altname and cond ~= 14 and cond ~= 15 then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if rn == rm then + local n = #operands + operands[n] = nil + x = map_cond[invc] + if rn ~= 31 then + if a1 then name = a1 else name = altname end + else + operands[n-1] = nil + name = a2 + end + end + end + elseif p == "W" then + x = band(rshift(op, 5), 0xffff) + elseif p == "Y" then + x = band(rshift(op, 5), 0xffff) + local hw = band(rshift(op, 21), 3) + if altname and (hw == 0 or x ~= 0) then + name = altname + end + elseif p == "L" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if band(op, 0x800) ~= 0 then + x = "["..rn..", #"..imm9.."]!" + else + x = "["..rn.."], #"..imm9 + end + elseif p == "U" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local sz = band(rshift(op, 30), 3) + local imm12 = lshift(arshift(lshift(op, 10), 20), sz) + if imm12 ~= 0 then + x = "["..rn..", #"..imm12.."]" + else + x = "["..rn.."]" + end + elseif p == "K" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if imm9 ~= 0 then + x = "["..rn..", #"..imm9.."]" + else + x = "["..rn.."]" + end + elseif p == "O" then + local rn, rm = map_regs.x[band(rshift(op, 5), 31)] + local m = band(rshift(op, 13), 1) + if m == 0 then + rm = map_regs.w[band(rshift(op, 16), 31)] + else + rm = map_regs.x[band(rshift(op, 16), 31)] + end + x = "["..rn..", "..rm + local opt = band(rshift(op, 13), 7) + local s = band(rshift(op, 12), 1) + local sz = band(rshift(op, 30), 3) + -- extension to be applied + if opt == 3 then + if s == 0 then x = x.."]" + else x = x..", lsl #"..sz.."]" end + elseif opt == 2 or opt == 6 or opt == 7 then + if s == 0 then x = x..", "..map_extend[opt].."]" + else x = x..", "..map_extend[opt].." #"..sz.."]" end + else + x = x.."]" + end + elseif p == "P" then + local opcv, sh = rshift(op, 26), 2 + if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end + local imm7 = lshift(arshift(lshift(op, 10), 25), sh) + local rn = map_regs.x[band(rshift(op, 5), 31)] + local ind = band(rshift(op, 23), 3) + if ind == 1 then + x = "["..rn.."], #"..imm7 + elseif ind == 2 then + if imm7 == 0 then + x = "["..rn.."]" + else + x = "["..rn..", #"..imm7.."]" + end + elseif ind == 3 then + x = "["..rn..", #"..imm7.."]!" + end + elseif p == "I" then + local shf = band(rshift(op, 22), 3) + local imm12 = band(rshift(op, 10), 0x0fff) + local rn, rd = band(rshift(op, 5), 31), band(op, 31) + if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then + name = altname + x = nil + elseif shf == 0 then + x = imm12 + elseif shf == 1 then + x = imm12..", lsl #12" + end + elseif p == "i" then + x = "#0x"..decode_imm13(op) + elseif p == "1" then + immr = band(rshift(op, 16), 63) + x = immr + elseif p == "2" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2, a3, a4, a5, a6 = + match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") + local sf = band(rshift(op, 26), 32) + local uns = band(rshift(op, 30), 1) + if prefer_bfx(sf, uns, x, immr) then + name = a2 + x = x - immr + 1 + elseif immr == 0 and x == 7 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a6 + x = nil + elseif immr == 0 and x == 15 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a5 + x = nil + elseif x == 31 or x == 63 then + if x == 31 and immr == 0 and name == "sbfm" then + name = a4 + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + else + name = a3 + end + x = nil + elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then + name = a4 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = nil + elseif x < immr then + name = a1 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + end + end + elseif p == "3" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if x < immr then + name = a1 + local sf = band(rshift(op, 26), 32) + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + elseif x >= immr then + name = a2 + x = x - immr + 1 + end + end + elseif p == "4" then + x = band(rshift(op, 10), 63) + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + if altname and rn == rm then + local n = #operands + operands[n] = nil + last = operands[n-1] + name = altname + end + elseif p == "5" then + x = band(rshift(op, 16), 31) + elseif p == "S" then + x = band(rshift(op, 10), 63) + if x == 0 then x = nil + else x = map_shift[band(rshift(op, 22), 3)].." #"..x end + elseif p == "X" then + local opt = band(rshift(op, 13), 7) + -- Width specifier
\n') end out:write("---- TRACE ", tr, " ", what) - if otr then out:write(" ", otr, "/", oex) end + if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end out:write(" ", fmtfunc(func, pc), "\n") elseif what == "stop" or what == "abort" then out:write("---- TRACE ", tr, " ", what) @@ -653,7 +655,8 @@ end local function dumpon(opt, outfile) if active then dumpoff() end - local colormode = os.getenv("COLORTERM") and "A" or "T" + local term = os.getenv("TERM") + local colormode = (term and term:match("color") or os.getenv("COLORTERM")) and "A" or "T" if opt then opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end) end diff --git a/src/jit/p.lua b/src/jit/p.lua index 09b3b9fe..7be10586 100644 --- a/src/jit/p.lua +++ b/src/jit/p.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -156,6 +156,7 @@ local function prof_annotate(count1, samples) ms = math.max(ms, v) if pct >= prof_min then local file, line = k:match("^(.*):(%d+)$") + if not file then file = k; line = 0 end local fl = files[file] if not fl then fl = {}; files[file] = fl; files[#files+1] = file end line = tonumber(line) diff --git a/src/jit/v.lua b/src/jit/v.lua index 60c8b05a..934de985 100644 --- a/src/jit/v.lua +++ b/src/jit/v.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- Verbose mode of the LuaJIT compiler. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -99,7 +99,7 @@ end local function dump_trace(what, tr, func, pc, otr, oex) if what == "start" then startloc = fmtfunc(func, pc) - startex = otr and "("..otr.."/"..oex..") " or "" + startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" else if what == "abort" then local loc = fmtfunc(func, pc) diff --git a/src/jit/zone.lua b/src/jit/zone.lua index f5f9656a..fa702c4e 100644 --- a/src/jit/zone.lua +++ b/src/jit/zone.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler zones. -- --- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- diff --git a/src/lauxlib.h b/src/lauxlib.h index fed1491b..a44f0272 100644 --- a/src/lauxlib.h +++ b/src/lauxlib.h @@ -15,9 +15,6 @@ #include "lua.h" -#define luaL_getn(L,i) ((int)lua_objlen(L, i)) -#define luaL_setn(L,i,j) ((void)0) /* no op! */ - /* extra error code for `luaL_load' */ #define LUA_ERRFILE (LUA_ERRERR+1) @@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, const char *const lst[]); +/* pre-defined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + LUALIB_API int (luaL_ref) (lua_State *L, int t); LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); @@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, const char *name, const char *mode); LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); +LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, + int sizehint); +LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); +LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); /* @@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) +/* From Lua 5.2. */ +#define luaL_newlibtable(L, l) \ + lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) +#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) + /* ** {====================================================== ** Generic Buffer manipulation @@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); /* }====================================================== */ - -/* compatibility with ref system */ - -/* pre-defined references */ -#define LUA_NOREF (-2) -#define LUA_REFNIL (-1) - -#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ - (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) - -#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) - -#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) - - -#define luaL_reg luaL_Reg - #endif diff --git a/src/lib_aux.c b/src/lib_aux.c index d6f56e30..c40565c3 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -1,6 +1,6 @@ /* ** Auxiliary library for the Lua/C API. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major parts taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, static int libsize(const luaL_Reg *l) { int size = 0; - for (; l->name; l++) size++; + for (; l && l->name; l++) size++; return size; } +LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) +{ + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, modname); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, modname); + lua_pushvalue(L, -1); + lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ + } + lua_remove(L, -2); /* Remove _LOADED table. */ +} + LUALIB_API void luaL_openlib(lua_State *L, const char *libname, const luaL_Reg *l, int nup) { lj_lib_checkfpu(L); if (libname) { - int size = libsize(l); - /* check whether lib already exists */ - luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); - lua_getfield(L, -1, libname); /* get _LOADED[libname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, libname); - lua_pushvalue(L, -1); - lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ - } - lua_remove(L, -2); /* remove _LOADED table */ - lua_insert(L, -(nup+1)); /* move library table to below upvalues */ + luaL_pushmodule(L, libname, libsize(l)); + lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ } - for (; l->name; l++) { - int i; - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -nup); - lua_pushcclosure(L, l->func, nup); - lua_setfield(L, -(nup+2), l->name); - } - lua_pop(L, nup); /* remove upvalues */ + if (l) + luaL_setfuncs(L, l, nup); + else + lua_pop(L, nup); /* Remove upvalues. */ } LUALIB_API void luaL_register(lua_State *L, const char *libname, @@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, luaL_openlib(L, libname, l, 0); } +LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) +{ + luaL_checkstack(L, nup, "too many upvalues"); + for (; l->name; l++) { + int i; + for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); + lua_setfield(L, -(nup + 2), l->name); + } + lua_pop(L, nup); /* Remove upvalues. */ +} + LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, const char *p, const char *r) { diff --git a/src/lib_base.c b/src/lib_base.c index 6107bde0..3a757870 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -1,6 +1,6 @@ /* ** Base and coroutine library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -23,6 +23,7 @@ #include "lj_tab.h" #include "lj_meta.h" #include "lj_state.h" +#include "lj_frame.h" #if LJ_HASFFI #include "lj_ctype.h" #include "lj_cconv.h" @@ -345,7 +346,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { - if (status == 0) { + if (status == LUA_OK) { if (tvistab(L->base+envarg-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); @@ -418,7 +419,7 @@ LJLIB_CF(dofile) GCstr *fname = lj_lib_optstr(L, 1); setnilV(L->top); L->top = L->base+1; - if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) + if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) lua_error(L); lua_call(L, 0, LUA_MULTRET); return (int)(L->top - L->base) - 1; @@ -536,7 +537,7 @@ LJLIB_CF(coroutine_status) co = threadV(L->base); if (co == L) s = "running"; else if (co->status == LUA_YIELD) s = "suspended"; - else if (co->status != 0) s = "dead"; + else if (co->status != LUA_OK) s = "dead"; else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; else if (co->top == co->base) s = "dead"; else s = "suspended"; @@ -557,6 +558,12 @@ LJLIB_CF(coroutine_running) #endif } +LJLIB_CF(coroutine_isyieldable) +{ + setboolV(L->top++, cframe_canyield(L->cframe)); + return 1; +} + LJLIB_CF(coroutine_create) { lua_State *L1; @@ -576,7 +583,7 @@ LJLIB_ASM(coroutine_yield) static int ffh_resume(lua_State *L, lua_State *co, int wrap) { if (co->cframe != NULL || co->status > LUA_YIELD || - (co->status == 0 && co->top == co->base)) { + (co->status == LUA_OK && co->top == co->base)) { ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; if (wrap) lj_err_caller(L, em); setboolV(L->base-1-LJ_FR2, 0); diff --git a/src/lib_bit.c b/src/lib_bit.c index 1344d24d..c979a448 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c @@ -1,6 +1,6 @@ /* ** Bit manipulation library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_bit_c diff --git a/src/lib_debug.c b/src/lib_debug.c index cda7137e..f112b5bc 100644 --- a/src/lib_debug.c +++ b/src/lib_debug.c @@ -1,6 +1,6 @@ /* ** Debug library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) /* ------------------------------------------------------------------------ */ -static const char KEY_HOOK = 'h'; +#define KEY_HOOK ((void *)0x3004) static void hookf(lua_State *L, lua_Debug *ar) { static const char *const hooknames[] = {"call", "return", "line", "count", "tail return"}; - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_rawget(L, LUA_REGISTRYINDEX); if (lua_isfunction(L, -1)) { lua_pushstring(L, hooknames[(int)ar->event]); @@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) count = luaL_optint(L, arg+3, 0); func = hookf; mask = makemask(smask, count); } - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_pushvalue(L, arg+1); lua_rawset(L, LUA_REGISTRYINDEX); lua_sethook(L, func, mask, count); @@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) if (hook != NULL && hook != hookf) { /* external hook? */ lua_pushliteral(L, "external hook"); } else { - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ } lua_pushstring(L, unmakemask(mask, buff)); diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 1feee215..136e98e8 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -1,6 +1,6 @@ /* ** FFI library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_ffi_c @@ -829,7 +829,7 @@ static GCtab *ffi_finalizer(lua_State *L) settabV(L, L->top++, t); setgcref(t->metatable, obj2gco(t)); setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), - lj_str_newlit(L, "K")); + lj_str_newlit(L, "k")); t->nomm = (uint8_t)(~(1u<base+n)) { const char *p = strVdata(L->base+n); - if (p[0] != '*') - lj_err_arg(L, n+1, LJ_ERR_INVOPT); - if (p[1] == 'n') + if (p[0] == '*') p++; + if (p[0] == 'n') ok = io_file_readnum(L, fp); - else if ((p[1] & ~0x20) == 'L') - ok = io_file_readline(L, fp, (p[1] == 'l')); - else if (p[1] == 'a') + else if ((p[0] & ~0x20) == 'L') + ok = io_file_readline(L, fp, (p[0] == 'l')); + else if (p[0] == 'a') io_file_readall(L, fp); else lj_err_arg(L, n+1, LJ_ERR_INVFMT); diff --git a/src/lib_jit.c b/src/lib_jit.c index 592538bd..22ca0a1a 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -1,6 +1,6 @@ /* ** JIT library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_jit_c @@ -204,6 +204,7 @@ LJLIB_CF(jit_util_funcinfo) lua_setfield(L, -2, "source"); lj_debug_pushloc(L, pt, pc); lua_setfield(L, -2, "loc"); + setprotoV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "proto")), pt); } else { GCfunc *fn = funcV(L->base); GCtab *t; diff --git a/src/lib_math.c b/src/lib_math.c index f90dd331..ef9dda2d 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -1,6 +1,6 @@ /* ** Math library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include @@ -221,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L) rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); rs->valid = 0; /* Use lazy initialization to save some time on startup. */ LJ_LIB_REG(L, LUA_MATHLIBNAME, math); -#if defined(LUA_COMPAT_MOD) && !LJ_52 - lua_getfield(L, -1, "fmod"); - lua_setfield(L, -2, "mod"); -#endif return 1; } diff --git a/src/lib_os.c b/src/lib_os.c index 941c2a5d..9e78d49a 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -1,6 +1,6 @@ /* ** OS library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/src/lib_package.c b/src/lib_package.c index 8c336b02..6fac43ec 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -1,6 +1,6 @@ /* ** Package library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -193,8 +193,7 @@ static void **ll_register(lua_State *L, const char *path) lua_pop(L, 1); plib = (void **)lua_newuserdata(L, sizeof(void *)); *plib = NULL; - luaL_getmetatable(L, "_LOADLIB"); - lua_setmetatable(L, -2); + luaL_setmetatable(L, "_LOADLIB"); lua_pushfstring(L, "LOADLIB: %s", path); lua_pushvalue(L, -2); lua_settable(L, LUA_REGISTRYINDEX); @@ -399,8 +398,7 @@ static int lj_cf_package_loader_preload(lua_State *L) /* ------------------------------------------------------------------------ */ -static const int sentinel_ = 0; -#define sentinel ((void *)&sentinel_) +#define sentinel ((void *)0x4004) static int lj_cf_package_require(lua_State *L) { @@ -490,29 +488,19 @@ static void modinit(lua_State *L, const char *modname) static int lj_cf_package_module(lua_State *L) { const char *modname = luaL_checkstring(L, 1); - int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ - lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); - lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, modname); - lua_pushvalue(L, -1); - lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ - } - /* check whether table already has a _NAME field */ + int lastarg = (int)(L->top - L->base); + luaL_pushmodule(L, modname, 1); lua_getfield(L, -1, "_NAME"); - if (!lua_isnil(L, -1)) { /* is table an initialized module? */ + if (!lua_isnil(L, -1)) { /* Module already initialized? */ lua_pop(L, 1); - } else { /* no; initialize it */ + } else { lua_pop(L, 1); modinit(L, modname); } lua_pushvalue(L, -1); setfenv(L); - dooptions(L, loaded - 1); - return 0; + dooptions(L, lastarg); + return LJ_52; } static int lj_cf_package_seeall(lua_State *L) @@ -583,13 +571,16 @@ LUALIB_API int luaopen_package(lua_State *L) lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); lua_setfield(L, -2, "__gc"); luaL_register(L, LUA_LOADLIBNAME, package_lib); - lua_pushvalue(L, -1); - lua_replace(L, LUA_ENVIRONINDEX); + lua_copy(L, -1, LUA_ENVIRONINDEX); lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); for (i = 0; package_loaders[i] != NULL; i++) { lj_lib_pushcf(L, package_loaders[i], 1); lua_rawseti(L, -2, i+1); } +#if LJ_52 + lua_pushvalue(L, -1); + lua_setfield(L, -3, "searchers"); +#endif lua_setfield(L, -2, "loaders"); lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); noenv = lua_toboolean(L, -1); diff --git a/src/lib_string.c b/src/lib_string.c index 46b54a5e..76b0730a 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -1,6 +1,6 @@ /* ** String library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -737,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L) GCtab *mt; global_State *g; LJ_LIB_REG(L, LUA_STRLIBNAME, string); -#if defined(LUA_COMPAT_GFIND) && !LJ_52 - lua_getfield(L, -1, "gmatch"); - lua_setfield(L, -2, "gfind"); -#endif mt = lj_tab_new(L, 0, 1); /* NOBARRIER: basemt is a GC root. */ g = G(L); diff --git a/src/lib_table.c b/src/lib_table.c index f9a3693d..0450f1f6 100644 --- a/src/lib_table.c +++ b/src/lib_table.c @@ -1,6 +1,6 @@ /* ** Table library. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -129,6 +129,26 @@ LJLIB_LUA(table_remove) /* end */ +LJLIB_LUA(table_move) /* + function(a1, f, e, t, a2) + CHECK_tab(a1) + CHECK_int(f) + CHECK_int(e) + CHECK_int(t) + if a2 == nil then a2 = a1 end + CHECK_tab(a2) + if e >= f then + local d = t - f + if t > e or t <= f or a2 ~= a1 then + for i=f,e do a2[i+d] = a1[i] end + else + for i=e,f,-1 do a2[i+d] = a1[i] end + end + end + return a2 + end +*/ + LJLIB_CF(table_concat) LJLIB_REC(.) { GCtab *t = lj_lib_checktab(L, 1); diff --git a/src/lj_api.c b/src/lj_api.c index 0fbb9256..d17a5754 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -1,6 +1,6 @@ /* ** Public Lua/C API. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -112,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) from->top = f; } +LUA_API const lua_Number *lua_version(lua_State *L) +{ + static const lua_Number version = LUA_VERSION_NUM; + UNUSED(L); + return &version; +} + /* -- Stack manipulation -------------------------------------------------- */ LUA_API int lua_gettop(lua_State *L) @@ -152,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx) copyTV(L, p, L->top); } -LUA_API void lua_replace(lua_State *L, int idx) +static void copy_slot(lua_State *L, TValue *f, int idx) { - api_checknelems(L, 1); if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(L->top-1)); + api_check(L, tvistab(f)); /* NOBARRIER: A thread (i.e. L) is never black. */ - setgcref(L->env, obj2gco(tabV(L->top-1))); + setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(L->top-1)); - setgcref(fn->c.env, obj2gco(tabV(L->top-1))); - lj_gc_barrier(L, fn, L->top-1); + api_check(L, tvistab(f)); + setgcref(fn->c.env, obj2gco(tabV(f))); + lj_gc_barrier(L, fn, f); } else { TValue *o = index2adr(L, idx); api_checkvalidindex(L, o); - copyTV(L, o, L->top-1); + copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ - lj_gc_barrier(L, curr_func(L), L->top-1); + lj_gc_barrier(L, curr_func(L), f); } +} + +LUA_API void lua_replace(lua_State *L, int idx) +{ + api_checknelems(L, 1); + copy_slot(L, L->top - 1, idx); L->top--; } +LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) +{ + copy_slot(L, index2adr(L, fromidx), toidx); +} + LUA_API void lua_pushvalue(lua_State *L, int idx) { copyTV(L, L->top, index2adr(L, idx)); @@ -325,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) return 0; } +LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + if (LJ_LIKELY(tvisnumber(o))) { + if (ok) *ok = 1; + return numberVnum(o); + } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { + if (ok) *ok = 1; + return numV(&tmp); + } else { + if (ok) *ok = 0; + return 0; + } +} + LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -362,7 +395,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) return 0; if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); + return intV(&tmp); n = numV(&tmp); } #if LJ_64 @@ -372,6 +405,35 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) #endif } +LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + lua_Number n; + if (LJ_LIKELY(tvisint(o))) { + if (ok) *ok = 1; + return intV(o); + } else if (LJ_LIKELY(tvisnum(o))) { + n = numV(o); + } else { + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { + if (ok) *ok = 0; + return 0; + } + if (tvisint(&tmp)) { + if (ok) *ok = 1; + return intV(&tmp); + } + n = numV(&tmp); + } + if (ok) *ok = 1; +#if LJ_64 + return (lua_Integer)n; +#else + return lj_num2int(n); +#endif +} + LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -858,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } -LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) { cTValue *o = index2adr(L, idx); if (tvisudata(o)) { @@ -867,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) return uddata(ud); } - lj_err_argtype(L, idx, tname); - return NULL; /* unreachable */ + return NULL; /* value is not a userdata with a metatable */ +} + +LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +{ + void *p = luaL_testudata(L, idx, tname); + if (!p) lj_err_argtype(L, idx, tname); + return p; } /* -- Object setters ------------------------------------------------------ */ @@ -977,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) return 1; } +LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) +{ + lua_getfield(L, LUA_REGISTRYINDEX, tname); + lua_setmetatable(L, -2); +} + LUA_API int lua_setfenv(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -1032,7 +1106,7 @@ static TValue *api_call_base(lua_State *L, int nargs) LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); lj_vm_call(L, api_call_base(L, nargs), nresults+1); } @@ -1043,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); if (errfunc == 0) { ef = 0; @@ -1075,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; @@ -1096,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) /* -- Coroutine yield and resume ------------------------------------------ */ +LUA_API int lua_isyieldable(lua_State *L) +{ + return cframe_canyield(L->cframe); +} + LUA_API int lua_yield(lua_State *L, int nresults) { void *cf = L->cframe; @@ -1140,7 +1219,7 @@ LUA_API int lua_resume(lua_State *L, int nargs) { if (L->cframe == NULL && L->status <= LUA_YIELD) return lj_vm_resume(L, - L->status == 0 ? api_call_base(L, nargs) : L->top - nargs, + L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, 0, 0); L->top = L->base; setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); diff --git a/src/lj_arch.h b/src/lj_arch.h index f775743c..c27ca6a5 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -1,6 +1,6 @@ /* ** Target architecture selection. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ARCH_H @@ -74,7 +74,7 @@ defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__DragonFly__)) && !defined(__ORBIS__) #define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) +#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) #define LUAJIT_OS LUAJIT_OS_POSIX #elif defined(__CYGWIN__) #define LJ_TARGET_CYGWIN 1 @@ -215,9 +215,14 @@ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 -#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_BITS 64 +#if defined(__AARCH64EB__) +#define LJ_ARCH_NAME "arm64be" +#define LJ_ARCH_ENDIAN LUAJIT_BE +#else +#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_ENDIAN LUAJIT_LE +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ @@ -226,7 +231,6 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#define LJ_ARCH_NOJIT 1 /* NYI */ #define LJ_ARCH_VERSION 80 @@ -333,10 +337,12 @@ #define LJ_ARCH_BITS 32 #define LJ_TARGET_MIPS32 1 #else +#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU +#define LJ_ARCH_NOJIT 1 /* NYI */ +#endif #define LJ_ARCH_BITS 64 #define LJ_TARGET_MIPS64 1 #define LJ_TARGET_GC64 1 -#define LJ_ARCH_NOJIT 1 /* NYI */ #endif #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 @@ -376,7 +382,7 @@ #endif #elif LJ_TARGET_ARM64 #if __clang__ -#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5) +#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) #error "Need at least Clang 3.5 or newer" #endif #else @@ -408,9 +414,6 @@ #error "Only ARM EABI or iOS 3.0+ ABI is supported" #endif #elif LJ_TARGET_ARM64 -#if defined(__AARCH64EB__) -#error "No support for big-endian ARM64" -#endif #if defined(_ILP32) #error "No support for ILP32 model on ARM64" #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 7ce58924..c2cf5a95 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_asm_c @@ -91,7 +91,7 @@ typedef struct ASMState { MCode *realign; /* Realign loop if not NULL. */ #ifdef RID_NUM_KREF - int32_t krefk[RID_NUM_KREF]; + intptr_t krefk[RID_NUM_KREF]; #endif IRRef1 phireg[RID_MAX]; /* PHI register references. */ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ @@ -144,7 +144,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) #define ra_krefk(as, ref) (as->krefk[(ref)]) -static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) +static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) { IRRef ref = (IRRef)(r - RID_MIN_KREF); as->krefk[ref] = k; @@ -171,6 +171,8 @@ IRFLDEF(FLOFS) #include "lj_emit_x86.h" #elif LJ_TARGET_ARM #include "lj_emit_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_emit_arm64.h" #elif LJ_TARGET_PPC #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS @@ -322,7 +324,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref) lua_assert(!rset_test(as->freeset, r)); ra_free(as, r); ra_modified(as, r); +#if LJ_64 + emit_loadu64(as, r, ra_krefk(as, ref)); +#else emit_loadi(as, r, ra_krefk(as, ref)); +#endif return r; } ir = IR(ref); @@ -524,7 +530,7 @@ static void ra_evictk(ASMState *as) #ifdef RID_NUM_KREF /* Allocate a register for a constant. */ -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) { /* First try to find a register which already holds the same constant. */ RegSet pick, work = ~as->freeset & RSET_GPR; @@ -533,9 +539,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) IRRef ref; r = rset_pickbot(work); ref = regcost_ref(as->cost[r]); +#if LJ_64 + if (ref < ASMREF_L) { + if (ra_iskref(ref)) { + if (k == ra_krefk(as, ref)) + return r; + } else { + IRIns *ir = IR(ref); + if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || +#if LJ_GC64 + (ir->o == IR_KINT && k == ir->i) || + (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || + ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && + k == (intptr_t)ir_kptr(ir)) +#else + (ir->o != IR_KINT64 && k == ir->i) +#endif + ) + return r; + } + } +#else if (ref < ASMREF_L && k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) return r; +#endif rset_clear(work, r); } pick = as->freeset & allow; @@ -555,7 +583,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) } /* Allocate a specific register for a constant. */ -static void ra_allockreg(ASMState *as, int32_t k, Reg r) +static void ra_allockreg(ASMState *as, intptr_t k, Reg r) { Reg kr = ra_allock(as, k, RID2RSET(r)); if (kr != r) { @@ -1563,6 +1591,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_x86.h" #elif LJ_TARGET_ARM #include "lj_asm_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_asm_arm64.h" #elif LJ_TARGET_PPC #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS @@ -2363,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) if (!as->loopref) asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); +#if LJ_TARGET_MCODE_FIXUP + asm_mcode_fixup(T->mcode, T->szmcode); +#endif lj_mcode_sync(T->mcode, origtop); } diff --git a/src/lj_asm.h b/src/lj_asm.h index 9f4654ec..2819481b 100644 --- a/src/lj_asm.h +++ b/src/lj_asm.h @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ASM_H diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 23f42919..37bfa40f 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1,6 +1,6 @@ /* ** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..8fd92e76 --- /dev/null +++ b/src/lj_asm_arm64.h @@ -0,0 +1,2022 @@ +/* +** ARM64 IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_allocref(as, ir->op2, allow); + left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_allocref(as, ir->op1, allow); + right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Setup all needed exit stubs. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); + mxp--; + *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 3; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guardcc(ASMState *as, A64CC cc) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_cond_branch(as, cc^1, p-1); + return; + } + emit_cond_branch(as, cc, target); +} + +/* Emit test and branch instruction to exit for guard. */ +static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_tnb(as, ai^0x01000000u, r, bit, p-1); + return; + } + emit_tnb(as, ai, r, bit, target); +} + +/* Emit compare and branch instruction to exit for guard. */ +static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_cnb(as, ai^0x01000000u, r, p-1); + return; + } + emit_cnb(as, ai, r, target); +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) +{ + if (irref_isk(ref)) { + IRIns *ir = IR(ref); + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } + } + return 0; +} + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +#define FUSE_REG 0x40000000 + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, + A64Ins ins) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } else { + Reg base = ra_alloc1(as, ir->op1, allow); + *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); + return base; + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + int64_t ofs = glofs(as, &uv->tv); + if (emit_checkofs(ins, ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse m operand into arithmetic/logic instructions. */ +static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_hasreg(ir->r)) { + ra_noweak(as, ir->r); + return A64F_M(ir->r); + } else if (irref_isk(ref)) { + uint32_t m; + int64_t k = get_k64val(ir); + if ((ai & 0x1f000000) == 0x0a000000) + m = emit_isk13(k, irt_is64(ir->t)); + else + m = emit_isk12(k); + if (m) + return m; + } else if (mayfuse(as, ref)) { + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) { + A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : + ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; + int shift = ir->o == IR_ADD ? 1 : + (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + IRIns *irl = IR(ir->op1); + if (sh == A64SH_LSL && + irl->o == IR_CONV && + irl->op2 == ((IRT_I64< op1, allow); + return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); + } else { + Reg m = ra_alloc1(as, ir->op1, allow); + return A64F_M(m) | A64F_SH(sh, shift); + } + } else if (ir->o == IR_CONV && + ir->op2 == ((IRT_I64< op1, allow); + return A64F_M(m) | A64F_EX(A64EX_SXTW); + } + } + return A64F_M(ra_allocref(as, ref, allow)); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, + RegSet allow) +{ + IRIns *ir = IR(ref); + Reg base; + int32_t ofs = 0; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + if (ir->o == IR_ADD) { + if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { + ref = ir->op1; + } else { + Reg rn, rm; + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref); + if (mayfuse(as, irl->op1)) { + unsigned int shift = 4; + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + shift = (IR(irl->op2)->i & 63); + } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { + shift = 1; + } + if ((ai >> 30) == shift) { + lref = irl->op1; + irl = IR(lref); + ai |= A64I_LS_SH; + } + } + if (irl->o == IR_CONV && + irl->op2 == ((IRT_I64< op1; + ai |= A64I_LS_SXTWx; + } else { + ai |= A64I_LS_LSLx; + } + rm = ra_alloc1(as, lref, allow); + rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); + emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm); + return; + } + } else if (ir->o == IR_STRREF) { + if (asm_isk32(as, ir->op2, &ofs)) { + ref = ir->op1; + } else if (asm_isk32(as, ir->op1, &ofs)) { + ref = ir->op2; + } else { + Reg rn = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + uint32_t m; + if (irr+1 == ir && !ra_used(irr) && + irr->o == IR_ADD && irref_isk(irr->op2)) { + ofs = sizeof(GCstr) + IR(irr->op2)->i; + if (emit_checkofs(ai, ofs)) { + Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); + m = A64F_M(rm) | A64F_EX(A64EX_SXTW); + goto skipopm; + } + } + m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); + ofs = sizeof(GCstr); + skipopm: + emit_lso(as, ai, rd, rd, ofs); + emit_dn(as, A64I_ADDx^m, rd, rn); + return; + } + ofs += sizeof(GCstr); + if (!emit_checkofs(ai, ofs)) { + Reg rn = ra_alloc1(as, ref, allow); + Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); + emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); + return; + } + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, ai, (rd & 31), base, ofs); +} + +/* Fuse FP multiply-add/sub. */ +static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ai = air, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + +/* Fuse BAND + BSHL/BSHR into UBFM. */ +static int asm_fuseandshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + lua_assert(ir->o == IR_BAND); + if (canfuse(as, irl) && irref_isk(ir->op2)) { + uint64_t mask = get_k64val(IR(ir->op2)); + if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { + int32_t shmask = irt_is64(irl->t) ? 63 : 31; + int32_t shift = (IR(irl->op2)->i & shmask); + int32_t imms = shift; + if (irl->o == IR_BSHL) { + mask >>= shift; + shift = (shmask-shift+1) & shmask; + imms = 0; + } + if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, irl->op1, RSET_GPR); + A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; + imms += 63 - emit_clz64(mask); + if (imms > shmask) imms = shmask; + emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); + return 1; + } + } + } + return 0; +} + +/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ +static int asm_fuseorshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + lua_assert(ir->o == IR_BOR); + if (canfuse(as, irl) && canfuse(as, irr) && + ((irl->o == IR_BSHR && irr->o == IR_BSHL) || + (irl->o == IR_BSHL && irr->o == IR_BSHR))) { + if (irref_isk(irl->op2) && irref_isk(irr->op2)) { + IRRef lref = irl->op1, rref = irr->op1; + uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; + if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ + uint32_t tmp2; + IRRef tmp1 = lref; lref = rref; rref = tmp1; + tmp2 = lshift; lshift = rshift; rshift = tmp2; + } + if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { + A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); + emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); + return 1; + } + } + } + return 0; +} + +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lua_assert(!irt_ispri(ir->t)); + if (irt_isfp(ir->t)) { + if (ci->flags & CCI_CASTU64) { + Reg dest = ra_dest(as, ir, RSET_FPR) & 31; + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else { + ra_destreg(as, ir, RID_RET); + } + } + UNUSED(ci); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(ir_k64(irf)->u64); + } else { /* Need a non-argument register for indirect calls. */ + Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + emit_n(as, A64I_BLR, freg); + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + /* Need to force a spill on REF_BASE now to update the stack slot. */ + emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_CMPx, RID_TMP, + ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, A64I_LDRx, RID_TMP, base, -8); +} + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); + emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); + emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); + emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lua_assert(irt_type(ir->t) != st); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, + (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : + (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : + (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); + emit_dn(as, ai, (dest & 31), left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lua_assert(irt_isint(ir->t) && st == IRT_NUM); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + A64Ins ai = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : + (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : + (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); + emit_dn(as, ai, dest, (left & 31)); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = st == IRT_I8 ? A64I_SXTBw : + st == IRT_U8 ? A64I_UXTBw : + st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; + lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + emit_dn(as, ai, dest, left); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64 || !(ir->op2 & IRCONV_SEXT)) { + /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dn(as, A64I_SXTW, dest, left); + } + } else { + if (st64) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dm(as, A64I_MOVw, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + Reg dest = 0, tmp; + int destused = ra_used(ir); + int32_t ofs = 0; + ra_evictset(as, RSET_SCRATCH); + if (destused) { + if (ra_hasspill(ir->s)) { + ofs = sps_scale(ir->s); + destused = 0; + if (ra_hasreg(ir->r)) { + ra_free(as, ir->r); + ra_modified(as, ir->r); + emit_spload(as, ir, ir->r, ofs); + } + } else { + dest = ra_dest(as, ir, RSET_FPR); + } + } + if (destused) + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); + asm_guardcnb(as, A64I_CBZ, RID_RET); + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) { + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); + } else { + Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +{ + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i64ptr(ir_knum(ir)), dest); + } else { + /* Otherwise force a spill and use the spill slot. */ + emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + asm_tvstore64(as, dest, 0, ref); + ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); + } +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); + if (k) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dn(as, A64I_ADDx^k, dest, base); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = 0, tmp = RID_TMP; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(ir->op2); + IRType1 kt = irkey->t; + uint32_t k = 0; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + + if (!isk) { + key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); + rset_clear(allow, key); + if (!irt_isstr(kt)) { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + } else if (irt_isnum(kt)) { + int64_t val = (int64_t)ir_knum(irkey)->u64; + if (!(k = emit_isk12(val))) { + key = ra_allock(as, val, allow); + rset_clear(allow, key); + } + } else if (!irt_ispri(kt)) { + if (!(k = emit_isk12(irkey->i))) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; + if (merge == IR_NE) + asm_guardcc(as, CC_AL); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_n(as, A64I_CMPx^A64I_K12^0, dest); + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) + asm_guardcc(as, CC_EQ); + else + emit_cond_branch(as, CC_EQ, l_end); + + if (irt_isnum(kt)) { + if (isk) { + /* Assumes -0.0 is already canonicalized to +0.0. */ + if (k) + emit_n(as, A64I_CMPx^k, tmp); + else + emit_nm(as, A64I_CMPx, key, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); + Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); + rset_clear(allow, tisnum); + emit_nm(as, A64I_FCMPd, key, ftmp); + emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); + emit_cond_branch(as, CC_LO, l_next); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); + } + } else if (irt_isaddr(kt)) { + Reg scr; + if (isk) { + int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + scr = ra_allock(as, kk, allow); + emit_nm(as, A64I_CMPx, scr, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + scr = ra_scratch(as, allow); + emit_nm(as, A64I_CMPx, tmp, scr); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + } + rset_clear(allow, scr); + } else { + Reg type, scr; + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + scr = ra_scratch(as, rset_clear(allow, type)); + rset_clear(allow, scr); + emit_nm(as, A64I_CMPw, scr, type); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); + } + + *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; + if (!isk && irt_isaddr(kt)) { + Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); + rset_clear(allow, type); + } + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(irkey) : 1; + if (khash == 0) { + emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); + emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); + if (isk) { + Reg tmphash = ra_allock(as, khash, allow); + emit_dnm(as, A64I_ANDw, dest, dest, tmphash); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { + /* Fetch of str->hash is cheaper than ra_allock. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); + emit_dnm(as, A64I_SUBw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); + emit_dnm(as, A64I_EORw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_SUBw, tmp, tmp, dest); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); + emit_dnm(as, A64I_EORw, tmp, tmp, dest); + if (irt_isnum(kt)) { + emit_dnm(as, A64I_ADDw, dest, dest, dest); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVw, tmp, dest); + emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + } else { + checkmclim(as); + emit_dm(as, A64I_MOVw, tmp, key); + emit_dnm(as, A64I_EORw, dest, dest, + ra_allock(as, irt_toitype(kt) << 15, allow)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVx, dest, key); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !emit_checkofs(A64I_LDRx, ofs); + RegSet allow = RSET_GPR; + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, allow); + Reg key = ra_scratch(as, rset_clear(allow, node)); + Reg idx = node; + uint64_t k; + lua_assert(ofs % sizeof(Node) == 0); + rset_clear(allow, key); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_opk(as, A64I_ADDx, dest, node, ofs, allow); + } + asm_guardcc(as, CC_NE); + if (irt_ispri(irkey->t)) { + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = ir_knum(irkey)->u64; + } else { + k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); + } + emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); + emit_lso(as, A64I_LDRx, key, idx, kofs); + if (bigofs) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, A64I_LDRx, dest, v); + } else { + Reg uv = ra_scratch(as, RSET_GPR); + Reg func = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->o == IR_UREFC) { + asm_guardcc(as, CC_NE); + emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); + emit_opk(as, A64I_ADDx, dest, uv, + (int32_t)offsetof(GCupval, tv), RSET_GPR); + emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + } else { + emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + } + emit_lso(as, A64I_LDRx, uv, func, + (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lua_assert(!ra_used(ir)); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + uint32_t m; + rset_clear(allow, base); + if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { + emit_dn(as, A64I_ADDx^m, dest, base); + } else { + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); + emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static A64Ins asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return A64I_LDRB ^ A64I_LS_S; + case IRT_U8: return A64I_LDRB; + case IRT_I16: return A64I_LDRH ^ A64I_LS_S; + case IRT_U16: return A64I_LDRH; + case IRT_NUM: return A64I_LDRd; + case IRT_FLOAT: return A64I_LDRs; + default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; + } +} + +static A64Ins asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return A64I_STRB; + case IRT_I16: case IRT_U16: return A64I_STRH; + case IRT_NUM: return A64I_STRd; + case IRT_FLOAT: return A64I_STRs; + default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx; + A64Ins ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); + return; + } + } + ofs = field_ofs[ir->op2]; + } + emit_lso(as, ai, (dest & 31), idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); +} + +static void asm_xstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src)); + } +} + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg idx, tmp, type; + int32_t ofs = 0; + RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + irt_isint(ir->t)); + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, allow); + tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; + if (irt_isaddr(ir->t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if (irt_isnum(ir->t)) { + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + } else if (irt_isint(ir->t)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + } else { + tmp = ra_scratch(as, gpr); + } + type = ra_scratch(as, rset_clear(gpr, tmp)); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + /* Always do the type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); + if (irt_type(ir->t) >= IRT_NUM) { + lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + } else if (irt_isaddr(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } else if (irt_isnil(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else { + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); + } + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_LDRx, tmp, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; + int32_t ofs = 0; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); + else + emit_lso(as, A64I_STRd, (src & 31), idx, ofs); + } else { + if (!irt_ispri(ir->t)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) + type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); + else + type = ra_allock(as, irt_toitype(ir->t), allow); + } else { + tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), + A64I_STRx); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_STRx, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); + } + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + int32_t ofs = 8*((int32_t)ir->op1-2); + IRType1 t = ir->t; + Reg dest = RID_NONE, base; + RegSet allow = RSET_GPR; + lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ + lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); + if (irt_isaddr(t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if ((ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { + emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); +dotypecheck: + rset_clear(allow, base); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + Reg tmp; + if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { + tmp = dest; + } else { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); + if (irt_type(t) >= IRT_NUM) { + lua_assert(irt_isinteger(t) || irt_isnum(t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, allow), tmp); + } else if (irt_isnil(t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else if (irt_ispri(t)) { + emit_nm(as, A64I_CMPx, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); + } else { + Reg type = ra_scratch(as, allow); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } + emit_lso(as, A64I_LDRx, tmp, base, ofs); + return; + } + if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? A64I_LDRd : + (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, + ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCcdata * */ + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); + Reg r = ra_alloc1(as, ir->op2, allow); + lua_assert(sz == 4 || sz == 8); + emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + { + Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); + emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); + emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); + emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); + if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); + } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#else +#define asm_cnew(as, ir) ((void)0) +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg gr = ra_allock(as, i64ptr(J2G(as->J)), + rset_exclude(rset_exclude(RSET_GPR, tab), link)); + Reg mark = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_lso(as, A64I_STRx, tab, gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); + emit_lso(as, A64I_LDRx, link, gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); + emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + RegSet allow = RSET_GPR; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lua_assert(IR(ir->op1)->o == IR_UREFC); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(allow, obj)); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, A64I_LDRB, tmp, obj, + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_dn(as, ai, (dest & 31), (left & 31)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm == IRFPM_SQRT) { + asm_fpunary(as, ir, A64I_FSQRTd); + } else if (fpm <= IRFPM_TRUNC) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : + fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); + } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { + return; + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); + } +} + +static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) +{ + IRIns *ir; + if (irref_isk(rref)) + return 0; /* Don't swap constants to the left. */ + if (irref_isk(lref)) + return 1; /* But swap constants to the right. */ + ir = IR(rref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64< o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64< op1, rref = ir->op2; + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_hintalloc(as, lref, dest, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_VS); + ai |= A64I_S; + } + emit_dn(as, ai^m, dest, left); +} + +static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) +{ + if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai |= A64I_S; + } + asm_intop(as, ir, ai); +} + +static void asm_intneg(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); +} + +/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ +static void asm_intmul(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* IR_MULOV */ + asm_guardcc(as, CC_NE); + emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ + emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); + emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_dnm(as, A64I_SMULL, dest, right, left); + } else { + emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); + } +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) + asm_fparith(as, ir, A64I_FADDd); + return; + } + asm_intop_s(as, ir, A64I_ADDw); +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) + asm_fparith(as, ir, A64I_FSUBd); + return; + } + asm_intop_s(as, ir, A64I_SUBw); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, A64I_FMULd); + return; + } + asm_intmul(as, ir); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fparith(as, ir, A64I_FDIVd); +} + +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_powi); +} + +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, A64I_FNEGd); + return; + } + asm_intneg(as, ir); +} + +static void asm_band(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_ANDw; + if (asm_fuseandshift(as, ir)) + return; + if (as->flagmcp == as->mcp) { + /* Try to drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai = A64I_ANDSw; + } + asm_intop(as, ir, ai); +} + +static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref), *irr = IR(rref); + if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || + (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if (irl->o == IR_BNOT) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_alloc1(as, lref, RSET_GPR); + ai |= A64I_ON; + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); + emit_dn(as, ai^m, dest, left); + } else { + asm_intop(as, ir, ai); + } +} + +static void asm_bor(ASMState *as, IRIns *ir) +{ + if (asm_fuseorshift(as, ir)) + return; + asm_borbxor(as, ir, A64I_ORRw); +} + +#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_MVNw; + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + emit_d(as, ai^m, dest); +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); +} + +static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) +{ + int32_t shmask = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + Reg left, dest = ra_dest(as, ir, RSET_GPR); + int32_t shift = (IR(ir->op2)->i & shmask); + IRIns *irl = IR(ir->op1); + if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; + + /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ + if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + int32_t shift2 = (IR(irl->op2)->i & shmask); + shift = ((shift - shift2) & shmask); + shmask -= shift2; + ir = irl; + } + } + + left = ra_alloc1(as, ir->op1, RSET_GPR); + switch (sh) { + case A64SH_LSL: + emit_dn(as, ai | A64F_IMMS(shmask-shift) | + A64F_IMMR((shmask-shift+1)&shmask), dest, left); + break; + case A64SH_LSR: case A64SH_ASR: + emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); + break; + case A64SH_ROR: + emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); + break; + } + } else { /* Variable-length shifts. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); + } +} + +#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) +#define asm_brol(as, ir) lua_assert(0) + +static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); + emit_nm(as, A64I_CMPw, left, right); +} + +static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) +{ + Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = ((left >> 8) & 31); left &= 31; + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); + emit_nm(as, A64I_FCMPd, left, right); +} + +static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) +{ + if (irt_isnum(ir->t)) + asm_fpmin_max(as, ir, fcc); + else + asm_intmin_max(as, ir, cc); +} + +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) + +/* -- Comparisons --------------------------------------------------------- */ + +/* Map of comparisons to flags. ORDER IR. */ +static const uint8_t asm_compmap[IR_ABC+1] = { + /* op FP swp int cc FP cc */ + /* LT */ CC_GE + (CC_HS << 4), + /* GE x */ CC_LT + (CC_HI << 4), + /* LE */ CC_GT + (CC_HI << 4), + /* GT x */ CC_LE + (CC_HS << 4), + /* ULT x */ CC_HS + (CC_LS << 4), + /* UGE */ CC_LO + (CC_LO << 4), + /* ULE x */ CC_HI + (CC_LO << 4), + /* UGT */ CC_LS + (CC_LS << 4), + /* EQ */ CC_NE + (CC_NE << 4), + /* NE */ CC_EQ + (CC_EQ << 4), + /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ +}; + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + Reg left, right; + A64Ins ai; + int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); + if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { + left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); + right = 0; + ai = A64I_FCMPZd; + } else { + left = ra_alloc2(as, ir, RSET_FPR); + if (swp) { + right = (left & 31); left = ((left >> 8) & 31); + } else { + right = ((left >> 8) & 31); left &= 31; + } + ai = A64I_FCMPd; + } + asm_guardcc(as, (asm_compmap[ir->o] >> 4)); + emit_nm(as, ai, left, right); +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + A64CC oldcc, cc = (asm_compmap[ir->o] & 15); + A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; + IRRef lref = ir->op1, rref = ir->op2; + Reg left; + uint32_t m; + int cmpprev0 = 0; + lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ + else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ + } + oldcc = cc; + if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { + IRIns *irl = IR(lref); + if (cc == CC_GE) cc = CC_PL; + else if (cc == CC_LT) cc = CC_MI; + else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ + cmpprev0 = (irl+1 == ir); + /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ + if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { + IRRef blref = irl->op1, brref = irl->op2; + uint32_t m2 = 0; + Reg bleft; + if (asm_swapops(as, blref, brref)) { + Reg tmp = blref; blref = brref; brref = tmp; + } + if (irref_isk(brref)) { + uint64_t k = get_k64val(IR(brref)); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, + ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + return; + } + m2 = emit_isk13(k, irt_is64(irl->t)); + } + bleft = ra_alloc1(as, blref, RSET_GPR); + ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); + if (!m2) + m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); + asm_guardcc(as, cc); + emit_n(as, ai^m2, bleft); + return; + } + if (cc == CC_EQ || cc == CC_NE) { + /* Combine cmp-bcc into cbz/cbnz. */ + ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; + if (irt_is64(ir->t)) ai |= A64I_X; + asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); + return; + } + } +nocombine: + left = ra_alloc1(as, lref, RSET_GPR); + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + asm_guardcc(as, cc); + emit_n(as, ai^m, left); + /* Signed comparison with zero and referencing previous ins? */ + if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) + as->flagmcp = as->mcp; /* Allow elimination of the compare. */ +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + +/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + uint32_t k = emit_isk13(HOOK_PROFILE, 0); + lua_assert(k != 0); + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, A64I_TSTw^k, RID_TMP); + emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + Reg pbase; + uint32_t k; + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; + lua_assert(ra_hasreg(pbase)); + } else if (allow) { + pbase = rset_pickbot(allow); + } else { + pbase = RID_RET; + emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ + } + } else { + pbase = RID_BASE; + } + emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + k = emit_isk12((8*topslot)); + lua_assert(k); + emit_n(as, A64I_CMPx^k, RID_TMP); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, + (int32_t)offsetof(lua_State, maxstack)); + if (irp) { /* Must not spill arbitrary registers in head of side trace. */ + if (ra_hasspill(irp->s)) + emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); + emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); + if (ra_hasspill(irp->s) && !allow) + emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ + } else { + emit_getgl(as, RID_TMP, cur_L); + } +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + checkmclim(as); + } + lua_assert(map + nent == flinks); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp1, tmp2; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + tmp1 = ra_releasetmp(as, ASMREF_TMP1); + tmp2 = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp2, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_cond_branch(as, CC_LS, l_end); + emit_nm(as, A64I_CMPx, RID_TMP, tmp2); + emit_lso(as, A64I_LDRx, tmp2, tmp1, + (int32_t)offsetof(global_State, gc.threshold)); + emit_lso(as, A64I_LDRx, RID_TMP, tmp1, + (int32_t)offsetof(global_State, gc.total)); + ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->loopinv) { /* Inverted loop branch? */ + uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; + ptrdiff_t delta = target - (p - 2); + /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ + p[-2] |= ((uint32_t)delta & mask) << 5; + } else { + ptrdiff_t delta = target - (p - 1); + p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); + } +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Reload L register from g->cur_L. */ +static void asm_head_lreg(ASMState *as) +{ + IRIns *ir = IR(ASMREF_L); + if (ra_used(ir)) { + Reg r = ra_dest(as, ir, RSET_GPR); + emit_getgl(as, r, cur_L); + ra_evictk(as); + } +} + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + ra_destreg(as, ir, RID_BASE); +} + +/* Coalesce BASE register for a side trace. */ +static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + if (ra_hasspill(irp->s)) { + rset_clear(allow, ra_dest(as, ir, allow)); + } else { + Reg r = irp->r; + lua_assert(ra_hasreg(r)); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); + ra_destreg(as, ir, r); + } + return allow; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target; + /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ + int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); + if (spadj == 0) { + *--p = A64I_LE(A64I_NOP); + as->mctop = p; + } else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(spadj); + lua_assert(k); + p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; + p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 1; /* Leave room for exit branch. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + *p = 0; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots += 2; + } else { + if (ngpr > 0) ngpr--; else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + /* May need extra exit for asm_stack_check on side traces. */ + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +#if LJ_BE +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +static void asm_mcode_fixup(MCode *mcode, MSize size) +{ + MCode *pe = (MCode *)((char *)mcode + size); + while (mcode < pe) { + MCode ins = *mcode; + *mcode++ = lj_bswap(ins); + } +} +#define LJ_TARGET_MCODE_FIXUP 1 +#endif + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *cstart = NULL, *cend = p; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MCode *px = exitstub_trace_addr(T, exitno); + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + MCode ins = A64I_LE(*p); + if ((ins & 0xff000000u) == 0x54000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch bcc exitstub. */ + *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0xfc000000u) == 0x14000000u && + ((ins ^ (px-p)) & 0x03ffffffu) == 0) { + /* Patch b exitstub. */ + *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x34000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch cbz/cbnz exitstub. */ + *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x36000000u && + ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { + /* Patch tbz/tbnz exitstub. */ + *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } + } + lua_assert(cstart != NULL); + lj_mcode_sync(cstart, cend); + lj_mcode_patch(J, mcarea, 1); +} + diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index cf446346..affe7d89 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1,6 +1,6 @@ /* ** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) { Reg r = IR(ref)->r; if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) return RID_ZERO; r = ra_allocref(as, ref, allow); } else { @@ -166,9 +166,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) } else if (ir->o == IR_UREFC) { if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { + intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; + intptr_t jgl = (intptr_t)J2G(as->J); + if ((uintptr_t)(ofs-jgl) < 65536) { *ofsp = ofs-jgl-32768; return RID_JGL; } else { @@ -190,20 +190,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg base; if (ra_noreg(ir->r) && canfuse(as, ir)) { if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { + intptr_t ofs2; + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), + checki16(ofs2))) { ref = ir->op1; - ofs = ofs2; + ofs = (int32_t)ofs2; } } else if (ir->o == IR_STRREF) { - int32_t ofs2 = 65536; + intptr_t ofs2 = 65536; lua_assert(ofs == 0); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { - ofs2 = ofs + IR(ir->op2)->i; + ofs2 = ofs + get_kval(IR(ir->op2)); ref = ir->op1; } else if (irref_isk(ir->op1)) { - ofs2 = ofs + IR(ir->op1)->i; + ofs2 = ofs + get_kval(IR(ir->op1)); ref = ir->op2; } if (!checki16(ofs2)) { @@ -211,7 +212,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg right, left = ra_alloc2(as, ir, allow); right = (left >> 8); left &= 255; emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); + emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); return; } ofs = ofs2; @@ -227,7 +228,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 16; + int32_t ofs = LJ_32 ? 16 : 0; #if LJ_SOFTFP Reg gpr = REGARG_FIRSTGPR; #else @@ -249,15 +250,15 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) !(ci->flags & CCI_VARARG)) { lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ra_leftov(as, fpr, ref); - fpr += 2; - gpr += irt_isnum(ir->t) ? 2 : 1; + fpr += LJ_32 ? 2 : 1; + gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; } else #endif { -#if !LJ_SOFTFP +#if LJ_32 && !LJ_SOFTFP fpr = REGARG_LASTFPR+1; #endif - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; + if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; if (gpr <= REGARG_LASTGPR) { lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ #if !LJ_SOFTFP @@ -269,35 +270,55 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) r = ra_alloc1(as, ref, RSET_FPR); as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) { +#if LJ_32 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ gpr += 2; +#else + emit_tg(as, MIPSI_DMFC1, gpr, r); + gpr++; fpr++; +#endif } else if (irt_isfloat(ir->t)) { emit_tg(as, MIPSI_MFC1, gpr, r); gpr++; +#if LJ_64 + fpr++; +#endif } } else #endif { ra_leftov(as, gpr, ref); gpr++; +#if LJ_64 + fpr++; +#endif } } else { Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +#if LJ_32 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; +#else + emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; +#endif } } } else { #if !LJ_SOFTFP fpr = REGARG_LASTFPR+1; #endif - if (gpr <= REGARG_LASTGPR) + if (gpr <= REGARG_LASTGPR) { gpr++; - else - ofs += 4; +#if LJ_64 + fpr++; +#endif + } else { + ofs += LJ_32 ? 4 : 8; + } } checkmclim(as); } @@ -307,15 +328,19 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; +#if LJ_32 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#endif #if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; #endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ +#if LJ_32 if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ +#endif ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { lua_assert(!irt_ispri(ir->t)); @@ -326,18 +351,28 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); +#if LJ_32 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); +#else + emit_tg(as, MIPSI_DMTC1, RID_RET, dest); +#endif } if (ofs) { +#if LJ_32 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); +#else + emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); +#endif } } else { ra_destreg(as, ir, RID_FPRET); } +#if LJ_32 } else if (hiop) { ra_destpair(as, ir); +#endif } else { ra_destreg(as, ir, RID_RET); } @@ -356,7 +391,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); + ci.func = (ASMFunction)(void *)get_kval(irf); } else { /* Need specific register for indirect calls. */ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); MCode *p = as->mcp; @@ -399,8 +434,8 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); } /* -- Type conversions ---------------------------------------------------- */ @@ -434,11 +469,16 @@ static void asm_conv(ASMState *as, IRIns *ir) IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if !LJ_SOFTFP int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif +#if LJ_64 + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); #endif IRRef lref = ir->op1; +#if LJ_32 lua_assert(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -#if LJ_SOFTFP +#endif +#if LJ_32 && LJ_SOFTFP /* FP conversions are handled by SPLIT. */ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ @@ -453,24 +493,50 @@ static void asm_conv(ASMState *as, IRIns *ir) /* y = (x ^ 0x8000000) + 2147483648.0 */ Reg left = ra_alloc1(as, lref, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D, - dest, dest, tmp); - emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, - dest, dest); if (irt_isfloat(ir->t)) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); + emit_fg(as, MIPSI_CVT_S_D, dest, dest); + /* Must perform arithmetic with doubles to keep the precision. */ + emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); + emit_fg(as, MIPSI_CVT_D_W, dest, dest); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); +#if LJ_64 + } else if(st == IRT_U64) { /* U64 to FP conversion. */ + /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); + MCLabel l_end = emit_label(as); + if (irt_isfloat(ir->t)) { + emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_S_L, dest, dest); + } else { + emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_D_L, dest, dest); + } + emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); + emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); + emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); +#endif } else { /* Integer to FP conversion. */ Reg left = ra_alloc1(as, lref, RSET_GPR); +#if LJ_32 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, left, dest); +#else + MIPSIns mi = irt_isfloat(ir->t) ? + (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : + (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); + emit_fg(as, mi, dest, dest); + emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); +#endif } } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { @@ -481,7 +547,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { + if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); @@ -496,10 +562,50 @@ static void asm_conv(ASMState *as, IRIns *ir) else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); +#if LJ_64 + } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ + MCLabel l_end; + emit_tg(as, MIPSI_DMFC1, dest, tmp); + l_end = emit_label(as); + /* For inputs >= 2^63 add -2^64 and convert again. */ + if (st == IRT_NUM) { + emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); + emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, dest)); + } else { + emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); + emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, dest)); + } +#endif } else { +#if LJ_32 emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, tmp, left); +#else + MIPSIns mi = irt_is64(ir->t) ? + (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : + (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); + emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); + emit_fg(as, mi, left, left); +#endif } } } else @@ -510,7 +616,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left = ra_alloc1(as, ir->op1, RSET_GPR); lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); if ((ir->op2 & IRCONV_SEXT)) { - if ((as->flags & JIT_F_MIPSXXR2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); } else { uint32_t shift = st == IRT_I8 ? 24 : 16; @@ -522,8 +628,35 @@ static void asm_conv(ASMState *as, IRIns *ir) (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); } } else { /* 32/64 bit integer conversions. */ +#if LJ_32 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ +#else + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); + } else { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_dta(as, MIPSI_SLL, dest, left, 0); + } else { /* 32 to 64 bit zero extension. */ + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } + } + } else { + if (st64) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } else { /* 32/32 bit no-op (cast). */ + /* Do nothing, but may need to move regs. */ + ra_leftov(as, dest, lref); + } + } +#endif } } } @@ -566,23 +699,50 @@ static void asm_strto(ASMState *as, IRIns *ir) args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); } /* -- Memory references --------------------------------------------------- */ +#if LJ_64 +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, + rset_exclude(allow, src)); + emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); + emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); + } + } +} +#endif + /* Get pointer to TValue. */ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) { IRIns *ir = IR(ref); if (irt_isnum(ir->t)) { if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + ra_allockreg(as, igcptr(ir_knum(ir)), dest); else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); + emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_32 RegSet allow = rset_exclude(RSET_GPR, dest); Reg type; emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); @@ -595,6 +755,11 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) else type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); emit_setgl(as, type, tmptv.it); +#else + asm_tvstore64(as, dest, 0, ref); + emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, + (int32_t)(offsetof(global_State, tmptv)-32768)); +#endif } } @@ -609,13 +774,13 @@ static void asm_aref(ASMState *as, IRIns *ir) ofs += 8*IR(ir->op2)->i; if (checki16(ofs)) { base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); return; } } base = ra_alloc1(as, ir->op1, RSET_GPR); idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); + emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); } @@ -636,13 +801,14 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); -#if LJ_SOFTFP - if (!irref_isk(refkey)) { +#if LJ_32 && LJ_SOFTFP + if (!isk) { key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); if (irkey[1].o == IR_HIOP) { @@ -667,8 +833,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) } else if (!irt_ispri(kt)) { key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); +#if LJ_32 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); rset_clear(allow, type); +#endif } #endif tmp2 = ra_scratch(as, allow); @@ -682,9 +850,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) else if (destused) emit_loada(as, dest, niltvg(J2G(as->J))); /* Follow hash chain until the end. */ - emit_move(as, dest, tmp2); + emit_move(as, dest, tmp1); l_loop = --as->mcp; - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next)); + emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ @@ -696,38 +864,66 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ - emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next); - emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM); + emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); + emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); +#if LJ_32 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); } else { if (irt_ispri(kt)) { - emit_branch(as, MIPSI_BEQ, tmp2, type, l_end); + emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); } else { - emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr)); - emit_branch(as, MIPSI_BNE, tmp2, type, l_next); + emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); + emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); + emit_branch(as, MIPSI_BNE, tmp1, type, l_next); } } - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu); + emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); +#else + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } else if (irt_isaddr(kt)) { + Reg refk = tmp2; + if (isk) { + int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + refk = ra_allock(as, k, allow); + rset_clear(allow, refk); + } + emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } else { + Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, pri); + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_dst(as, MIPSI_DADDU, tmp2, key, type); + rset_clear(allow, type); + } +#endif /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); + emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); lua_assert(sizeof(Node) == 24); emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); @@ -737,6 +933,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); +#if LJ_32 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); if ((as->flags & JIT_F_MIPSXXR2)) { @@ -759,6 +956,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); } +#else + emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); + emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); + if (irt_isnum(kt)) { + emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); +#if !LJ_SOFTFP + emit_tg(as, MIPSI_DMFC1, tmp1, key); +#endif + } else { + checkmclim(as); + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, key, 0); + emit_dst(as, MIPSI_DADDU, tmp1, key, type); + } +#endif } } } @@ -771,17 +985,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t kofs = ofs + (int32_t)offsetof(Node, key); Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; +#if LJ_32 + Reg key = RID_NONE, type = RID_TMP; int32_t lo, hi; +#else + Reg key = ra_scratch(as, allow); + int64_t k; +#endif lua_assert(ofs % sizeof(Node) == 0); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); kofs = (int32_t)offsetof(Node, key); } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); } +#if LJ_32 if (!irt_ispri(irkey->t)) { key = ra_scratch(as, allow); rset_clear(allow, key); @@ -800,8 +1021,20 @@ nolo: asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); +#else + if (irt_ispri(irkey->t)) { + lua_assert(!irt_isnil(irkey->t)); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); + emit_tsi(as, MIPSI_LD, key, idx, kofs); +#endif if (ofs > 32736) - emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); + emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); } static void asm_uref(ASMState *as, IRIns *ir) @@ -810,19 +1043,19 @@ static void asm_uref(ASMState *as, IRIns *ir) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); + emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); + emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); } else { - emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); } - emit_tsi(as, MIPSI_LW, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -834,6 +1067,7 @@ static void asm_fref(ASMState *as, IRIns *ir) static void asm_strref(ASMState *as, IRIns *ir) { +#if LJ_32 Reg dest = ra_dest(as, ir, RSET_GPR); IRRef ref = ir->op2, refk = ir->op1; int32_t ofs = (int32_t)sizeof(GCstr); @@ -865,6 +1099,20 @@ static void asm_strref(ASMState *as, IRIns *ir) else emit_dst(as, MIPSI_ADDU, dest, r, ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); +#else + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { + emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); + } else { + emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); + emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); + } +#endif } /* -- Loads and stores ---------------------------------------------------- */ @@ -878,7 +1126,7 @@ static MIPSIns asm_fxloadins(IRIns *ir) case IRT_U16: return MIPSI_LHU; case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return MIPSI_LW; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; } } @@ -889,7 +1137,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir) case IRT_I16: case IRT_U16: return MIPSI_SH; case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return MIPSI_SW; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; } } @@ -901,13 +1149,13 @@ static void asm_fload(ASMState *as, IRIns *ir) int32_t ofs; if (ir->op1 == REF_NIL) { idx = RID_JGL; - ofs = ir->op2 - 32768; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); } else { idx = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->op2 == IRFL_TAB_ARRAY) { ofs = asm_fuseabase(as, ir->op1); if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); return; } } @@ -952,36 +1200,59 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) static void asm_ahuvload(ASMState *as, IRIns *ir) { - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); + int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); Reg dest = RID_NONE, type = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = 0; - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); + IRType1 t = ir->t; + if (hiop) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } } if (ra_used(ir)) { lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); +#if LJ_64 + if (irt_isaddr(t)) + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + else if (irt_isint(t)) + emit_dta(as, MIPSI_SLL, dest, dest, 0); +#endif } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); rset_clear(allow, idx); - if (t == IRT_NUM) { + if (irt_isnum(t)) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); } else { - asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype_(t), allow)); + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); } +#if LJ_32 if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && t == IRT_NUM) + if (!LJ_SOFTFP && irt_isnum(t)) emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); else emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); } emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); +#else + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) { + emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dta(as, MIPSI_DSRA32, type, dest, 15); + emit_tsi(as, MIPSI_LD, dest, idx, ofs); +#endif } static void asm_ahustore(ASMState *as, IRIns *ir) @@ -993,103 +1264,159 @@ static void asm_ahustore(ASMState *as, IRIns *ir) return; if (!LJ_SOFTFP && irt_isnum(ir->t)) { src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_hsi(as, MIPSI_SDC1, src, idx, ofs); } else { - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); +#if LJ_32 if (!irt_ispri(ir->t)) { src = ra_alloc1(as, ir->op2, allow); rset_clear(allow, src); } - if (hiop) + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) type = ra_alloc1(as, (ir+1)->op2, allow); else type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); if (ra_hasreg(src)) emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); +#else + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_tsi(as, MIPSI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, tmp, tmp, type); + emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, tmp, src, type); + } + } +#endif } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; + IRType1 t = ir->t; +#if LJ_32 + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; +#else + int32_t ofs = 8*((int32_t)ir->op1-2); +#endif lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_SOFTFP +#if LJ_32 && LJ_SOFTFP lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); } #else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); - t = IRT_NUM; /* Continue with a regular number type check. */ + t.irt = IRT_NUM; /* Continue with a regular number type check. */ } else #endif if (ra_used(ir)) { lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (t == IRT_INT) { + if (irt_isint(t)) { Reg tmp = ra_scratch(as, RSET_FPR); emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); dest = tmp; - t = IRT_NUM; /* Check for original type. */ + t.irt = IRT_NUM; /* Check for original type. */ } else { Reg tmp = ra_scratch(as, RSET_GPR); emit_fg(as, MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, tmp, dest); dest = tmp; - t = IRT_INT; /* Check for original type. */ + t.irt = IRT_INT; /* Check for original type. */ } } +#if LJ_64 + else if (irt_isaddr(t)) { + /* Clear type from pointers. */ + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_dta(as, MIPSI_SLL, dest, dest, 0); + } +#endif goto dotypecheck; } base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); dotypecheck: +#if LJ_32 if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) { - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - if (t == IRT_NUM) { + if (ra_noreg(type)) + type = RID_TMP; + if (irt_isnum(t)) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); } else { - Reg ktype = ra_allock(as, irt_toitype_(t), allow); + Reg ktype = ra_allock(as, irt_toitype(t), allow); asm_guard(as, MIPSI_BNE, type, ktype); } } if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && t == IRT_NUM) + if (!LJ_SOFTFP && irt_isnum(t)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); else emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); } if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); +#else + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else { + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); + if (ra_hasreg(dest)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + } else { + asm_guard(as, MIPSI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); + } + emit_tsi(as, MIPSI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + if (irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, + ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); + } +#endif } /* -- Allocations --------------------------------------------------------- */ @@ -1116,8 +1443,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); +#if LJ_32 int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); if (sz == 8) { ofs += 4; lua_assert((ir+1)->o == IR_HIOP); @@ -1130,6 +1457,11 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; if (LJ_BE) ir++; else ir--; } +#else + emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), + RID_RET, sizeof(GCcdata)); +#endif + lua_assert(sz == 4 || sz == 8); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1164,7 +1496,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg link = RID_TMP; MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, link, gc.grayagain); @@ -1187,7 +1519,7 @@ static void asm_obar(ASMState *as, IRIns *ir) args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ir->op1; /* TValue *tv */ asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); obj = IR(ir->op1)->r; tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); @@ -1233,8 +1565,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir) static void asm_add(ASMState *as, IRIns *ir) { + IRType1 t = ir->t; #if !LJ_SOFTFP - if (irt_isnum(ir->t)) { + if (irt_isnum(t)) { asm_fparith(as, ir, MIPSI_ADD_D); } else #endif @@ -1242,14 +1575,16 @@ static void asm_add(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if (checki16(k)) { - emit_tsi(as, MIPSI_ADDIU, dest, left, k); + emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, + left, k); return; } } right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, + left, right); } } @@ -1264,7 +1599,8 @@ static void asm_sub(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + left, right); } } @@ -1279,13 +1615,49 @@ static void asm_mul(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_MUL, dest, left, right); + if (LJ_64 && irt_is64(ir->t)) { + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_DMULT, 0, left, right); + } else { + emit_dst(as, MIPSI_MUL, dest, left, right); + } } } -#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +#if !LJ_SOFTFP +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_powi); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fparith(as, ir, MIPSI_DIV_D); +} +#endif static void asm_neg(ASMState *as, IRIns *ir) { @@ -1297,7 +1669,8 @@ static void asm_neg(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + RID_ZERO, left); } } @@ -1308,6 +1681,7 @@ static void asm_neg(ASMState *as, IRIns *ir) static void asm_arithov(ASMState *as, IRIns *ir) { Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lua_assert(!irt_is64(ir->t)); if (irref_isk(ir->op2)) { int k = IR(ir->op2)->i; if (ir->o == IR_SUBOV) k = -k; @@ -1355,7 +1729,7 @@ static void asm_mulov(ASMState *as, IRIns *ir) emit_dst(as, MIPSI_MULT, 0, left, right); } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI static void asm_add64(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1457,6 +1831,7 @@ static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); +#if LJ_32 if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); @@ -1472,6 +1847,15 @@ static void asm_bswap(ASMState *as, IRIns *ir) emit_dta(as, MIPSI_SRL, tmp, left, 24); emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); } +#else + if (irt_is64(ir->t)) { + emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); + emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); + } else { + emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); + emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); + } +#endif } static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) @@ -1479,7 +1863,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if (checku16(k)) { emit_tsi(as, mik, dest, left, k); return; @@ -1497,11 +1881,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) { Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); + uint32_t shift = (uint32_t)IR(ir->op2)->i; + if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; + emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), + (shift & 31)); } else { Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; + if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ } } @@ -1513,7 +1900,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) static void asm_bror(ASMState *as, IRIns *ir) { - if ((as->flags & JIT_F_MIPSXXR2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1532,7 +1919,7 @@ static void asm_bror(ASMState *as, IRIns *ir) } } -#if LJ_SOFTFP +#if LJ_32 && LJ_SOFTFP static void asm_sfpmin_max(ASMState *as, IRIns *ir) { CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; @@ -1581,7 +1968,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) /* -- Comparisons --------------------------------------------------------- */ -#if LJ_SOFTFP +#if LJ_32 && LJ_SOFTFP /* SFP comparisons. */ static void asm_sfpcomp(ASMState *as, IRIns *ir) { @@ -1654,13 +2041,13 @@ static void asm_comp(ASMState *as, IRIns *ir) } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); asm_guard(as, mi, left, 0); } else { if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if ((op&2)) k++; if (checki16(k)) { asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -1679,7 +2066,8 @@ static void asm_comp(ASMState *as, IRIns *ir) static void asm_equal(ASMState *as, IRIns *ir) { - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR); + Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? + RSET_FPR : RSET_GPR); right = (left >> 8); left &= 255; if (!LJ_SOFTFP && irt_isnum(ir->t)) { asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); @@ -1689,7 +2077,7 @@ static void asm_equal(ASMState *as, IRIns *ir) } } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) { @@ -1731,7 +2119,7 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI || LJ_SOFTFP +#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; @@ -1835,36 +2223,42 @@ static void asm_stack_check(ASMState *as, BCReg topslot, Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ExitNo oldsnap = as->snapno; rset_clear(allow, pbase); +#if LJ_32 tmp = allow ? rset_pickbot(allow) : (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); +#else + tmp = allow ? rset_pickbot(allow) : RID_RET; +#endif as->snapno = exitno; asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); as->snapno = oldsnap; if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); else ra_modified(as, tmp); emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); + emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); + emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); if (pbase == RID_TMP) emit_getgl(as, RID_TMP, jit_base); emit_getgl(as, tmp, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); } /* Restore Lua stack from on-trace state. */ static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if LJ_32 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -1884,8 +2278,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); #endif } else { - Reg type; +#if LJ_32 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg type; lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); @@ -1903,6 +2298,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); +#else + asm_tvstore64(as, RID_BASE, ofs, ref); +#endif } checkmclim(as); } @@ -1926,7 +2324,7 @@ static void asm_gc_check(ASMState *as) args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); tmp = ra_releasetmp(as, ASMREF_TMP2); emit_loadi(as, tmp, as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ @@ -2001,7 +2399,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; int32_t spadj = as->T->spadjust; MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; + *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); } @@ -2019,9 +2417,14 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { IRRef args[CCI_NARGS_MAX*2]; uint32_t i, nargs = CCI_XNARGS(ci); +#if LJ_32 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; +#else + int nslots = 0, ngpr = REGARG_NUMGPR; +#endif asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) { +#if LJ_32 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && nfpr > 0 && !(ci->flags & CCI_VARARG)) { nfpr--; @@ -2034,6 +2437,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) nfpr = 0; if (ngpr > 0) ngpr--; else nslots++; } +#else + if (ngpr > 0) ngpr--; else nslots += 2; +#endif } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 46821515..6daa861b 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1,6 +1,6 @@ /* ** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -809,7 +809,7 @@ static void asm_fload(ASMState *as, IRIns *ir) int32_t ofs; if (ir->op1 == REF_NIL) { idx = RID_JGL; - ofs = ir->op2 - 32768; + ofs = (ir->op2 << 2) - 32768; } else { idx = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->op2 == IRFL_TAB_ARRAY) { diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 50784daa..3e189b1d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Guard handling ------------------------------------------------------ */ @@ -234,10 +234,10 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) as->mrm.idx = RID_NONE; if (ir->op1 == REF_NIL) { #if LJ_GC64 - as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch); + as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; #else - as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); + as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); as->mrm.base = RID_NONE; #endif return; @@ -1065,7 +1065,8 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_u32(as, irt_toitype(ir->t) << 15); emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); } else { - emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff); + /* Currently, no caller passes integers that might end up here. */ + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); } emit_movtomro(as, REX_64IR(ir, src), dest, 0); } @@ -1246,7 +1247,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif } else { emit_rr(as, XO_MOV, tmp, key); +#if LJ_GC64 + checkmclim(as); + emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 32); + emit_mrm(as, XV_RORX|VEX_64, dest, key); + } else { + emit_shifti(as, XOg_SHR|REX_64, dest, 32); + emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); + } +#else emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); +#endif } } } @@ -1778,8 +1791,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); - uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : - (uint64_t)(uint32_t)irk->i; + uint64_t k = (irk->o == IR_KINT64 || + (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ? + ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; if (sz == 4 || checki32((int64_t)k)) { emit_i32(as, (int32_t)k); emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); diff --git a/src/lj_bc.c b/src/lj_bc.c index f038769c..a597692c 100644 --- a/src/lj_bc.c +++ b/src/lj_bc.c @@ -1,6 +1,6 @@ /* ** Bytecode instruction modes. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bc_c diff --git a/src/lj_bc.h b/src/lj_bc.h index 78826068..69a45f28 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -1,6 +1,6 @@ /* ** Bytecode instruction format. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BC_H diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index 3752f151..fdfc6ec0 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h @@ -1,6 +1,6 @@ /* ** Bytecode dump definitions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BCDUMP_H diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 455c9d82..48c5e7c7 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -1,6 +1,6 @@ /* ** Bytecode reader. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcread_c diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index 47283059..5e05caea 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -1,6 +1,6 @@ /* ** Bytecode writer. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcwrite_c diff --git a/src/lj_buf.c b/src/lj_buf.c index 6251b075..0dfe7f98 100644 --- a/src/lj_buf.c +++ b/src/lj_buf.c @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_buf_c diff --git a/src/lj_buf.h b/src/lj_buf.h index fb632ee2..a4051694 100644 --- a/src/lj_buf.h +++ b/src/lj_buf.h @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BUF_H diff --git a/src/lj_carith.c b/src/lj_carith.c index 6102dcaa..218abd26 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/src/lj_carith.h b/src/lj_carith.h index b3bdff8a..67d976bf 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CARITH_H diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 96cb6582..e369e2ab 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -301,7 +301,7 @@ unsigned int cl = ccall_classify_struct(cts, ctr); \ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ CTSize i = (cl >> 8) - 1; \ - do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ + do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ } else { \ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ memcpy(dp, sp, ctr->size); \ @@ -331,7 +331,7 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in separate (!) FPRs or on stack. */ \ - isfp = ctr->size == 2*sizeof(float) ? 2 : 1; + isfp = sz == 2*sizeof(float) ? 2 : 1; #define CCALL_HANDLE_REGARG \ if (LJ_TARGET_IOS && isva) { \ @@ -359,6 +359,13 @@ } \ } +#if LJ_BE +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; +#endif + + #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ @@ -1070,9 +1077,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_ARM64 && LJ_BE + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 - if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || - (isfp && nsp == 0)) && d->size <= 4) { + || (isfp && nsp == 0) +#endif + ) && d->size <= 4) { *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ } #endif diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 206b7b2b..a222e526 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALL_H @@ -79,8 +79,8 @@ typedef union FPRArg { typedef intptr_t GPRArg; typedef union FPRArg { double d; - float f; - uint32_t u32; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; } FPRArg; #elif LJ_TARGET_PPC diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 892827aa..e44d077d 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -177,16 +177,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page) uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; - *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); - *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); - *p++ = A64I_BR | A64F_N(RID_X11); - *p++ = A64I_NOP; + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); + *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_NOP); ((void **)p)[0] = target; ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); - *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); + *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); + *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; } lua_assert(p - page <= CALLBACK_MCODE_SIZE); @@ -638,6 +638,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) #if CCALL_NUM_FPR if (ctype_isfp(ctr->info)) dp = (uint8_t *)&cts->cb.fpr[0]; +#endif +#if LJ_TARGET_ARM64 && LJ_BE + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) + dp = (uint8_t *)&cts->cb.fpr[0].f[1]; #endif lj_cconv_ct_tv(cts, ctr, dp, o, 0); #ifdef CALLBACK_HANDLE_RET @@ -652,7 +656,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h index 99c778d1..a8cdad38 100644 --- a/src/lj_ccallback.h +++ b/src/lj_ccallback.h @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALLBACK_H diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 81f3c871..13b8230d 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -448,8 +448,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) setintV(o, (int32_t)val); } } else { + uint32_t b = (val >> pos) & 1; lua_assert(bsz == 1); - setboolV(o, (val >> pos) & 1); + setboolV(o, b); + setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ } return 0; /* No GC step needed. */ } diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 35e72cbb..0a0b66c9 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCONV_H diff --git a/src/lj_cdata.c b/src/lj_cdata.c index e8ffdbcb..68e16d76 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/src/lj_cdata.h b/src/lj_cdata.h index ee5b740d..5bb0f5dc 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CDATA_H diff --git a/src/lj_clib.c b/src/lj_clib.c index 8398e803..61426590 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/src/lj_clib.h b/src/lj_clib.h index ce3ab85a..fcc9dac5 100644 --- a/src/lj_clib.h +++ b/src/lj_clib.h @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CLIB_H diff --git a/src/lj_cparse.c b/src/lj_cparse.c index 16d2cb65..83cfd112 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/src/lj_cparse.h b/src/lj_cparse.h index bfd1a9bb..bad1060b 100644 --- a/src/lj_cparse.h +++ b/src/lj_cparse.h @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CPARSE_H diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 4799031a..e32ae23e 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c @@ -751,6 +751,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, } } +/* Record bitfield load/store. */ +static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) +{ + IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); + TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); + CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; + lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ + if (rd->data == 0) { /* __index metamethod. */ + if ((info & CTF_BOOL)) { + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); + /* Assume not equal to zero. Fixup and emit pending guard later. */ + lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); + J->postproc = LJ_POST_FIXGUARD; + tr = TREF_TRUE; + } else if (!(info & CTF_UNSIGNED)) { + tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); + tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); + } else { + lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ + tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); + /* We can omit the U32 to NUM conversion, since bsz < 32. */ + } + J->base[0] = tr; + } else { /* __newindex metamethod. */ + CTState *cts = ctype_ctsG(J2G(J)); + CType *ct = ctype_get(cts, + (info & CTF_BOOL) ? CTID_BOOL : + (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32); + int32_t mask = (int32_t)(((1u << bsz)-1) << pos); + TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]); + sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos)); + /* Use of the target type avoids forwarding conversions. */ + sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask)); + tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask)); + tr = emitir(IRT(IR_BOR, t), tr, sp); + emitir(IRT(IR_XSTORE, t), ptr, tr); + rd->nres = 0; + J->needsnap = 1; + } +} + void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) { TRef idx, ptr = J->base[0]; @@ -825,6 +867,7 @@ again: CType *fct; fct = lj_ctype_getfield(cts, ct, name, &fofs); if (fct) { + ofs += (ptrdiff_t)fofs; /* Always specialize to the field name. */ emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); if (ctype_isconstval(fct->info)) { @@ -836,12 +879,14 @@ again: J->base[0] = lj_ir_kint(J, (int32_t)fct->size); return; /* Interpreter will throw for newindex. */ } else if (ctype_isbitfield(fct->info)) { - lj_trace_err(J, LJ_TRERR_NYICONV); + if (ofs) + ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); + crec_index_bf(J, rd, ptr, fct->info); + return; } else { lua_assert(ctype_isfield(fct->info)); sid = ctype_cid(fct->info); } - ofs += (ptrdiff_t)fofs; } } else if (ctype_iscomplex(ct->info)) { if (name->len == 2 && diff --git a/src/lj_crecord.h b/src/lj_crecord.h index 7a058813..c165def4 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CRECORD_H diff --git a/src/lj_ctype.c b/src/lj_ctype.c index bc47cc5b..0ea89c74 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/src/lj_ctype.h b/src/lj_ctype.h index bae37d2d..ed974bb9 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CTYPE_H diff --git a/src/lj_debug.c b/src/lj_debug.c index 153035a9..959dc289 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_debug_c diff --git a/src/lj_debug.h b/src/lj_debug.h index 1028688d..5917c00b 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEBUG_H diff --git a/src/lj_def.h b/src/lj_def.h index 9413399d..2d8fff66 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -1,6 +1,6 @@ /* ** LuaJIT common internal definitions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEF_H @@ -97,6 +97,7 @@ typedef unsigned int uintptr_t; #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) #define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) #define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) +#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) @@ -105,14 +106,8 @@ typedef unsigned int uintptr_t; #define checki32(x) ((x) == (int32_t)(x)) #define checku32(x) ((x) == (uint32_t)(x)) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) -#define checkptr47(x) (((uint64_t)(x) >> 47) == 0) -#if LJ_GC64 -#define checkptrGC(x) (checkptr47((x))) -#elif LJ_64 -#define checkptrGC(x) (checkptr32((x))) -#else -#define checkptrGC(x) 1 -#endif +#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index e5aa495d..5d6795f8 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_dispatch_c @@ -267,7 +267,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) case LUAJIT_MODE_FUNC: case LUAJIT_MODE_ALLFUNC: case LUAJIT_MODE_ALLSUBFUNC: { - cTValue *tv = idx == 0 ? frame_prev(L->base-1) : + cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 : idx > 0 ? L->base + (idx-1) : L->top + idx; GCproto *pt; if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 82708077..5bda51a2 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DISPATCH_H @@ -107,6 +107,7 @@ typedef struct GG_State { #define J2G(J) (&J2GG(J)->g) #define G2J(gl) (&G2GG(gl)->J) #define L2J(L) (&L2GG(L)->J) +#define GG_G2J (GG_OFS(J) - GG_OFS(g)) #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index dff9fac4..dee8bdcc 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -1,6 +1,6 @@ /* ** ARM instruction emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Constant encoding --------------------------------------------------- */ @@ -207,7 +207,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) @@ -274,7 +274,7 @@ static void emit_call(ASMState *as, void *target) ptrdiff_t delta = ((char *)target - (char *)p) - 8; if ((((delta>>2) + 0x00800000) >> 24) == 0) { if ((delta & 1)) - *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 27); + *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); else *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); } else { /* Target out of range: need indirect call. But don't use R0-R3. */ diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..6da4c7d4 --- /dev/null +++ b/src/lj_emit_arm64.h @@ -0,0 +1,419 @@ +/* +** ARM64 instruction emitter. +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Constant encoding --------------------------------------------------- */ + +static uint64_t get_k64val(IRIns *ir) +{ + if (ir->o == IR_KINT64) { + return ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (uint64_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (uint64_t)ir_kptr(ir); + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + return ir->i; /* Sign-extended. */ + } +} + +/* Encode constant in K12 format for data processing instructions. */ +static uint32_t emit_isk12(int64_t n) +{ + uint64_t k = (n < 0) ? -n : n; + uint32_t m = (n < 0) ? 0x40000000 : 0; + if (k < 0x1000) { + return A64I_K12|m|A64F_U12(k); + } else if ((k & 0xfff000) == k) { + return A64I_K12|m|0x400000|A64F_U12(k>>12); + } + return 0; +} + +#define emit_clz64(n) __builtin_clzll(n) +#define emit_ctz64(n) __builtin_ctzll(n) + +/* Encode constant in K13 format for logical data processing instructions. */ +static uint32_t emit_isk13(uint64_t n, int is64) +{ + int inv = 0, w = 128, lz, tz; + if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ + if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ + do { /* Find the repeat width. */ + if (is64 && (uint32_t)(n^(n>>32))) break; + n = (uint32_t)n; + if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ + w = 32; if ((n^(n>>16)) & 0xffff) break; + n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; + n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; + n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; + n = n & 0x3; w = 2; + } while (0); + lz = emit_clz64(n); + tz = emit_ctz64(n); + if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ + if (inv) + return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); + else + return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); +} + +static uint32_t emit_isfpk64(uint64_t n) +{ + uint64_t etop9 = ((n >> 54) & 0x1ff); + if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { + return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); + } + return ~0u; +} + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); +} + +static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); +} + +static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); +} + +static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); +} + +static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); +} + +static void emit_d(ASMState *as, A64Ins ai, Reg rd) +{ + *--as->mcp = ai | A64F_D(rd); +} + +static void emit_n(ASMState *as, A64Ins ai, Reg rn) +{ + *--as->mcp = ai | A64F_N(rn); +} + +static int emit_checkofs(A64Ins ai, int64_t ofs) +{ + int scale = (ai >> 30) & 3; + if (ofs < 0 || (ofs & ((1< = -256 && ofs <= 255) ? -1 : 0; + } else { + return (ofs < (4096< > 30) & 3; + lua_assert(ot); + /* Combine LDR/STR pairs to LDP/STP. */ + if ((sc == 2 || sc == 3) && + (!(ai & 0x400000) || rd != rn) && + as->mcp != as->mcloop) { + uint32_t prev = *as->mcp & ~A64F_D(31); + int ofsm = ofs - (1< >sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); + } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); + ofsm = ofs; + } else { + goto nopair; + } + if (ofsm >= (int)((unsigned int)-64< mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | + (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); + return; + } + } +nopair: + if (ot == 1) + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); + else + *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= ASMREF_L) + +/* Try to find an N-step delta relative to other consts with N < lim. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +{ + RegSet work = ~as->freeset & RSET_GPR; + if (lim <= 1) return 0; /* Can't beat that. */ + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); + lua_assert(r != rd); + if (ref < REF_TRUE) { + uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : + get_k64val(IR(ref)); + int64_t delta = (int64_t)(k - kx); + if (delta == 0) { + emit_dm(as, A64I_MOVx, rd, r); + return 1; + } else { + uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); + if (k12) { + emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + return 1; + } + /* Do other ops or multi-step deltas pay off? Probably not. + ** E.g. XOR rarely helps with pointer consts. + */ + } + } + rset_clear(work, r); + } + return 0; /* Failed. */ +} + +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +{ + uint32_t k13 = emit_isk13(u64, is64); + if (k13) { /* Can the constant be represented as a bitmask immediate? */ + emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); + } else { + int i, zeros = 0, ones = 0, neg; + if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ + /* Count homogeneous 16 bit fragments. */ + for (i = 0; i < 4; i++) { + uint64_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag == 0); + ones += (frag == 0xffff); + } + neg = ones > zeros; /* Use MOVN if it pays off. */ + if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { + int shift = 0, lshift = 0; + uint64_t n64 = neg ? ~u64 : u64; + if (n64 != 0) { + /* Find first/last fragment to be filled. */ + shift = (63-emit_clz64(n64)) & ~15; + lshift = emit_ctz64(n64) & ~15; + } + /* MOVK requires the original value (u64). */ + while (shift > lshift) { + uint32_t u16 = (u64 >> shift) & 0xffff; + /* Skip fragments that are correctly filled by MOVN/MOVZ. */ + if (u16 != (neg ? 0xffff : 0)) + emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); + shift -= 16; + } + /* But MOVN needs an inverted value (n64). */ + emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | + A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); + } + } +} + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) + +/* Load a 64 bit constant into a GPR. */ +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) + +#define glofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) +#define checkmcpofs(as, k) \ + ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) +{ + /* First, check if ip + offset is in range. */ + if ((ai & 0x00400000) && checkmcpofs(as, p)) { + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); + } else { + Reg base = RID_GL; /* Next, try GL + offset. */ + int64_t ofs = glofs(as, p); + if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; + } + emit_lso(as, ai, r, base, ofs); + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + int64_t ofs; + if (r >= RID_MAX_GPR) { + uint32_t fpk = emit_isfpk64(*k); + if (fpk != ~0u) { + emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); + return; + } + } + ofs = glofs(as, k); + if (emit_checkofs(A64I_LDRx, ofs)) { + emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, + (r & 31), RID_GL, ofs); + } else { + if (r >= RID_MAX_GPR) { + emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); + r = RID_TMP; + } + if (checkmcpofs(as, k)) + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); + else + emit_loadu64(as, r, *k); + } +} + +/* Get/set global_State fields. */ +#define emit_getgl(as, r, field) \ + emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) +#define emit_setgl(as, r, field) \ + emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) + +/* Trace number is determined from pc of exit instruction. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x40000) >> 19) == 0); + *p = A64I_BCC | A64F_S19(delta) | cond; +} + +static void emit_branch(ASMState *as, A64Ins ai, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x02000000) >> 26) == 0); + *p = ai | ((uint32_t)delta & 0x03ffffffu); +} + +static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); + if (bit > 31) ai |= A64I_X; + *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; +} + +static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x40000) >> 19) == 0); + *p = ai | A64F_S19(delta) | r; +} + +#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) + +static void emit_call(ASMState *as, void *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; + if ((((delta>>2) + 0x02000000) >> 26) == 0) { + *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); + } else { /* Target out of range: need indirect call. But don't use R0-R7. */ + Reg r = ra_allock(as, i64ptr(target), + RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + *p = A64I_BLR | A64F_N(r); + } +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (dst >= RID_MAX_GPR) { + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, + (dst & 31), (src & 31)); + return; + } + if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ + MCode ins = *as->mcp, swp = (src^dst); + if ((ins & 0xbf800000) == 0xb9000000) { + if (!((ins ^ (dst << 5)) & 0x000003e0)) + *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ + if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) + *as->mcp = ins ^ swp; /* Swap D in store. */ + } + } + emit_dm(as, A64I_MOVx, dst, src); +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, + int32_t i, RegSet allow) +{ + uint32_t k = emit_isk12(i); + if (k) + emit_dn(as, ai^k, dest, src); + else + emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, + ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); +} + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index d35f830b..8a9ee24d 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -1,8 +1,30 @@ /* ** MIPS instruction emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ +#if LJ_64 +static intptr_t get_k64val(IRIns *ir) +{ + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + return ir->i; /* Sign-extended. */ + } +} +#endif + +#if LJ_64 +#define get_kval(ir) get_k64val(ir) +#else +#define get_kval(ir) ((ir)->i) +#endif + /* -- Emit basic instructions --------------------------------------------- */ static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) @@ -35,7 +57,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) { - if ((as->flags & JIT_F_MIPSXXR2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, src, shift); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp); @@ -44,13 +66,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) } } +#if LJ_64 +static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, + uint32_t lsb) +{ + *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); +} +#endif + /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { @@ -58,9 +88,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i) IRRef ref = regcost_ref(as->cost[r]); lua_assert(r != t); if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + intptr_t delta = (intptr_t)((uintptr_t)i - + (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); if (checki16(delta)) { - emit_tsi(as, MIPSI_ADDIU, t, r, delta); + emit_tsi(as, MIPSI_AADDIU, t, r, delta); return 1; } } @@ -76,8 +107,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) emit_ti(as, MIPSI_LI, r, i); } else { if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { + intptr_t jgl = (intptr_t)(void *)J2G(as->J); + if ((uintptr_t)(i-jgl) < 65536) { emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); return; } else if (emit_kdelta1(as, r, i)) { @@ -92,16 +123,48 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#if LJ_64 +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + if (checki32((int64_t)u64)) { + emit_loadi(as, r, (int32_t)u64); + } else { + uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); + if (delta < 65536) { + emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); + } else if (emit_kdelta1(as, r, (intptr_t)u64)) { + return; + } else { + if ((u64 & 0xffff)) { + emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); + } + if (((u64 >> 16) & 0xffff)) { + emit_dta(as, MIPSI_DSLL, r, r, 16); + emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); + emit_dta(as, MIPSI_DSLL, r, r, 16); + } else { + emit_dta(as, MIPSI_DSLL32, r, r, 0); + } + emit_loadi(as, r, (int32_t)(u64 >> 32)); + } + /* TODO: There are probably more optimization opportunities. */ + } +} -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); -static void ra_allockreg(ASMState *as, int32_t k, Reg r); +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) +#else +#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#endif + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) { - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); + intptr_t jgl = (intptr_t)(J2G(as->J)); + intptr_t i = (intptr_t)(p); Reg base; if ((uint32_t)(i-jgl) < 65536) { i = i-jgl-32768; @@ -112,8 +175,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) emit_tsi(as, mi, r, base, i); } +#if LJ_64 +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + r64 = RID_TMP; + emit_tg(as, MIPSI_DMTC1, r64, r); + } + if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) + emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); + else + emit_loadu64(as, r64, *k); +} +#else #define emit_loadk64(as, r, ir) \ emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) +#endif /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) @@ -122,9 +201,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) } #define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) #define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) /* Trace number is determined from per-trace exit stubs. */ #define emit_setvmstate(as, i) UNUSED(i) @@ -164,7 +243,7 @@ static void emit_call(ASMState *as, void *target, int needcfa) needcfa = 1; } as->mcp = p; - if (needcfa) ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); + if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); } /* -- Emit generic operations --------------------------------------------- */ @@ -185,7 +264,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_LW, r, base, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, (r & 31), base, ofs); @@ -195,7 +274,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_SW, r, base, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, (r&31), base, ofs); @@ -206,7 +285,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_ADDIU, r, r, ofs); + emit_tsi(as, MIPSI_AADDIU, r, r, ofs); } } diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 5163012a..21c3c2ac 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -1,6 +1,6 @@ /* ** PPC instruction emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index f0bca938..5207f9da 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 instruction emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -392,6 +392,7 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); } emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); #else diff --git a/src/lj_err.c b/src/lj_err.c index 600e6ee6..b6be357e 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_err_c @@ -509,7 +509,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) global_State *g = G(L); lj_trace_abort(g); setmref(g->jit_base, NULL); - L->status = 0; + L->status = LUA_OK; #if LJ_UNWIND_EXT err_raise_ext(errcode); /* diff --git a/src/lj_err.h b/src/lj_err.h index 6c757613..cba5fb71 100644 --- a/src/lj_err.h +++ b/src/lj_err.h @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ERR_H diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 0ed8f4e7..060a9f89 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -1,6 +1,6 @@ /* ** VM error messages. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different ERRDEF macros. */ diff --git a/src/lj_ff.h b/src/lj_ff.h index 9af3b12d..31d65a00 100644 --- a/src/lj_ff.h +++ b/src/lj_ff.h @@ -1,6 +1,6 @@ /* ** Fast function IDs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FF_H diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 6d141a20..dfdee2db 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h index 7a188beb..3b407450 100644 --- a/src/lj_ffrecord.h +++ b/src/lj_ffrecord.h @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FFRECORD_H diff --git a/src/lj_frame.h b/src/lj_frame.h index 7ef5f7d7..939e3f88 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -1,6 +1,6 @@ /* ** Stack frames. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FRAME_H diff --git a/src/lj_func.c b/src/lj_func.c index 431a56d7..639dad87 100644 --- a/src/lj_func.c +++ b/src/lj_func.c @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -140,7 +140,9 @@ GCfunc *lj_func_newL_empty(lua_State *L, GCproto *pt, GCtab *env) /* NOBARRIER: The GCfunc is new (marked white). */ for (i = 0; i < nuv; i++) { GCupval *uv = func_emptyuv(L); - uv->dhash = (uint32_t)(uintptr_t)pt ^ ((uint32_t)proto_uv(pt)[i] << 24); + int32_t v = proto_uv(pt)[i]; + uv->immutable = ((v / PROTO_UV_IMMUTABLE) & 1); + uv->dhash = (uint32_t)(uintptr_t)pt ^ (v << 24); setgcref(fn->l.uvptr[i], obj2gco(uv)); } fn->l.nupvalues = (uint8_t)nuv; diff --git a/src/lj_func.h b/src/lj_func.h index e0c3c555..901751b9 100644 --- a/src/lj_func.h +++ b/src/lj_func.h @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FUNC_H diff --git a/src/lj_gc.c b/src/lj_gc.c index 7c707462..2aaf5b2c 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -169,12 +169,19 @@ static int gc_traverse_tab(global_State *g, GCtab *t) while ((c = *modestr++)) { if (c == 'k') weak |= LJ_GC_WEAKKEY; else if (c == 'v') weak |= LJ_GC_WEAKVAL; - else if (c == 'K') weak = (int)(~0u & ~LJ_GC_WEAKVAL); } - if (weak > 0) { /* Weak tables are cleared in the atomic phase. */ - t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); - setgcrefr(t->gclist, g->gc.weak); - setgcref(g->gc.weak, obj2gco(t)); + if (weak) { /* Weak tables are cleared in the atomic phase. */ +#if LJ_HASFFI + CTState *cts = ctype_ctsG(g); + if (cts && cts->finalizer == t) { + weak = (int)(~0u & ~LJ_GC_WEAKVAL); + } else +#endif + { + t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); + setgcrefr(t->gclist, g->gc.weak); + setgcref(g->gc.weak, obj2gco(t)); + } } } if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */ @@ -310,7 +317,7 @@ static size_t propagatemark(global_State *g) if (gc_traverse_tab(g, t) > 0) black2gray(o); /* Keep weak tables gray. */ return sizeof(GCtab) + sizeof(TValue) * t->asize + - sizeof(Node) * (t->hmask + 1); + (t->hmask ? sizeof(Node) * (t->hmask + 1) : 0); } else if (LJ_LIKELY(gct == ~LJ_TFUNC)) { GCfunc *fn = gco2func(o); gc_traverse_func(g, fn); diff --git a/src/lj_gc.h b/src/lj_gc.h index eee09afb..669bbe92 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GC_H diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 8b72be7d..c219ffac 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_gdbjit_c @@ -296,6 +296,9 @@ enum { #elif LJ_TARGET_ARM DW_REG_SP = 13, DW_REG_RA = 14, +#elif LJ_TARGET_ARM64 + DW_REG_SP = 31, + DW_REG_RA = 30, #elif LJ_TARGET_PPC DW_REG_SP = 1, DW_REG_RA = 65, @@ -374,6 +377,8 @@ static const ELFheader elfhdr_template = { .machine = 62, #elif LJ_TARGET_ARM .machine = 40, +#elif LJ_TARGET_ARM64 + .machine = 183, #elif LJ_TARGET_PPC .machine = 20, #elif LJ_TARGET_MIPS @@ -563,6 +568,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) int i; for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } } +#elif LJ_TARGET_ARM64 + { + int i; + DB(DW_CFA_offset|31); DUV(2); + for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } + for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } + } #elif LJ_TARGET_PPC { int i; diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h index 1043bc69..bbaa1568 100644 --- a/src/lj_gdbjit.h +++ b/src/lj_gdbjit.h @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GDBJIT_H diff --git a/src/lj_ir.c b/src/lj_ir.c index 87fd0f4d..5baece67 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) emitter. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ir_c @@ -145,10 +145,12 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) return emitir(CCI_OPTYPE(ci), tr, id); } -/* Load field of type t from GG_State + offset. */ +/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) { - lua_assert(ofs >= IRFL__MAX && ofs < REF_BIAS); + lua_assert((ofs & 3) == 0); + ofs >>= 2; + lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); return lj_opt_fold(J); } diff --git a/src/lj_ir.h b/src/lj_ir.h index e77f7b99..34c27853 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) format. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IR_H diff --git a/src/lj_ircall.h b/src/lj_ircall.h index c7cd4257..973c36e6 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -1,6 +1,6 @@ /* ** IR CALL* instruction definitions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IRCALL_H diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 8b7a43de..73aef0ef 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -1,6 +1,6 @@ /* ** Common header for IR emitter and optimizations. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IROPT_H diff --git a/src/lj_jit.h b/src/lj_jit.h index 3505c63f..92054e3d 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -1,6 +1,6 @@ /* ** Common definitions for the JIT compiler. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_JIT_H @@ -337,6 +337,10 @@ enum { #endif #if LJ_TARGET_MIPS LJ_K64_2P31, /* 2^31 */ +#if LJ_64 + LJ_K64_2P63, /* 2^63 */ + LJ_K64_M2P64, /* -2^64 */ +#endif #endif LJ_K64__MAX, }; @@ -351,6 +355,10 @@ enum { #endif #if LJ_TARGET_PPC || LJ_TARGET_MIPS LJ_K32_2P31, /* 2^31 */ +#endif +#if LJ_TARGET_MIPS64 + LJ_K32_2P63, /* 2^63 */ + LJ_K32_M2P64, /* -2^64 */ #endif LJ_K32__MAX }; diff --git a/src/lj_lex.c b/src/lj_lex.c index 0187c426..2d2f8194 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/src/lj_lex.h b/src/lj_lex.h index 7fed1fd6..33fa8657 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LEX_H diff --git a/src/lj_lib.c b/src/lj_lib.c index 8bdf6912..b8638de6 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_lib_c diff --git a/src/lj_lib.h b/src/lj_lib.h index dbc04055..37ec9d78 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LIB_H diff --git a/src/lj_load.c b/src/lj_load.c index d500d162..9a31d9a1 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -1,6 +1,6 @@ /* ** Load and dump code. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 3eaee054..77035bf7 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_mcode_c @@ -204,8 +204,8 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_TARGET_X64 -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < (uintptr_t)1<<47) +#if LJ_64 +#define mcode_validptr(p) (p) #else #define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) #endif @@ -221,8 +221,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) */ #if LJ_TARGET_MIPS /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + - 0x08000000u; + uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & + ~(uintptr_t)0x0fffffffu) + 0x08000000u; #else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; #endif @@ -230,7 +230,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* First try a contiguous area below the last one. */ uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; int i; - for (i = 0; i < 32; i++) { /* 32 attempts ought to be enough ... */ + /* Limit probing iterations, depending on the available pool size. */ + for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { if (mcode_validptr(hint)) { void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); @@ -239,11 +240,11 @@ static void *mcode_alloc(jit_State *J, size_t sz) return p; if (p) mcode_free(J, p, sz); /* Free badly placed area. */ } - /* Next try probing pseudo-random addresses. */ + /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = (0x78fb ^ LJ_PRNG_BITS(J, 15)) << 16; /* 64K aligned. */ - } while (!(hint + sz < range)); - hint = target + hint - (range>>1); + hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; + } while (!(hint + sz < range+range)); + hint = target + hint - range; } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; diff --git a/src/lj_mcode.h b/src/lj_mcode.h index 503e0019..f0847e93 100644 --- a/src/lj_mcode.h +++ b/src/lj_mcode.h @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_MCODE_H diff --git a/src/lj_meta.c b/src/lj_meta.c index c7993ad6..0bd4d842 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/src/lj_meta.h b/src/lj_meta.h index 52e85cba..73b45724 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_META_H diff --git a/src/lj_obj.c b/src/lj_obj.c index 1daea817..ee33aeb3 100644 --- a/src/lj_obj.c +++ b/src/lj_obj.c @@ -1,6 +1,6 @@ /* ** Miscellaneous object handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_obj_c diff --git a/src/lj_obj.h b/src/lj_obj.h index 25da9455..52372c3e 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM tags, values and objects. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index e5a096af..2417f324 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c @@ -1,6 +1,6 @@ /* ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_dce_c diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 5f4b8810..acbf36a5 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2,7 +2,7 @@ ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. ** ABCelim: Array Bounds Check Elimination. ** CSE: Common-Subexpression Elimination. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_fold_c @@ -173,8 +173,6 @@ LJFOLD(ADD KNUM KNUM) LJFOLD(SUB KNUM KNUM) LJFOLD(MUL KNUM KNUM) LJFOLD(DIV KNUM KNUM) -LJFOLD(NEG KNUM KNUM) -LJFOLD(ABS KNUM KNUM) LJFOLD(ATAN2 KNUM KNUM) LJFOLD(LDEXP KNUM KNUM) LJFOLD(MIN KNUM KNUM) @@ -187,6 +185,15 @@ LJFOLDF(kfold_numarith) return lj_ir_knum(J, y); } +LJFOLD(NEG KNUM FLOAD) +LJFOLD(ABS KNUM FLOAD) +LJFOLDF(kfold_numabsneg) +{ + lua_Number a = knumleft; + lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); + return lj_ir_knum(J, y); +} + LJFOLD(LDEXP KNUM KINT) LJFOLDF(kfold_ldexp) { @@ -347,6 +354,11 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; + case IR_BSHL: k1 <<= (k2 & 63); break; + case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; + case IR_BSAR: k1 >>= (k2 & 63); break; + case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; #endif default: UNUSED(k2); lua_assert(0); break; } @@ -436,14 +448,14 @@ LJFOLDF(kfold_int64comp) #if LJ_HASFFI uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64; switch ((IROp)fins->o) { - case IR_LT: return CONDFOLD(a < b); - case IR_GE: return CONDFOLD(a >= b); - case IR_LE: return CONDFOLD(a <= b); - case IR_GT: return CONDFOLD(a > b); - case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b); - case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b); - case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b); - case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b); + case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b); + case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b); + case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b); + case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b); + case IR_ULT: return CONDFOLD(a < b); + case IR_UGE: return CONDFOLD(a >= b); + case IR_ULE: return CONDFOLD(a <= b); + case IR_UGT: return CONDFOLD(a > b); default: lua_assert(0); return FAILFOLD; } #else @@ -911,13 +923,13 @@ LJFOLDF(shortcut_round) return NEXTFOLD; } -LJFOLD(ABS ABS KNUM) +LJFOLD(ABS ABS FLOAD) LJFOLDF(shortcut_left) { return LEFTFOLD; /* f(g(x)) ==> g(x) */ } -LJFOLD(ABS NEG KNUM) +LJFOLD(ABS NEG FLOAD) LJFOLDF(shortcut_dropleft) { PHIBARRIER(fleft); @@ -1653,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk) fins->op2 = (IRRef1)lj_ir_kint(J, k); fins->ot = IRTI(IR_BAND); return RETRYFOLD; + } else if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); + IROpT ot = fleft->ot; + fins->op1 = fleft->op1; + fins->op1 = (IRRef1)lj_opt_fold(J); + fins->op2 = (IRRef1)lj_ir_kint64(J, k); + fins->ot = ot; + return RETRYFOLD; } return NEXTFOLD; } @@ -1668,6 +1688,47 @@ LJFOLDF(simplify_andk_shiftk) return NEXTFOLD; } +LJFOLD(BAND BOR KINT) +LJFOLD(BOR BAND KINT) +LJFOLDF(simplify_andor_k) +{ + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT) { + int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? 0 : -1)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(BAND BOR KINT64) +LJFOLD(BOR BAND KINT64) +LJFOLDF(simplify_andor_k64) +{ +#if LJ_HASFFI + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(ir_k64(irk)->u64, + ir_k64(fright)->u64, (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +#else + UNUSED(J); lua_assert(0); return FAILFOLD; +#endif +} + /* -- Reassociation ------------------------------------------------------- */ LJFOLD(ADD ADD KINT) diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 7978f3ee..04c6d06d 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -1,6 +1,6 @@ /* ** LOOP: Loop Optimizations. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_loop_c diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 92ecbb48..cc177d39 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -3,7 +3,7 @@ ** AA: Alias Analysis using high-level semantic disambiguation. ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). ** DSE: Dead-Store Elimination. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_mem_c diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index ca0a0f49..cd96ca4b 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -1,7 +1,7 @@ /* ** NARROW: Narrowing of numbers to integers (double to int32_t). ** STRIPOV: Stripping of overflow checks. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_narrow_c diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index af05ef46..929ccb61 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -1,6 +1,6 @@ /* ** SINK: Allocation Sinking and Store Sinking. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_sink_c diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 884285d2..fc935204 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -1,6 +1,6 @@ /* ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_split_c @@ -436,7 +436,8 @@ static void split_ir(jit_State *J) nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); + hisubst[ir->op1], + lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index 5df4c6ec..08f7cfa6 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -1282,12 +1282,14 @@ static void fscope_end(FuncState *fs) MSize idx = gola_new(ls, NAME_BREAK, VSTACK_LABEL, fs->pc); ls->vtop = idx; /* Drop break label immediately. */ gola_resolve(ls, bl, idx); + } else { /* Need the fixup step to propagate the breaks. */ + gola_fixup(ls, bl); return; - } /* else: need the fixup step to propagate the breaks. */ - } else if (!(bl->flags & FSCOPE_GOLA)) { - return; + } + } + if ((bl->flags & FSCOPE_GOLA)) { + gola_fixup(ls, bl); } - gola_fixup(ls, bl); } /* Mark scope as having an upvalue. */ diff --git a/src/lj_parse.h b/src/lj_parse.h index b383c897..ceeab699 100644 --- a/src/lj_parse.h +++ b/src/lj_parse.h @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PARSE_H diff --git a/src/lj_profile.c b/src/lj_profile.c index 95b0a621..116998e1 100644 --- a/src/lj_profile.c +++ b/src/lj_profile.c @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_profile_c diff --git a/src/lj_profile.h b/src/lj_profile.h index 14c3fc08..0cccfd78 100644 --- a/src/lj_profile.h +++ b/src/lj_profile.h @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PROFILE_H diff --git a/src/lj_record.c b/src/lj_record.c index 76699a9f..9d0469c4 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_record_c @@ -105,7 +105,7 @@ static void rec_check_slots(jit_State *J) lua_assert(tref_isfunc(tr)); #if LJ_FR2 } else if (s == 1) { - lua_assert(0); + lua_assert((tr & ~TREF_FRAME) == 0); #endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); @@ -747,7 +747,7 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) } /* Move func + args down. */ if (LJ_FR2 && J->baseslot == 2) - J->base[func+1] = 0; + J->base[func+1] = TREF_FRAME; memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ @@ -1765,6 +1765,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) int32_t numparams = J->pt->numparams; ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; lua_assert(frame_isvarg(J->L->base-1)); + if (LJ_FR2 && dst > J->maxslot) + J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; if (nvararg < 0) nvararg = 0; @@ -2261,6 +2263,8 @@ void lj_record_ins(jit_State *J) rc = lj_ir_kint(J, (int32_t)(int16_t)rc); break; case BC_KNIL: + if (LJ_FR2 && ra > J->maxslot) + J->base[ra-1] = 0; while (ra <= rc) J->base[ra++] = TREF_NIL; if (rc >= J->maxslot) J->maxslot = rc+1; diff --git a/src/lj_record.h b/src/lj_record.h index cb184e8f..93d374d2 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_RECORD_H diff --git a/src/lj_snap.c b/src/lj_snap.c index 48259972..bb063c2b 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_snap_c @@ -69,9 +69,13 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); #if LJ_FR2 - if (s == 1) continue; + if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ + if ((tr & TREF_FRAME)) + map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); + continue; + } if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { - TValue *base = J->L->base - J->baseslot; + cTValue *base = J->L->base - J->baseslot; tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); ref = tref_ref(tr); } @@ -470,7 +474,11 @@ void lj_snap_replay(jit_State *J, GCtrace *T) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { - tr = snap_replay_const(J, ir); + /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ + if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) + tr = 0; + else + tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; lua_assert(s != 0); @@ -484,7 +492,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ - J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); + J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } @@ -715,8 +723,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, #else if (LJ_BE && sz == 4) src++; #endif - } + } else #endif + if (LJ_64 && LJ_BE && sz == 4) src++; } } lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); diff --git a/src/lj_snap.h b/src/lj_snap.h index 1d3379bc..2c9ae3d6 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_SNAP_H diff --git a/src/lj_state.c b/src/lj_state.c index a3bfc45e..632dd07e 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -224,7 +224,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) close_state(L); return NULL; } - L->status = 0; + L->status = LUA_OK; return L; } @@ -256,10 +256,10 @@ LUA_API void lua_close(lua_State *L) #endif for (i = 0;;) { hook_enter(g); - L->status = 0; + L->status = LUA_OK; L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; L->cframe = NULL; - if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { + if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { if (++i >= 10) break; lj_gc_separateudata(g, 1); /* Separate udata again. */ if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ @@ -274,7 +274,7 @@ lua_State *lj_state_new(lua_State *L) lua_State *L1 = lj_mem_newobj(L, lua_State); L1->gct = ~LJ_TTHREAD; L1->dummy_ffid = FF_C; - L1->status = 0; + L1->status = LUA_OK; L1->stacksize = 0; setmref(L1->stack, NULL); L1->cframe = NULL; diff --git a/src/lj_state.h b/src/lj_state.h index e128d321..02a0eafa 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STATE_H @@ -28,7 +28,7 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) LJ_FUNC lua_State *lj_state_new(lua_State *L); LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); -#if LJ_64 +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); #endif diff --git a/src/lj_str.c b/src/lj_str.c index a6a9364a..264dedc1 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_str_c diff --git a/src/lj_str.h b/src/lj_str.h index bb306c14..85c1e405 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STR_H diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 04c71e88..d7893ce9 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index a38bc7c3..6e1d9017 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRFMT_H diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index 04769258..9271f68a 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -1,6 +1,6 @@ /* ** String formatting for floating-point numbers. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** Contributed by Peter Cawley. */ @@ -138,7 +138,7 @@ static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, } if (carry_in) { nd[++ndhi] = carry_in; carry_in = 0; - if(start++ == ndlo) ++ndlo; + if (start++ == ndlo) ++ndlo; } k -= ND_MUL2K_MAX_SHIFT; } diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 7c517132..f5f35c96 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include diff --git a/src/lj_strscan.h b/src/lj_strscan.h index b3ee9e52..6fb0dda0 100644 --- a/src/lj_strscan.h +++ b/src/lj_strscan.h @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRSCAN_H diff --git a/src/lj_tab.c b/src/lj_tab.c index 8011212f..47c0cfd3 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -28,7 +28,6 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) #if LJ_GC64 #define hashgcref(t, r) \ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) diff --git a/src/lj_tab.h b/src/lj_tab.h index f06fcf61..71e34945 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TAB_H diff --git a/src/lj_target.h b/src/lj_target.h index abea8d5b..8dcae957 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -1,6 +1,6 @@ /* ** Definitions for target CPU. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_H @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; #else typedef uint32_t RegSet; @@ -69,7 +69,7 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) #else diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 36959dbc..5551b1f1 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM CPUs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM_H diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 57ab134f..520023ae 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM64_H @@ -55,7 +55,8 @@ enum { /* Make use of all registers, except for x18, fp, lr and sp. */ #define RSET_FIXED \ - (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)) + (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ + RID2RSET(RID_GL)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) #define RSET_ALL (RSET_GPR|RSET_FPR) @@ -73,25 +74,256 @@ enum { #define REGARG_LASTFPR RID_D7 #define REGARG_NUMFPR 8 +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the vm_arm64.dasc file. +** Pre-allocate some slots to avoid sp adjust in every root trace. +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 4 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) +{ + while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + return p + 3 + exitno; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) + /* -- Instructions -------------------------------------------------------- */ +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +#if LJ_BE +#define A64I_LE(x) (lj_bswap(x)) +#else +#define A64I_LE(x) (x) +#endif + /* Instruction fields. */ #define A64F_D(r) (r) -#define A64F_N(r) ((r) << 5) -#define A64F_A(r) ((r) << 10) -#define A64F_M(r) ((r) << 16) +#define A64F_N(r) ((r) << 5) +#define A64F_A(r) ((r) << 10) +#define A64F_M(r) ((r) << 16) +#define A64F_IMMS(x) ((x) << 10) +#define A64F_IMMR(x) ((x) << 16) #define A64F_U16(x) ((x) << 5) +#define A64F_U12(x) ((x) << 10) #define A64F_S26(x) (x) -#define A64F_S19(x) ((x) << 5) +#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) +#define A64F_S14(x) ((x) << 5) +#define A64F_S9(x) ((x) << 12) +#define A64F_BIT(x) ((x) << 19) +#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) +#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) +#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) +#define A64F_FP8(x) ((x) << 13) +#define A64F_CC(cc) ((cc) << 12) +#define A64F_LSL16(x) (((x) / 16) << 21) +#define A64F_BSH(sh) ((sh) << 10) typedef enum A64Ins { + A64I_S = 0x20000000, + A64I_X = 0x80000000, + A64I_EX = 0x00200000, + A64I_ON = 0x00200000, + A64I_K12 = 0x1a000000, + A64I_K13 = 0x18000000, + A64I_LS_U = 0x01000000, + A64I_LS_S = 0x00800000, + A64I_LS_R = 0x01200800, + A64I_LS_SH = 0x00001000, + A64I_LS_UXTWx = 0x00004000, + A64I_LS_SXTWx = 0x0000c000, + A64I_LS_SXTXx = 0x0000e000, + A64I_LS_LSLx = 0x00006000, + + A64I_ADDw = 0x0b000000, + A64I_ADDx = 0x8b000000, + A64I_ADDSw = 0x2b000000, + A64I_ADDSx = 0xab000000, + A64I_NEGw = 0x4b0003e0, + A64I_NEGx = 0xcb0003e0, + A64I_SUBw = 0x4b000000, + A64I_SUBx = 0xcb000000, + A64I_SUBSw = 0x6b000000, + A64I_SUBSx = 0xeb000000, + + A64I_MULw = 0x1b007c00, + A64I_MULx = 0x9b007c00, + A64I_SMULL = 0x9b207c00, + + A64I_ANDw = 0x0a000000, + A64I_ANDx = 0x8a000000, + A64I_ANDSw = 0x6a000000, + A64I_ANDSx = 0xea000000, + A64I_EORw = 0x4a000000, + A64I_EORx = 0xca000000, + A64I_ORRw = 0x2a000000, + A64I_ORRx = 0xaa000000, + A64I_TSTw = 0x6a00001f, + A64I_TSTx = 0xea00001f, + + A64I_CMPw = 0x6b00001f, + A64I_CMPx = 0xeb00001f, + A64I_CMNw = 0x2b00001f, + A64I_CMNx = 0xab00001f, + A64I_CCMPw = 0x7a400000, + A64I_CCMPx = 0xfa400000, + A64I_CSELw = 0x1a800000, + A64I_CSELx = 0x9a800000, + + A64I_ASRw = 0x13007c00, + A64I_ASRx = 0x9340fc00, + A64I_LSLx = 0xd3400000, + A64I_LSRx = 0xd340fc00, + A64I_SHRw = 0x1ac02000, + A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ + A64I_REVw = 0x5ac00800, + A64I_REVx = 0xdac00c00, + + A64I_EXTRw = 0x13800000, + A64I_EXTRx = 0x93c00000, + A64I_SBFMw = 0x13000000, + A64I_SBFMx = 0x93400000, + A64I_SXTBw = 0x13001c00, + A64I_SXTHw = 0x13003c00, + A64I_SXTW = 0x93407c00, + A64I_UBFMw = 0x53000000, + A64I_UBFMx = 0xd3400000, + A64I_UXTBw = 0x53001c00, + A64I_UXTHw = 0x53003c00, + + A64I_MOVw = 0x2a0003e0, + A64I_MOVx = 0xaa0003e0, + A64I_MVNw = 0x2a2003e0, + A64I_MVNx = 0xaa2003e0, + A64I_MOVKw = 0x72800000, + A64I_MOVKx = 0xf2800000, A64I_MOVZw = 0x52800000, A64I_MOVZx = 0xd2800000, + A64I_MOVNw = 0x12800000, + A64I_MOVNx = 0x92800000, + + A64I_LDRB = 0x39400000, + A64I_LDRH = 0x79400000, + A64I_LDRw = 0xb9400000, + A64I_LDRx = 0xf9400000, A64I_LDRLw = 0x18000000, A64I_LDRLx = 0x58000000, - A64I_NOP = 0xd503201f, + A64I_STRB = 0x39000000, + A64I_STRH = 0x79000000, + A64I_STRw = 0xb9000000, + A64I_STRx = 0xf9000000, + A64I_STPw = 0x29000000, + A64I_STPx = 0xa9000000, + A64I_LDPw = 0x29400000, + A64I_LDPx = 0xa9400000, + A64I_B = 0x14000000, + A64I_BCC = 0x54000000, + A64I_BL = 0x94000000, A64I_BR = 0xd61f0000, + A64I_BLR = 0xd63f0000, + A64I_TBZ = 0x36000000, + A64I_TBNZ = 0x37000000, + A64I_CBZ = 0x34000000, + A64I_CBNZ = 0x35000000, + + A64I_NOP = 0xd503201f, + + /* FP */ + A64I_FADDd = 0x1e602800, + A64I_FSUBd = 0x1e603800, + A64I_FMADDd = 0x1f400000, + A64I_FMSUBd = 0x1f408000, + A64I_FNMADDd = 0x1f600000, + A64I_FNMSUBd = 0x1f608000, + A64I_FMULd = 0x1e600800, + A64I_FDIVd = 0x1e601800, + A64I_FNEGd = 0x1e614000, + A64I_FABS = 0x1e60c000, + A64I_FSQRTd = 0x1e61c000, + A64I_LDRs = 0xbd400000, + A64I_LDRd = 0xfd400000, + A64I_STRs = 0xbd000000, + A64I_STRd = 0xfd000000, + A64I_LDPs = 0x2d400000, + A64I_LDPd = 0x6d400000, + A64I_STPs = 0x2d000000, + A64I_STPd = 0x6d000000, + A64I_FCMPd = 0x1e602000, + A64I_FCMPZd = 0x1e602008, + A64I_FCSELd = 0x1e600c00, + A64I_FRINTMd = 0x1e654000, + A64I_FRINTPd = 0x1e64c000, + A64I_FRINTZd = 0x1e65c000, + + A64I_FCVT_F32_F64 = 0x1e624000, + A64I_FCVT_F64_F32 = 0x1e22c000, + A64I_FCVT_F32_S32 = 0x1e220000, + A64I_FCVT_F64_S32 = 0x1e620000, + A64I_FCVT_F32_U32 = 0x1e230000, + A64I_FCVT_F64_U32 = 0x1e630000, + A64I_FCVT_F32_S64 = 0x9e220000, + A64I_FCVT_F64_S64 = 0x9e620000, + A64I_FCVT_F32_U64 = 0x9e230000, + A64I_FCVT_F64_U64 = 0x9e630000, + A64I_FCVT_S32_F64 = 0x1e780000, + A64I_FCVT_S32_F32 = 0x1e380000, + A64I_FCVT_U32_F64 = 0x1e790000, + A64I_FCVT_U32_F32 = 0x1e390000, + A64I_FCVT_S64_F64 = 0x9e780000, + A64I_FCVT_S64_F32 = 0x9e380000, + A64I_FCVT_U64_F64 = 0x9e790000, + A64I_FCVT_U64_F32 = 0x9e390000, + + A64I_FMOV_S = 0x1e204000, + A64I_FMOV_D = 0x1e604000, + A64I_FMOV_R_S = 0x1e260000, + A64I_FMOV_S_R = 0x1e270000, + A64I_FMOV_R_D = 0x9e660000, + A64I_FMOV_D_R = 0x9e670000, + A64I_FMOV_DI = 0x1e601000, } A64Ins; +typedef enum A64Shift { + A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR +} A64Shift; + +typedef enum A64Extend { + A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, + A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, +} A64Extend; + +/* ARM condition codes. */ +typedef enum A64CC { + CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, + CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, + CC_HS = CC_CS, CC_LO = CC_CC +} A64CC; + #endif diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index 6a7d4b50..740687b3 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -1,6 +1,6 @@ /* ** Definitions for MIPS CPUs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_MIPS_H @@ -32,6 +32,7 @@ enum { RID_MAX, RID_ZERO = RID_R0, RID_TMP = RID_RA, + RID_GP = RID_R28, /* Calling conventions. */ RID_RET = RID_R2, @@ -74,13 +75,13 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2 and JGL. */ +/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2, JGL and GP. */ #define RSET_FIXED \ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ - RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)) + RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) #if LJ_SOFTFP -#define RSET_FPR 0 +#define RSET_FPR 0 #else #if LJ_32 #define RSET_FPR \ @@ -89,15 +90,15 @@ enum { RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) #else -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) #endif #endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL #define RSET_SCRATCH_GPR \ (RSET_RANGE(RID_R1, RID_R15+1)|\ - RID2RSET(RID_R24)|RID2RSET(RID_R25)|RID2RSET(RID_R28)) + RID2RSET(RID_R24)|RID2RSET(RID_R25)) #if LJ_SOFTFP #define RSET_SCRATCH_FPR 0 #else @@ -191,8 +192,12 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) #define MIPSF_F(r) ((r) << 6) #define MIPSF_A(n) ((n) << 6) #define MIPSF_M(n) ((n) << 11) +#define MIPSF_L(n) ((n) << 6) typedef enum MIPSIns { + MIPSI_D = 0x38, + MIPSI_DV = 0x10, + MIPSI_D32 = 0x3c, /* Integer instructions. */ MIPSI_MOVE = 0x00000025, MIPSI_NOP = 0x00000000, @@ -201,22 +206,27 @@ typedef enum MIPSIns { MIPSI_LU = 0x34000000, MIPSI_LUI = 0x3c000000, - MIPSI_ADDIU = 0x24000000, + MIPSI_AND = 0x00000024, MIPSI_ANDI = 0x30000000, + MIPSI_OR = 0x00000025, MIPSI_ORI = 0x34000000, + MIPSI_XOR = 0x00000026, MIPSI_XORI = 0x38000000, + MIPSI_NOR = 0x00000027, + + MIPSI_SLT = 0x0000002a, + MIPSI_SLTU = 0x0000002b, MIPSI_SLTI = 0x28000000, MIPSI_SLTIU = 0x2c000000, MIPSI_ADDU = 0x00000021, + MIPSI_ADDIU = 0x24000000, + MIPSI_SUB = 0x00000022, MIPSI_SUBU = 0x00000023, MIPSI_MUL = 0x70000002, - MIPSI_AND = 0x00000024, - MIPSI_OR = 0x00000025, - MIPSI_XOR = 0x00000026, - MIPSI_NOR = 0x00000027, - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, + MIPSI_DIV = 0x0000001a, + MIPSI_DIVU = 0x0000001b, + MIPSI_MOVZ = 0x0000000a, MIPSI_MOVN = 0x0000000b, MIPSI_MFHI = 0x00000010, @@ -227,14 +237,18 @@ typedef enum MIPSIns { MIPSI_SRL = 0x00000002, MIPSI_SRA = 0x00000003, MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ + MIPSI_DROTR = 0x0020003a, + MIPSI_DROTR32 = 0x0020003e, MIPSI_SLLV = 0x00000004, MIPSI_SRLV = 0x00000006, MIPSI_SRAV = 0x00000007, MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ + MIPSI_DROTRV = 0x00000056, MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ + MIPSI_DSBH = 0x7c0000a4, MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, @@ -252,7 +266,9 @@ typedef enum MIPSIns { /* Load/store instructions. */ MIPSI_LW = 0x8c000000, + MIPSI_LD = 0xdc000000, MIPSI_SW = 0xac000000, + MIPSI_SD = 0xfc000000, MIPSI_LB = 0x80000000, MIPSI_SB = 0xa0000000, MIPSI_LH = 0x84000000, @@ -265,13 +281,48 @@ typedef enum MIPSIns { MIPSI_SDC1 = 0xf4000000, /* MIPS64 instructions. */ - MIPSI_DSLL = 0x00000038, - MIPSI_LD = 0xdc000000, + MIPSI_DADD = 0x0000002c, + MIPSI_DADDI = 0x60000000, + MIPSI_DADDU = 0x0000002d, MIPSI_DADDIU = 0x64000000, - MIPSI_SD = 0xfc000000, - MIPSI_DMFC1 = 0x44200000, + MIPSI_DSUB = 0x0000002e, + MIPSI_DSUBU = 0x0000002f, + MIPSI_DDIV = 0x0000001e, + MIPSI_DDIVU = 0x0000001f, + MIPSI_DMULT = 0x0000001c, + MIPSI_DMULTU = 0x0000001d, + + MIPSI_DSLL = 0x00000038, + MIPSI_DSRL = 0x0000003a, + MIPSI_DSLLV = 0x00000014, + MIPSI_DSRLV = 0x00000016, + MIPSI_DSRA = 0x0000003b, + MIPSI_DSRAV = 0x00000017, MIPSI_DSRA32 = 0x0000003f, - MIPSI_MFHC1 = 0x44600000, + MIPSI_DSLL32 = 0x0000003c, + MIPSI_DSRL32 = 0x0000003e, + MIPSI_DSHD = 0x7c000164, + + MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, + MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, + MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, + MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, + MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, + + /* Extract/insert instructions. */ + MIPSI_DEXTM = 0x7c000001, + MIPSI_DEXTU = 0x7c000002, + MIPSI_DEXT = 0x7c000003, + MIPSI_DINSM = 0x7c000005, + MIPSI_DINSU = 0x7c000006, + MIPSI_DINS = 0x7c000007, + + MIPSI_RINT_D = 0x4620001a, + MIPSI_RINT_S = 0x4600001a, + MIPSI_RINT = 0x4400001a, + MIPSI_FLOOR_D = 0x4620000b, + MIPSI_CEIL_D = 0x4620000a, + MIPSI_ROUND_D = 0x46200008, /* FP instructions. */ MIPSI_MOV_S = 0x46000006, @@ -297,24 +348,30 @@ typedef enum MIPSIns { MIPSI_CVT_W_D = 0x46200024, MIPSI_CVT_S_W = 0x46800020, MIPSI_CVT_D_W = 0x46800021, + MIPSI_CVT_S_L = 0x46a00020, + MIPSI_CVT_D_L = 0x46a00021, MIPSI_TRUNC_W_S = 0x4600000d, MIPSI_TRUNC_W_D = 0x4620000d, + MIPSI_TRUNC_L_S = 0x46000009, + MIPSI_TRUNC_L_D = 0x46200009, MIPSI_FLOOR_W_S = 0x4600000f, MIPSI_FLOOR_W_D = 0x4620000f, MIPSI_MFC1 = 0x44000000, MIPSI_MTC1 = 0x44800000, + MIPSI_DMTC1 = 0x44a00000, + MIPSI_DMFC1 = 0x44200000, MIPSI_BC1F = 0x45000000, MIPSI_BC1T = 0x45010000, MIPSI_C_EQ_D = 0x46200032, + MIPSI_C_OLT_S = 0x46000034, MIPSI_C_OLT_D = 0x46200034, MIPSI_C_ULT_D = 0x46200035, MIPSI_C_OLE_D = 0x46200036, MIPSI_C_ULE_D = 0x46200037, - } MIPSIns; #endif diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index c6e0ef67..bb59d296 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -1,6 +1,6 @@ /* ** Definitions for PPC/PPC64 CPUs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_PPC_H diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index d5429597..356f7924 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -1,6 +1,6 @@ /* ** Definitions for x86 and x64 CPUs. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_X86_H @@ -31,7 +31,7 @@ enum { FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ - RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */ + RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ /* Calling conventions. */ RID_SP = RID_ESP, diff --git a/src/lj_trace.c b/src/lj_trace.c index 87146832..d85b47f8 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_trace_c @@ -319,13 +319,15 @@ void lj_trace_initstate(global_State *g) /* Initialize 32/64 bit constants. */ #if LJ_TARGET_X86ORX64 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); - J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); - J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); #if LJ_32 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); #endif + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; #endif +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#endif #if LJ_TARGET_PPC J->k32[LJ_K32_2P52_2P31] = 0x59800004; J->k32[LJ_K32_2P52] = 0x59800000; @@ -335,6 +337,11 @@ void lj_trace_initstate(global_State *g) #endif #if LJ_TARGET_MIPS J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#if LJ_64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); + J->k32[LJ_K32_2P63] = 0x5f000000; + J->k32[LJ_K32_M2P64] = 0xdf800000; +#endif #endif } @@ -446,6 +453,12 @@ static void trace_start(jit_State *J) if (J->parent) { setintV(L->top++, J->parent); setintV(L->top++, J->exitno); + } else { + BCOp op = bc_op(*J->pc); + if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { + setintV(L->top++, J->exitno); /* Parent of stitched trace. */ + setintV(L->top++, -1); + } } ); lj_record_setup(J); @@ -881,7 +894,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: - return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2); + return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); case BC_RETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: diff --git a/src/lj_trace.h b/src/lj_trace.h index 5658d8a5..22cae741 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TRACE_H diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index 9c01ffb7..1363c4f3 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h @@ -1,6 +1,6 @@ /* ** Trace compiler error messages. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different TREDEF macros. */ diff --git a/src/lj_udata.c b/src/lj_udata.c index 71697a42..bd0321b8 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_udata_c diff --git a/src/lj_udata.h b/src/lj_udata.h index 81c15816..f271a42d 100644 --- a/src/lj_udata.h +++ b/src/lj_udata.h @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_UDATA_H diff --git a/src/lj_vm.h b/src/lj_vm.h index d605b143..1cc7eed7 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -1,6 +1,6 @@ /* ** Assembler VM interface definitions. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VM_H diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 3acdd253..86640804 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h index b9495b4f..050fb4dd 100644 --- a/src/lj_vmevent.h +++ b/src/lj_vmevent.h @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VMEVENT_H diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index e6622650..b231d3e8 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -1,6 +1,6 @@ /* ** Math helper functions for assembler VM. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_vmmath_c diff --git a/src/ljamalg.c b/src/ljamalg.c index 4c44de27..f1f28623 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -1,6 +1,6 @@ /* ** LuaJIT core and libraries amalgamation. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* diff --git a/src/lua.h b/src/lua.h index 352d29f3..850bd796 100644 --- a/src/lua.h +++ b/src/lua.h @@ -39,7 +39,8 @@ #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) -/* thread status; 0 is OK */ +/* thread status */ +#define LUA_OK 0 #define LUA_YIELD 1 #define LUA_ERRRUN 2 #define LUA_ERRSYNTAX 3 @@ -347,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, const char *chunkname, const char *mode); +LUA_API const lua_Number *lua_version (lua_State *L); +LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); +LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); +LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); + +/* From Lua 5.3. */ +LUA_API int lua_isyieldable (lua_State *L); struct lua_Debug { diff --git a/src/luaconf.h b/src/luaconf.h index 4c24bb1a..c2d29d94 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -1,6 +1,6 @@ /* ** Configuration header. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef luaconf_h @@ -37,7 +37,7 @@ #endif #define LUA_LROOT "/usr/local" #define LUA_LUADIR "/lua/5.1/" -#define LUA_LJDIR "/luajit-2.1.0-beta2/" +#define LUA_LJDIR "/luajit-2.1.0-beta3/" #ifdef LUA_ROOT #define LUA_JROOT LUA_ROOT @@ -79,7 +79,7 @@ #define LUA_IGMARK "-" #define LUA_PATH_CONFIG \ LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ - LUA_EXECDIR "\n" LUA_IGMARK + LUA_EXECDIR "\n" LUA_IGMARK "\n" /* Quoting in error messages. */ #define LUA_QL(x) "'" x "'" @@ -92,10 +92,6 @@ #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ -/* Compatibility with older library function names. */ -#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ -#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ - /* Configuration for the frontend (the luajit executable). */ #if defined(luajit_c) #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ diff --git a/src/luajit.c b/src/luajit.c index e582f469..1ca24301 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -1,6 +1,6 @@ /* ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -124,7 +124,7 @@ static int docall(lua_State *L, int narg, int clear) #endif lua_remove(L, base); /* remove traceback function */ /* force a complete garbage collection in case of errors */ - if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); return status; } @@ -249,9 +249,9 @@ static void dotty(lua_State *L) const char *oldprogname = progname; progname = NULL; while ((status = loadline(L)) != -1) { - if (status == 0) status = docall(L, 0, 0); + if (status == LUA_OK) status = docall(L, 0, 0); report(L, status); - if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ lua_getglobal(L, "print"); lua_insert(L, 1); if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) @@ -273,7 +273,7 @@ static int handle_script(lua_State *L, char **argx) if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) fname = NULL; /* stdin */ status = luaL_loadfile(L, fname); - if (status == 0) { + if (status == LUA_OK) { /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ int narg = 0; lua_getglobal(L, "arg"); @@ -483,7 +483,7 @@ static int runargs(lua_State *L, char **argv, int argn) default: break; } } - return 0; + return LUA_OK; } static int handle_luainit(lua_State *L) @@ -494,7 +494,7 @@ static int handle_luainit(lua_State *L) const char *init = getenv(LUA_INIT); #endif if (init == NULL) - return 0; /* status OK */ + return LUA_OK; else if (init[0] == '@') return dofile(L, init+1); else @@ -539,17 +539,17 @@ static int pmain(lua_State *L) if (!(flags & FLAGS_NOENV)) { s->status = handle_luainit(L); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } if ((flags & FLAGS_VERSION)) print_version(); s->status = runargs(L, argv, argn); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; if (s->argc > argn) { s->status = handle_script(L, argv + argn); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } if ((flags & FLAGS_INTERACTIVE)) { diff --git a/src/luajit.h b/src/luajit.h index 1d0a558c..708a5a11 100644 --- a/src/luajit.h +++ b/src/luajit.h @@ -1,7 +1,7 @@ /* ** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ ** -** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** ** Permission is hereby granted, free of charge, to any person obtaining ** a copy of this software and associated documentation files (the @@ -30,10 +30,10 @@ #include "lua.h" -#define LUAJIT_VERSION "LuaJIT 2.1.0-beta2" +#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ -#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta2 -#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2016 Mike Pall" +#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 +#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall" #define LUAJIT_URL "http://luajit.org/" /* Modes for luaJIT_setmode. */ diff --git a/src/lualib.h b/src/lualib.h index 3a549555..bfc130a1 100644 --- a/src/lualib.h +++ b/src/lualib.h @@ -1,6 +1,6 @@ /* ** Standard library header. -** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LUALIB_H diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index f977a249..71bde759 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -1,5 +1,5 @@ @rem Script to build LuaJIT with MSVC. -@rem Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h @rem @rem Either open a "Visual Studio .NET Command Prompt" @rem (Note that the Express Edition does not contain an x64 compiler) @@ -14,12 +14,13 @@ @if not defined INCLUDE goto :FAIL @setlocal -@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE +@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline @set LJLINK=link /nologo @set LJMT=mt /nologo @set LJLIB=lib /nologo /nodefaultlib @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua +@set DASC=vm_x86.dasc @set LJDLLNAME=lua51.dll @set LJLIBNAME=lua51.lib @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c @@ -39,7 +40,12 @@ if exist minilua.exe.manifest^ @set LJARCH=x86 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 :X64 -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc +@if "%1" neq "gc64" goto :NOGC64 +@shift +@set DASC=vm_x64.dasc +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64 +:NOGC64 +minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c @@ -67,7 +73,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if "%1" neq "debug" goto :NODEBUG @shift @set LJCOMPILE=%LJCOMPILE% /Zi -@set LJLINK=%LJLINK% /debug +@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no :NODEBUG @if "%1"=="amalg" goto :AMALGDLL @if "%1"=="static" goto :STATIC diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 882884cd..780cc16e 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch arm |.section code_op, code_sub diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 7a881bdd..3eaf3763 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM64 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch arm64 |.section code_op, code_sub @@ -151,6 +151,21 @@ |.define FRAME_FUNC, #-16 |.define FRAME_PC, #-8 | +|// Endian-specific defines. +|.if ENDIAN_LE +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RB, 3 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RB, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro @@ -236,12 +251,17 @@ |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | -#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field)) +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | |.macro hotcheck, delta -| NYI +| lsr CARG1, PC, #1 +| and CARG1, CARG1, #126 +| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT +| ldrh CARG2w, [GL, CARG1] +| subs CARG2, CARG2, #delta +| strh CARG2w, [GL, CARG1] |.endmacro | |.macro hotloop @@ -712,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx) | cmp CRET1, #1 | bhi ->vmeta_binop |4: - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RB, PC, RB, lsl #2 | sub RB, RB, #0x20000 @@ -869,7 +889,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_meta_for // (lua_State *L, TValue *base) | ldr INSw, [PC, #-4] |.if JIT - | uxtb TMP0, INS + | uxtb TMP0w, INSw |.endif | decode_RA RA, INS | decode_RD RC, INS @@ -1495,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback | checkint CARG1, ->fff_fallback | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. + | // Point to the char inside the integer in the stack slot. + |.if ENDIAN_LE + | mov CARG2, BASE + |.else + | add CARG2, BASE, #7 + |.endif |->fff_newstr: | // CARG2 = str, CARG3 = len. | str BASE, L->base @@ -1698,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx) | ands TMP0, PC, #FRAME_TYPE | and TMP1, PC, #~FRAME_TYPEP | bne >3 - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | lsl RA, RA, #3 | add TMP1, RA, #16 |3: @@ -1732,7 +1757,20 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. - | NYI + |.if JIT + | ldrb CARG1w, GL->hookmask + | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. + | bne >5 + | // Decrement the hookcount for consistency, but always do the call. + | ldr CARG2w, GL->hookcount + | tst CARG1, #HOOK_ACTIVE + | bne >1 + | sub CARG2w, CARG2w, #1 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | beq >1 + | str CARG2w, GL->hookcount + | b >1 + |.endif | |->vm_rethook: // Dispatch target for return hooks. | ldrb TMP2w, GL->hookmask @@ -1774,7 +1812,21 @@ static void build_subroutines(BuildCtx *ctx) | b <4 | |->vm_hotloop: // Hot loop counter underflow. - | NYI + |.if JIT + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). + | add CARG1, GL, #GG_G2DISP+GG_DISP2J + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | str PC, SAVE_PC + | ldr CARG3, LFUNC:CARG3->pc + | mov CARG2, PC + | str L, [GL, #GL_J(L)] + | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] + | str BASE, L->base + | add CARG3, BASE, CARG3, lsl #3 + | str CARG3, L->top + | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | b <3 + |.endif | |->vm_callhook: // Dispatch target for call hooks. | mov CARG2, PC @@ -1804,7 +1856,54 @@ static void build_subroutines(BuildCtx *ctx) | br CRET1 | |->cont_stitch: // Trace stitching. - | NYI + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RBw, SAVE_MULTRES + | ldr INSw, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. + | subs RB, RB, #8 + | decode_RA RC, INS // Call base. + | and CARG3, CARG3, #LJ_GCVMASK + | beq >2 + |1: // Move results down. + | ldr CARG1, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | str CARG1, [BASE, RC, lsl #3] + | add RC, RC, #1 + | bne <1 + |2: + | decode_RA RA, INS + | decode_RB RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | bhi >9 // More results wanted? + | + | ldrh RAw, TRACE:CARG3->traceno + | ldrh RCw, TRACE:CARG3->link + | cmp RCw, RAw + | beq ->cont_nop // Blacklisted. + | cmp RCw, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov CARG1, #GL_J(exitno) + | str RAw, [GL, CARG1] + | mov CARG1, #GL_J(L) + | str L, [GL, CARG1] + | str BASE, L->base + | add CARG1, GL, #GG_G2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | str TISNIL, [BASE, RC, lsl #3] + | add RC, RC, #1 + | b <3 + |.endif | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE @@ -1822,10 +1921,122 @@ static void build_subroutines(BuildCtx *ctx) |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- | + |.macro savex_, a, b + | stp d..a, d..b, [sp, #a*8] + | stp x..a, x..b, [sp, #32*8+a*8] + |.endmacro + | |->vm_exit_handler: - | NYI + |.if JIT + | sub sp, sp, #(64*8) + | savex_, 0, 1 + | savex_, 2, 3 + | savex_, 4, 5 + | savex_, 6, 7 + | savex_, 8, 9 + | savex_, 10, 11 + | savex_, 12, 13 + | savex_, 14, 15 + | savex_, 16, 17 + | savex_, 18, 19 + | savex_, 20, 21 + | savex_, 22, 23 + | savex_, 24, 25 + | savex_, 26, 27 + | savex_, 28, 29 + | stp d30, d31, [sp, #30*8] + | ldr CARG1, [sp, #64*8] // Load original value of lr. + | add CARG3, sp, #64*8 // Recompute original value of sp. + | mv_vmstate CARG4w, EXIT + | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. + | sub CARG1, CARG1, lr + | ldr L, GL->cur_L + | lsr CARG1, CARG1, #2 + | ldr BASE, GL->jit_base + | sub CARG1, CARG1, #2 + | ldr CARG2w, [lr] // Load trace number. + | st_vmstate CARG4w + |.if ENDIAN_BE + | rev32 CARG2, CARG2 + |.endif + | str BASE, L->base + | ubfx CARG2w, CARG2w, #5, #16 + | str CARG1w, [GL, #GL_J(exitno)] + | str CARG2w, [GL, #GL_J(parent)] + | str L, [GL, #GL_J(L)] + | str xzr, GL->jit_base + | add CARG1, GL, #GG_G2J + | mov CARG2, sp + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldr CARG2, L->cframe + | ldr BASE, L->base + | and sp, CARG2, #CFRAME_RAWMASK + | ldr PC, SAVE_PC // Get SAVE_PC. + | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif + | |->vm_exit_interp: - | NYI + | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. + |.if JIT + | ldr L, SAVE_L + |1: + | cmp CARG1w, #0 + | blt >9 // Check for error from exit. + | lsl RC, CARG1, #3 + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | str RCw, SAVE_MULTRES + | str BASE, L->base + | ldr CARG2, LFUNC:CARG2->pc + | str xzr, GL->jit_base + | mv_vmstate CARG4w, INTERP + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | // Modified copy of ins_next which handles function header dispatch, too. + | ldrb RBw, [PC, # OFS_OP] + | ldr INSw, [PC], #4 + | st_vmstate CARG4w + | cmp RBw, #BC_FUNCC+2 // Fast function? + | add TMP1, GL, INS, uxtb #3 + | bhs >4 + |2: + | cmp RBw, #BC_FUNCF // Function header? + | add TMP0, GL, RB, uxtb #3 + | ldr RB, [TMP0, #GG_G2DISP] + | decode_RA RA, INS + | lsr TMP0, INS, #16 + | csel RC, TMP0, RC, lo + | blo >5 + | ldr CARG3, [BASE, FRAME_FUNC] + | sub RC, RC, #8 + | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + |5: + | br RB + | + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3w, [CARG1, #-4] + | decode_RA CARG1, CARG3 + | sub CARG2, BASE, CARG1, lsl #3 + | ldr LFUNC:CARG3, [CARG2, #-32] + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | ldr CARG3, LFUNC:CARG3->pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | neg CARG2, CARG1 + | mov CARG1, L + | bl extern lj_err_throw // (lua_State *L, int errcode) + |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- @@ -1965,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [BASE, RC, lsl #3] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2022,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2083,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = str_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | mvn RC, RC - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [KBASE, RC, lsl #3] | add PC, PC, #4 | movn TMP0, #~LJ_TSTR @@ -2111,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = num_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2171,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQP; | // RA = src, RC = primitive_type (~), JMP with RC = target | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RC, RC, #1 | add RB, PC, RB, lsl #2 @@ -2196,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr TMP0, [BASE, RC, lsl #3] | add PC, PC, #4 | mov_false TMP1 @@ -2443,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | str PC, SAVE_PC | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] + | ldrb RBw, [PC, #-4+OFS_RB] | ldr BASE, L->base | cbnz CRET1, ->vmeta_binop | ldr TMP0, [BASE, RB, lsl #3] @@ -3074,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_callt | |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | sub CARG1, BASE, RA, lsl #3 | ldr LFUNC:CARG1, [CARG1, #-32] | and LFUNC:CARG1, CARG1, #LJ_GCVMASK @@ -3115,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. + | ldrh TMP3w, [PC, # OFS_RD] + | ldr CARG1w, [RA, #-8+LO] // Get index from control var. | add PC, PC, #4 | add TMP3, PC, TMP3, lsl #2 | and TAB:RB, RB, #LJ_GCVMASK @@ -3135,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stp CARG1, TMP0, [RA] | add CARG1, CARG1, #1 |3: - | str CARG1w, [RA, #-8] // Update control var. + | str CARG1w, [RA, #-8+LO] // Update control var. | mov PC, TMP3 |4: | ins_next @@ -3181,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | mov TMP0, #BC_JMP | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] + | strb TMP0w, [PC, #-4+OFS_OP] + | strb TMP1w, [RC, # OFS_OP] | b <1 break; @@ -3387,7 +3598,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FORI) { | csel PC, RC, PC, gt } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] + | mov PC, RC + | ldrh RCw, [RC, #-4+OFS_RD] } else if (op == BC_IFORL) { | csel PC, RC, PC, le } @@ -3428,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FORI) { | csel PC, RC, PC, hi } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] + | ldrh RCw, [RC, #-4+OFS_RD] | bls =>BC_JLOOP } else if (op == BC_IFORL) { | csel PC, RC, PC, ls @@ -3488,7 +3700,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_JLOOP: |.if JIT - | NYI + | // RA = base (ignored), RC = traceno + | ldr CARG1, [GL, #GL_J(trace)] + | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | ldr TRACE:RC, [CARG1, RC, lsl #3] + | st_vmstate CARG2w + | ldr RA, TRACE:RC->mcode + | str BASE, GL->jit_base + | str L, GL->tmpbuf.L + | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. + | br RA |.endif break; @@ -3546,10 +3767,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_IFUNCV: | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ldr CARG1, L->maxstack + | movn TMP0, #~LJ_TFUNC | add TMP2, BASE, RC + | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | add RA, RA, RC | add TMP0, RC, #16+FRAME_VARG - | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC. + | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ldr KBASE, [PC, #-4+PC2PROTO(k)] | cmp RA, CARG1 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. @@ -3736,8 +3959,8 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.uleb128 0x1\n" "\t.sleb128 -8\n" "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ "\t.align 3\n" ".LECIE2:\n\n"); @@ -3748,7 +3971,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .LASFDE3-.Lframe2\n" "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ + "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 6f5a83dd..1afd6118 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for MIPS CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h |// |// MIPS soft-float support contributed by Djordje Kovacevic and |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. @@ -4317,7 +4317,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 + | barrierback TAB:CARG2, TMP3, TMP0, <2 break; case BC_TSETM: diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 52b56de4..c06270a0 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for MIPS64 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h |// |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. |// Sponsored by Cisco Systems, Inc. @@ -327,7 +327,13 @@ |.macro jmp_extern; jr CFUNCADDR; .endmacro | |.macro hotcheck, delta, target -| NYI +| dsrl TMP1, PC, 1 +| andi TMP1, TMP1, 126 +| daddu TMP1, TMP1, DISPATCH +| lhu TMP2, GG_DISP2HOT(TMP1) +| addiu TMP2, TMP2, -delta +| bltz TMP2, target +|. sh TMP2, GG_DISP2HOT(TMP1) |.endmacro | |.macro hotloop @@ -2150,7 +2156,21 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. - | NYI + |.if JIT + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. + | bnez AT, >5 + | // Decrement the hookcount for consistency, but always do the call. + |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE + | bnez AT, >1 + |. addiu TMP2, TMP2, -1 + | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, >1 + |. nop + | b >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + |.endif | |->vm_rethook: // Dispatch target for return hooks. | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) @@ -2201,7 +2221,25 @@ static void build_subroutines(BuildCtx *ctx) |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. | |->vm_hotloop: // Hot loop counter underflow. - | NYI + |.if JIT + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | daddiu CARG1, DISPATCH, GG_DISP2J + | cleartp LFUNC:TMP1 + | sd PC, SAVE_PC + | ld TMP1, LFUNC:TMP1->pc + | move CARG2, PC + | sd L, DISPATCH_J(L)(DISPATCH) + | lbu TMP1, PC2PROTO(framesize)(TMP1) + | load_got lj_trace_hot + | sd BASE, L->base + | dsll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) + |. sd TMP1, L->top + | b <3 + |. nop + |.endif + | | |->vm_callhook: // Dispatch target for call hooks. |.if JIT @@ -2235,7 +2273,55 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_stitch: // Trace stitching. |.if JIT - | NYI + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | ld TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA8a RC, INS + | daddiu AT, MULTRES, -8 + | cleartp TRACE:TMP2 + | decode_RA8b RC + | beqz AT, >2 + |. daddu RC, BASE, RC // Call base. + |1: // Move results down. + | ld CARG1, 0(RA) + | daddiu AT, AT, -8 + | daddiu RA, RA, 8 + | sd CARG1, 0(RC) + | bnez AT, <1 + |. daddiu RC, RC, 8 + |2: + | decode_RA8a RA, INS + | decode_RB8a RB, INS + | decode_RA8b RA + | decode_RB8b RB + | daddu RA, RA, RB + | daddu RA, BASE, RA + |3: + | sltu AT, RC, RA + | bnez AT, >9 // More results wanted? + |. nop + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | beq RD, TMP3, ->cont_nop // Blacklisted. + |. load_got lj_dispatch_stitch + | bnez RD, =>BC_JLOOP // Jump to stitched trace. + |. sll RD, RD, 3 + | + | // Stitch a new trace to the previous trace. + | sw TMP3, DISPATCH_J(exitno)(DISPATCH) + | sd L, DISPATCH_J(L)(DISPATCH) + | sd BASE, L->base + | daddiu CARG1, DISPATCH, GG_DISP2J + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + |. move CARG2, PC + | b ->cont_nop + |. ld BASE, L->base + | + |9: + | sd TISNIL, 0(RC) + | b <3 + |. daddiu RC, RC, 8 |.endif | |->vm_profhook: // Dispatch target for profiler hook. @@ -2243,13 +2329,13 @@ static void build_subroutines(BuildCtx *ctx) | load_got lj_dispatch_profile | sw MULTRES, SAVE_MULTRES | move CARG2, PC - | sw BASE, L->base + | sd BASE, L->base | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) |. move CARG1, L | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | daddiu PC, PC, -4 | b ->cont_nop - |. lw BASE, L->base + |. ld BASE, L->base #endif | |//----------------------------------------------------------------------- @@ -2259,6 +2345,7 @@ static void build_subroutines(BuildCtx *ctx) |.macro savex_, a, b |.if FPU | sdc1 f..a, a*8(sp) + | sdc1 f..b, b*8(sp) | sd r..a, 32*8+a*8(sp) | sd r..b, 32*8+b*8(sp) |.else @@ -2269,11 +2356,124 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_exit_handler: |.if JIT - | NYI + |.if FPU + | daddiu sp, sp, -(32*8+32*8) + |.else + | daddiu sp, sp, -(32*8) + |.endif + | savex_ 0, 1 + | savex_ 2, 3 + | savex_ 4, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 26, 27 + | savex_ 28, 30 + |.if FPU + | sdc1 f29, 29*8(sp) + | sdc1 f31, 31*8(sp) + | sd r0, 32*8+31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. + | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP + |.else + | sd r0, 31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8 // Recompute original value of sp. + | sd TMP2, 29*8(sp) // Store sp in RID_SP + |.endif + | li_vmstate EXIT + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | lw TMP1, 0(TMP2) // Load exit number. + | st_vmstate + | ld L, DISPATCH_GL(cur_L)(DISPATCH) + | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) + | load_got lj_trace_exit + | sd L, DISPATCH_J(L)(DISPATCH) + | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. + | sd BASE, L->base + | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. + | daddiu CARG1, DISPATCH, GG_DISP2J + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) + |. move CARG2, sp + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | li AT, -4 + | ld BASE, L->base + | and sp, TMP1, AT + | ld PC, SAVE_PC // Get SAVE_PC. + | b >1 + |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). |.endif |->vm_exit_interp: |.if JIT - | NYI + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ld L, SAVE_L + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | sd BASE, L->base + |1: + | bltz CRET1, >9 // Check for error from exit. + |. ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsll MULTRES, CRET1, 3 + | cleartp LFUNC:RB + | sw MULTRES, SAVE_MULTRES + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | .FPU mtc1 TMP3, TOBIT + | ld TMP1, LFUNC:RB->pc + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | ld KBASE, PC2PROTO(k)(TMP1) + | .FPU cvt.d.s TOBIT, TOBIT + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) + | daddiu PC, PC, 4 + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | sltiu TMP2, TMP1, BC_FUNCF*8 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, 0(TMP0) + | decode_RA8a RA, INS + | beqz TMP2, >2 + |. decode_RA8b RA + | jr AT + |. decode_RD8b RD + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? + | bnez TMP2, >3 + |. ld TMP1, FRAME_PC(BASE) + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + |. nop + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8a TMP0, TMP2 + | decode_RA8b TMP0 + | dsubu TMP1, BASE, TMP0 + | ld LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ld TMP1, LFUNC:TMP2->pc + | ld KBASE, PC2PROTO(k)(TMP1) + |3: + | daddiu RC, MULTRES, -8 + | jr AT + |. daddu RA, RA, BASE + | + |9: // Rethrow error from the right C frame. + | load_got lj_err_throw + | negu CARG2, CRET1 + | call_intern lj_err_throw // (lua_State *L, int errcode) + |. move CARG1, L |.endif | |//----------------------------------------------------------------------- @@ -3460,7 +3660,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld CARG2, UPVAL:RB->v | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbu TMP0, UPVAL:RB->closed - | gettp TMP2, RD + | gettp TMP2, CRET1 | sd CRET1, 0(CARG2) | li AT, LJ_GC_BLACK|1 | or TMP3, TMP3, TMP0 @@ -3472,8 +3672,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: // Check if new value is collectable. | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | beqz AT, <1 // tvisgcv(v) - |. cleartp GCOBJ:TMP1, RB - | lbu TMP3, GCOBJ:TMP1->gch.marked + |. cleartp GCOBJ:CRET1, CRET1 + | lbu TMP3, GCOBJ:CRET1->gch.marked | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | beqz TMP3, <1 |. load_got lj_gc_barrieruv @@ -4013,7 +4213,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 + | barrierback TAB:CARG2, TMP3, TMP0, <2 break; case BC_TSETM: @@ -4632,7 +4832,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_JLOOP: |.if JIT - | NYI + | // RA = base*8 (ignored), RD = traceno*8 + | ld TMP1, DISPATCH_J(trace)(DISPATCH) + | li AT, 0 + | daddu TMP1, TMP1, RD + | // Traces on MIPS don't store the trace number, so use 0. + | sd AT, DISPATCH_GL(vmstate)(DISPATCH) + | ld TRACE:TMP2, 0(TMP1) + | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) + | ld TMP2, TRACE:TMP2->mcode + | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) + | jr TMP2 + |. daddiu JGL, DISPATCH, GG_DISP2G+32768 |.endif break; @@ -4694,10 +4905,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_IFUNCV: | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC | daddu TMP1, BASE, RC | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 | daddu TMP0, RA, RC - | sd LFUNC:RB, 0(TMP1) // Store (untagged) copy of LFUNC. + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. | daddiu TMP3, RC, 16+FRAME_VARG | sltu AT, TMP0, TMP2 | ld KBASE, -4+PC2PROTO(k)(PC) diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index be4356e7..b4260ebc 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch ppc |.section code_op, code_sub diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 56154495..a003fb4f 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x64 CPUs in LJ_GC64 mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch x64 |.section code_op, code_sub @@ -1469,7 +1469,7 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE-16], CFUNC:RD | mov [BASE-8], TMPR |.if DUALNUM - | mov64 RD, ((int64_t)LJ_TISNUM<<47) + | mov64 RD, ((uint64_t)LJ_TISNUM<<47) | mov [BASE], RD |.else | mov qword [BASE], 0 diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 39ccaa2e..211ae7b9 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x86 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.if P64 |.arch x64