Rewrite memory block allocator.

Use a mix of linear probing and pseudo-random probing.
Workaround for 1GB MAP_32BIT limit on Linux/x64. Now 2GB with !LJ_GC64.
Enforce 128TB LJ_GC64 limit for > 47 bit memory layouts (ARM64).
This commit is contained in:
Mike Pall 2016-04-18 10:57:49 +02:00
parent 101115ddd8
commit 0c6fdc1039

View File

@ -72,13 +72,56 @@
#define IS_DIRECT_BIT (SIZE_T_ONE) #define IS_DIRECT_BIT (SIZE_T_ONE)
/* Determine system-specific block allocation method. */
#if LJ_TARGET_WINDOWS #if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
#if LJ_64 && !LJ_GC64 #define LJ_ALLOC_VIRTUALALLOC 1
#if LJ_64 && !LJ_GC64
#define LJ_ALLOC_NTAVM 1
#endif
#else
#include <errno.h>
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
#include <sys/mman.h>
#define LJ_ALLOC_MMAP 1
#if LJ_64
#define LJ_ALLOC_MMAP_PROBE 1
#if LJ_GC64
#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
#elif LJ_TARGET_X64 && LJ_HASJIT
/* Due to limitations in the x64 compiler backend. */
#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
#else
#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
#endif
#endif
#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
#define LJ_ALLOC_MMAP32 1
#endif
#if LJ_TARGET_LINUX
#define LJ_ALLOC_MREMAP 1
#endif
#endif
#if LJ_ALLOC_VIRTUALALLOC
#if LJ_ALLOC_NTAVM
/* Undocumented, but hey, that's what we all love so much about Windows. */ /* Undocumented, but hey, that's what we all love so much about Windows. */
typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
size_t *size, ULONG alloctype, ULONG prot); size_t *size, ULONG alloctype, ULONG prot);
@ -89,14 +132,15 @@ static PNTAVM ntavm;
*/ */
#define NTAVM_ZEROBITS 1 #define NTAVM_ZEROBITS 1
static void INIT_MMAP(void) static void init_mmap(void)
{ {
ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
"NtAllocateVirtualMemory"); "NtAllocateVirtualMemory");
} }
#define INIT_MMAP() init_mmap()
/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
static LJ_AINLINE void *CALL_MMAP(size_t size) static void *CALL_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = NULL; void *ptr = NULL;
@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
} }
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size) static void *DIRECT_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = NULL; void *ptr = NULL;
@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#else #else
#define INIT_MMAP() ((void)0)
/* Win32 MMAP via VirtualAlloc */ /* Win32 MMAP via VirtualAlloc */
static LJ_AINLINE void *CALL_MMAP(size_t size) static void *CALL_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
} }
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static LJ_AINLINE void *DIRECT_MMAP(size_t size) static void *DIRECT_MMAP(size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#endif #endif
/* This function supports releasing coalesed segments */ /* This function supports releasing coalesed segments */
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) static int CALL_MUNMAP(void *ptr, size_t size)
{ {
DWORD olderr = GetLastError(); DWORD olderr = GetLastError();
MEMORY_BASIC_INFORMATION minfo; MEMORY_BASIC_INFORMATION minfo;
@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return 0; return 0;
} }
#else #elif LJ_ALLOC_MMAP
#include <errno.h>
#include <sys/mman.h>
#define MMAP_PROT (PROT_READ|PROT_WRITE) #define MMAP_PROT (PROT_READ|PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@ -174,107 +213,145 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
#endif #endif
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
#if LJ_64 && !LJ_GC64 #if LJ_ALLOC_MMAP_PROBE
/* 64 bit mode with 32 bit pointers needs special support for allocating
** memory in the lower 2GB. #define LJ_ALLOC_MMAP_PROBE_MAX 30
#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
/* No point in a giant ifdef mess. Just try to open /dev/urandom.
** It doesn't really matter if this fails, since we get some ASLR bits from
** every unsuitable allocation, too. And we prefer linear allocation, anyway.
*/ */
#include <fcntl.h>
#include <unistd.h>
#if defined(MAP_32BIT) static uintptr_t mmap_probe_seed(void)
#if defined(__sun__)
#define MMAP_REGION_START ((uintptr_t)0x1000)
#else
/* Actually this only gives us max. 1GB in current Linux kernels. */
#define MMAP_REGION_START ((uintptr_t)0)
#endif
static LJ_AINLINE void *CALL_MMAP(size_t size)
{ {
int olderr = errno; uintptr_t val;
void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); int fd = open("/dev/urandom", O_RDONLY);
errno = olderr; if (fd != -1) {
return ptr; int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
(void)close(fd);
if (ok) return val;
}
return 1; /* Punt. */
} }
#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN static void *mmap_probe(size_t size)
/* OSX and FreeBSD mmap() use a naive first-fit linear search.
** That's perfect for us. Except that -pagezero_size must be set for OSX,
** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
** to be reduced to 250MB on FreeBSD.
*/
#if LJ_TARGET_OSX || defined(__DragonFly__)
#define MMAP_REGION_START ((uintptr_t)0x10000)
#elif LJ_TARGET_PS4
#define MMAP_REGION_START ((uintptr_t)0x4000)
#else
#define MMAP_REGION_START ((uintptr_t)0x10000000)
#endif
#define MMAP_REGION_END ((uintptr_t)0x80000000)
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
#endif
static LJ_AINLINE void *CALL_MMAP(size_t size)
{ {
int olderr = errno;
/* Hint for next allocation. Doesn't need to be thread-safe. */ /* Hint for next allocation. Doesn't need to be thread-safe. */
static uintptr_t alloc_hint = MMAP_REGION_START; static uintptr_t hint_addr = 0;
int retry = 0; static uintptr_t hint_prng = 0;
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 int olderr = errno;
static int rlimit_modified = 0; int retry;
if (LJ_UNLIKELY(rlimit_modified == 0)) { for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
struct rlimit rlim; void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; uintptr_t addr = (uintptr_t)p;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) {
rlimit_modified = 1; /* We got a suitable address. Bump the hint address. */
} hint_addr = addr + size;
#endif
for (;;) {
void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
if ((uintptr_t)p >= MMAP_REGION_START &&
(uintptr_t)p + size < MMAP_REGION_END) {
alloc_hint = (uintptr_t)p + size;
errno = olderr; errno = olderr;
return p; return p;
} }
if (p != CMFAIL) munmap(p, size); if (p != MFAIL) {
#if defined(__sun__) || defined(__DragonFly__) munmap(p, size);
alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ } else if (errno == ENOMEM) {
if (alloc_hint + size < MMAP_REGION_END) continue; return MFAIL;
#endif }
if (retry) break; if (hint_addr) {
retry = 1; /* First, try linear probing. */
alloc_hint = MMAP_REGION_START; if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
hint_addr += 0x1000000;
if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
hint_addr = 0;
continue;
} else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
/* Next, try a no-hint probe to get back an ASLR address. */
hint_addr = 0;
continue;
}
}
/* Finally, try pseudo-random probing. */
if (LJ_UNLIKELY(hint_prng == 0)) {
hint_prng = mmap_probe_seed();
}
/* The unsuitable address we got has some ASLR PRNG bits. */
hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
do { /* The PRNG itself is very weak, but see above. */
hint_prng = hint_prng * 1103515245 + 12345;
hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
} while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
} }
errno = olderr; errno = olderr;
return CMFAIL; return MFAIL;
} }
#else
#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
#endif #endif
#else #if LJ_ALLOC_MMAP32
/* 32 bit mode and GC64 mode is easy. */ #if defined(__sun__)
static LJ_AINLINE void *CALL_MMAP(size_t size) #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
#else
#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
#endif
static void *mmap_map32(size_t size)
{
#if LJ_ALLOC_MMAP_PROBE
static int fallback = 0;
if (fallback)
return mmap_probe(size);
#endif
{
int olderr = errno;
void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
errno = olderr;
/* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
#if LJ_ALLOC_MMAP_PROBE
if (ptr == MFAIL) {
fallback = 1;
return mmap_probe(size);
}
#endif
return ptr;
}
}
#endif
#if LJ_ALLOC_MMAP32
#define CALL_MMAP(size) mmap_map32(size)
#elif LJ_ALLOC_MMAP_PROBE
#define CALL_MMAP(size) mmap_probe(size)
#else
static void *CALL_MMAP(size_t size)
{ {
int olderr = errno; int olderr = errno;
void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
errno = olderr; errno = olderr;
return ptr; return ptr;
} }
#endif
#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
#include <sys/resource.h>
static void init_mmap(void)
{
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 0x10000;
setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
}
#define INIT_MMAP() init_mmap()
#endif #endif
#define INIT_MMAP() ((void)0) static int CALL_MUNMAP(void *ptr, size_t size)
#define DIRECT_MMAP(s) CALL_MMAP(s)
static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
{ {
int olderr = errno; int olderr = errno;
int ret = munmap(ptr, size); int ret = munmap(ptr, size);
@ -282,10 +359,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return ret; return ret;
} }
#if LJ_TARGET_LINUX #if LJ_ALLOC_MREMAP
/* Need to define _GNU_SOURCE to get the mremap prototype. */ /* Need to define _GNU_SOURCE to get the mremap prototype. */
static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
int flags)
{ {
int olderr = errno; int olderr = errno;
ptr = mremap(ptr, osz, nsz, flags); ptr = mremap(ptr, osz, nsz, flags);
@ -305,6 +381,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#endif #endif
#ifndef INIT_MMAP
#define INIT_MMAP() ((void)0)
#endif
#ifndef DIRECT_MMAP
#define DIRECT_MMAP(s) CALL_MMAP(s)
#endif
#ifndef CALL_MREMAP #ifndef CALL_MREMAP
#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
#endif #endif