Skip to content

Commit

Permalink
Make global/static variables thread_local; use mmap() for managing me…
Browse files Browse the repository at this point in the history
…mory (#1161)

Memory allocation is simplified by using mmap() to allocate a single 1TB
block of addresses and relying on demand paging. Global and static
variables are made thread_local and setThreadLocal(true) is used on such
variables in the code generator.

---------

Co-authored-by: Steven Eker <[email protected]>
Co-authored-by: Dwight Guth <[email protected]>
  • Loading branch information
3 people authored Nov 26, 2024
1 parent f18f71c commit 0f72ec9
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 59 deletions.
2 changes: 1 addition & 1 deletion config/llvm_header.inc
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ declare void @write_configuration_to_proof_trace(ptr, ptr, i1)
@current_interval = thread_local global i64 0
@GC_THRESHOLD = thread_local global i64 @GC_THRESHOLD@
@gc_roots = global [256 x ptr] zeroinitializer
@gc_roots = thread_local global [256 x ptr] zeroinitializer
define i64 @get_gc_threshold() {
%threshold = load i64, ptr @GC_THRESHOLD
Expand Down
9 changes: 8 additions & 1 deletion include/runtime/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@ using memory_block_header = struct {
// Macro to define a new arena with the given ID. Supports IDs ranging from 0 to
// 127.
#define REGISTER_ARENA(name, id) \
static struct arena name = {.allocation_semispace_id = (id)}
static thread_local struct arena name = {.allocation_semispace_id = (id)}

#define MEM_BLOCK_START(ptr) \
((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)))

#ifdef __MACH__
//
// thread_local disabled for Apple
//
extern bool time_for_collection;
#else
extern thread_local bool time_for_collection;
#endif

size_t get_gc_threshold();

Expand Down
4 changes: 2 additions & 2 deletions include/runtime/collect.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ using set_node = set::iterator::node_t;
using set_impl = set::iterator::tree_t;

extern "C" {
extern size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
extern bool collect_old;
extern thread_local size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
extern thread_local bool collect_old;
size_t get_size(uint64_t, uint16_t);
void migrate_static_roots(void);
void migrate(block **block_ptr);
Expand Down
8 changes: 7 additions & 1 deletion include/runtime/header.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,14 @@ size_t hash_k(block *);
void k_hash(block *, void *);
bool hash_enter(void);
void hash_exit(void);

#ifdef __MACH__
//
// thread_local disabled for Apple
//
extern bool gc_enabled;
#else
extern thread_local bool gc_enabled;
#endif
}

class k_elem {
Expand Down
35 changes: 32 additions & 3 deletions lib/codegen/CreateTerm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,18 +782,47 @@ llvm::Value *create_term::disable_gc() {
llvm::Constant *global
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
#ifdef __MACH__
//
// thread_local disabled for Apple
//
/*
global_var->setThreadLocal(true);
llvm::IRBuilder b(current_block_);
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
*/
auto *global_var_address = global_var;
#else
global_var->setThreadLocal(true);
auto *global_var_address = global_var;
#endif
auto *old_val = new llvm::LoadInst(
llvm::Type::getInt1Ty(ctx_), global_var, "was_enabled", current_block_);
llvm::Type::getInt1Ty(ctx_), global_var_address, "was_enabled",
current_block_);
new llvm::StoreInst(
llvm::ConstantInt::getFalse(ctx_), global_var, current_block_);
llvm::ConstantInt::getFalse(ctx_), global_var_address, current_block_);
return old_val;
}

void create_term::enable_gc(llvm::Value *was_enabled) {
llvm::Constant *global
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
new llvm::StoreInst(was_enabled, global_var, current_block_);
#ifdef __MACH__
//
// thread_local disabled for Apple
//
/*
global_var->setThreadLocal(true);
llvm::IRBuilder b(current_block_);
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
*/
auto *global_var_address = global_var;
#else
global_var->setThreadLocal(true);
auto *global_var_address = global_var;
#endif
new llvm::StoreInst(was_enabled, global_var_address, current_block_);
}

// We use tailcc calling convention for apply_rule_* and eval_* functions to
Expand Down
21 changes: 19 additions & 2 deletions lib/codegen/Decision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "kllvm/codegen/ProofEvent.h"
#include "kllvm/codegen/Util.h"

#include "llvm/IR/IRBuilder.h"
#include <llvm/ADT/APInt.h>
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/StringMap.h>
Expand Down Expand Up @@ -1006,9 +1007,25 @@ std::pair<std::vector<llvm::Value *>, llvm::BasicBlock *> step_function_header(

auto *collection = module->getOrInsertGlobal(
"time_for_collection", llvm::Type::getInt1Ty(module->getContext()));

#ifdef __MACH__
//
// thread_local disabled for Apple
//
/*
llvm::cast<llvm::GlobalVariable>(collection)->setThreadLocal(true);
llvm::IRBuilder b(check_collect);
auto *collection_address = b.CreateThreadLocalAddress(collection);
*/
auto *collection_address = collection;
#else
llvm::cast<llvm::GlobalVariable>(collection)->setThreadLocal(true);
auto *collection_address = collection;
#endif

auto *is_collection = new llvm::LoadInst(
llvm::Type::getInt1Ty(module->getContext()), collection, "is_collection",
check_collect);
llvm::Type::getInt1Ty(module->getContext()), collection_address,
"is_collection", check_collect);
set_debug_loc(is_collection);
auto *collect = llvm::BasicBlock::Create(
module->getContext(), "isCollect", block->getParent());
Expand Down
100 changes: 64 additions & 36 deletions runtime/alloc/arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <sys/mman.h>

#include "runtime/alloc.h"
#include "runtime/arena.h"
Expand Down Expand Up @@ -47,36 +49,68 @@ get_arena_semispace_id_of_object(void *ptr) {
return mem_block_header(ptr)->semispace;
}

static void *first_superblock_ptr = nullptr;
static void *superblock_ptr = nullptr;
static char **next_superblock_ptr = nullptr;
static unsigned blocks_left = 0;
//
// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server.
//
size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
static thread_local void *hyperblock_ptr = nullptr; // only needed for munmap()

static void *megabyte_malloc() {
if (blocks_left == 0) {
blocks_left = 15;
if (int result
= posix_memalign(&superblock_ptr, BLOCK_SIZE, BLOCK_SIZE * 15)) {
errno = result;
perror("posix_memalign");
}
if (!first_superblock_ptr) {
first_superblock_ptr = superblock_ptr;
}
if (next_superblock_ptr) {
*next_superblock_ptr = (char *)superblock_ptr;
//
// Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment.
//
static thread_local char *currentblock_ptr
= nullptr; // char* rather than void* to permit pointer arithmetic
if (currentblock_ptr) {
//
// We expect an page fault due to not being able to map physical memory to this block or the
// process to be killed by the OOM killer long before we run off the end of our address space.
//
currentblock_ptr += BLOCK_SIZE;
} else {
//
// First call - need to reserve the address space.
//
size_t request = HYPERBLOCK_SIZE;
void *addr = mmap(
nullptr, // let OS choose the address
request, // Linux and MacOS both allow up to 64TB
PROT_READ | PROT_WRITE, // read, write but not execute
MAP_ANONYMOUS | MAP_PRIVATE
| MAP_NORESERVE, // allocate address space only
-1, // no file backing
0); // no offset
if (addr == MAP_FAILED) {
perror("mmap()");
abort();
}
auto *hdr = (memory_block_header *)superblock_ptr;
next_superblock_ptr = &hdr->next_superblock;
hdr->next_superblock = nullptr;
hyperblock_ptr = addr;
//
// We ask for one block worth of address space less than we allocated so alignment will always succeed.
// We don't worry about unused address space either side of our aligned address space because there will be no
// memory mapped to it.
//
currentblock_ptr = reinterpret_cast<char *>(
std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request));
}
blocks_left--;
void *result = superblock_ptr;
superblock_ptr = (char *)superblock_ptr + BLOCK_SIZE;
return result;
return currentblock_ptr;
}

void free_all_memory() {
//
// Frees all memory that was demand paged into this address range.
//
munmap(hyperblock_ptr, HYPERBLOCK_SIZE);
}

#ifdef __MACH__
//
// thread_local disabled for Apple
//
bool time_for_collection;
#else
thread_local bool time_for_collection;
#endif

static void fresh_block(struct arena *arena) {
char *next_block = nullptr;
Expand Down Expand Up @@ -122,7 +156,14 @@ static void fresh_block(struct arena *arena) {
BLOCK_SIZE - sizeof(memory_block_header));
}

#ifdef __MACH__
//
// thread_local disabled for Apple
//
bool gc_enabled = true;
#else
thread_local bool gc_enabled = true;
#endif

__attribute__((noinline)) void *
do_alloc_slow(size_t requested, struct arena *arena) {
Expand Down Expand Up @@ -229,16 +270,3 @@ size_t arena_size(const struct arena *arena) {
: arena->num_collection_blocks)
* (BLOCK_SIZE - sizeof(memory_block_header));
}

void free_all_memory() {
auto *superblock = (memory_block_header *)first_superblock_ptr;
while (superblock) {
auto *next_superblock = (memory_block_header *)superblock->next_superblock;
free(superblock);
superblock = next_superblock;
}
first_superblock_ptr = nullptr;
superblock_ptr = nullptr;
next_superblock_ptr = nullptr;
blocks_left = 0;
}
2 changes: 1 addition & 1 deletion runtime/alloc/register_gc_roots_enum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "runtime/collect.h"
#include "runtime/header.h"

std::vector<BlockEnumerator> block_enumerators;
thread_local std::vector<BlockEnumerator> block_enumerators;

void register_gc_roots_enumerator(BlockEnumerator f) {
block_enumerators.push_back(f);
Expand Down
4 changes: 2 additions & 2 deletions runtime/arithmetic/int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,8 @@ void int_hash(mpz_t i, void *hasher) {
}
}

gmp_randstate_t kllvm_rand_state;
bool kllvm_rand_state_initialized = false;
thread_local gmp_randstate_t kllvm_rand_state;
thread_local bool kllvm_rand_state_initialized = false;

SortK hook_INT_srand(SortInt seed) {
if (!kllvm_rand_state_initialized) {
Expand Down
10 changes: 5 additions & 5 deletions runtime/collect/collect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ char **old_alloc_ptr(void);
char *youngspace_ptr(void);
char *oldspace_ptr(void);

static bool is_gc = false;
bool collect_old = false;
static thread_local bool is_gc = false;
bool thread_local collect_old = false;
#ifndef GC_DBG
static uint8_t num_collection_only_young = 0;
static thread_local uint8_t num_collection_only_young = 0;
#else
static char *last_alloc_ptr;
static thread_local char *last_alloc_ptr;
#endif

size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
size_t thread_local numBytesLiveAtCollection[1 << AGE_WIDTH];

bool during_gc() {
return is_gc;
Expand Down
6 changes: 3 additions & 3 deletions runtime/collect/migrate_static_roots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

#include "runtime/collect.h"

extern std::vector<BlockEnumerator> block_enumerators;
extern thread_local std::vector<BlockEnumerator> block_enumerators;

extern gmp_randstate_t kllvm_rand_state;
extern bool kllvm_rand_state_initialized;
extern thread_local gmp_randstate_t kllvm_rand_state;
extern thread_local bool kllvm_rand_state_initialized;

extern "C" {

Expand Down
4 changes: 2 additions & 2 deletions runtime/lto/alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ static inline void *kore_alloc_collection(kllvm::sort_category cat) {
void *mem
= kore_alloc(sizeof(blockheader) + sizeof(collection) + sizeof(uint64_t));
auto *hdr = (blockheader *)mem;
static std::string name = get_raw_symbol_name(cat) + "{}";
static blockheader hdr_val
static thread_local std::string name = get_raw_symbol_name(cat) + "{}";
static thread_local blockheader hdr_val
= get_block_header_for_symbol(get_tag_for_symbol_name(name.c_str()));
*hdr = hdr_val;
auto *offset = (uint64_t *)(hdr + 1);
Expand Down
8 changes: 8 additions & 0 deletions unittests/runtime-collections/lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,15 @@ block D1 = {{1}};
block *DUMMY1 = &D1;
}

#ifdef __MACH__
//
// thread_local disabled for Apple
//
bool gc_enabled;
#else
thread_local bool gc_enabled;
#endif

size_t get_gc_threshold() {
return SIZE_MAX;
}
Expand Down

0 comments on commit 0f72ec9

Please sign in to comment.