Skip to content

Commit

Permalink
X jit-max-code-size initial implementation
Browse files Browse the repository at this point in the history
Summary:
Initial go at implementing a limit to the amount of memory the jit compiler will use. The implementation is reactive, it will stop compiling once the limit has been breached; therefore, it will always be breached. This is not reactive, in that it not abort during jit compilation.

This version only tests the cinder allocator and not the slab or none huge page ones. Maybe they are worth testing as a follow on?

Reviewed By: alexmalyshev

Differential Revision: D55872896

fbshipit-source-id: 8e225ef65ec64a3c006e61b09749890e2271c610
  • Loading branch information
SonicField authored and facebook-github-bot committed Apr 10, 2024
1 parent 8e6058f commit 2d763aa
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 9 deletions.
1 change: 1 addition & 0 deletions cinderx/Jit/code_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ asmjit::Error MultipleSectionCodeAllocator::addCode(
*dst = nullptr;

size_t potential_code_size = code->codeSize();
used_bytes_ += potential_code_size;
// We fall back to the default size of code allocation if the
// code doesn't fit into either section, and we can make this check more
// granular by comparing sizes section-by-section.
Expand Down
13 changes: 8 additions & 5 deletions cinderx/Jit/code_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "cinderx/ThirdParty/asmjit/src/asmjit/asmjit.h"

#include <atomic>
#include <memory>
#include <vector>

Expand Down Expand Up @@ -42,18 +43,25 @@ class CodeAllocator {

static void freeGlobalCodeAllocator();

size_t usedBytes() const {
return used_bytes_;
}

const asmjit::Environment& asmJitEnvironment() {
return runtime_->environment();
}

virtual asmjit::Error addCode(void** dst, asmjit::CodeHolder* code) noexcept {
used_bytes_ += code->codeSize();
return runtime_->add(dst, code);
}

protected:
std::unique_ptr<asmjit::JitRuntime> runtime_{
std::make_unique<asmjit::JitRuntime>()};

std::atomic<size_t> used_bytes_{0};

private:
static CodeAllocator* s_global_code_allocator_;
};
Expand All @@ -65,10 +73,6 @@ class CodeAllocatorCinder : public CodeAllocator {

asmjit::Error addCode(void** dst, asmjit::CodeHolder* code) noexcept override;

size_t usedBytes() const {
return used_bytes_;
}

size_t lostBytes() const {
return lost_bytes_;
}
Expand All @@ -90,7 +94,6 @@ class CodeAllocatorCinder : public CodeAllocator {
// Free space in the current chunk
size_t current_alloc_free_{0};

size_t used_bytes_{0};
// Number of bytes in total lost when allocations didn't fit neatly into
// the bytes remaining in a chunk so a new one was allocated.
size_t lost_bytes_{0};
Expand Down
2 changes: 2 additions & 0 deletions cinderx/Jit/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ struct Config {
// multiple code sections are enabled.
size_t cold_code_section_size{0};
size_t hot_code_section_size{0};
// Memory threshold after which we stop jitting.
size_t max_code_size{0};
// Size (in number of entries) of the LoadAttrCached and StoreAttrCached
// inline caches used by the JIT.
uint32_t attr_cache_size{1};
Expand Down
84 changes: 80 additions & 4 deletions cinderx/Jit/pyjit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "cinderx/Jit/type_profiler.h"

#include <atomic>
#include <charconv>
#include <chrono>
#include <climits>
#include <cstddef>
Expand Down Expand Up @@ -259,6 +260,49 @@ static void warnJITOff(const char* flag) {
JIT_LOG("Warning: JIT disabled; {} has no effect", flag);
}

static size_t parse_sized_argument(const std::string& val) {
std::string parsed;
// " 1024 k" should parse OK - so remove the space.
std::remove_copy_if(
val.begin(), val.end(), std::back_inserter(parsed), ::isspace);
JIT_CHECK(!parsed.empty(), "Input string is empty");
static_assert(
sizeof(decltype(std::stoull(parsed))) == sizeof(size_t),
"stoull parses to size_t size");
size_t scale = 1;
// "1024k" and "1024K" are the same - so upper case.
char lastChar = std::toupper(parsed.back());
switch (lastChar) {
case 'K':
scale = 1024;
parsed.pop_back();
break;
case 'M':
scale = 1024 * 1024;
parsed.pop_back();
break;
case 'G':
scale = 1024 * 1024 * 1024;
parsed.pop_back();
break;
default:
JIT_CHECK(
std::isdigit(lastChar), "Invalid character in input string: {}", val);
}
size_t ret_value{0};
auto p_last = parsed.data() + parsed.size();
auto int_ok = std::from_chars(parsed.data(), p_last, ret_value);
JIT_CHECK(
int_ok.ec == std::errc() && int_ok.ptr == p_last,
"Invalid unsigned integer in input string: '{}'",
val);
JIT_CHECK(
ret_value <= (std::numeric_limits<size_t>::max() / scale),
"Unsigned Integer overflow in input string: '{}'",
val);
return ret_value * scale;
}

void initFlagProcessor() {
use_jit = 0;
read_profile_file = "";
Expand Down Expand Up @@ -743,6 +787,20 @@ void initFlagProcessor() {
"PERFTRAMPOLINEPREFORKCOMPILATION",
getMutableConfig().compile_perf_trampoline_prefork,
"Compile perf trampoline pre-fork");

xarg_flag_processor.addOption(
"jit-max-code-size",
"",
[](const std::string& val) {
if (use_jit) {
getMutableConfig().max_code_size = parse_sized_argument(val);
} else {
warnJITOff("jit-max-code-size");
}
},
"Set the maximum code size for JIT in bytes (no suffix). For kilobytes "
"use k or K as a suffix. "
"Megabytes is m or M and gigabytes is g or G. 0 implies no limit.");
}

xarg_flag_processor.setFlags(PySys_GetXOptions());
Expand Down Expand Up @@ -1454,20 +1512,32 @@ static PyObject* jit_suppress(PyObject*, PyObject* func_obj) {
}

static PyObject* get_allocator_stats(PyObject*, PyObject*) {
auto allocator = dynamic_cast<CodeAllocatorCinder*>(CodeAllocator::get());
if (allocator == nullptr) {
auto base_allocator = CodeAllocator::get();
if (base_allocator == nullptr) {
Py_RETURN_NONE;
}

auto stats = Ref<>::steal(PyDict_New());
if (stats == nullptr) {
return nullptr;
}

auto used_bytes = Ref<>::steal(PyLong_FromLong(allocator->usedBytes()));
auto used_bytes = Ref<>::steal(PyLong_FromLong(base_allocator->usedBytes()));
if (used_bytes == nullptr ||
PyDict_SetItemString(stats, "used_bytes", used_bytes) < 0) {
return nullptr;
}
auto max_bytes = Ref<>::steal(PyLong_FromLong(getConfig().max_code_size));
if (max_bytes == nullptr ||
PyDict_SetItemString(stats, "max_bytes", max_bytes) < 0) {
return nullptr;
}

auto allocator = dynamic_cast<CodeAllocatorCinder*>(base_allocator);
if (allocator == nullptr) {
return stats.release();
}

auto lost_bytes = Ref<>::steal(PyLong_FromLong(allocator->lostBytes()));
if (lost_bytes == nullptr ||
PyDict_SetItemString(stats, "lost_bytes", lost_bytes) < 0) {
Expand Down Expand Up @@ -2145,7 +2215,13 @@ int _PyJIT_RegisterFunction(PyFunctionObject* func) {
return 1;
}

if (!_PyJIT_IsEnabled()) {
bool skip = !_PyJIT_IsEnabled();
auto max_code_size = getConfig().max_code_size;
if ((!skip) && max_code_size) {
skip = CodeAllocator::get()->usedBytes() >= max_code_size;
}

if (skip) {
if (_PyPerfTrampoline_IsPreforkCompilationEnabled()) {
perf_trampoline_reg_units.emplace(reinterpret_cast<PyObject*>(func));
}
Expand Down
143 changes: 143 additions & 0 deletions cinderx/PythonLib/test_cinderx/test_cinderjit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4323,6 +4323,149 @@ def g():

self.assertEqual(cinderjit.get_num_inlined_functions(g), 1)

def test_max_code_size_slow(self):
code = textwrap.dedent(
"""
import cinderjit
for i in range(2000):
exec(f'''
def junk{i}(j):
j = j + 1
s = f'dogs {i} ' + str(j)
if s == '23':
j += 2
return j*2+{i}
''')
x = 0
for i in range(2000):
exec(f'x *= junk{i}(i)')
max_bytes = cinderjit.get_allocator_stats()["max_bytes"]
used_bytes = cinderjit.get_allocator_stats()["used_bytes"]
print(f'max_size: {max_bytes}')
print(f'used_size: {used_bytes}')
"""
)
with tempfile.TemporaryDirectory() as tmp:
dirpath = Path(tmp)
codepath = dirpath / "mod.py"
codepath.write_text(code)

def run_test(asserts_func, params=[]):
args = [sys.executable, "-X", "jit"]
args.extend(params)
args.append("mod.py")
proc = subprocess.run(
args, cwd=tmp, stdout=subprocess.PIPE, encoding=sys.stdout.encoding
)
self.assertEqual(proc.returncode, 0, proc)
actual_stdout = [x.strip() for x in proc.stdout.split("\n")]
asserts_func(actual_stdout)

def zero_asserts(actual_stdout):
expected_stdout = "max_size: 0"
self.assertEqual(actual_stdout[0], expected_stdout)
self.assertIn("used_size", actual_stdout[1])
used_size = int(actual_stdout[1].split(" ")[1])
self.assertGreater(used_size, 0)

def onek_asserts(actual_stdout):
expected_stdout = "max_size: 1024"
self.assertEqual(actual_stdout[0], expected_stdout)
self.assertIn("used_size", actual_stdout[1])
used_size = int(actual_stdout[1].split(" ")[1])
self.assertGreater(used_size, 1024)
# This is a bit fragile because it depends on what the initial 'zeroth'
# allocation is; we assume < 200K.
self.assertLess(used_size, 1024 * 200)

run_test(zero_asserts, ["-X", f"jit-max-code-size=0"])
run_test(onek_asserts, ["-X", f"jit-max-code-size=1024"])

def test_max_code_size_fast(self):
code = textwrap.dedent(
"""
import cinderjit
max_bytes = cinderjit.get_allocator_stats()["max_bytes"]
print(f'max_size: {max_bytes}')
"""
)
with tempfile.TemporaryDirectory() as tmp:
dirpath = Path(tmp)
codepath = dirpath / "mod.py"
codepath.write_text(code)

def run_proc():
proc = subprocess.run(
args, cwd=tmp, stdout=subprocess.PIPE, encoding=sys.stdout.encoding
)
self.assertEqual(proc.returncode, 0, proc)
actual_stdout = [x.strip() for x in proc.stdout.split("\n")]
return actual_stdout[0]

args = [sys.executable, "-X", "jit", "mod.py"]
self.assertEqual(run_proc(), "max_size: 0")
args = [
sys.executable,
"-X",
"jit",
"-X",
"jit-max-code-size=1234567",
"mod.py",
]
self.assertEqual(run_proc(), "max_size: 1234567")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1k", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1024")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1K", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1024")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1m", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1048576")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1M", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1048576")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1g", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1073741824")
args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=1G", "mod.py"]
self.assertEqual(run_proc(), "max_size: 1073741824")

def run_proc():
proc = subprocess.run(
args, cwd=tmp, stderr=subprocess.PIPE, encoding=sys.stdout.encoding
)
self.assertEqual(proc.returncode, -6, proc)
return proc.stderr

args = [sys.executable, "-X", "jit", "-X", "jit-max-code-size=-1", "mod.py"]
self.assertIn("Invalid unsigned integer in input string: '-1'", run_proc())
args = [
sys.executable,
"-X",
"jit",
"-X",
"jit-max-code-size=1.1",
"mod.py",
]
self.assertIn("Invalid unsigned integer in input string: '1.1'", run_proc())
args = [
sys.executable,
"-X",
"jit",
"-X",
"jit-max-code-size=dogs",
"mod.py",
]
self.assertIn("Invalid character in input string", run_proc())
args = [
sys.executable,
"-X",
"jit",
"-X",
"jit-max-code-size=1152921504606846976g",
"mod.py",
]
self.assertIn(
"Unsigned Integer overflow in input string: '1152921504606846976g'",
run_proc(),
)


@cinder_support.failUnlessJITCompiled
def _outer(inner):
Expand Down

0 comments on commit 2d763aa

Please sign in to comment.