diff --git a/Documentation/devel/features.md b/Documentation/devel/features.md index cfe9873890..93f84e3b07 100644 --- a/Documentation/devel/features.md +++ b/Documentation/devel/features.md @@ -2963,7 +2963,8 @@ current process. The following resources are supported: - `RLIMIT_CORE` -- dummy, zero by default - `RLIMIT_RSS` -- dummy, no limit by default - `RLIMIT_NPROC` -- dummy, no limit by default -- `RLIMIT_NOFILE` -- implemented, default soft limit is 900, default hard limit is 65K +- `RLIMIT_NOFILE` -- implemented, equal to `sys.fds.limit` {ref}`manifest option ` by + default - `RLIMIT_MEMLOCK` -- dummy, no limit by default - `RLIMIT_AS` -- dummy, no limit by default - `RLIMIT_LOCKS` -- dummy, no limit by default diff --git a/Documentation/manifest-syntax.rst b/Documentation/manifest-syntax.rst index cd320240c4..ce53210c4b 100644 --- a/Documentation/manifest-syntax.rst +++ b/Documentation/manifest-syntax.rst @@ -308,6 +308,21 @@ may improve performance for certain workloads but may also generate ``SIGSEGV/SIGBUS`` exceptions for some applications that specifically use invalid pointers (though this is not expected for most real-world applications). +.. _sys-fds-limit: + +Limit on open file descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + sys.fds.limit = [NUM] + (default: 900) + +This specifies the maximum number of open file descriptors in the Gramine +process. More specifically, this option sets the ``RLIMIT_NOFILE`` resource +limit: it specifies a value one greater than the maximum file descriptor number +that can be opened by the Gramine process. + .. _stack-size: Stack size diff --git a/libos/src/bookkeep/libos_handle.c b/libos/src/bookkeep/libos_handle.c index 246eedb076..853d4a4c9e 100644 --- a/libos/src/bookkeep/libos_handle.c +++ b/libos/src/bookkeep/libos_handle.c @@ -146,10 +146,33 @@ int init_handle(void) { if (!create_lock(&handle_mgr_lock)) { return -ENOMEM; } + handle_mgr = create_mem_mgr(init_align_up(HANDLE_MGR_ALLOC)); if (!handle_mgr) { return -ENOMEM; } + + /* after fork, in the new child process, `libos_init` is run, hence this function too - but + * forked process will get its RLIMIT_NOFILE from the checkpoint */ + assert(g_pal_public_state); + if (g_pal_public_state->parent_process) + return 0; + + assert(g_manifest_root); + int64_t fds_limit_init64; + int ret = toml_int_in(g_manifest_root, "sys.fds.limit", + /*defaultval=*/get_rlimit_cur(RLIMIT_NOFILE), + &fds_limit_init64); + if (ret < 0) { + log_error("Cannot parse 'sys.fds.limit'"); + return -EINVAL; + } + if (fds_limit_init64 < 0) { + log_error("'sys.fds.limit' is negative (%ld)", fds_limit_init64); + return -EINVAL; + } + set_rlimit_cur(RLIMIT_NOFILE, (uint64_t)fds_limit_init64); + return 0; } diff --git a/libos/src/sys/libos_getrlimit.c b/libos/src/sys/libos_getrlimit.c index 69494a0972..0e1147569a 100644 --- a/libos/src/sys/libos_getrlimit.c +++ b/libos/src/sys/libos_getrlimit.c @@ -16,7 +16,17 @@ #include "linux_abi/sysinfo.h" /* - * TODO: implement actual limitation on each resource. + * The only resource limits that affect internal Gramine logic are: + * + * - RLIMIT_DATA: initially equal to `sys.brk.max_size` manifest option, affects `brk()` system + * call + * - RLIMIT_STACK: initially equal to `sys.stack.size` manifest option, but updating this limit + * does *not* affect the max stack size of the main thread for processes which + * are already running + * - RLIMIT_NOFILE: initially equal to `sys.fds.limit` manifest option, affects system calls that + * create new file descriptors, e.g. open(), dup(), pipe(), etc. + * + * FIXME: implement actual limitation on each resource? * * The current behavor(i.e. sys_stack_size, brk_max_size) may be subject * to be fixed. diff --git a/libos/test/regression/meson.build b/libos/test/regression/meson.build index cbd141d51f..b6f201b24e 100644 --- a/libos/test/regression/meson.build +++ b/libos/test/regression/meson.build @@ -102,6 +102,7 @@ tests = { 'readdir': {}, 'rename_unlink': {}, 'rename_unlink_fchown': {}, + 'rlimit_nofile': {}, 'run_test': { 'include_directories': include_directories( # for `gramine_entry_api.h` diff --git a/libos/test/regression/rlimit_nofile.c b/libos/test/regression/rlimit_nofile.c new file mode 100644 index 0000000000..707fa40267 --- /dev/null +++ b/libos/test/regression/rlimit_nofile.c @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* Copyright (C) 2024 Intel Corporation */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +int main(void) { + struct rlimit rlim; + + int dev_null_fd = CHECK(open("/dev/null", O_WRONLY, 0666)); + + CHECK(getrlimit(RLIMIT_NOFILE, &rlim)); + printf("old RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur); + int old_lim = (int)rlim.rlim_cur; + + /* make sure we can increase the current soft limit */ + if (old_lim <= 0 || old_lim >= (int)rlim.rlim_max) + CHECK(-1); + + int good_dup_fd = dup2(dev_null_fd, old_lim - 1); + CHECK(good_dup_fd); + printf("(before setrlimit) opened fd: %d\n", good_dup_fd); + CHECK(close(good_dup_fd)); + + int fail_dup_fd = dup2(dev_null_fd, old_lim); + if (fail_dup_fd != -1 || errno != EBADF) + CHECK(-1); + + rlim.rlim_cur++; + CHECK(setrlimit(RLIMIT_NOFILE, &rlim)); + printf("new RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur); + + fflush(stdout); + + int pid = CHECK(fork()); + if (pid == 0) { + /* verify that NOFILE limit is correctly migrated to the child process */ + good_dup_fd = dup2(dev_null_fd, old_lim); + CHECK(good_dup_fd); + printf("(in child, after setrlimit) opened fd: %d\n", good_dup_fd); + exit(0); + } else { + int status = 0; + CHECK(wait(&status)); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + errx(1, "child wait status: %#x", status); + } + + good_dup_fd = dup2(dev_null_fd, old_lim); + CHECK(good_dup_fd); + printf("(after setrlimit) opened fd: %d\n", good_dup_fd); + CHECK(close(good_dup_fd)); + + CHECK(close(dev_null_fd)); + puts("TEST OK"); + return 0; +} diff --git a/libos/test/regression/rlimit_nofile_4k.manifest.template b/libos/test/regression/rlimit_nofile_4k.manifest.template new file mode 100644 index 0000000000..6f6fe02396 --- /dev/null +++ b/libos/test/regression/rlimit_nofile_4k.manifest.template @@ -0,0 +1,22 @@ +{% set entrypoint = "rlimit_nofile" -%} + +libos.entrypoint = "{{ entrypoint }}" + +loader.env.LD_LIBRARY_PATH = "/lib" + +fs.mounts = [ + { path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" }, + { path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" }, +] + +sys.fds.limit = 4096 + +sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '4' }} +sgx.debug = true +sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +sgx.use_exinfo = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir(libc) }}/", + "file:{{ binary_dir }}/{{ entrypoint }}", +] diff --git a/libos/test/regression/test_libos.py b/libos/test/regression/test_libos.py index 55ed2c56be..57ab164f74 100644 --- a/libos/test/regression/test_libos.py +++ b/libos/test/regression/test_libos.py @@ -1064,6 +1064,26 @@ def test_150_itimer(self): stdout, _ = self.run_binary(['itimer']) self.assertIn("TEST OK", stdout) + def test_160_rlimit_nofile(self): + # uses manifest.template + stdout, _ = self.run_binary(['rlimit_nofile']) + self.assertIn("old RLIMIT_NOFILE soft limit: 900", stdout) + self.assertIn("(before setrlimit) opened fd: 899", stdout) + self.assertIn("new RLIMIT_NOFILE soft limit: 901", stdout) + self.assertIn("(in child, after setrlimit) opened fd: 900", stdout) + self.assertIn("(after setrlimit) opened fd: 900", stdout) + self.assertIn("TEST OK", stdout) + + def test_161_rlimit_nofile_4k(self): + # uses rlimit_nofile_4k.manifest.template + stdout, _ = self.run_binary(['rlimit_nofile_4k']) + self.assertIn("old RLIMIT_NOFILE soft limit: 4096", stdout) + self.assertIn("(before setrlimit) opened fd: 4095", stdout) + self.assertIn("new RLIMIT_NOFILE soft limit: 4097", stdout) + self.assertIn("(in child, after setrlimit) opened fd: 4096", stdout) + self.assertIn("(after setrlimit) opened fd: 4096", stdout) + self.assertIn("TEST OK", stdout) + class TC_31_Syscall(RegressionTestCase): def test_000_syscall_redirect(self): stdout, _ = self.run_binary(['syscall']) diff --git a/libos/test/regression/tests.toml b/libos/test/regression/tests.toml index 16033ebc79..23fa2fc5c6 100644 --- a/libos/test/regression/tests.toml +++ b/libos/test/regression/tests.toml @@ -101,6 +101,8 @@ manifests = [ "readdir", "rename_unlink", "rename_unlink_fchown", + "rlimit_nofile", + "rlimit_nofile_4k", "run_test", "rwlock", "sched", diff --git a/libos/test/regression/tests_musl.toml b/libos/test/regression/tests_musl.toml index 2c5de8d5ee..7a3acc3743 100644 --- a/libos/test/regression/tests_musl.toml +++ b/libos/test/regression/tests_musl.toml @@ -103,6 +103,8 @@ manifests = [ "readdir", "rename_unlink", "rename_unlink_fchown", + "rlimit_nofile", + "rlimit_nofile_4k", "run_test", "rwlock", "sched", diff --git a/python/graminelibos/manifest_check.py b/python/graminelibos/manifest_check.py index 94d19a316a..bfba896527 100644 --- a/python/graminelibos/manifest_check.py +++ b/python/graminelibos/manifest_check.py @@ -130,5 +130,7 @@ 'debug__mock_syscalls': [{Required('name'): str, 'return': int}], 'stack': {'size': _size}, + + 'fds': {'limit': int}, }, })