Skip to content

Commit

Permalink
[LibOS] Introduce sys.fds.limit manifest option
Browse files Browse the repository at this point in the history
This manifest option allows to modify the original `RLIMIT_NOFILE`
resource limit. There is *no* way to propagate this limit from the host;
this is a deliberate design choice.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii authored and mkow committed Aug 26, 2024
1 parent 68b9602 commit ca534ce
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 2 deletions.
3 changes: 2 additions & 1 deletion Documentation/devel/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -2963,7 +2963,8 @@ current process. The following resources are supported:
- `RLIMIT_CORE` -- dummy, zero by default
- `RLIMIT_RSS` -- dummy, no limit by default
- `RLIMIT_NPROC` -- dummy, no limit by default
- `RLIMIT_NOFILE` -- implemented, default soft limit is 900, default hard limit is 65K
- `RLIMIT_NOFILE` -- implemented, equal to `sys.fds.limit` {ref}`manifest option <sys-fds-limit>` by
default
- `RLIMIT_MEMLOCK` -- dummy, no limit by default
- `RLIMIT_AS` -- dummy, no limit by default
- `RLIMIT_LOCKS` -- dummy, no limit by default
Expand Down
15 changes: 15 additions & 0 deletions Documentation/manifest-syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,21 @@ may improve performance for certain workloads but may also generate
``SIGSEGV/SIGBUS`` exceptions for some applications that specifically use
invalid pointers (though this is not expected for most real-world applications).

.. _sys-fds-limit:

Limit on open file descriptors
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

::

sys.fds.limit = [NUM]
(default: 900)

This specifies the maximum number of open file descriptors in the Gramine
process. More specifically, this option sets the ``RLIMIT_NOFILE`` resource
limit: it specifies a value one greater than the maximum file descriptor number
that can be opened by the Gramine process.

.. _stack-size:

Stack size
Expand Down
23 changes: 23 additions & 0 deletions libos/src/bookkeep/libos_handle.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,33 @@ int init_handle(void) {
if (!create_lock(&handle_mgr_lock)) {
return -ENOMEM;
}

handle_mgr = create_mem_mgr(init_align_up(HANDLE_MGR_ALLOC));
if (!handle_mgr) {
return -ENOMEM;
}

/* after fork, in the new child process, `libos_init` is run, hence this function too - but
* forked process will get its RLIMIT_NOFILE from the checkpoint */
assert(g_pal_public_state);
if (g_pal_public_state->parent_process)
return 0;

assert(g_manifest_root);
int64_t fds_limit_init64;
int ret = toml_int_in(g_manifest_root, "sys.fds.limit",
/*defaultval=*/get_rlimit_cur(RLIMIT_NOFILE),
&fds_limit_init64);
if (ret < 0) {
log_error("Cannot parse 'sys.fds.limit'");
return -EINVAL;
}
if (fds_limit_init64 < 0) {
log_error("'sys.fds.limit' is negative (%ld)", fds_limit_init64);
return -EINVAL;
}
set_rlimit_cur(RLIMIT_NOFILE, (uint64_t)fds_limit_init64);

return 0;
}

Expand Down
12 changes: 11 additions & 1 deletion libos/src/sys/libos_getrlimit.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,17 @@
#include "linux_abi/sysinfo.h"

/*
* TODO: implement actual limitation on each resource.
* The only resource limits that affect internal Gramine logic are:
*
* - RLIMIT_DATA: initially equal to `sys.brk.max_size` manifest option, affects `brk()` system
* call
* - RLIMIT_STACK: initially equal to `sys.stack.size` manifest option, but updating this limit
* does *not* affect the max stack size of the main thread for processes which
* are already running
* - RLIMIT_NOFILE: initially equal to `sys.fds.limit` manifest option, affects system calls that
* create new file descriptors, e.g. open(), dup(), pipe(), etc.
*
* FIXME: implement actual limitation on each resource?
*
* The current behavor(i.e. sys_stack_size, brk_max_size) may be subject
* to be fixed.
Expand Down
1 change: 1 addition & 0 deletions libos/test/regression/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ tests = {
'readdir': {},
'rename_unlink': {},
'rename_unlink_fchown': {},
'rlimit_nofile': {},
'run_test': {
'include_directories': include_directories(
# for `gramine_entry_api.h`
Expand Down
65 changes: 65 additions & 0 deletions libos/test/regression/rlimit_nofile.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2024 Intel Corporation */

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <unistd.h>

#include "common.h"

int main(void) {
struct rlimit rlim;

int dev_null_fd = CHECK(open("/dev/null", O_WRONLY, 0666));

CHECK(getrlimit(RLIMIT_NOFILE, &rlim));
printf("old RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur);
int old_lim = (int)rlim.rlim_cur;

/* make sure we can increase the current soft limit */
if (old_lim <= 0 || old_lim >= (int)rlim.rlim_max)
CHECK(-1);

int good_dup_fd = dup2(dev_null_fd, old_lim - 1);
CHECK(good_dup_fd);
printf("(before setrlimit) opened fd: %d\n", good_dup_fd);
CHECK(close(good_dup_fd));

int fail_dup_fd = dup2(dev_null_fd, old_lim);
if (fail_dup_fd != -1 || errno != EBADF)
CHECK(-1);

rlim.rlim_cur++;
CHECK(setrlimit(RLIMIT_NOFILE, &rlim));
printf("new RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur);

fflush(stdout);

int pid = CHECK(fork());
if (pid == 0) {
/* verify that NOFILE limit is correctly migrated to the child process */
good_dup_fd = dup2(dev_null_fd, old_lim);
CHECK(good_dup_fd);
printf("(in child, after setrlimit) opened fd: %d\n", good_dup_fd);
exit(0);
} else {
int status = 0;
CHECK(wait(&status));
if (!WIFEXITED(status) || WEXITSTATUS(status))
errx(1, "child wait status: %#x", status);
}

good_dup_fd = dup2(dev_null_fd, old_lim);
CHECK(good_dup_fd);
printf("(after setrlimit) opened fd: %d\n", good_dup_fd);
CHECK(close(good_dup_fd));

CHECK(close(dev_null_fd));
puts("TEST OK");
return 0;
}
22 changes: 22 additions & 0 deletions libos/test/regression/rlimit_nofile_4k.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{% set entrypoint = "rlimit_nofile" -%}

libos.entrypoint = "{{ entrypoint }}"

loader.env.LD_LIBRARY_PATH = "/lib"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" },
{ path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" },
]

sys.fds.limit = 4096

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '4' }}
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.use_exinfo = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
]
20 changes: 20 additions & 0 deletions libos/test/regression/test_libos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,26 @@ def test_150_itimer(self):
stdout, _ = self.run_binary(['itimer'])
self.assertIn("TEST OK", stdout)

def test_160_rlimit_nofile(self):
# uses manifest.template
stdout, _ = self.run_binary(['rlimit_nofile'])
self.assertIn("old RLIMIT_NOFILE soft limit: 900", stdout)
self.assertIn("(before setrlimit) opened fd: 899", stdout)
self.assertIn("new RLIMIT_NOFILE soft limit: 901", stdout)
self.assertIn("(in child, after setrlimit) opened fd: 900", stdout)
self.assertIn("(after setrlimit) opened fd: 900", stdout)
self.assertIn("TEST OK", stdout)

def test_161_rlimit_nofile_4k(self):
# uses rlimit_nofile_4k.manifest.template
stdout, _ = self.run_binary(['rlimit_nofile_4k'])
self.assertIn("old RLIMIT_NOFILE soft limit: 4096", stdout)
self.assertIn("(before setrlimit) opened fd: 4095", stdout)
self.assertIn("new RLIMIT_NOFILE soft limit: 4097", stdout)
self.assertIn("(in child, after setrlimit) opened fd: 4096", stdout)
self.assertIn("(after setrlimit) opened fd: 4096", stdout)
self.assertIn("TEST OK", stdout)

class TC_31_Syscall(RegressionTestCase):
def test_000_syscall_redirect(self):
stdout, _ = self.run_binary(['syscall'])
Expand Down
2 changes: 2 additions & 0 deletions libos/test/regression/tests.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ manifests = [
"readdir",
"rename_unlink",
"rename_unlink_fchown",
"rlimit_nofile",
"rlimit_nofile_4k",
"run_test",
"rwlock",
"sched",
Expand Down
2 changes: 2 additions & 0 deletions libos/test/regression/tests_musl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ manifests = [
"readdir",
"rename_unlink",
"rename_unlink_fchown",
"rlimit_nofile",
"rlimit_nofile_4k",
"run_test",
"rwlock",
"sched",
Expand Down
2 changes: 2 additions & 0 deletions python/graminelibos/manifest_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,7 @@
'debug__mock_syscalls': [{Required('name'): str, 'return': int}],

'stack': {'size': _size},

'fds': {'limit': int},
},
})

0 comments on commit ca534ce

Please sign in to comment.