Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LibOS] Fix bug of RLIMIT_STACK being overwritten in child #1976

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Documentation/devel/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -2963,7 +2963,8 @@ current process. The following resources are supported:
- `RLIMIT_CORE` -- dummy, zero by default
- `RLIMIT_RSS` -- dummy, no limit by default
- `RLIMIT_NPROC` -- dummy, no limit by default
- `RLIMIT_NOFILE` -- implemented, default soft limit is 900, default hard limit is 65K
- `RLIMIT_NOFILE` -- implemented, equal to `sys.fds.limit` {ref}`manifest option <sys-fds-limit>` by
default
- `RLIMIT_MEMLOCK` -- dummy, no limit by default
- `RLIMIT_AS` -- dummy, no limit by default
- `RLIMIT_LOCKS` -- dummy, no limit by default
Expand Down
15 changes: 15 additions & 0 deletions Documentation/manifest-syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,21 @@ may improve performance for certain workloads but may also generate
``SIGSEGV/SIGBUS`` exceptions for some applications that specifically use
invalid pointers (though this is not expected for most real-world applications).

.. _sys-fds-limit:

Limit on open file descriptors
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

::

sys.fds.limit = [NUM]
(default: 900)

This specifies the maximum number of open file descriptors in the Gramine
process. More specifically, this option sets the ``RLIMIT_NOFILE`` resource
limit: it specifies a value one greater than the maximum file descriptor number
that can be opened by the Gramine process.

.. _stack-size:

Stack size
Expand Down
23 changes: 23 additions & 0 deletions libos/src/bookkeep/libos_handle.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,33 @@ int init_handle(void) {
if (!create_lock(&handle_mgr_lock)) {
return -ENOMEM;
}

handle_mgr = create_mem_mgr(init_align_up(HANDLE_MGR_ALLOC));
if (!handle_mgr) {
return -ENOMEM;
}

/* after fork, in the new child process, `libos_init` is run, hence this function too - but
* forked process will get its RLIMIT_NOFILE from the checkpoint */
assert(g_pal_public_state);
if (g_pal_public_state->parent_process)
return 0;

assert(g_manifest_root);
int64_t fds_limit_init64;
int ret = toml_int_in(g_manifest_root, "sys.fds.limit",
/*defaultval=*/get_rlimit_cur(RLIMIT_NOFILE),
&fds_limit_init64);
if (ret < 0) {
log_error("Cannot parse 'sys.fds.limit'");
return -EINVAL;
}
if (fds_limit_init64 < 0) {
log_error("'sys.fds.limit' is negative (%ld)", fds_limit_init64);
return -EINVAL;
}
set_rlimit_cur(RLIMIT_NOFILE, (uint64_t)fds_limit_init64);

return 0;
}

Expand Down
26 changes: 16 additions & 10 deletions libos/src/libos_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,23 +278,29 @@ static int populate_stack(void* stack, size_t stack_size, const char* const* arg
int init_stack(const char* const* argv, const char* const* envp, char*** out_argp,
elf_auxv_t** out_auxv) {
int ret;

assert(g_manifest_root);
uint64_t stack_size;
ret = toml_sizestring_in(g_manifest_root, "sys.stack.size", get_rlimit_cur(RLIMIT_STACK),
&stack_size);
if (ret < 0) {
log_error("Cannot parse 'sys.stack.size'");
return -EINVAL;
}

stack_size = ALLOC_ALIGN_UP(stack_size);
set_rlimit_cur(RLIMIT_STACK, stack_size);
assert(g_pal_public_state);
if (g_pal_public_state->parent_process) {
/* after fork, in the new child process, `libos_init` is run, hence this function too - but
* forked process will get its RLIMIT_STACK from the checkpoint */
stack_size = get_rlimit_cur(RLIMIT_STACK);
} else {
assert(g_manifest_root);
ret = toml_sizestring_in(g_manifest_root, "sys.stack.size", get_rlimit_cur(RLIMIT_STACK),
&stack_size);
if (ret < 0) {
log_error("Cannot parse 'sys.stack.size'");
return -EINVAL;
}
set_rlimit_cur(RLIMIT_STACK, stack_size);
}

struct libos_thread* cur_thread = get_cur_thread();
if (!cur_thread || cur_thread->stack)
return 0;

stack_size = ALLOC_ALIGN_UP(stack_size);
void* stack = allocate_stack(stack_size, ALLOC_ALIGNMENT, /*user=*/true);
if (!stack)
return -ENOMEM;
Expand Down
12 changes: 11 additions & 1 deletion libos/src/sys/libos_getrlimit.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,17 @@
#include "linux_abi/sysinfo.h"

/*
* TODO: implement actual limitation on each resource.
* The only resource limits that affect internal Gramine logic are:
*
* - RLIMIT_DATA: initially equal to `sys.brk.max_size` manifest option, affects `brk()` system
* call
* - RLIMIT_STACK: initially equal to `sys.stack.size` manifest option, but updating this limit
* does *not* affect the max stack size of the main thread for processes which
* are already running
* - RLIMIT_NOFILE: initially equal to `sys.fds.limit` manifest option, affects system calls that
* create new file descriptors, e.g. open(), dup(), pipe(), etc.
*
* FIXME: implement actual limitation on each resource?
*
* The current behavor(i.e. sys_stack_size, brk_max_size) may be subject
* to be fixed.
Expand Down
2 changes: 2 additions & 0 deletions libos/test/regression/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ tests = {
'readdir': {},
'rename_unlink': {},
'rename_unlink_fchown': {},
'rlimit_nofile': {},
'rlimit_stack': {},
'run_test': {
'include_directories': include_directories(
# for `gramine_entry_api.h`
Expand Down
64 changes: 64 additions & 0 deletions libos/test/regression/rlimit_nofile.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2024 Intel Corporation */

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <unistd.h>

#include "common.h"

int main(void) {
struct rlimit rlim;

int dev_null_fd = CHECK(open("/dev/null", O_WRONLY, 0666));

CHECK(getrlimit(RLIMIT_NOFILE, &rlim));
printf("old RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur);
int old_lim = (int)rlim.rlim_cur;

/* make sure we can increase the current soft limit */
if (old_lim <= 0 || old_lim >= (int)rlim.rlim_max)
CHECK(-1);

int good_dup_fd = dup2(dev_null_fd, old_lim - 1);
CHECK(good_dup_fd);
printf("(before setrlimit) opened fd: %d\n", good_dup_fd);
CHECK(close(good_dup_fd));

int fail_dup_fd = dup2(dev_null_fd, old_lim);
if (fail_dup_fd != -1 || errno != EBADF)
CHECK(-1);

rlim.rlim_cur++;
CHECK(setrlimit(RLIMIT_NOFILE, &rlim));
printf("new RLIMIT_NOFILE soft limit: %d\n", (int)rlim.rlim_cur);

fflush(stdout);

int pid = CHECK(fork());
if (pid == 0) {
/* verify that NOFILE limit is correctly migrated to the child process */
good_dup_fd = dup2(dev_null_fd, old_lim);
CHECK(good_dup_fd);
printf("(in child, after setrlimit) opened fd: %d\n", good_dup_fd);
exit(0);
} else {
int status = 0;
CHECK(wait(&status));
if (!WIFEXITED(status) || WEXITSTATUS(status))
errx(1, "child wait status: %#x", status);
}

good_dup_fd = dup2(dev_null_fd, old_lim);
CHECK(good_dup_fd);
printf("(after setrlimit) opened fd: %d\n", good_dup_fd);
CHECK(close(good_dup_fd));

CHECK(close(dev_null_fd));
puts("TEST OK");
return 0;
}
22 changes: 22 additions & 0 deletions libos/test/regression/rlimit_nofile_4k.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{% set entrypoint = "rlimit_nofile" -%}

libos.entrypoint = "{{ entrypoint }}"

loader.env.LD_LIBRARY_PATH = "/lib"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" },
{ path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" },
]

sys.fds.limit = 4096

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '4' }}
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.use_exinfo = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
]
54 changes: 54 additions & 0 deletions libos/test/regression/rlimit_stack.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2024 Intel Corporation */

#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <unistd.h>

#include "common.h"

int main(void) {
struct rlimit rlim;

CHECK(getrlimit(RLIMIT_STACK, &rlim));
printf("old RLIMIT_STACK soft limit: %lu\n", (uint64_t)rlim.rlim_cur);
uint64_t old_lim = (uint64_t)rlim.rlim_cur;

/* make sure we can increase the current soft limit */
if (old_lim >= (uint64_t)rlim.rlim_max)
CHECK(-1);

rlim.rlim_cur++;
CHECK(setrlimit(RLIMIT_STACK, &rlim));
printf("new RLIMIT_STACK soft limit: %lu\n", (uint64_t)rlim.rlim_cur);

fflush(stdout);

int pid = CHECK(fork());
if (pid == 0) {
/* verify that STACK limit is correctly migrated to the child process */
CHECK(getrlimit(RLIMIT_STACK, &rlim));
printf("(in child, after setrlimit) RLIMIT_STACK soft limit: %lu\n",
(uint64_t)rlim.rlim_cur);

/* NOTE: we currently don't test that the stack limit is indeed enforced */
exit(0);
} else {
int status = 0;
CHECK(wait(&status));
if (!WIFEXITED(status) || WEXITSTATUS(status))
errx(1, "child wait status: %#x", status);
}

CHECK(getrlimit(RLIMIT_STACK, &rlim));
printf("(in parent, after setrlimit) RLIMIT_STACK soft limit: %lu\n", (uint64_t)rlim.rlim_cur);

/* NOTE: we currently don't test that the stack limit is indeed enforced */
puts("TEST OK");
return 0;
}
21 changes: 21 additions & 0 deletions libos/test/regression/rlimit_stack.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
libos.entrypoint = "{{ entrypoint }}"

loader.env.LD_LIBRARY_PATH = "/lib"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" },
{ path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" },
]

# we specify any non-standard stack size just for testing
sys.stack.size = "1M"

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '4' }}
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.use_exinfo = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
]
27 changes: 27 additions & 0 deletions libos/test/regression/test_libos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,33 @@ def test_150_itimer(self):
stdout, _ = self.run_binary(['itimer'])
self.assertIn("TEST OK", stdout)

def test_160_rlimit_nofile(self):
# uses manifest.template
stdout, _ = self.run_binary(['rlimit_nofile'])
self.assertIn("old RLIMIT_NOFILE soft limit: 900", stdout)
self.assertIn("(before setrlimit) opened fd: 899", stdout)
self.assertIn("new RLIMIT_NOFILE soft limit: 901", stdout)
self.assertIn("(after setrlimit) opened fd: 900", stdout)
self.assertIn("TEST OK", stdout)

def test_161_rlimit_nofile_4k(self):
# uses rlimit_nofile_4k.manifest.template
stdout, _ = self.run_binary(['rlimit_nofile_4k'])
self.assertIn("old RLIMIT_NOFILE soft limit: 4096", stdout)
self.assertIn("(before setrlimit) opened fd: 4095", stdout)
self.assertIn("new RLIMIT_NOFILE soft limit: 4097", stdout)
self.assertIn("(after setrlimit) opened fd: 4096", stdout)
self.assertIn("TEST OK", stdout)

def test_165_rlimit_stack(self):
# rlimit_stack.manifest.template specifies 1MB (= 1048576B) stack size
stdout, _ = self.run_binary(['rlimit_stack'])
self.assertIn("old RLIMIT_STACK soft limit: 1048576", stdout)
self.assertIn("new RLIMIT_STACK soft limit: 1048577", stdout)
self.assertIn("(in child, after setrlimit) RLIMIT_STACK soft limit: 1048577", stdout)
self.assertIn("(in parent, after setrlimit) RLIMIT_STACK soft limit: 1048577", stdout)
self.assertIn("TEST OK", stdout)

class TC_31_Syscall(RegressionTestCase):
def test_000_syscall_redirect(self):
stdout, _ = self.run_binary(['syscall'])
Expand Down
3 changes: 3 additions & 0 deletions libos/test/regression/tests.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ manifests = [
"readdir",
"rename_unlink",
"rename_unlink_fchown",
"rlimit_nofile",
"rlimit_nofile_4k",
"rlimit_stack",
"run_test",
"rwlock",
"sched",
Expand Down
3 changes: 3 additions & 0 deletions libos/test/regression/tests_musl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ manifests = [
"readdir",
"rename_unlink",
"rename_unlink_fchown",
"rlimit_nofile",
"rlimit_nofile_4k",
"rlimit_stack",
"run_test",
"rwlock",
"sched",
Expand Down
2 changes: 2 additions & 0 deletions python/graminelibos/manifest_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,7 @@
'debug__mock_syscalls': [{Required('name'): str, 'return': int}],

'stack': {'size': _size},

'fds': {'limit': int},
},
})