-
Notifications
You must be signed in to change notification settings - Fork 200
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LibOS] Add support for timerfd system calls
This commit adds support for system calls that create and operate on a timer that delivers timer expiration notifications via a file descriptor, specifically: `timerfd_create()`, `timerfd_settime()` and `timerfd_gettime()`. The timerfd object is associated with a dummy eventfd created on the host to trigger notifications (e.g., in epoll). The object is created inside Gramine, with all its operations resolved entirely inside Gramine (note that the time source in Gramine SGX is still untrusted). The emulation is currently implemented at the level of a single process. All timerfds created in the parent process are marked as invalid in child processes. In multi-process applications, Gramine does not exit immediately after fork; it only exits if the application attempts to use timerfds in the child. Therefore, inter-process timing signals via timerfds are not allowed. LibOS regression tests are also added. Signed-off-by: Kailun Qin <[email protected]>
- Loading branch information
1 parent
aef14f1
commit a717ed5
Showing
27 changed files
with
1,158 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* SPDX-License-Identifier: LGPL-3.0-or-later */ | ||
/* Copyright (C) 2024 Intel Corporation | ||
* Kailun Qin <[email protected]> | ||
*/ | ||
|
||
/* | ||
* This file contains code for implementation of "timerfd" filesystem. For more information, see | ||
* `libos/src/sys/libos_timerfd.c`. | ||
*/ | ||
|
||
#include "libos_fs.h" | ||
#include "libos_handle.h" | ||
#include "libos_internal.h" | ||
#include "libos_lock.h" | ||
#include "linux_abi/errors.h" | ||
#include "pal.h" | ||
|
||
/* Enforce a restriction that all timerfds created in the parent process are marked as invalid in | ||
* child processes, i.e. inter-process timing signals via timerfds are not allowed. This restriction | ||
* is because LibOS doesn't yet implement sync between timerfd objects. */ | ||
static int timerfd_checkin(struct libos_handle* hdl) { | ||
assert(hdl->type == TYPE_TIMERFD); | ||
hdl->info.timerfd.broken_in_child = true; | ||
return 0; | ||
} | ||
|
||
/* This implementation is the same as `eventfd_dummy_host_read()` in "fs/eventfd/fs.c". */ | ||
static void timerfd_dummy_host_read(struct libos_handle* hdl) { | ||
int ret; | ||
uint64_t buf_dummy_host_val = 0; | ||
size_t dummy_host_val_count = sizeof(buf_dummy_host_val); | ||
do { | ||
ret = PalStreamRead(hdl->pal_handle, /*offset=*/0, &dummy_host_val_count, | ||
&buf_dummy_host_val); | ||
} while (ret == PAL_ERROR_INTERRUPTED); | ||
if (ret < 0 || dummy_host_val_count != sizeof(buf_dummy_host_val)) { | ||
/* must not happen in benign case, consider it an attack and panic */ | ||
BUG(); | ||
} | ||
} | ||
|
||
/* This implementation is the same as `eventfd_dummy_host_wait()` in "fs/eventfd/fs.c". */ | ||
static void timerfd_dummy_host_wait(struct libos_handle* hdl) { | ||
pal_wait_flags_t wait_for_events = PAL_WAIT_READ; | ||
pal_wait_flags_t ret_events = 0; | ||
int ret = PalStreamsWaitEvents(1, &hdl->pal_handle, &wait_for_events, &ret_events, NULL); | ||
if (ret < 0 && ret != PAL_ERROR_INTERRUPTED) { | ||
BUG(); | ||
} | ||
(void)ret_events; /* we don't care what events the host returned, we can't trust them anyway */ | ||
} | ||
|
||
static ssize_t timerfd_read(struct libos_handle* hdl, void* buf, size_t count, file_off_t* pos) { | ||
__UNUSED(pos); | ||
assert(hdl->type == TYPE_TIMERFD); | ||
|
||
if (count < sizeof(uint64_t)) | ||
return -EINVAL; | ||
|
||
if (hdl->info.timerfd.broken_in_child) { | ||
log_warning("Child process tried to access timerfd created by parent process. This is " | ||
"disallowed in Gramine."); | ||
return -EIO; | ||
} | ||
|
||
int ret; | ||
spinlock_lock(&hdl->info.timerfd.expiration_lock); | ||
|
||
while (!hdl->info.timerfd.num_expirations) { | ||
if (hdl->flags & O_NONBLOCK) { | ||
ret = -EAGAIN; | ||
goto out; | ||
} | ||
spinlock_unlock(&hdl->info.timerfd.expiration_lock); | ||
timerfd_dummy_host_wait(hdl); | ||
spinlock_lock(&hdl->info.timerfd.expiration_lock); | ||
} | ||
|
||
memcpy(buf, &hdl->info.timerfd.num_expirations, sizeof(uint64_t)); | ||
hdl->info.timerfd.num_expirations = 0; | ||
|
||
/* perform a read (not supposed to block) to clear the event from polling threads */ | ||
if (hdl->info.timerfd.dummy_host_val) { | ||
timerfd_dummy_host_read(hdl); | ||
hdl->info.timerfd.dummy_host_val = 0; | ||
} | ||
|
||
ret = (ssize_t)count; | ||
out: | ||
spinlock_unlock(&hdl->info.timerfd.expiration_lock); | ||
maybe_epoll_et_trigger(hdl, ret, /*in=*/true, /*unused was_partial=*/false); | ||
return ret; | ||
} | ||
|
||
static void timerfd_post_poll(struct libos_handle* hdl, pal_wait_flags_t* pal_ret_events) { | ||
assert(hdl->type == TYPE_TIMERFD); | ||
|
||
if (hdl->info.timerfd.broken_in_child) { | ||
log_warning("Child process tried to access timerfd created by parent process. This is " | ||
"disallowed in Gramine."); | ||
*pal_ret_events = PAL_WAIT_ERROR; | ||
return; | ||
} | ||
|
||
if (*pal_ret_events & (PAL_WAIT_ERROR | PAL_WAIT_HANG_UP | PAL_WAIT_WRITE)) { | ||
/* impossible: we control timerfd inside the LibOS, and we never raise such conditions */ | ||
BUG(); | ||
} | ||
|
||
spinlock_lock(&hdl->info.timerfd.expiration_lock); | ||
if (*pal_ret_events & PAL_WAIT_READ) { | ||
/* there is data to read: verify if timerfd has number of expirations greater than zero */ | ||
if (!hdl->info.timerfd.num_expirations) { | ||
/* spurious or malicious notification, can legitimately happen if another thread | ||
* consumed this event between this thread's poll wakeup and the post_poll callback; | ||
* we currently choose to return a spurious notification to the user */ | ||
*pal_ret_events &= ~PAL_WAIT_READ; | ||
} | ||
} | ||
spinlock_unlock(&hdl->info.timerfd.expiration_lock); | ||
} | ||
|
||
static int timerfd_close(struct libos_handle* hdl) { | ||
if (hdl->info.timerfd.broken_in_child) { | ||
log_warning("Child process tried to access timerfd created by parent process. This is " | ||
"disallowed in Gramine."); | ||
return -EIO; | ||
} | ||
|
||
/* cancel the pending timerfd object */ | ||
return install_async_event(ASYNC_EVENT_TYPE_ALARM_TIMER, hdl->pal_handle, | ||
/*time_us=*/0, /*absolute_time=*/false, /*callback=*/NULL, | ||
/*arg=*/NULL); | ||
} | ||
|
||
struct libos_fs_ops timerfd_fs_ops = { | ||
.checkin = &timerfd_checkin, | ||
.read = &timerfd_read, | ||
.close = &timerfd_close, | ||
.post_poll = &timerfd_post_poll, | ||
}; | ||
|
||
struct libos_fs timerfd_builtin_fs = { | ||
.name = "timerfd", | ||
.fs_ops = &timerfd_fs_ops, | ||
}; |
Oops, something went wrong.