Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PAL/Linux-SGX] Print SGX stats on SIGUSR1 and reset them #1996

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Documentation/manifest-syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,10 @@ This syntax specifies whether to enable SGX enclave-specific statistics:
includes creating the enclave, adding enclave pages, measuring them and
initializing the enclave.

For this option to take effect, Gramine must be compiled with
``--buildtype=debug`` or ``--buildtype=debugoptimized``. Otherwise (if built in
release mode), Gramine will exit with an error.

.. warning::
This option is insecure and cannot be used with production enclaves
(``sgx.debug = false``). If a production enclave is started with this option
Expand Down
18 changes: 13 additions & 5 deletions Documentation/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ Enabling SGX enclave stats

See also :ref:`perf` below for installing ``perf``.

Enable statistics using ``sgx.enable_stats = true`` manifest option. Now your
graminized application correctly reports performance counters. This is useful
when using e.g. ``perf stat`` to collect performance statistics. This manifest
option also forces Gramine to dump SGX-related information on each enclave exit.
Here is an example:
Enable statistics using ``sgx.enable_stats = true`` manifest option (note that
Gramine must be compiled with ``--buildtype=debug`` or
``--buildtype=debugoptimized`` for this option to work). Now your graminized
application correctly reports performance counters. This is useful when using
e.g. ``perf stat`` to collect performance statistics. This manifest option also
forces Gramine to dump SGX-related information on each enclave exit. Here is an
example:

::

Expand Down Expand Up @@ -90,6 +92,12 @@ How to read this output:
counters should be compared against "golden runs" to deduce any interesting
trends.

It is also possible to dump and reset SGX-related statistics interactively, using
``SIGUSR1`` signal. This helps to collect SGX-related statistics only for a
particular period, e.g. skipping the Gramine startup and application
initialization time and concentrating only on the actual application processing.
Send ``SIGUSR1`` using command ``kill -SIGUSR1 -<PGID>``.

Effects of system calls / ocalls
--------------------------------

Expand Down
3 changes: 3 additions & 0 deletions libos/test/regression/fork_and_access_file.manifest.template
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

# this is only to test that `sgx.enable_stats` works (it can only be specified for debug-mode tests)
sgx.enable_stats = true

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
Expand Down
1 change: 0 additions & 1 deletion libos/test/regression/multi_pthread.manifest.template
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '8' }}

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.enable_stats = true

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ sgx.insecure__rpc_thread_num = 8

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.enable_stats = true

sgx.trusted_files = [
"file:{{ gramine.runtimedir(libc) }}/",
Expand Down
1 change: 0 additions & 1 deletion pal/regression/Thread2.manifest.template
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
loader.entrypoint.uri = "file:{{ binary_dir }}/{{ entrypoint }}"

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }}
sgx.enable_stats = true
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
1 change: 0 additions & 1 deletion pal/regression/Thread2_exitless.manifest.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ loader.entrypoint.uri = "file:{{ binary_dir }}/{{ entrypoint }}"

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }}
sgx.insecure__rpc_thread_num = 2
sgx.enable_stats = true
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
5 changes: 5 additions & 0 deletions pal/src/host/linux-sgx/host_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

.extern tcs_base
.extern g_in_aex_profiling
.extern maybe_dump_and_reset_stats

.global sgx_ecall
.type sgx_ecall, @function
Expand Down Expand Up @@ -102,6 +103,8 @@ async_exit_pointer:
movq %rbx, %rdi
call sgx_profile_sample_aex

call maybe_dump_and_reset_stats

# Restore stack
movq %rbp, %rsp
.cfi_def_cfa_register %rsp
Expand Down Expand Up @@ -169,6 +172,8 @@ sgx_raise:
movq %rdx, %rdi
call sgx_profile_sample_ocall_inner

call maybe_dump_and_reset_stats

# Restore RDI
movq -8(%rbp), %rdi
#else
Expand Down
19 changes: 19 additions & 0 deletions pal/src/host/linux-sgx/host_exception.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "debug_map.h"
#include "host_internal.h"
#include "pal_rpc_queue.h"
#include "pal_tcb.h"
#include "sigreturn.h"
#include "sigset.h"
#include "ucontext.h"
Expand Down Expand Up @@ -188,6 +189,11 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) {
__UNUSED(info);
__UNUSED(uc);

if (g_sgx_enable_stats) {
PAL_HOST_TCB* tcb = pal_get_host_tcb();
__atomic_store_n(&tcb->reset_stats, true, __ATOMIC_RELAXED);
}

if (g_pal_enclave.profile_enable) {
__atomic_store_n(&g_trigger_profile_reinit, true, __ATOMIC_RELEASE);
}
Expand Down Expand Up @@ -268,3 +274,16 @@ void pal_describe_location(uintptr_t addr, char* buf, size_t buf_size) {
#endif
default_describe_location(addr, buf, buf_size);
}

#ifdef DEBUG
/* called on each AEX and OCALL (in normal context), see host_entry.S */
void maybe_dump_and_reset_stats(void) {
if (!g_sgx_enable_stats)
return;

PAL_HOST_TCB* tcb = pal_get_host_tcb();
if (__atomic_exchange_n(&tcb->reset_stats, false, __ATOMIC_RELAXED) == true) {
collect_and_print_sgx_stats();
}
}
#endif
1 change: 0 additions & 1 deletion pal/src/host/linux-sgx/host_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ int clone_thread(void* dynamic_tcs);

int create_tcs_mapper(void* tcs_base, unsigned int thread_num);
int pal_thread_init(void* tcbptr);
void map_tcs(unsigned int tid);
void unmap_my_tcs(void);
int current_enclave_thread_cnt(void);
void thread_exit(int status);
Expand Down
8 changes: 8 additions & 0 deletions pal/src/host/linux-sgx/host_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,14 @@ static int parse_loader_config(char* manifest, struct pal_enclave* enclave_info,
goto out;
}

#ifndef DEBUG
if (g_sgx_enable_stats) {
log_error("'sgx.enable_stats = true' is specified in non-debug mode, this is disallowed");
ret = -EINVAL;
goto out;
}
#endif

ret = toml_string_in(manifest_root, "sgx.sigfile", &dummy_sigfile_str);
if (ret < 0 || dummy_sigfile_str) {
log_error("sgx.sigfile is not supported anymore. Please update your manifest according to "
Expand Down
7 changes: 4 additions & 3 deletions pal/src/host/linux-sgx/host_ocalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@
rpc_queue_t* g_rpc_queue = NULL; /* pointer to untrusted queue */

static noreturn void process_exit(int exitcode) {
update_sgx_stats(/*do_print=*/true);

#ifdef DEBUG
update_sgx_stats_on_exit(/*do_print=*/true);
sgx_profile_finish();
#endif

Expand Down Expand Up @@ -73,7 +72,9 @@ static long sgx_ocall_exit(void* args) {
process_exit((int)ocall_exit_args->exitcode);
}

update_sgx_stats(/*do_print=*/false);
#ifdef DEBUG
update_sgx_stats_on_exit(/*do_print=*/false);
#endif
thread_exit((int)ocall_exit_args->exitcode);
return 0;
}
Expand Down
119 changes: 93 additions & 26 deletions pal/src/host/linux-sgx/host_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
struct enclave_thread_map {
unsigned int tid;
sgx_arch_tcs_t* tcs;
PAL_HOST_TCB* tcb;
};

static struct enclave_thread_map* g_enclave_thread_map = NULL;
Expand All @@ -26,36 +27,97 @@ static size_t g_enclave_thread_num = 0;

bool g_sgx_enable_stats = false;

/* this function is called only on thread/process exit (never in the middle of thread exec) */
void update_sgx_stats(bool do_print) {
static atomic_ulong g_eenter_cnt = 0;
static atomic_ulong g_eexit_cnt = 0;
static atomic_ulong g_aex_cnt = 0;
static atomic_ulong g_sync_signal_cnt = 0;
static atomic_ulong g_async_signal_cnt = 0;
/* protected by g_enclave_thread_map_lock to prevent data races when one thread exits (and updates
* these global SGX counters) and another thread dumps (reads) these SGX counters */
static uint64_t g_eenter_cnt = 0;
static uint64_t g_eexit_cnt = 0;
static uint64_t g_aex_cnt = 0;
static uint64_t g_sync_signal_cnt = 0;
static uint64_t g_async_signal_cnt = 0;

static void print_global_sgx_stats(void) {
assert(spinlock_is_locked(&g_enclave_thread_map_lock));

int pid = g_host_pid;
assert(pid > 0);

log_always("----- Total SGX stats for process %d -----\n"
" # of EENTERs: %lu\n"
" # of EEXITs: %lu\n"
" # of AEXs: %lu\n"
" # of sync signals: %lu\n"
" # of async signals: %lu",
pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt,
g_sync_signal_cnt, g_async_signal_cnt);
}

static void reset_global_sgx_stats(void) {
assert(spinlock_is_locked(&g_enclave_thread_map_lock));

g_eenter_cnt = 0;
g_eexit_cnt = 0;
g_aex_cnt = 0;
g_sync_signal_cnt = 0;
g_async_signal_cnt = 0;
}

static void update_global_sgx_stats_from_thread_stats(PAL_HOST_TCB* tcb) {
assert(spinlock_is_locked(&g_enclave_thread_map_lock));

/* tcb->eenter_cnt and others are C11 atomic vars of type `atomic_ulong`, but
* __atomic_exchange_n() can operate only on pointers to integers, thus need explicit cast */
g_eenter_cnt += __atomic_exchange_n((unsigned long*)&tcb->eenter_cnt, 0,
__ATOMIC_RELAXED);
g_eexit_cnt += __atomic_exchange_n((unsigned long*)&tcb->eexit_cnt, 0, __ATOMIC_RELAXED);
g_aex_cnt += __atomic_exchange_n((unsigned long*)&tcb->aex_cnt, 0, __ATOMIC_RELAXED);
g_sync_signal_cnt += __atomic_exchange_n((unsigned long*)&tcb->sync_signal_cnt, 0,
__ATOMIC_RELAXED);
g_async_signal_cnt += __atomic_exchange_n((unsigned long*)&tcb->async_signal_cnt, 0,
__ATOMIC_RELAXED);
}

/* this function is called only on thread/process exit (never in the middle of thread exec) */
void update_sgx_stats_on_exit(bool do_print) {
if (!g_sgx_enable_stats)
return;

spinlock_lock(&g_enclave_thread_map_lock);

PAL_HOST_TCB* tcb = pal_get_host_tcb();
g_eenter_cnt += tcb->eenter_cnt;
g_eexit_cnt += tcb->eexit_cnt;
g_aex_cnt += tcb->aex_cnt;
g_sync_signal_cnt += tcb->sync_signal_cnt;
g_async_signal_cnt += tcb->async_signal_cnt;

if (do_print) {
int pid = g_host_pid;
assert(pid > 0);
log_always("----- Total SGX stats for process %d -----\n"
" # of EENTERs: %lu\n"
" # of EEXITs: %lu\n"
" # of AEXs: %lu\n"
" # of sync signals: %lu\n"
" # of async signals: %lu",
pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt,
g_sync_signal_cnt, g_async_signal_cnt);
update_global_sgx_stats_from_thread_stats(tcb);

if (do_print)
print_global_sgx_stats();

spinlock_unlock(&g_enclave_thread_map_lock);
}

void collect_and_print_sgx_stats(void) {
if (!g_sgx_enable_stats)
return;

/*
* This function is executed by the "SGX-stats-collecting" thread (that received SIGUSR1). Thus,
* this thread is able to peek into the local storage of other threads. This is typically
* considered a bad smell (one thread reads local data of another thread), but here it's a
* reasonable trade-off: most of the accesses to the thread-local SGX counters are done on EEXIT
* and AEX events by the thread itself, so the memory access should be as simple as possible and
* as fast as possible. An alternative would be to move all SGX stats in a shared array, then we
* would have false cache sharing and complex memory management of the shared array.
*/
spinlock_lock(&g_enclave_thread_map_lock);
for (size_t i = 0; i < g_enclave_thread_num; i++) {
if (!g_enclave_thread_map[i].tcs || !g_enclave_thread_map[i].tid)
continue;

PAL_HOST_TCB* tcb = g_enclave_thread_map[i].tcb;
update_global_sgx_stats_from_thread_stats(tcb);
}

print_global_sgx_stats();
reset_global_sgx_stats();

spinlock_unlock(&g_enclave_thread_map_lock);
}

void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) {
Expand All @@ -69,6 +131,7 @@ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) {
tcb->aex_cnt = 0;
tcb->sync_signal_cnt = 0;
tcb->async_signal_cnt = 0;
tcb->reset_stats = false;

tcb->profile_sample_time = 0;

Expand All @@ -86,6 +149,7 @@ int create_tcs_mapper(void* tcs_base, unsigned int thread_num) {
for (uint32_t i = 0; i < thread_num; i++) {
g_enclave_thread_map[i].tid = 0;
g_enclave_thread_map[i].tcs = &enclave_tcs[i];
g_enclave_thread_map[i].tcb = NULL;
}
g_enclave_thread_num = thread_num;
return 0;
Expand Down Expand Up @@ -147,14 +211,16 @@ static int add_dynamic_tcs(sgx_arch_tcs_t* tcs) {
return ret;
}

void map_tcs(unsigned int tid) {
static void map_tcs(unsigned int tid, PAL_HOST_TCB* tcb) {
while (true) {
spinlock_lock(&g_enclave_thread_map_lock);
for (size_t i = 0; i < g_enclave_thread_num; i++) {
if (!g_enclave_thread_map[i].tcs)
continue;
if (!g_enclave_thread_map[i].tid) {
g_enclave_thread_map[i].tid = tid;
g_enclave_thread_map[i].tcb = tcb;

pal_get_host_tcb()->tcs = g_enclave_thread_map[i].tcs;
((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i] = tid;
spinlock_unlock(&g_enclave_thread_map_lock);
Expand Down Expand Up @@ -186,6 +252,7 @@ void unmap_my_tcs(void) {
for (i = 0; i < g_enclave_thread_num; i++)
if (g_enclave_thread_map[i].tcs == pal_get_host_tcb()->tcs) {
g_enclave_thread_map[i].tid = 0;
g_enclave_thread_map[i].tcb = NULL;
((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i] = 0;
break;
}
Expand Down Expand Up @@ -237,7 +304,7 @@ int pal_thread_init(void* tcbptr) {
}

int tid = DO_SYSCALL(gettid);
map_tcs(tid); /* updates tcb->tcs */
map_tcs(tid, tcb); /* also updates tcb->tcs */

if (!tcb->tcs) {
log_error("There are no available TCS pages left for a new thread. Please try to increase"
Expand Down
8 changes: 7 additions & 1 deletion pal/src/host/linux-sgx/pal_tcb.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ typedef struct pal_host_tcb {
uint64_t profile_sample_time; /* last time sgx_profile_sample() recorded a sample */
int32_t last_async_event; /* last async signal, reported to the enclave on ocall return */
int* start_status_ptr; /* pointer to return value of clone_thread */
bool reset_stats; /* if true, dump SGX stats and reset them on next AEX/OCALL */
} PAL_HOST_TCB;

extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack);
Expand All @@ -115,5 +116,10 @@ static inline PAL_HOST_TCB* pal_get_host_tcb(void) {
}

extern bool g_sgx_enable_stats;
void update_sgx_stats(bool do_print);
void update_sgx_stats_on_exit(bool do_print);
void collect_and_print_sgx_stats(void);
#ifdef DEBUG
void maybe_dump_and_reset_stats(void);
#endif /* DEBUG */

#endif /* IN_ENCLAVE */