diff --git a/Documentation/manifest-syntax.rst b/Documentation/manifest-syntax.rst index 772ef48ed9..c531139ff0 100644 --- a/Documentation/manifest-syntax.rst +++ b/Documentation/manifest-syntax.rst @@ -1293,6 +1293,10 @@ This syntax specifies whether to enable SGX enclave-specific statistics: includes creating the enclave, adding enclave pages, measuring them and initializing the enclave. +For this option to take effect, Gramine must be compiled with +``--buildtype=debug`` or ``--buildtype=debugoptimized``. Otherwise (if built in +release mode), Gramine will exit with an error. + .. warning:: This option is insecure and cannot be used with production enclaves (``sgx.debug = false``). If a production enclave is started with this option diff --git a/Documentation/performance.rst b/Documentation/performance.rst index 44ad2de509..6cbb4206d0 100644 --- a/Documentation/performance.rst +++ b/Documentation/performance.rst @@ -18,11 +18,13 @@ Enabling SGX enclave stats See also :ref:`perf` below for installing ``perf``. -Enable statistics using ``sgx.enable_stats = true`` manifest option. Now your -graminized application correctly reports performance counters. This is useful -when using e.g. ``perf stat`` to collect performance statistics. This manifest -option also forces Gramine to dump SGX-related information on each enclave exit. -Here is an example: +Enable statistics using ``sgx.enable_stats = true`` manifest option (note that +Gramine must be compiled with ``--buildtype=debug`` or +``--buildtype=debugoptimized`` for this option to work). Now your graminized +application correctly reports performance counters. This is useful when using +e.g. ``perf stat`` to collect performance statistics. This manifest option also +forces Gramine to dump SGX-related information on each enclave exit. Here is an +example: :: @@ -90,6 +92,12 @@ How to read this output: counters should be compared against "golden runs" to deduce any interesting trends. +It is also possible to dump and reset SGX-related statistics interactively, using +``SIGUSR1`` signal. This helps to collect SGX-related statistics only for a +particular period, e.g. skipping the Gramine startup and application +initialization time and concentrating only on the actual application processing. +Send ``SIGUSR1`` using command ``kill -SIGUSR1 -``. + Effects of system calls / ocalls -------------------------------- diff --git a/libos/test/regression/fork_and_access_file.manifest.template b/libos/test/regression/fork_and_access_file.manifest.template index 0f78cc750b..8f63bf4106 100644 --- a/libos/test/regression/fork_and_access_file.manifest.template +++ b/libos/test/regression/fork_and_access_file.manifest.template @@ -11,6 +11,9 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }} sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +# this is only to test that `sgx.enable_stats` works (it can only be specified for debug-mode tests) +sgx.enable_stats = true + sgx.trusted_files = [ "file:{{ gramine.runtimedir(libc) }}/", "file:{{ binary_dir }}/{{ entrypoint }}", diff --git a/libos/test/regression/multi_pthread.manifest.template b/libos/test/regression/multi_pthread.manifest.template index 5f43081db8..2c6240bd7a 100644 --- a/libos/test/regression/multi_pthread.manifest.template +++ b/libos/test/regression/multi_pthread.manifest.template @@ -12,7 +12,6 @@ sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '8' }} sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} -sgx.enable_stats = true sgx.trusted_files = [ "file:{{ gramine.runtimedir(libc) }}/", diff --git a/libos/test/regression/multi_pthread_exitless.manifest.template b/libos/test/regression/multi_pthread_exitless.manifest.template index 4f4ad2c00d..f2f92753db 100644 --- a/libos/test/regression/multi_pthread_exitless.manifest.template +++ b/libos/test/regression/multi_pthread_exitless.manifest.template @@ -13,7 +13,6 @@ sgx.insecure__rpc_thread_num = 8 sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} -sgx.enable_stats = true sgx.trusted_files = [ "file:{{ gramine.runtimedir(libc) }}/", diff --git a/pal/regression/Thread2.manifest.template b/pal/regression/Thread2.manifest.template index fe216e7cdb..8e9926f044 100644 --- a/pal/regression/Thread2.manifest.template +++ b/pal/regression/Thread2.manifest.template @@ -1,6 +1,5 @@ loader.entrypoint.uri = "file:{{ binary_dir }}/{{ entrypoint }}" sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }} -sgx.enable_stats = true sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} diff --git a/pal/regression/Thread2_exitless.manifest.template b/pal/regression/Thread2_exitless.manifest.template index 49557336b0..d416edd8d2 100644 --- a/pal/regression/Thread2_exitless.manifest.template +++ b/pal/regression/Thread2_exitless.manifest.template @@ -4,6 +4,5 @@ loader.entrypoint.uri = "file:{{ binary_dir }}/{{ entrypoint }}" sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '2' }} sgx.insecure__rpc_thread_num = 2 -sgx.enable_stats = true sgx.debug = true sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} diff --git a/pal/src/host/linux-sgx/host_entry.S b/pal/src/host/linux-sgx/host_entry.S index 97a3ebeec5..654f205ec8 100644 --- a/pal/src/host/linux-sgx/host_entry.S +++ b/pal/src/host/linux-sgx/host_entry.S @@ -16,6 +16,7 @@ .extern tcs_base .extern g_in_aex_profiling + .extern maybe_dump_and_reset_stats .global sgx_ecall .type sgx_ecall, @function @@ -102,6 +103,8 @@ async_exit_pointer: movq %rbx, %rdi call sgx_profile_sample_aex + call maybe_dump_and_reset_stats + # Restore stack movq %rbp, %rsp .cfi_def_cfa_register %rsp @@ -169,6 +172,8 @@ sgx_raise: movq %rdx, %rdi call sgx_profile_sample_ocall_inner + call maybe_dump_and_reset_stats + # Restore RDI movq -8(%rbp), %rdi #else diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c index 0eea389dde..63a0abf6d0 100644 --- a/pal/src/host/linux-sgx/host_exception.c +++ b/pal/src/host/linux-sgx/host_exception.c @@ -17,6 +17,7 @@ #include "debug_map.h" #include "host_internal.h" #include "pal_rpc_queue.h" +#include "pal_tcb.h" #include "sigreturn.h" #include "sigset.h" #include "ucontext.h" @@ -188,6 +189,11 @@ static void handle_sigusr1(int signum, siginfo_t* info, struct ucontext* uc) { __UNUSED(info); __UNUSED(uc); + if (g_sgx_enable_stats) { + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + __atomic_store_n(&tcb->reset_stats, true, __ATOMIC_RELAXED); + } + if (g_pal_enclave.profile_enable) { __atomic_store_n(&g_trigger_profile_reinit, true, __ATOMIC_RELEASE); } @@ -268,3 +274,16 @@ void pal_describe_location(uintptr_t addr, char* buf, size_t buf_size) { #endif default_describe_location(addr, buf, buf_size); } + +#ifdef DEBUG +/* called on each AEX and OCALL (in normal context), see host_entry.S */ +void maybe_dump_and_reset_stats(void) { + if (!g_sgx_enable_stats) + return; + + PAL_HOST_TCB* tcb = pal_get_host_tcb(); + if (__atomic_exchange_n(&tcb->reset_stats, false, __ATOMIC_RELAXED) == true) { + collect_and_print_sgx_stats(); + } +} +#endif diff --git a/pal/src/host/linux-sgx/host_internal.h b/pal/src/host/linux-sgx/host_internal.h index c7699924df..bf400954bc 100644 --- a/pal/src/host/linux-sgx/host_internal.h +++ b/pal/src/host/linux-sgx/host_internal.h @@ -122,7 +122,6 @@ int clone_thread(void* dynamic_tcs); int create_tcs_mapper(void* tcs_base, unsigned int thread_num); int pal_thread_init(void* tcbptr); -void map_tcs(unsigned int tid); void unmap_my_tcs(void); int current_enclave_thread_cnt(void); void thread_exit(int status); diff --git a/pal/src/host/linux-sgx/host_main.c b/pal/src/host/linux-sgx/host_main.c index d034e7ecdf..33b29878c4 100644 --- a/pal/src/host/linux-sgx/host_main.c +++ b/pal/src/host/linux-sgx/host_main.c @@ -753,6 +753,14 @@ static int parse_loader_config(char* manifest, struct pal_enclave* enclave_info, goto out; } +#ifndef DEBUG + if (g_sgx_enable_stats) { + log_error("'sgx.enable_stats = true' is specified in non-debug mode, this is disallowed"); + ret = -EINVAL; + goto out; + } +#endif + ret = toml_string_in(manifest_root, "sgx.sigfile", &dummy_sigfile_str); if (ret < 0 || dummy_sigfile_str) { log_error("sgx.sigfile is not supported anymore. Please update your manifest according to " diff --git a/pal/src/host/linux-sgx/host_ocalls.c b/pal/src/host/linux-sgx/host_ocalls.c index 45ac7e3f4a..31b199c0c9 100644 --- a/pal/src/host/linux-sgx/host_ocalls.c +++ b/pal/src/host/linux-sgx/host_ocalls.c @@ -31,9 +31,8 @@ rpc_queue_t* g_rpc_queue = NULL; /* pointer to untrusted queue */ static noreturn void process_exit(int exitcode) { - update_sgx_stats(/*do_print=*/true); - #ifdef DEBUG + update_sgx_stats_on_exit(/*do_print=*/true); sgx_profile_finish(); #endif @@ -73,7 +72,9 @@ static long sgx_ocall_exit(void* args) { process_exit((int)ocall_exit_args->exitcode); } - update_sgx_stats(/*do_print=*/false); +#ifdef DEBUG + update_sgx_stats_on_exit(/*do_print=*/false); +#endif thread_exit((int)ocall_exit_args->exitcode); return 0; } diff --git a/pal/src/host/linux-sgx/host_thread.c b/pal/src/host/linux-sgx/host_thread.c index e5f89bc52d..70384c72d3 100644 --- a/pal/src/host/linux-sgx/host_thread.c +++ b/pal/src/host/linux-sgx/host_thread.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "asan.h" #include "assert.h" @@ -16,6 +17,7 @@ struct enclave_thread_map { unsigned int tid; sgx_arch_tcs_t* tcs; + PAL_HOST_TCB* tcb; }; static struct enclave_thread_map* g_enclave_thread_map = NULL; @@ -26,36 +28,92 @@ static size_t g_enclave_thread_num = 0; bool g_sgx_enable_stats = false; -/* this function is called only on thread/process exit (never in the middle of thread exec) */ -void update_sgx_stats(bool do_print) { - static atomic_ulong g_eenter_cnt = 0; - static atomic_ulong g_eexit_cnt = 0; - static atomic_ulong g_aex_cnt = 0; - static atomic_ulong g_sync_signal_cnt = 0; - static atomic_ulong g_async_signal_cnt = 0; +/* protected by g_enclave_thread_map_lock to prevent data races when one thread exits (and updates + * these global SGX counters) and another thread dumps (reads) these SGX counters */ +static uint64_t g_eenter_cnt = 0; +static uint64_t g_eexit_cnt = 0; +static uint64_t g_aex_cnt = 0; +static uint64_t g_sync_signal_cnt = 0; +static uint64_t g_async_signal_cnt = 0; + +static void print_global_sgx_stats(void) { + assert(spinlock_is_locked(&g_enclave_thread_map_lock)); + + int pid = g_host_pid; + assert(pid > 0); + + log_always("----- Total SGX stats for process %d -----\n" + " # of EENTERs: %lu\n" + " # of EEXITs: %lu\n" + " # of AEXs: %lu\n" + " # of sync signals: %lu\n" + " # of async signals: %lu", + pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, + g_sync_signal_cnt, g_async_signal_cnt); +} +static void reset_global_sgx_stats(void) { + assert(spinlock_is_locked(&g_enclave_thread_map_lock)); + + g_eenter_cnt = 0; + g_eexit_cnt = 0; + g_aex_cnt = 0; + g_sync_signal_cnt = 0; + g_async_signal_cnt = 0; +} + +static void update_global_sgx_stats_from_thread_stats(PAL_HOST_TCB* tcb) { + assert(spinlock_is_locked(&g_enclave_thread_map_lock)); + + g_eenter_cnt += atomic_exchange_explicit(&tcb->eenter_cnt, 0, memory_order_relaxed); + g_eexit_cnt += atomic_exchange_explicit(&tcb->eexit_cnt, 0, memory_order_relaxed); + g_aex_cnt += atomic_exchange_explicit(&tcb->aex_cnt, 0, memory_order_relaxed); + g_sync_signal_cnt += atomic_exchange_explicit(&tcb->sync_signal_cnt, 0, memory_order_relaxed); + g_async_signal_cnt += atomic_exchange_explicit(&tcb->async_signal_cnt, 0, memory_order_relaxed); +} + +/* this function is called only on thread/process exit (never in the middle of thread exec) */ +void update_sgx_stats_on_exit(bool do_print) { if (!g_sgx_enable_stats) return; + spinlock_lock(&g_enclave_thread_map_lock); + PAL_HOST_TCB* tcb = pal_get_host_tcb(); - g_eenter_cnt += tcb->eenter_cnt; - g_eexit_cnt += tcb->eexit_cnt; - g_aex_cnt += tcb->aex_cnt; - g_sync_signal_cnt += tcb->sync_signal_cnt; - g_async_signal_cnt += tcb->async_signal_cnt; - - if (do_print) { - int pid = g_host_pid; - assert(pid > 0); - log_always("----- Total SGX stats for process %d -----\n" - " # of EENTERs: %lu\n" - " # of EEXITs: %lu\n" - " # of AEXs: %lu\n" - " # of sync signals: %lu\n" - " # of async signals: %lu", - pid, g_eenter_cnt, g_eexit_cnt, g_aex_cnt, - g_sync_signal_cnt, g_async_signal_cnt); + update_global_sgx_stats_from_thread_stats(tcb); + + if (do_print) + print_global_sgx_stats(); + + spinlock_unlock(&g_enclave_thread_map_lock); +} + +void collect_and_print_sgx_stats(void) { + if (!g_sgx_enable_stats) + return; + + /* + * This function is executed by the "SGX-stats-collecting" thread (that received SIGUSR1). Thus, + * this thread is able to peek into the local storage of other threads. This is typically + * considered a bad smell (one thread reads local data of another thread), but here it's a + * reasonable trade-off: most of the accesses to the thread-local SGX counters are done on EEXIT + * and AEX events by the thread itself, so the memory access should be as simple as possible and + * as fast as possible. An alternative would be to move all SGX stats in a shared array, then we + * would have false cache sharing and complex memory management of the shared array. + */ + spinlock_lock(&g_enclave_thread_map_lock); + for (size_t i = 0; i < g_enclave_thread_num; i++) { + if (!g_enclave_thread_map[i].tcs || !g_enclave_thread_map[i].tid) + continue; + + PAL_HOST_TCB* tcb = g_enclave_thread_map[i].tcb; + update_global_sgx_stats_from_thread_stats(tcb); } + + print_global_sgx_stats(); + reset_global_sgx_stats(); + + spinlock_unlock(&g_enclave_thread_map_lock); } void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { @@ -69,6 +127,7 @@ void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack) { tcb->aex_cnt = 0; tcb->sync_signal_cnt = 0; tcb->async_signal_cnt = 0; + tcb->reset_stats = false; tcb->profile_sample_time = 0; @@ -86,6 +145,7 @@ int create_tcs_mapper(void* tcs_base, unsigned int thread_num) { for (uint32_t i = 0; i < thread_num; i++) { g_enclave_thread_map[i].tid = 0; g_enclave_thread_map[i].tcs = &enclave_tcs[i]; + g_enclave_thread_map[i].tcb = NULL; } g_enclave_thread_num = thread_num; return 0; @@ -147,7 +207,7 @@ static int add_dynamic_tcs(sgx_arch_tcs_t* tcs) { return ret; } -void map_tcs(unsigned int tid) { +static void map_tcs(unsigned int tid, PAL_HOST_TCB* tcb) { while (true) { spinlock_lock(&g_enclave_thread_map_lock); for (size_t i = 0; i < g_enclave_thread_num; i++) { @@ -155,6 +215,8 @@ void map_tcs(unsigned int tid) { continue; if (!g_enclave_thread_map[i].tid) { g_enclave_thread_map[i].tid = tid; + g_enclave_thread_map[i].tcb = tcb; + pal_get_host_tcb()->tcs = g_enclave_thread_map[i].tcs; ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i] = tid; spinlock_unlock(&g_enclave_thread_map_lock); @@ -186,6 +248,7 @@ void unmap_my_tcs(void) { for (i = 0; i < g_enclave_thread_num; i++) if (g_enclave_thread_map[i].tcs == pal_get_host_tcb()->tcs) { g_enclave_thread_map[i].tid = 0; + g_enclave_thread_map[i].tcb = NULL; ((struct enclave_dbginfo*)DBGINFO_ADDR)->thread_tids[i] = 0; break; } @@ -237,7 +300,7 @@ int pal_thread_init(void* tcbptr) { } int tid = DO_SYSCALL(gettid); - map_tcs(tid); /* updates tcb->tcs */ + map_tcs(tid, tcb); /* also updates tcb->tcs */ if (!tcb->tcs) { log_error("There are no available TCS pages left for a new thread. Please try to increase" diff --git a/pal/src/host/linux-sgx/pal_tcb.h b/pal/src/host/linux-sgx/pal_tcb.h index 97c4534583..227506db86 100644 --- a/pal/src/host/linux-sgx/pal_tcb.h +++ b/pal/src/host/linux-sgx/pal_tcb.h @@ -101,6 +101,7 @@ typedef struct pal_host_tcb { uint64_t profile_sample_time; /* last time sgx_profile_sample() recorded a sample */ int32_t last_async_event; /* last async signal, reported to the enclave on ocall return */ int* start_status_ptr; /* pointer to return value of clone_thread */ + bool reset_stats; /* if true, dump SGX stats and reset them on next AEX/OCALL */ } PAL_HOST_TCB; extern void pal_host_tcb_init(PAL_HOST_TCB* tcb, void* stack, void* alt_stack); @@ -115,5 +116,10 @@ static inline PAL_HOST_TCB* pal_get_host_tcb(void) { } extern bool g_sgx_enable_stats; -void update_sgx_stats(bool do_print); +void update_sgx_stats_on_exit(bool do_print); +void collect_and_print_sgx_stats(void); +#ifdef DEBUG +void maybe_dump_and_reset_stats(void); +#endif /* DEBUG */ + #endif /* IN_ENCLAVE */