diff --git a/configure.ac b/configure.ac index 4e56b6497..8fe117f22 100644 --- a/configure.ac +++ b/configure.ac @@ -202,8 +202,12 @@ AC_ARG_ENABLE([tool], # --enable-stack-overflow-check AC_ARG_ENABLE([stack-overflow-check], [ --enable-stack-overflow-check@<:@=OPT@:>@ enable a stack overflow check - canary|canary-8 - use an 8-byte stack canary. - canary-XX - use an XX-byte stack canary. + canary|canary-8 - use an 8-byte stack canary. + canary-XX - use an XX-byte stack canary. + mprotect - use mprotect. Ignore the failure of mprotect(). + Alternatively, users can set ABT_STACK_OVERFLOW_CHECK=mprotect + mprotect-strict - use mprotect. Assert if mprotect() fails. + Alternatively, users can set ABT_STACK_OVERFLOW_CHECK=mprotect_strict none|no ],,[enable_stack_overflow_check=no]) @@ -712,6 +716,12 @@ case "$enable_stack_overflow_check" in AC_MSG_WARN([Unknown value $enable_stack_overflow_check for --enable-stack-overflow-check]) fi ;; + mprotect) + stack_overflow_check_type="ABTI_STACK_CHECK_TYPE_MPROTECT" + ;; + mprotect-strict) + stack_overflow_check_type="ABTI_STACK_CHECK_TYPE_MPROTECT_STRICT" + ;; none|no) stack_overflow_check_type="ABTI_STACK_CHECK_TYPE_NONE" ;; @@ -875,6 +885,12 @@ AC_CHECK_LIB(pthread, pthread_join) # check pthread_barrier AC_CHECK_FUNCS(pthread_barrier_init) +# check mprotect +AC_CHECK_FUNCS(mprotect) + +# check getpagesize +AC_CHECK_FUNCS(getpagesize) + # check dlvsym ABT_RT_CFLAGS="" ABT_RT_LDFLAGS="" diff --git a/src/arch/abtd_env.c b/src/arch/abtd_env.c index 42e33a191..033dd7dbb 100644 --- a/src/arch/abtd_env.c +++ b/src/arch/abtd_env.c @@ -13,6 +13,7 @@ #define ABTD_SCHED_EVENT_FREQ 50 #define ABTD_SCHED_SLEEP_NSEC 100 +#define ABTD_SYS_PAGE_SIZE 4096 #define ABTD_HUGE_PAGE_SIZE (2 * 1024 * 1024) #define ABTD_MEM_PAGE_SIZE (2 * 1024 * 1024) #define ABTD_MEM_STACK_PAGE_SIZE (8 * 1024 * 1024) @@ -28,9 +29,7 @@ #define ABTD_ENV_SIZE_MAX ((size_t)(SIZE_MAX / 2)) static uint32_t roundup_pow2_uint32(uint32_t val); -#ifdef ABT_CONFIG_USE_MEM_POOL static size_t roundup_pow2_size(size_t val); -#endif static const char *get_abt_env(const char *env_suffix); static ABT_bool is_false(const char *str, ABT_bool include0); static ABT_bool is_true(const char *str, ABT_bool include1); @@ -89,6 +88,38 @@ void ABTD_env_init(ABTI_global *p_global) load_env_uint32("KEY_TABLE_SIZE", ABTD_KEY_TABLE_DEFAULT_SIZE, 1, ABTD_ENV_UINT32_MAX)); + /* ABT_STACK_OVERFLOW_CHECK, ABT_ENV_STACK_OVERFLOW_CHECK */ + env = get_abt_env("STACK_OVERFLOW_CHECK"); + if (env) { + if (strcasecmp(env, "mprotect_strict") == 0) { + p_global->stack_guard_kind = ABTI_STACK_GUARD_MPROTECT_STRICT; + } else if (strcasecmp(env, "mprotect") == 0) { + p_global->stack_guard_kind = ABTI_STACK_GUARD_MPROTECT; + } else { + /* Otherwise, disable mprotect-based stack guard. */ + p_global->stack_guard_kind = ABTI_STACK_GUARD_NONE; + } + } else { + /* Set the default mode. */ +#if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_MPROTECT + p_global->stack_guard_kind = ABTI_STACK_GUARD_MPROTECT; +#elif ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_MPROTECT_STRICT + p_global->stack_guard_kind = ABTI_STACK_GUARD_MPROTECT_STRICT; +#else + /* Stack canary is compile-time setting. */ + p_global->stack_guard_kind = ABTI_STACK_GUARD_NONE; +#endif + } + + /* ABT_SYS_PAGE_SIZE, ABT_ENV_SYS_PAGE_SIZE + * System page size. It must be 2^N. */ + size_t sys_page_size = ABTD_SYS_PAGE_SIZE; +#if HAVE_GETPAGESIZE + sys_page_size = getpagesize(); +#endif + p_global->sys_page_size = roundup_pow2_size( + load_env_size("SYS_PAGE_SIZE", sys_page_size, 64, ABTD_ENV_SIZE_MAX)); + /* ABT_THREAD_STACKSIZE, ABT_ENV_THREAD_STACKSIZE * Default stack size for ULT */ p_global->thread_stacksize = @@ -267,7 +298,6 @@ static uint32_t roundup_pow2_uint32(uint32_t val) return ((uint32_t)1) << i; } -#ifdef ABT_CONFIG_USE_MEM_POOL static size_t roundup_pow2_size(size_t val) { if (val == 0) @@ -279,7 +309,6 @@ static size_t roundup_pow2_size(size_t val) } return ((size_t)1) << i; } -#endif static const char *get_abt_env(const char *env_suffix) { diff --git a/src/include/abti.h b/src/include/abti.h index bfdbebb63..29b4cf15c 100644 --- a/src/include/abti.h +++ b/src/include/abti.h @@ -66,6 +66,8 @@ #define ABTI_STACK_CHECK_TYPE_NONE 0 #define ABTI_STACK_CHECK_TYPE_CANARY 1 +#define ABTI_STACK_CHECK_TYPE_MPROTECT 2 +#define ABTI_STACK_CHECK_TYPE_MPROTECT_STRICT 3 enum ABTI_xstream_type { ABTI_XSTREAM_TYPE_PRIMARY, @@ -78,6 +80,12 @@ enum ABTI_sched_used { ABTI_SCHED_IN_POOL }; +enum ABTI_stack_guard { + ABTI_STACK_GUARD_NONE = 0, + ABTI_STACK_GUARD_MPROTECT, + ABTI_STACK_GUARD_MPROTECT_STRICT, +}; + #define ABTI_THREAD_TYPE_EXT ((ABTI_thread_type)0) #define ABTI_THREAD_TYPE_THREAD ((ABTI_thread_type)(0x1 << 0)) #define ABTI_THREAD_TYPE_ROOT ((ABTI_thread_type)(0x1 << 1)) @@ -150,6 +158,7 @@ typedef struct ABTI_thread_id_opaque *ABTI_thread_id; /* Unit-to-thread hash table. */ typedef struct ABTI_atomic_unit_to_thread ABTI_atomic_unit_to_thread; typedef struct ABTI_unit_to_thread_entry ABTI_unit_to_thread_entry; +typedef enum ABTI_stack_guard ABTI_stack_guard; /* Architecture-Dependent Definitions */ #include "abtd.h" @@ -215,6 +224,7 @@ struct ABTI_global { uint32_t mutex_max_handovers; /* Default max. # of local handovers (unused) */ uint32_t mutex_max_wakeups; /* Default max. # of wakeups (unused) */ + size_t sys_page_size; /* System page size (typically, 4KB) */ size_t huge_page_size; /* Huge page size */ #ifdef ABT_CONFIG_USE_MEM_POOL size_t mem_page_size; /* Page size for memory allocation */ @@ -234,6 +244,7 @@ struct ABTI_global { ABTI_mem_pool_local_pool mem_pool_desc_ext; #endif #endif + ABTI_stack_guard stack_guard_kind; /* Stack guard type. */ ABT_bool print_config; /* Whether to print config on ABT_init */ diff --git a/src/include/abti_mem.h b/src/include/abti_mem.h index b958e4056..b6e392fc0 100644 --- a/src/include/abti_mem.h +++ b/src/include/abti_mem.h @@ -32,34 +32,95 @@ int ABTI_mem_check_lp_alloc(ABTI_global *p_global, int lp_alloc); #define ABTI_STACK_CANARY_VALUE ((uint64_t)0xbaadc0debaadc0de) /* Inline functions */ -static inline void ABTI_mem_register_stack(void *p_stack, size_t stacksize) -{ #if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY +static inline void ABTI_mem_write_stack_canary(void *p_stack) +{ /* Write down stack canary. */ - if (p_stack) { - uint64_t i; - for (i = 0; - i < ABTU_roundup_uint64(ABT_CONFIG_STACK_CHECK_CANARY_SIZE, 8); - i += sizeof(uint64_t)) { - ((uint64_t *)p_stack)[i] = ABTI_STACK_CANARY_VALUE; - } + uint64_t i; + for (i = 0; i < ABTU_roundup_uint64(ABT_CONFIG_STACK_CHECK_CANARY_SIZE, 8); + i += sizeof(uint64_t)) { + ((uint64_t *)p_stack)[i] = ABTI_STACK_CANARY_VALUE; + } +} + +static inline void ABTI_mem_check_stack_canary(void *p_stack) +{ + uint64_t i; + for (i = 0; i < ABTU_roundup_uint64(ABT_CONFIG_STACK_CHECK_CANARY_SIZE, 8); + i += sizeof(uint64_t)) { + ABTI_ASSERT(((uint64_t *)p_stack)[i] == ABTI_STACK_CANARY_VALUE); } +} +#endif + +static inline void ABTI_mem_register_stack(const ABTI_global *p_global, + void *p_stack, size_t stacksize, + ABT_bool mprotect_if_needed) +{ + if (mprotect_if_needed) { + if (p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT || + p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) { + if (p_stack) { + int abt_errno = + ABTU_mprotect(ABTU_roundup_ptr(p_stack, + p_global->sys_page_size), + p_global->sys_page_size, ABT_TRUE); + if (p_global->stack_guard_kind == + ABTI_STACK_GUARD_MPROTECT_STRICT) { + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } + } + } else { +#if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY + if (p_stack) { + ABTI_mem_write_stack_canary(p_stack); + } +#endif + } + } else { +#if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY + if (!(p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT || + p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) && + p_stack) { + ABTI_mem_write_stack_canary(p_stack); + } #endif + } ABTI_VALGRIND_REGISTER_STACK(p_stack, stacksize); } -static inline void ABTI_mem_unregister_stack(void *p_stack) +static inline void ABTI_mem_unregister_stack(const ABTI_global *p_global, + void *p_stack, + ABT_bool mprotect_if_needed) { + if (mprotect_if_needed) { + if (p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT || + p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) { + if (p_stack) { + int abt_errno = + ABTU_mprotect(ABTU_roundup_ptr(p_stack, + p_global->sys_page_size), + p_global->sys_page_size, ABT_FALSE); + /* This should not fail since otherwise we cannot free this + * memory. */ + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } + } else { #if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY - if (p_stack) { - uint64_t i; - for (i = 0; - i < ABTU_roundup_uint64(ABT_CONFIG_STACK_CHECK_CANARY_SIZE, 8); - i += sizeof(uint64_t)) { - ABTI_ASSERT(((uint64_t *)p_stack)[i] == ABTI_STACK_CANARY_VALUE); + if (p_stack) { + ABTI_mem_check_stack_canary(p_stack); + } +#endif + } + } else { +#if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY + if (!(p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT || + p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) && + p_stack) { + ABTI_mem_check_stack_canary(p_stack); } - } #endif + } ABTI_VALGRIND_UNREGISTER_STACK(p_stack); } @@ -179,24 +240,26 @@ ABTI_mem_alloc_ythread_default(ABTI_global *p_global, ABTI_local *p_local, &p_stack); ABTI_CHECK_ERROR(abt_errno); p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK; + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_TRUE); } else { #ifdef ABT_CONFIG_USE_MEM_POOL int abt_errno = ABTI_mem_alloc_ythread_mempool_desc_stack_impl( &p_local_xstream->mem_pool_stack, stacksize, &p_ythread, &p_stack); ABTI_CHECK_ERROR(abt_errno); p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MEMPOOL_DESC_STACK; + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_FALSE); #else int abt_errno = ABTI_mem_alloc_ythread_malloc_desc_stack_impl(stacksize, &p_ythread, &p_stack); ABTI_CHECK_ERROR(abt_errno); p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK; + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_TRUE); #endif } /* Initialize members of ABTI_thread_attr. */ p_ythread->p_stack = p_stack; p_ythread->stacksize = stacksize; - ABTI_mem_register_stack(p_ythread->p_stack, p_ythread->stacksize); *pp_ythread = p_ythread; return ABT_SUCCESS; } @@ -217,24 +280,24 @@ ABTU_ret_err static inline int ABTI_mem_alloc_ythread_mempool_desc_stack( &p_stack); ABTI_CHECK_ERROR(abt_errno); p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK; + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_TRUE); } else { int abt_errno = ABTI_mem_alloc_ythread_mempool_desc_stack_impl( &p_local_xstream->mem_pool_stack, stacksize, &p_ythread, &p_stack); ABTI_CHECK_ERROR(abt_errno); p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MEMPOOL_DESC_STACK; + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_FALSE); } /* Copy members of p_attr. */ p_ythread->p_stack = p_stack; p_ythread->stacksize = stacksize; - ABTI_mem_register_stack(p_ythread->p_stack, p_ythread->stacksize); *pp_ythread = p_ythread; return ABT_SUCCESS; } #endif -ABTU_ret_err static inline int -ABTI_mem_alloc_ythread_malloc_desc_stack(ABTI_thread_attr *p_attr, - ABTI_ythread **pp_ythread) +ABTU_ret_err static inline int ABTI_mem_alloc_ythread_malloc_desc_stack( + ABTI_global *p_global, ABTI_thread_attr *p_attr, ABTI_ythread **pp_ythread) { size_t stacksize = p_attr->stacksize; ABTI_ythread *p_ythread; @@ -248,13 +311,15 @@ ABTI_mem_alloc_ythread_malloc_desc_stack(ABTI_thread_attr *p_attr, p_ythread->thread.type = ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK; p_ythread->stacksize = stacksize; p_ythread->p_stack = p_stack; - ABTI_mem_register_stack(p_ythread->p_stack, p_ythread->stacksize); + ABTI_mem_register_stack(p_global, p_stack, stacksize, ABT_TRUE); *pp_ythread = p_ythread; return ABT_SUCCESS; } -ABTU_ret_err static inline int ABTI_mem_alloc_ythread_mempool_desc( - ABTI_local *p_local, ABTI_thread_attr *p_attr, ABTI_ythread **pp_ythread) +ABTU_ret_err static inline int +ABTI_mem_alloc_ythread_mempool_desc(ABTI_global *p_global, ABTI_local *p_local, + ABTI_thread_attr *p_attr, + ABTI_ythread **pp_ythread) { ABTI_ythread *p_ythread; if (sizeof(ABTI_ythread) <= ABTI_MEM_POOL_DESC_ELEM_SIZE) { @@ -272,8 +337,8 @@ ABTU_ret_err static inline int ABTI_mem_alloc_ythread_mempool_desc( /* Copy members of p_attr. */ p_ythread->stacksize = p_attr->stacksize; p_ythread->p_stack = p_attr->p_stack; - /* Note that the valgrind registration is ignored if p_stack is NULL. */ - ABTI_mem_register_stack(p_ythread->p_stack, p_ythread->stacksize); + ABTI_mem_register_stack(p_global, p_ythread->p_stack, p_ythread->stacksize, + ABT_TRUE); *pp_ythread = p_ythread; return ABT_SUCCESS; } @@ -286,7 +351,7 @@ static inline void ABTI_mem_free_thread(ABTI_global *p_global, #ifdef ABT_CONFIG_USE_MEM_POOL if (p_thread->type & ABTI_THREAD_TYPE_MEM_MEMPOOL_DESC_STACK) { ABTI_ythread *p_ythread = ABTI_thread_get_ythread(p_thread); - ABTI_mem_unregister_stack(p_ythread->p_stack); + ABTI_mem_unregister_stack(p_global, p_ythread->p_stack, ABT_FALSE); ABTI_xstream *p_local_xstream = ABTI_local_get_xstream_or_null(p_local); /* Came from a memory pool. */ @@ -306,18 +371,18 @@ static inline void ABTI_mem_free_thread(ABTI_global *p_global, /* Non-yieldable thread or yieldable thread without stack. */ ABTI_ythread *p_ythread = ABTI_thread_get_ythread_or_null(p_thread); if (p_ythread) - ABTI_mem_unregister_stack(p_ythread->p_stack); + ABTI_mem_unregister_stack(p_global, p_ythread->p_stack, ABT_TRUE); ABTI_mem_free_nythread(p_global, p_local, p_thread); } else if (p_thread->type & ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK) { ABTI_ythread *p_ythread = ABTI_thread_get_ythread(p_thread); - ABTI_mem_unregister_stack(p_ythread->p_stack); + ABTI_mem_unregister_stack(p_global, p_ythread->p_stack, ABT_TRUE); ABTU_free(p_ythread->p_stack); } else { ABTI_ASSERT(p_thread->type & ABTI_THREAD_TYPE_MEM_MALLOC_DESC); ABTI_STATIC_ASSERT(offsetof(ABTI_ythread, thread) == 0); ABTI_ythread *p_ythread = ABTI_thread_get_ythread_or_null(p_thread); if (p_ythread) - ABTI_mem_unregister_stack(p_ythread->p_stack); + ABTI_mem_unregister_stack(p_global, p_ythread->p_stack, ABT_TRUE); ABTU_free(p_thread); } } diff --git a/src/include/abti_mem_pool.h b/src/include/abti_mem_pool.h index 3cdc17332..b3962cb42 100644 --- a/src/include/abti_mem_pool.h +++ b/src/include/abti_mem_pool.h @@ -32,6 +32,15 @@ typedef struct ABTI_mem_pool_page { size_t mem_extra_size; } ABTI_mem_pool_page; +typedef struct ABTI_mem_pool_global_pool_mprotect_config { + ABT_bool enabled; /* Use page protection or not. */ + ABT_bool check_error; /* Check error. */ + size_t offset; /* Page protection offset. */ + size_t page_size; /* Protection page size. */ + size_t alignment; /* Alignment of protected page. It should be a multiple + of the system page size. */ +} ABTI_mem_pool_global_pool_mprotect_config; + /* * To efficiently take/return multiple headers per bucket, headers are linked as * follows in the global pool (bucket_lifo). @@ -47,7 +56,8 @@ typedef struct ABTI_mem_pool_page { * . */ typedef struct ABTI_mem_pool_global_pool { - size_t header_size; /* Size of header */ + size_t header_size; /* Size of header. This size includes a protected + * page. */ size_t page_size; /* Size of page (mem of ABTI_mem_pool_page) */ size_t alignment_hint; /* Alignment hint for page */ size_t header_offset; /* Offset of ABTI_mem_pool_header from the top @@ -59,6 +69,7 @@ typedef struct ABTI_mem_pool_global_pool { */ ABTU_MEM_LARGEPAGE_TYPE lp_type_requests[4]; /* Requests for large page allocation */ + ABTI_mem_pool_global_pool_mprotect_config mprotect_config; ABTU_align_member_var(ABT_CONFIG_STATIC_CACHELINE_SIZE) ABTI_sync_lifo bucket_lifo; /* LIFO of available buckets. */ ABTU_align_member_var(ABT_CONFIG_STATIC_CACHELINE_SIZE) @@ -100,7 +111,8 @@ void ABTI_mem_pool_init_global_pool( ABTI_mem_pool_global_pool *p_global_pool, size_t num_headers_per_bucket, size_t header_size, size_t header_offset, size_t page_size, const ABTU_MEM_LARGEPAGE_TYPE *lp_type_requests, - uint32_t num_lp_type_requests, size_t alignment_hint); + uint32_t num_lp_type_requests, size_t alignment_hint, + ABTI_mem_pool_global_pool_mprotect_config *p_mprotect_config); void ABTI_mem_pool_destroy_global_pool( ABTI_mem_pool_global_pool *p_global_pool); ABTU_ret_err int diff --git a/src/include/abtu.h b/src/include/abtu.h index f297edaac..ea70d79a4 100644 --- a/src/include/abtu.h +++ b/src/include/abtu.h @@ -102,6 +102,17 @@ static inline size_t ABTU_roundup_size(size_t val, size_t multiple) } } +static inline void *ABTU_roundup_ptr(void *ptr, size_t multiple) +{ + if ((multiple & (multiple - 1)) == 0) { + /* If multiple is a power of two. */ + return (void *)((((uintptr_t)ptr) + multiple - 1) & (~(multiple - 1))); + } else { + return (void *)(((((uintptr_t)ptr) + multiple - 1) / multiple) * + multiple); + } +} + /* Utility feature */ #ifdef HAVE___BUILTIN_EXPECT @@ -311,6 +322,11 @@ ABTU_alloc_largepage(size_t size, size_t alignment_hint, void **p_ptr); void ABTU_free_largepage(void *ptr, size_t size, ABTU_MEM_LARGEPAGE_TYPE type); +/* An error is ignored even if mprotect call fails. + * PROT_NONE is set if protect == ABT_TRUE. + * (PROT_READ | PROT_WRITE) is permitted if if protect == ABT_FALSE. */ +ABTU_ret_err int ABTU_mprotect(void *addr, size_t size, ABT_bool protect); + /* String-to-integer functions. */ ABTU_ret_err int ABTU_atoi(const char *str, int *p_val, ABT_bool *p_overflow); ABTU_ret_err int ABTU_atoui32(const char *str, uint32_t *p_val, diff --git a/src/info.c b/src/info.c index b38642283..63c7f761f 100644 --- a/src/info.c +++ b/src/info.c @@ -174,7 +174,10 @@ static void info_trigger_print_all_thread_stacks( * * \c val must be a pointer to a variable of type \c int. \c val is set to 1 * if Argobots is configured to use a stack canary to check stack overflow. - * Otherwise, \c val is set to 0. + * \c val is set to 2 if Argobots is configured to use an mprotect-based stack + * guard but ignore an error of \c mprotect(). \c val is set to 3 if Argobots + * is configured to use an mprotect-based stack guard and assert an error of + * \c mprotect(). Otherwise, \c val is set to 0. * * - \c ABT_INFO_QUERY_KIND_WAIT_POLICY * @@ -356,13 +359,22 @@ int ABT_info_query_config(ABT_info_query_kind query_kind, void *val) *((ABT_bool *)val) = ABT_FALSE; #endif break; - case ABT_INFO_QUERY_KIND_ENABLED_STACK_OVERFLOW_CHECK: + case ABT_INFO_QUERY_KIND_ENABLED_STACK_OVERFLOW_CHECK: { + ABTI_global *p_global; + ABTI_SETUP_GLOBAL(&p_global); + if (p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT) { + *((int *)val) = 2; + } else if (p_global->stack_guard_kind == + ABTI_STACK_GUARD_MPROTECT_STRICT) { + *((int *)val) = 3; + } else { #if ABT_CONFIG_STACK_CHECK_TYPE == ABTI_STACK_CHECK_TYPE_CANARY - *((int *)val) = 1; + *((int *)val) = 1; #else - *((int *)val) = 0; + *((int *)val) = 0; #endif - break; + } + } break; case ABT_INFO_QUERY_KIND_WAIT_POLICY: #if ABT_CONFIG_ACTIVE_WAIT_POLICY *((int *)val) = 1; diff --git a/src/mem/malloc.c b/src/mem/malloc.c index 2911443bb..335af4b5b 100644 --- a/src/mem/malloc.c +++ b/src/mem/malloc.c @@ -50,6 +50,20 @@ ABTU_ret_err int ABTI_mem_init(ABTI_global *p_global) size_t stacksize = ABTU_roundup_size(thread_stacksize + sizeof(ABTI_ythread), ABT_CONFIG_STATIC_CACHELINE_SIZE); + ABTI_mem_pool_global_pool_mprotect_config mprotect_config; + if (p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT || + p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) { + mprotect_config.enabled = ABT_TRUE; + mprotect_config.check_error = + (p_global->stack_guard_kind == ABTI_STACK_GUARD_MPROTECT_STRICT) + ? ABT_TRUE + : ABT_FALSE; + mprotect_config.offset = 0; + mprotect_config.page_size = p_global->sys_page_size; + mprotect_config.alignment = p_global->sys_page_size; + } else { + mprotect_config.enabled = ABT_FALSE; + } if ((stacksize & (2 * ABT_CONFIG_STATIC_CACHELINE_SIZE - 1)) == 0) { /* Avoid a multiple of 2 * cacheline size to avoid cache bank conflict. */ @@ -60,8 +74,8 @@ ABTU_ret_err int ABTI_mem_init(ABTI_global *p_global) ABT_MEM_POOL_MAX_LOCAL_BUCKETS, stacksize, thread_stacksize, p_global->mem_sp_size, requested_types, - num_requested_types, - p_global->mem_page_size); + num_requested_types, p_global->mem_page_size, + &mprotect_config); /* The last four bytes will be used to store a mempool flag */ ABTI_STATIC_ASSERT((ABTI_MEM_POOL_DESC_ELEM_SIZE & (ABT_CONFIG_STATIC_CACHELINE_SIZE - 1)) == 0); @@ -70,8 +84,8 @@ ABTU_ret_err int ABTI_mem_init(ABTI_global *p_global) ABT_MEM_POOL_MAX_LOCAL_BUCKETS, ABTI_MEM_POOL_DESC_ELEM_SIZE, 0, p_global->mem_page_size, requested_types, - num_requested_types, - p_global->mem_page_size); + num_requested_types, p_global->mem_page_size, + NULL); #ifndef ABT_CONFIG_DISABLE_EXT_THREAD int abt_errno; ABTD_spinlock_clear(&p_global->mem_pool_stack_lock); diff --git a/src/mem/mem_pool.c b/src/mem/mem_pool.c index 6d32df14b..2d5c9e93c 100644 --- a/src/mem/mem_pool.c +++ b/src/mem/mem_pool.c @@ -23,6 +23,18 @@ mem_pool_lifo_elem_to_header(ABTI_sync_lifo_element *lifo_elem) lifo_elem))); } +static ABTU_ret_err int protect_memory(void *addr, size_t size, + size_t page_size, ABT_bool protect, + ABT_bool adjust_size) +{ + /* Align addr. */ + void *mprotect_addr = ABTU_roundup_ptr(addr, page_size); + if (adjust_size) { + size -= ((uintptr_t)mprotect_addr) - ((uintptr_t)addr); + } + return ABTU_mprotect(mprotect_addr, size, protect); +} + static void mem_pool_return_partial_bucket(ABTI_mem_pool_global_pool *p_global_pool, ABTI_mem_pool_header *bucket) @@ -78,13 +90,20 @@ void ABTI_mem_pool_init_global_pool( ABTI_mem_pool_global_pool *p_global_pool, size_t num_headers_per_bucket, size_t header_size, size_t header_offset, size_t page_size, const ABTU_MEM_LARGEPAGE_TYPE *lp_type_requests, - uint32_t num_lp_type_requests, size_t alignment_hint) + uint32_t num_lp_type_requests, size_t alignment_hint, + ABTI_mem_pool_global_pool_mprotect_config *p_mprotect_config) { p_global_pool->num_headers_per_bucket = num_headers_per_bucket; ABTI_ASSERT(header_offset + sizeof(ABTI_mem_pool_header) <= header_size); p_global_pool->header_size = header_size; p_global_pool->header_offset = header_offset; p_global_pool->page_size = page_size; + if (p_mprotect_config) { + memcpy(&p_global_pool->mprotect_config, p_mprotect_config, + sizeof(ABTI_mem_pool_global_pool_mprotect_config)); + } else { + p_global_pool->mprotect_config.enabled = ABT_FALSE; + } /* Note that lp_type_requests is a constant-sized array */ ABTI_ASSERT(num_lp_type_requests <= @@ -93,6 +112,24 @@ void ABTI_mem_pool_init_global_pool( p_global_pool->num_lp_type_requests = num_lp_type_requests; memcpy(p_global_pool->lp_type_requests, lp_type_requests, sizeof(ABTU_MEM_LARGEPAGE_TYPE) * num_lp_type_requests); + /* If mprotect_config is set, we should not use a large page. */ + if (p_global_pool->mprotect_config.enabled) { + uint32_t i, idx = 0; + for (i = 0; i < num_lp_type_requests; i++) { + if (p_global_pool->lp_type_requests[i] != + ABTU_MEM_LARGEPAGE_MMAP_HUGEPAGE) { + p_global_pool->lp_type_requests[idx++] = + p_global_pool->lp_type_requests[i]; + } + } + if (idx == 0) { + /* Use a fallback allocation type. */ + p_global_pool->lp_type_requests[0] = ABTU_MEM_LARGEPAGE_MALLOC; + p_global_pool->num_lp_type_requests = 1; + } else { + p_global_pool->num_lp_type_requests = idx; + } + } p_global_pool->alignment_hint = alignment_hint; ABTI_sync_lifo_init(&p_global_pool->mem_page_lifo); @@ -112,12 +149,32 @@ void ABTI_mem_pool_destroy_global_pool(ABTI_mem_pool_global_pool *p_global_pool) while ((p_page_lifo_elem = ABTI_sync_lifo_pop_unsafe(&p_global_pool->mem_page_lifo))) { p_page = mem_pool_lifo_elem_to_page(p_page_lifo_elem); + if (p_global_pool->mprotect_config.enabled) { + /* Undo mprotect() */ + int abt_errno = + protect_memory(p_page->mem, p_page->page_size, + p_global_pool->mprotect_config.alignment, + ABT_FALSE, ABT_TRUE); + /* This should not fail since the allocated region is not newly + * split by this operation. */ + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } ABTU_free_largepage(p_page->mem, p_page->page_size, p_page->lp_type); } p_page = (ABTI_mem_pool_page *)ABTD_atomic_relaxed_load_ptr( &p_global_pool->p_mem_page_empty); while (p_page) { ABTI_mem_pool_page *p_next = p_page->p_next_empty_page; + if (p_global_pool->mprotect_config.enabled) { + /* Undo mprotect() */ + int abt_errno = + protect_memory(p_page->mem, p_page->page_size, + p_global_pool->mprotect_config.alignment, + ABT_FALSE, ABT_TRUE); + /* This should not fail since the allocated region is not newly + * split by this operation. */ + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } ABTU_free_largepage(p_page->mem, p_page->page_size, p_page->lp_type); p_page = p_next; } @@ -259,11 +316,49 @@ ABTI_mem_pool_take_bucket(ABTI_mem_pool_global_pool *p_global_pool, (ABTI_mem_pool_header *)(((char *)p_mem_extra) + header_offset); p_local_tail->p_next = p_head; ABTI_mem_pool_header *p_prev = p_local_tail; - for (i = 1; i < num_provided; i++) { - ABTI_mem_pool_header *p_cur = - (ABTI_mem_pool_header *)(((char *)p_prev) + header_size); - p_cur->p_next = p_prev; - p_prev = p_cur; + if (!p_global_pool->mprotect_config.enabled) { + /* Fast path. */ + for (i = 1; i < num_provided; i++) { + ABTI_mem_pool_header *p_cur = + (ABTI_mem_pool_header *)(((char *)p_prev) + + header_size); + p_cur->p_next = p_prev; + p_prev = p_cur; + } + } else { + /* Slow path. Use mprotect(). */ + const ABT_bool check_error = + p_global_pool->mprotect_config.check_error; + const size_t protect_offset = + p_global_pool->mprotect_config.offset; + const size_t protect_page_size = + p_global_pool->mprotect_config.page_size; + const size_t protect_alignment = + p_global_pool->mprotect_config.alignment; + int abt_errno; + abt_errno = + protect_memory((void *)(((char *)p_prev) - header_offset + + protect_offset), + protect_page_size, protect_alignment, + ABT_TRUE, ABT_FALSE); + if (check_error) { + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } + for (i = 1; i < num_provided; i++) { + ABTI_mem_pool_header *p_cur = + (ABTI_mem_pool_header *)(((char *)p_prev) + + header_size); + p_cur->p_next = p_prev; + p_prev = p_cur; + abt_errno = + protect_memory((void *)(((char *)p_prev) - + header_offset + protect_offset), + protect_page_size, protect_alignment, + ABT_TRUE, ABT_FALSE); + if (check_error) { + ABTI_ASSERT(abt_errno == ABT_SUCCESS); + } + } } p_head = p_prev; num_headers += num_provided; diff --git a/src/thread.c b/src/thread.c index aac05d156..6b5554058 100644 --- a/src/thread.c +++ b/src/thread.c @@ -2599,19 +2599,22 @@ ythread_create(ABTI_global *p_global, ABTI_local *p_local, ABTI_pool *p_pool, ABTI_CHECK_ERROR(abt_errno); #else abt_errno = - ABTI_mem_alloc_ythread_malloc_desc_stack(p_attr, &p_newthread); + ABTI_mem_alloc_ythread_malloc_desc_stack(p_global, p_attr, + &p_newthread); #endif ABTI_CHECK_ERROR(abt_errno); } else if (attr_type & ABTI_THREAD_TYPE_MEM_MALLOC_DESC_STACK) { abt_errno = - ABTI_mem_alloc_ythread_malloc_desc_stack(p_attr, &p_newthread); + ABTI_mem_alloc_ythread_malloc_desc_stack(p_global, p_attr, + &p_newthread); ABTI_CHECK_ERROR(abt_errno); } else { ABTI_ASSERT(attr_type & (ABTI_THREAD_TYPE_MEM_MEMPOOL_DESC | ABTI_THREAD_TYPE_MEM_MALLOC_DESC)); /* Let's try to use mempool first since it performs better. */ - abt_errno = ABTI_mem_alloc_ythread_mempool_desc(p_local, p_attr, - &p_newthread); + abt_errno = + ABTI_mem_alloc_ythread_mempool_desc(p_global, p_local, p_attr, + &p_newthread); ABTI_CHECK_ERROR(abt_errno); } #ifndef ABT_CONFIG_DISABLE_MIGRATION diff --git a/src/util/Makefile.mk b/src/util/Makefile.mk index acdde2cc2..f0626d44b 100644 --- a/src/util/Makefile.mk +++ b/src/util/Makefile.mk @@ -5,4 +5,5 @@ abt_sources += \ util/atoi.c \ - util/largepage.c + util/largepage.c \ + util/mprotect.c diff --git a/src/util/mprotect.c b/src/util/mprotect.c new file mode 100644 index 000000000..96b8447c1 --- /dev/null +++ b/src/util/mprotect.c @@ -0,0 +1,22 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * See COPYRIGHT in top-level directory. + */ + +#include "abti.h" +#include + +ABTU_ret_err int ABTU_mprotect(void *addr, size_t size, ABT_bool protect) +{ +#ifdef HAVE_MPROTECT + int ret; + if (protect) { + ret = mprotect(addr, size, PROT_READ); + } else { + ret = mprotect(addr, size, PROT_READ | PROT_WRITE); + } + return ret == 0 ? ABT_SUCCESS : ABT_ERR_SYS; +#else + return ABT_ERR_SYS; +#endif +} diff --git a/test/.gitignore b/test/.gitignore index eb57d7a4a..ac709f7d4 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -72,6 +72,7 @@ basic/ext_thread_future basic/ext_thread_join basic/ext_thread_mutex basic/ext_thread_rwlock +basic/stack_guard basic/timer basic/info_print basic/info_print_stack diff --git a/test/basic/Makefile.am b/test/basic/Makefile.am index 02b27941f..84f9bff2c 100644 --- a/test/basic/Makefile.am +++ b/test/basic/Makefile.am @@ -77,6 +77,7 @@ TESTS = \ ext_thread_join \ ext_thread_mutex \ ext_thread_rwlock \ + stack_guard \ timer \ info_print \ info_print_stack \ @@ -173,6 +174,7 @@ ext_thread_future_SOURCES = ext_thread_future.c ext_thread_join_SOURCES = ext_thread_join.c ext_thread_mutex_SOURCES = ext_thread_mutex.c ext_thread_rwlock_SOURCES = ext_thread_rwlock.c +stack_guard_SOURCES = stack_guard.c timer_SOURCES = timer.c info_print_SOURCES = info_print.c info_print_stack_SOURCES = info_print_stack.c @@ -255,6 +257,7 @@ testing: ./ext_thread_join ./ext_thread_mutex ./ext_thread_rwlock + ./stack_guard ./timer ./info_print ./info_print_stack diff --git a/test/basic/stack_guard.c b/test/basic/stack_guard.c new file mode 100644 index 000000000..b1f018a43 --- /dev/null +++ b/test/basic/stack_guard.c @@ -0,0 +1,230 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * See COPYRIGHT in top-level directory. + */ + +#include +#include + +#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L + +#include +#include +#include +#include +#include "abt.h" +#include "abttest.h" + +#define DUMMY_SIZE ((int)(1024 / sizeof(double))) +#define SYS_PAGE_SIZE 4096 + +int g_mprotect_signal = 0; +volatile int g_sig_err = 0; +volatile int g_is_segv = 0; +volatile char *gp_stack = NULL; + +void segv_handler(int sig, siginfo_t *si, void *unused) +{ + if (sig != SIGSEGV) { + g_sig_err = 1; /* We cannot call assert(). */ + } else if (si->si_addr != gp_stack) { + g_sig_err = 2; + } else { + /* Since POSIX does not mark mprotect() as async-signal safe, we need to + * ask another thread to call mprotect() instead of this thread even if + * we control where the signal happens; calling an async signal-unsafe + * function can cause any unexpected issues. */ + ATS_atomic_store(&g_mprotect_signal, 1); + while (ATS_atomic_load(&g_mprotect_signal) == 1) { + ; /* Waiting for the helper thread. */ + } + /* mprotect() finished. */ + g_is_segv = 1; + } +} + +void *helper_func(void *arg) +{ + /* Waiting for g_mprotect_signal from a signal handler. */ + while (ATS_atomic_load(&g_mprotect_signal) == 0) + ; + /* Call mprotect() to temporarily allow an access. */ + int ret = mprotect((void *)gp_stack, SYS_PAGE_SIZE, PROT_READ | PROT_WRITE); + assert(ret == 0); + /* Tell the signal handler that mprotect has finished. */ + ATS_atomic_store(&g_mprotect_signal, 0); + return NULL; +} + +void thread_func(void *arg) +{ + int ret; + void *p_stack; + size_t stacksize; + /* Get the stack information. */ + { + ABT_thread self_thread; + ABT_thread_attr self_thread_attr; + ret = ABT_self_get_thread(&self_thread); + ATS_ERROR(ret, "ABT_self_get_thread"); + ret = ABT_thread_get_attr(self_thread, &self_thread_attr); + ATS_ERROR(ret, "ABT_thread_get_attr"); + ret = ABT_thread_attr_get_stack(self_thread_attr, &p_stack, &stacksize); + ATS_ERROR(ret, "ABT_thread_attr_get_stack"); + ret = ABT_thread_attr_free(&self_thread_attr); + ATS_ERROR(ret, "ABT_thread_attr_free"); + } + + /* We can reasonably assume that we do not corrupt the function stack of + * thread_func(). Let's assume that the protected page is within a few + * pages from the bottom of the stack. + * gp_stack should be aligned with the page size. */ + gp_stack = + (char *)(((((uintptr_t)p_stack) + SYS_PAGE_SIZE - 1) / SYS_PAGE_SIZE) * + SYS_PAGE_SIZE + + SYS_PAGE_SIZE * 2); + while (1) { + /* Using this stack variable to see if we can observe SEGV. */ + gp_stack -= SYS_PAGE_SIZE; + assert(((char *)p_stack) <= gp_stack); + volatile char val = gp_stack[0]; + /* Though we use "volatile", we'd like to put a compiler barrier just in + * case. */ + __asm__ __volatile__("" ::: "memory"); + /* The following should cause SEGV. If SEGV happens, the signal handler + * will allow this ULT to temporarily access this. */ + gp_stack[0] = val; + __asm__ __volatile__("" ::: "memory"); + /* Signal might have happened. */ + if (g_is_segv) { + assert(g_sig_err == 0); + /* Succeeded! Undo the mprotect setting. Originally it should be + * read-protected. */ + g_is_segv = 0; + ret = mprotect((void *)gp_stack, SYS_PAGE_SIZE, PROT_READ); + assert(ret == 0); + return; + } + /* We must catch SEGV until we touch stacksize */ + } +} + +int main(int argc, char *argv[]) +{ + int ret, i; + /* Catch SEGV. */ + struct sigaction sa; + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = segv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + /* Unsupported. */ + return 77; + } + /* Set memory protection */ + putenv("ABT_STACK_OVERFLOW_CHECK=mprotect_strict"); + /* Initialize */ + ATS_read_args(argc, argv); + size_t stacksizes[] = { 1024 * 64, 1024 * 64 + 64, 1024 * 64 + 128, + 1024 * 64 - 64, 1024 * 64 - 128, 1024 * 1024, + 4 * 1024 * 1024 }; + + int stack_i, num_stacksizes = sizeof(stacksizes) / sizeof(stacksizes[0]); + for (stack_i = 0; stack_i < num_stacksizes; stack_i++) { + /* Set the default stack size. */ + const size_t stacksize = stacksizes[stack_i]; + unsetenv("ABT_THREAD_STACKSIZE"); + char stacksize_str[256]; + sprintf(stacksize_str, "ABT_THREAD_STACKSIZE=%zu", stacksize); + putenv(stacksize_str); + /* Use ATS_init for the last run. */ + if (stack_i == num_stacksizes - 1) { + ATS_init(argc, argv, 2); + } else { + ret = ABT_init(argc, argv); + ATS_ERROR(ret, "ABT_finalize"); + } + /* Check if the mprotect-based stack guard is enabled. */ + int stack_overflow_check_mode = 0; + ret = ABT_info_query_config( + ABT_INFO_QUERY_KIND_ENABLED_STACK_OVERFLOW_CHECK, + &stack_overflow_check_mode); + ATS_ERROR(ret, "ABT_info_query_config"); + if (stack_overflow_check_mode != 3) { + /* Unsupported. */ + return 77; + } + + ABT_xstream xstream; + ABT_pool main_pool; + ret = ABT_self_get_xstream(&xstream); + ATS_ERROR(ret, "ABT_self_get_xstream"); + ret = ABT_xstream_get_main_pools(xstream, 1, &main_pool); + ATS_ERROR(ret, "ABT_xstream_get_main_pools"); + + for (i = 0; i < 3; i++) { + pthread_t helper_thread; + ret = pthread_create(&helper_thread, NULL, helper_func, NULL); + assert(ret == 0); + ABT_thread thread; + void *stack = NULL; + if (i == 0) { + /* 1. ULT + default parameters. */ + ret = ABT_thread_create(main_pool, thread_func, NULL, + ABT_THREAD_ATTR_NULL, &thread); + ATS_ERROR(ret, "ABT_thread_create"); + } else if (i == 1) { + /* 2. ULT + user-given stack size. */ + ABT_thread_attr thread_attr; + ret = ABT_thread_attr_create(&thread_attr); + ATS_ERROR(ret, "ABT_thread_attr_create"); + ret = + ABT_thread_attr_set_stacksize(thread_attr, stacksize + 128); + ATS_ERROR(ret, "ABT_thread_attr_set_stacksize"); + ret = ABT_thread_create(main_pool, thread_func, NULL, + thread_attr, &thread); + ATS_ERROR(ret, "ABT_thread_create"); + ret = ABT_thread_attr_free(&thread_attr); + ATS_ERROR(ret, "ABT_thread_attr_free"); + } else if (i == 2) { + /* 3. ULT + user-given stack. */ + ABT_thread_attr thread_attr; + ret = ABT_thread_attr_create(&thread_attr); + ATS_ERROR(ret, "ABT_thread_attr_create"); + stack = calloc(1, stacksize); + ret = ABT_thread_attr_set_stack(thread_attr, stack, stacksize); + ATS_ERROR(ret, "ABT_thread_attr_set_stack"); + ret = ABT_thread_create(main_pool, thread_func, NULL, + thread_attr, &thread); + ATS_ERROR(ret, "ABT_thread_create"); + ret = ABT_thread_attr_free(&thread_attr); + ATS_ERROR(ret, "ABT_thread_attr_free"); + } + ret = ABT_thread_free(&thread); + ATS_ERROR(ret, "ABT_thread_free"); + if (stack) + free(stack); + ret = pthread_join(helper_thread, NULL); + assert(ret == 0); + } + /* Finalize */ + if (stack_i == num_stacksizes - 1) { + ret = ATS_finalize(0); + ATS_ERROR(ret, "ATS_finalize"); + } else { + ret = ABT_finalize(); + ATS_ERROR(ret, "ABT_thread_free"); + } + } + return ret; +} + +#else /* _POSIX_C_SOURCE */ + +int main() +{ + /* Unsupported. */ + return 77; +} + +#endif