diff --git a/libia2/include/ia2_internal.h b/libia2/include/ia2_internal.h index 6c14097c3e..3f7c86a283 100644 --- a/libia2/include/ia2_internal.h +++ b/libia2/include/ia2_internal.h @@ -216,6 +216,13 @@ asm(".macro mov_pkru_eax pkey\n" return out; \ } +/* Pass to mmap to signal end of program init */ +#define IA2_FINISH_INIT_MAGIC 0x1a21face1a21faceULL +/* Tell the syscall filter to forbid init-only operations. This mmap() will +always fail because it maps a non-page-aligned addr with MAP_FIXED, so it +works as a reasonable signpost no-op. */ +#define mark_init_finished() (void)mmap((void *)IA2_FINISH_INIT_MAGIC, 0, 0, MAP_FIXED, -1, 0) + #define declare_init_tls_fn(n) void init_tls_##n(void); #define setup_destructors_for_compartment(n) \ void ia2_setup_destructors_##n(void); \ @@ -311,4 +318,5 @@ asm(".macro mov_pkru_eax pkey\n" /* Initialize stacks for the main thread/ */ \ init_stacks_and_setup_tls(); \ REPEATB##n(setup_destructors_for_compartment, nop_macro); \ + mark_init_finished(); \ } diff --git a/runtime/memory-map/src/lib.rs b/runtime/memory-map/src/lib.rs index d0d2b20bfa..9367adbf82 100644 --- a/runtime/memory-map/src/lib.rs +++ b/runtime/memory-map/src/lib.rs @@ -82,12 +82,22 @@ use disjoint_interval_tree::NonOverlappingIntervalTree; pub struct MemoryMap { regions: NonOverlappingIntervalTree, + init_finished: bool, } impl MemoryMap { pub fn new() -> Self { MemoryMap { regions: Default::default(), + init_finished: false, + } + } + pub fn mark_init_finished(&mut self) -> bool { + if self.init_finished { + false + } else { + self.init_finished = true; + true } } pub fn add_region(&mut self, mut range: Range, state: State) -> bool { @@ -235,6 +245,16 @@ pub extern "C" fn memory_map_new() -> Box { #[no_mangle] pub extern "C" fn memory_map_destroy(_map: Box) {} +#[no_mangle] +pub extern "C" fn memory_map_mark_init_finished(map: &mut MemoryMap) -> bool { + map.mark_init_finished() +} + +#[no_mangle] +pub extern "C" fn memory_map_is_init_finished(map: &MemoryMap) -> bool { + map.init_finished +} + #[no_mangle] pub extern "C" fn memory_map_all_overlapping_regions_have_pkey( map: &MemoryMap, @@ -353,18 +373,26 @@ pub extern "C" fn memory_map_pkey_mprotect_region( pkey: u8, ) -> bool { if let Some(mut state) = map.split_out_region(range) { - /* forbid repeated pkey_mprotect */ - if state.pkey_mprotected == false { - state.pkey_mprotected = true; - map.add_region(range, state) - } else { + /* forbid pkey_mprotect of owned by another compartment other than 0 */ + if state.owner_pkey != pkey && state.owner_pkey != 0 { printerrln!( - "not pkey {} or already pkey_mprotected ({}/{})", - pkey, + "memory pkey not {} or 0 (running with {})", state.owner_pkey, - state.pkey_mprotected + pkey ); false + /* forbid repeated pkey_mprotect */ + } else if state.pkey_mprotected == true { + printerrln!("{} already pkey_mprotected region", state.owner_pkey); + false + /* otherwise, allow */ + } else { + state.pkey_mprotected = true; + /* set owner if a trusted compartment protected untrusted memory */ + if state.owner_pkey == 0 { + state.owner_pkey = pkey; + } + map.add_region(range, state) } } else { // we're attempting to pkey_mprotect memory that was never mmapped. @@ -381,12 +409,15 @@ pub extern "C" fn memory_map_mprotect_region(map: &mut MemoryMap, range: Range, state.prot = prot; map.add_region(range, state) } else { - printerrln!( - "warning: reprotecting already-mprotected region {:?} (prot {} => {})", - range, - state.prot, - prot - ); + /* after init has finished, we should warn about reprotecting regions */ + if map.init_finished { + printerrln!( + "warning: reprotecting already-mprotected region {:?} (prot {} => {})", + range, + state.prot, + prot + ); + } state.mprotected = true; state.prot = prot; map.add_region(range, state) diff --git a/runtime/memory_map.h b/runtime/memory_map.h index ef60211f51..ffe6552677 100644 --- a/runtime/memory_map.h +++ b/runtime/memory_map.h @@ -25,6 +25,10 @@ struct memory_map *memory_map_new(void); void memory_map_destroy(struct memory_map *_map); +bool memory_map_mark_init_finished(struct memory_map *map); + +bool memory_map_is_init_finished(const struct memory_map *map); + bool memory_map_all_overlapping_regions_have_pkey(const struct memory_map *map, struct range needle, uint8_t pkey); diff --git a/runtime/track_memory_map.c b/runtime/track_memory_map.c index dc8c6e1ab1..f8a498d33c 100644 --- a/runtime/track_memory_map.c +++ b/runtime/track_memory_map.c @@ -53,6 +53,10 @@ bool is_op_permitted(struct memory_map *map, int event, false); if (impacts_only_unprotected_memory) return true; + /* during init, we allow re-mprotecting memory, which we need to alter + initially-RO destructors */ + else if (!memory_map_is_init_finished(map)) + return true; /* allow mprotecting memory that is already writable */ uint32_t prot = memory_map_region_get_prot(map, info->mprotect.range); @@ -184,6 +188,15 @@ unsigned char pkey_for_pkru(uint32_t pkru) { #undef CHECK } +/* Pass to mmap to signal end of program init */ +#define IA2_FINISH_INIT_MAGIC 0x1a21face1a21faceULL + +bool event_marks_init_finished(enum mmap_event event, const union event_info *event_info) { + return event == EVENT_MMAP && + event_info->mmap.range.start == IA2_FINISH_INIT_MAGIC && + event_info->mmap.flags & MAP_FIXED; +} + /* query pid to determine the mmap-relevant event being requested. returns true * unless something horrible happens */ bool interpret_syscall(struct user_regs_struct *regs, unsigned char pkey, @@ -423,6 +436,28 @@ bool track_memory_map(pid_t pid, int *exit_status_out, enum trace_mode mode) { return false; } + /* pick up signal marking IA2 init finished to start forbidding init-only operations */ + if (event_marks_init_finished(event, &event_info)) { + if (!memory_map_mark_init_finished(map)) { + fprintf(stderr, "attempting to re-finish init! (rip=%p)\n", (void *)regs.rip); + return false; + } + debug_op("init finished\n"); + /* finish syscall; it will fail benignly */ + if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) { + perror("could not PTRACE_SYSCALL"); + } + switch (wait_for_next_trap(pid, exit_status_out)) { + case WAIT_TRAP: + break; + case WAIT_ERROR: + return false; + default: + return true; + } + continue; + } + if (!is_op_permitted(map, event, &event_info)) { fprintf(stderr, "forbidden operation requested: %s\n", event_name(event)); return_syscall_eperm(pid);