diff --git a/Cargo.toml b/Cargo.toml index d6447f70e..9dc90d257 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,9 @@ serde_test = "1.0" doc-comment = "0.3.1" bumpalo = { version = "3.13.0", features = ["allocator-api2"] } +[target.'cfg(unix)'.dev-dependencies] +libc = "0.2.155" + [features] default = ["default-hasher", "inline-more", "allocator-api2", "equivalent", "raw-entry"] diff --git a/src/map.rs b/src/map.rs index 78c0a2e52..013cdbed0 100644 --- a/src/map.rs +++ b/src/map.rs @@ -6514,3 +6514,136 @@ mod test_map { ); } } + +#[cfg(all(test, unix, any(feature = "nightly", feature = "allocator-api2")))] +mod test_map_with_mmap_allocations { + use super::HashMap; + use crate::raw::prev_pow2; + use core::alloc::Layout; + use core::ptr::{null_mut, NonNull}; + + #[cfg(feature = "nightly")] + use core::alloc::{AllocError, Allocator}; + + #[cfg(all(feature = "allocator-api2", not(feature = "nightly")))] + use allocator_api2::alloc::{AllocError, Allocator}; + + /// This is not a production quality allocator, just good enough for + /// some basic tests. + #[derive(Clone, Copy, Debug)] + struct MmapAllocator { + /// Guarantee this is a power of 2. + page_size: usize, + } + + impl MmapAllocator { + fn new() -> Result { + let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + if result < 1 { + return Err(AllocError); + } + + let page_size = result as usize; + if !page_size.is_power_of_two() { + Err(AllocError) + } else { + Ok(Self { page_size }) + } + } + + fn fit_to_page_size(&self, n: usize) -> Result { + // If n=0, give a single page (wasteful, I know). + let n = if n == 0 { self.page_size } else { n }; + + match n & (self.page_size - 1) { + 0 => Ok(n), + rem => n.checked_add(self.page_size - rem).ok_or(AllocError), + } + } + } + + unsafe impl Allocator for MmapAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.align() > self.page_size { + return Err(AllocError); + } + + let null = null_mut(); + let len = self.fit_to_page_size(layout.size())? as libc::size_t; + let prot = libc::PROT_READ | libc::PROT_WRITE; + let flags = libc::MAP_PRIVATE | libc::MAP_ANON; + let addr = unsafe { libc::mmap(null, len, prot, flags, -1, 0) }; + + // mmap returns MAP_FAILED on failure, not Null. + if addr == libc::MAP_FAILED { + return Err(AllocError); + } + + match NonNull::new(addr.cast()) { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + len, + )) + }) + } + + // This branch shouldn't be taken in practice, but since we + // cannot return null as a valid pointer in our type system, + // we attempt to handle it. + None => { + _ = unsafe { libc::munmap(addr, len) }; + Err(AllocError) + } + } + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // If they allocated it with this layout, it must round correctly. + let size = self.fit_to_page_size(layout.size()).unwrap(); + let _result = libc::munmap(ptr.as_ptr().cast(), size); + debug_assert_eq!(0, _result) + } + } + + #[test] + fn test_tiny_allocation_gets_rounded_to_page_size() { + let alloc = MmapAllocator::new().unwrap(); + let mut map: HashMap = HashMap::with_capacity_in(1, alloc); + + // Size of an element plus its control byte. + let rough_bucket_size = core::mem::size_of::<(usize, ())>() + 1; + + // Accounting for some misc. padding that's likely in the allocation + // due to rounding to group width, etc. + let overhead = 3 * core::mem::size_of::(); + let num_buckets = (alloc.page_size - overhead) / rough_bucket_size; + // Buckets are always powers of 2. + let min_elems = prev_pow2(num_buckets); + // Real load-factor is 7/8, but this is a lower estimation, so 1/2. + let min_capacity = min_elems >> 1; + let capacity = map.capacity(); + assert!( + capacity >= min_capacity, + "failed: {capacity} >= {min_capacity}" + ); + + // Fill it up. + for i in 0..capacity { + map.insert(i, ()); + } + // Capacity should not have changed and it should be full. + assert_eq!(capacity, map.len()); + assert_eq!(capacity, map.capacity()); + + // Alright, make it grow. + map.insert(capacity, ()); + assert!( + capacity < map.capacity(), + "failed: {capacity} < {}", + map.capacity() + ); + } +} diff --git a/src/raw/alloc.rs b/src/raw/alloc.rs index c01e2a45c..bacb4a149 100644 --- a/src/raw/alloc.rs +++ b/src/raw/alloc.rs @@ -15,9 +15,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.as_non_null_ptr()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -38,9 +38,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.cast()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -61,7 +61,7 @@ mod inner { #[allow(clippy::missing_safety_doc)] // not exposed outside of this crate pub unsafe trait Allocator { - fn allocate(&self, layout: Layout) -> Result, ()>; + fn allocate(&self, layout: Layout) -> Result, ()>; unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); } @@ -70,8 +70,19 @@ mod inner { unsafe impl Allocator for Global { #[inline] - fn allocate(&self, layout: Layout) -> Result, ()> { - unsafe { NonNull::new(alloc(layout)).ok_or(()) } + fn allocate(&self, layout: Layout) -> Result, ()> { + match unsafe { NonNull::new(alloc(layout)) } { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + layout.size(), + )) + }) + } + None => Err(()), + } } #[inline] unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { @@ -86,7 +97,7 @@ mod inner { } } - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { alloc.allocate(layout) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 72ef7fb1a..b4b611eb8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1442,6 +1442,40 @@ impl RawTableInner { } } +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +pub(crate) fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + let numerator = allocation_size - group_width; + let denominator = table_layout.size + 1; // todo: ZSTs? + let quotient = numerator / denominator; + prev_pow2(quotient) +} + impl RawTableInner { /// Allocates a new [`RawTableInner`] with the given number of buckets. /// The control bytes and buckets are left uninitialized. @@ -1459,7 +1493,7 @@ impl RawTableInner { unsafe fn new_uninitialized( alloc: &A, table_layout: TableLayout, - buckets: usize, + mut buckets: usize, fallibility: Fallibility, ) -> Result where @@ -1468,13 +1502,29 @@ impl RawTableInner { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { + let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; let ptr: NonNull = match do_alloc(alloc, layout) { - Ok(block) => block.cast(), + Ok(block) => { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (_oversized_layout, oversized_ctrl_offset) = + match table_layout.calculate_layout_for(x) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; + debug_assert!(_oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + + block.cast() + } Err(_) => return Err(fallibility.alloc_err(layout)), }; @@ -4166,6 +4216,23 @@ impl RawExtractIf<'_, T, A> { mod test_map { use super::*; + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + #[test] fn test_minimum_capacity_for_small_types() { #[track_caller]