From f9464b51a411974cc8e7d187dbda4f668f55de2d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 7 Oct 2024 20:04:33 +0200 Subject: [PATCH] refactor(rust): Remove deprecated raw_entry_mut in StringCache (#19126) --- .../logical/categorical/string_cache.rs | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs index a0bd2687af63..d5c6d6e857b8 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs @@ -2,12 +2,12 @@ use std::hash::{Hash, Hasher}; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use hashbrown::hash_map::RawEntryMut; +use hashbrown::hash_table::Entry; +use hashbrown::HashTable; use once_cell::sync::Lazy; use polars_utils::aliases::PlRandomState; use polars_utils::pl_str::PlSmallStr; -use crate::datatypes::{InitHashMaps2, PlIdHashMap}; use crate::hashing::_HASHMAP_INIT_SIZE; /// We use atomic reference counting to determine how many threads use the @@ -131,7 +131,7 @@ impl Hash for Key { } pub(crate) struct SCacheInner { - map: PlIdHashMap, + map: HashTable, pub(crate) uuid: u32, payloads: Vec, } @@ -149,26 +149,23 @@ impl SCacheInner { #[inline] pub(crate) fn insert_from_hash(&mut self, h: u64, s: &str) -> u32 { let mut global_idx = self.payloads.len() as u32; - // Note that we don't create the PlSmallStr to search the key in the hashmap - // as PlSmallStr may allocate a string - let entry = self.map.raw_entry_mut().from_hash(h, |key| { - (key.hash == h) && { - let pos = key.idx as usize; - let value = unsafe { self.payloads.get_unchecked(pos) }; + let entry = self.map.entry( + h, + |k| { + let value = unsafe { self.payloads.get_unchecked(k.idx as usize) }; s == value.as_str() - } - }); + }, + |k| k.hash, + ); match entry { - RawEntryMut::Occupied(entry) => { - global_idx = entry.key().idx; + Entry::Occupied(entry) => { + global_idx = entry.get().idx; }, - RawEntryMut::Vacant(entry) => { + Entry::Vacant(entry) => { let idx = self.payloads.len() as u32; let key = Key::new(h, idx); - entry.insert_hashed_nocheck(h, key, ()); - - // only just now we allocate the string + entry.insert(key); self.payloads.push(PlSmallStr::from_str(s)); }, } @@ -179,15 +176,11 @@ impl SCacheInner { pub(crate) fn get_cat(&self, s: &str) -> Option { let h = StringCache::get_hash_builder().hash_one(s); self.map - .raw_entry() - .from_hash(h, |key| { - (key.hash == h) && { - let pos = key.idx as usize; - let value = unsafe { self.payloads.get_unchecked(pos) }; - s == value.as_str() - } + .find(h, |k| { + let value = unsafe { self.payloads.get_unchecked(k.idx as usize) }; + s == value.as_str() }) - .map(|(k, _)| k.idx) + .map(|k| k.idx) } #[inline] @@ -200,7 +193,7 @@ impl SCacheInner { impl Default for SCacheInner { fn default() -> Self { Self { - map: PlIdHashMap::with_capacity(_HASHMAP_INIT_SIZE), + map: HashTable::with_capacity(_HASHMAP_INIT_SIZE), uuid: STRING_CACHE_UUID_CTR.fetch_add(1, Ordering::AcqRel), payloads: Vec::with_capacity(_HASHMAP_INIT_SIZE), }