diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 7f63e60aa..16ba32f03 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -18,6 +18,7 @@ pyo3 = { version = "0.21" }
 numpy = "0.21"
 ndarray = "0.15"
 itertools = "0.12"
+ahash = { version = "0.8.11", features = ["serde"] }
 
 [dependencies.tokenizers]
 path = "../../tokenizers"
diff --git a/bindings/python/src/models.rs b/bindings/python/src/models.rs
index 424be9f57..7233fed14 100644
--- a/bindings/python/src/models.rs
+++ b/bindings/python/src/models.rs
@@ -1,9 +1,12 @@
 use std::collections::HashMap;
+use std::hash::Hash;
+use std::ops::{Deref, DerefMut};
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, RwLock};
 
 use crate::token::PyToken;
 use crate::trainers::PyTrainer;
+use ahash::AHashMap;
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use pyo3::types::*;
@@ -31,6 +34,53 @@ pub struct PyModel {
     pub model: Arc<RwLock<ModelWrapper>>,
 }
 
+// Newtype wrapper for AHashMap
+#[derive(Clone, Debug)]
+pub struct PyAHashMap<K, V>(pub AHashMap<K, V>);
+
+impl<K, V> IntoPy<PyObject> for PyAHashMap<K, V>
+where
+    K: IntoPy<PyObject> + Eq + Hash,
+    V: IntoPy<PyObject>,
+{
+    fn into_py(self, py: Python<'_>) -> PyObject {
+        let dict = PyDict::new_bound(py);
+        for (k, v) in self.0 {
+            dict.set_item(k.into_py(py), v.into_py(py)).unwrap();
+        }
+        dict.into()
+    }
+}
+
+impl<'source, K, V> FromPyObject<'source> for PyAHashMap<K, V>
+where
+    K: FromPyObject<'source> + Eq + Hash,
+    V: FromPyObject<'source>,
+{
+    fn extract(ob: &'source PyAny) -> PyResult<Self> {
+        let dict = ob.downcast::<PyDict>()?;
+        let mut map = AHashMap::new();
+        for (k, v) in dict.iter() {
+            map.insert(K::extract(k)?, V::extract(v)?);
+        }
+        Ok(PyAHashMap(map))
+    }
+}
+
+impl<K, V> Deref for PyAHashMap<K, V> {
+    type Target = AHashMap<K, V>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<K, V> DerefMut for PyAHashMap<K, V> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 impl PyModel {
     pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
         let base = self.clone();
@@ -62,6 +112,10 @@ impl Model for PyModel {
         self.model.read().unwrap().get_vocab()
     }
 
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
+        self.model.read().unwrap().get_vocab_ahash()
+    }
+
     fn get_vocab_size(&self) -> usize {
         self.model.read().unwrap().get_vocab_size()
     }
diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml
index 25396f192..3b1cc1171 100644
--- a/tokenizers/Cargo.toml
+++ b/tokenizers/Cargo.toml
@@ -68,6 +68,9 @@ fancy-regex = { version = "0.13", optional = true}
 getrandom = { version = "0.2.10" }
 esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
 monostate = "0.1.12"
+ahash = { version = "0.8.11", features = ["serde"] }
+dary_heap = { version = "0.3.6", features = ["serde"] }
+compact_str = { version = "0.8.0", features = ["serde"] }
 
 [features]
 default = ["progressbar", "onig", "esaxx_fast"]
diff --git a/tokenizers/src/models/bpe/model.rs b/tokenizers/src/models/bpe/model.rs
index 1585da761..977e251b8 100644
--- a/tokenizers/src/models/bpe/model.rs
+++ b/tokenizers/src/models/bpe/model.rs
@@ -2,19 +2,21 @@ use super::{super::OrderedVocabIter, trainer::BpeTrainer, Error, Pair, Word};
 use crate::tokenizer::{Model, Result, Token};
 use crate::utils::cache::{Cache, DEFAULT_CACHE_CAPACITY};
 use crate::utils::iter::ResultShunt;
+use ahash::AHashMap;
 use serde_json::Value;
 use std::borrow::Cow;
+
+use std::collections::HashMap;
 use std::{
-    collections::HashMap,
     fs::File,
     io::prelude::*,
     io::{BufRead, BufReader},
     path::{Path, PathBuf},
 };
 
-pub type Vocab = HashMap<String, u32>;
-type VocabR = HashMap<u32, String>;
-pub type MergeMap = HashMap<Pair, (u32, u32)>;
+pub type Vocab = AHashMap<String, u32>;
+type VocabR = AHashMap<u32, String>;
+pub type MergeMap = AHashMap<Pair, (u32, u32)>;
 pub type Merges = Vec<(String, String)>;
 
 struct Config {
@@ -41,7 +43,7 @@ impl Default for BpeBuilder {
         Self {
             config: Config {
                 files: None,
-                vocab: HashMap::new(),
+                vocab: AHashMap::new(),
                 merges: vec![],
                 cache_capacity: DEFAULT_CACHE_CAPACITY,
                 dropout: None,
@@ -324,7 +326,7 @@ impl BPE {
         let mut buffer = String::new();
         vocab_file.read_to_string(&mut buffer)?;
         let json: Value = serde_json::from_str(&buffer)?;
-        let mut vocab = HashMap::new();
+        let mut vocab = AHashMap::new();
         match json {
             Value::Object(m) => {
                 for (token, id) in m {
@@ -354,7 +356,11 @@ impl BPE {
         }
     }
 
-    pub fn get_vocab(&self) -> Vocab {
+    pub fn get_vocab(&self) -> HashMap<String, u32> {
+        self.vocab.clone().into_iter().collect()
+    }
+
+    pub fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
         self.vocab.clone()
     }
 
@@ -481,6 +487,10 @@ impl Model for BPE {
     type Trainer = BpeTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
+        self.vocab.clone().into_iter().collect()
+    }
+
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
         self.vocab.clone()
     }
 
diff --git a/tokenizers/src/models/bpe/serialization.rs b/tokenizers/src/models/bpe/serialization.rs
index 98cc15102..98cf54944 100644
--- a/tokenizers/src/models/bpe/serialization.rs
+++ b/tokenizers/src/models/bpe/serialization.rs
@@ -1,10 +1,10 @@
 use super::{super::OrderedVocabIter, convert_merges_to_hashmap, BpeBuilder, Pair, BPE};
+use ahash::AHashMap;
 use serde::{
     de::{Error, MapAccess, Visitor},
     ser::SerializeStruct,
     Deserialize, Deserializer, Serialize, Serializer,
 };
-use std::collections::HashMap;
 
 impl Serialize for BPE {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@@ -80,7 +80,7 @@ impl<'de> Visitor<'de> for BPEVisitor {
         V: MapAccess<'de>,
     {
         let mut builder = BpeBuilder::new();
-        let mut vocab: Option<HashMap<String, u32>> = None;
+        let mut vocab: Option<AHashMap<String, u32>> = None;
 
         #[derive(Debug, Deserialize)]
         #[serde(untagged)]
diff --git a/tokenizers/src/models/bpe/trainer.rs b/tokenizers/src/models/bpe/trainer.rs
index 3689a856a..09e62be2c 100644
--- a/tokenizers/src/models/bpe/trainer.rs
+++ b/tokenizers/src/models/bpe/trainer.rs
@@ -4,15 +4,17 @@ use super::{Pair, WithFirstLastIterator, Word, BPE};
 use crate::parallelism::*;
 use crate::tokenizer::{AddedToken, Result, Trainer};
 use crate::utils::progress::{ProgressBar, ProgressStyle};
+use ahash::{AHashMap, AHashSet};
+use compact_str::CompactString;
+use dary_heap::OctonaryHeap;
 use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
-use std::collections::{BinaryHeap, HashMap, HashSet};
 
 #[derive(Debug, Eq)]
 struct Merge {
     pair: Pair,
     count: u64,
-    pos: HashSet<usize>,
+    pos: AHashSet<usize>,
 }
 impl PartialEq for Merge {
     fn eq(&self, other: &Self) -> bool {
@@ -41,7 +43,7 @@ struct Config {
     show_progress: bool,
     special_tokens: Vec<AddedToken>,
     limit_alphabet: Option<usize>,
-    initial_alphabet: HashSet<char>,
+    initial_alphabet: AHashSet<char>,
     continuing_subword_prefix: Option<String>,
     end_of_word_suffix: Option<String>,
     max_token_length: Option<usize>,
@@ -62,7 +64,7 @@ impl Default for BpeTrainerBuilder {
                 show_progress: true,
                 special_tokens: vec![],
                 limit_alphabet: None,
-                initial_alphabet: HashSet::new(),
+                initial_alphabet: AHashSet::new(),
                 continuing_subword_prefix: None,
                 end_of_word_suffix: None,
                 max_token_length: None,
@@ -114,7 +116,7 @@ impl BpeTrainerBuilder {
 
     /// Set the initial alphabet
     #[must_use]
-    pub fn initial_alphabet(mut self, alphabet: HashSet<char>) -> Self {
+    pub fn initial_alphabet(mut self, alphabet: AHashSet<char>) -> Self {
         self.config.initial_alphabet = alphabet;
         self
     }
@@ -151,7 +153,7 @@ impl BpeTrainerBuilder {
             continuing_subword_prefix: self.config.continuing_subword_prefix,
             end_of_word_suffix: self.config.end_of_word_suffix,
             max_token_length: self.config.max_token_length,
-            words: HashMap::new(),
+            words: AHashMap::new(),
         }
     }
 }
@@ -187,7 +189,7 @@ pub struct BpeTrainer {
     pub limit_alphabet: Option<usize>,
     /// The initial alphabet we want absolutely to include. This allows to cover
     /// some characters that are not necessarily in the training set
-    pub initial_alphabet: HashSet<char>,
+    pub initial_alphabet: AHashSet<char>,
     /// An optional prefix to use on any subword that exist only behind another one
     pub continuing_subword_prefix: Option<String>,
     /// An optional suffix to caracterize and end-of-word subword
@@ -195,7 +197,7 @@ pub struct BpeTrainer {
     /// An optional parameter to limit the max length of any single token
     pub max_token_length: Option<usize>,
 
-    words: HashMap<String, u64>,
+    words: AHashMap<CompactString, u64>,
 }
 
 impl Default for BpeTrainer {
@@ -251,11 +253,16 @@ impl BpeTrainer {
     }
 
     /// Add the provided special tokens to the initial vocabulary
-    fn add_special_tokens(&self, w2id: &mut HashMap<String, u32>, id2w: &mut Vec<String>) {
+    fn add_special_tokens(
+        &self,
+        w2id: &mut AHashMap<CompactString, u32>,
+        id2w: &mut Vec<CompactString>,
+    ) {
         for token in &self.special_tokens {
-            if !w2id.contains_key(&token.content) {
-                id2w.push(token.content.to_owned());
-                w2id.insert(token.content.to_owned(), (id2w.len() - 1) as u32);
+            // get hash of content
+            if !w2id.contains_key(&CompactString::from(&token.content)) {
+                id2w.push(CompactString::from(&token.content));
+                w2id.insert(CompactString::from(&token.content), (id2w.len() - 1) as u32);
             }
         }
     }
@@ -263,12 +270,12 @@ impl BpeTrainer {
     /// Compute the initial alphabet and limit it if relevant
     fn compute_alphabet(
         &self,
-        wc: &HashMap<String, u64>,
-        w2id: &mut HashMap<String, u32>,
-        id2w: &mut Vec<String>,
+        wc: &AHashMap<CompactString, u64>,
+        w2id: &mut AHashMap<CompactString, u32>,
+        id2w: &mut Vec<CompactString>,
     ) {
         // Compute the alphabet from seen words
-        let mut alphabet: HashMap<char, usize> = HashMap::new();
+        let mut alphabet: AHashMap<char, usize> = AHashMap::new();
         for (word, count) in wc {
             for c in word.chars() {
                 alphabet
@@ -312,19 +319,26 @@ impl BpeTrainer {
         kept.sort_unstable_by_key(|k| (*k.0) as u32);
         kept.into_iter().for_each(|(c, _)| {
             let s = c.to_string();
+            /*
             if !w2id.contains_key(&s) {
                 id2w.push(s.clone());
                 w2id.insert(s, (id2w.len() - 1) as u32);
             }
+            */
+            // u64 hash version
+            if !w2id.contains_key(&CompactString::from(&s)) {
+                id2w.push(CompactString::from(&s));
+                w2id.insert(CompactString::from(&s), (id2w.len() - 1) as u32);
+            }
         });
     }
 
     /// Tokenize words and add subwords to the vocabulary when relevant
     fn tokenize_words(
         &self,
-        wc: &HashMap<String, u64>,
-        w2id: &mut HashMap<String, u32>,
-        id2w: &mut Vec<String>,
+        wc: &AHashMap<CompactString, u64>,
+        w2id: &mut AHashMap<CompactString, u32>,
+        id2w: &mut Vec<CompactString>,
         p: &Option<ProgressBar>,
     ) -> (Vec<Word>, Vec<u64>) {
         let mut words: Vec<Word> = Vec::with_capacity(wc.len());
@@ -336,7 +350,7 @@ impl BpeTrainer {
 
             for (is_first, is_last, c) in word.chars().with_first_and_last() {
                 let mut s = c.to_string();
-                if w2id.contains_key(&s) {
+                if w2id.contains_key(&CompactString::from(&s)) {
                     // Found the initial char in the authorized alphabet
 
                     // Add the `continuing_subword_prefix` if relevant
@@ -353,11 +367,11 @@ impl BpeTrainer {
                     }
 
                     // Insert the new formed string if necessary
-                    if !w2id.contains_key(&s) {
-                        id2w.push(s.clone());
-                        w2id.insert(s.clone(), (id2w.len() - 1) as u32);
+                    if !w2id.contains_key(&CompactString::from(&s)) {
+                        id2w.push(CompactString::from(&s));
+                        w2id.insert(CompactString::from(&s), (id2w.len() - 1) as u32);
                     }
-                    current_word.add(w2id[&s], 1); // We do not care about the len here
+                    current_word.add(w2id[&CompactString::from(&s)], 1); // We do not care about the len here
                 }
             }
             words.push(current_word);
@@ -375,13 +389,13 @@ impl BpeTrainer {
         words: &[Word],
         counts: &[u64],
         p: &Option<ProgressBar>,
-    ) -> (HashMap<Pair, i32>, HashMap<Pair, HashSet<usize>>) {
+    ) -> (AHashMap<Pair, i32>, AHashMap<Pair, AHashSet<usize>>) {
         words
             .maybe_par_iter()
             .enumerate()
             .map(|(i, word)| {
-                let mut pair_counts = HashMap::new();
-                let mut where_to_update: HashMap<Pair, HashSet<usize>> = HashMap::new();
+                let mut pair_counts = AHashMap::new();
+                let mut where_to_update: AHashMap<Pair, AHashSet<usize>> = AHashMap::new();
 
                 for window in word.get_chars().windows(2) {
                     let cur_pair: Pair = (window[0], window[1]);
@@ -399,7 +413,7 @@ impl BpeTrainer {
                             h.insert(i);
                         })
                         .or_insert_with(|| {
-                            let mut h = HashSet::new();
+                            let mut h = AHashSet::new();
                             h.insert(i);
                             h
                         });
@@ -413,7 +427,7 @@ impl BpeTrainer {
                 (pair_counts, where_to_update)
             })
             .reduce(
-                || (HashMap::new(), HashMap::new()),
+                || (AHashMap::new(), AHashMap::new()),
                 |(mut pair_counts, mut where_to_update), (pc, wtu)| {
                     for (k, v) in pc {
                         pair_counts.entry(k).and_modify(|c| *c += v).or_insert(v);
@@ -431,11 +445,11 @@ impl BpeTrainer {
 
     pub fn do_train(
         &self,
-        word_counts: &HashMap<String, u64>,
+        word_counts: &AHashMap<CompactString, u64>,
         model: &mut BPE,
     ) -> Result<Vec<AddedToken>> {
-        let mut word_to_id: HashMap<String, u32> = HashMap::with_capacity(self.vocab_size);
-        let mut id_to_word: Vec<String> = Vec::with_capacity(self.vocab_size);
+        let mut word_to_id: AHashMap<CompactString, u32> = AHashMap::with_capacity(self.vocab_size);
+        let mut id_to_word: Vec<CompactString> = Vec::with_capacity(self.vocab_size);
         let max_token_length: usize = self.max_token_length.unwrap_or(usize::MAX);
 
         let progress = self.setup_progress();
@@ -464,7 +478,7 @@ impl BpeTrainer {
         self.update_progress(&progress, words.len(), "Count pairs");
         let (mut pair_counts, mut where_to_update) = self.count_pairs(&words, &counts, &progress);
         // Insert them in the queue
-        let mut queue = BinaryHeap::with_capacity(pair_counts.len());
+        let mut queue = OctonaryHeap::with_capacity(pair_counts.len());
         where_to_update.drain().for_each(|(pair, pos)| {
             let count = pair_counts[&pair];
             if count > 0 {
@@ -510,7 +524,7 @@ impl BpeTrainer {
             if let Some(prefix) = &self.continuing_subword_prefix {
                 if part_b.starts_with(prefix) {
                     let prefix_byte_len = prefix.chars().map(|c| c.len_utf8()).sum();
-                    part_b = part_b[prefix_byte_len..].to_string();
+                    part_b = CompactString::from(&part_b[prefix_byte_len..]);
                 }
             }
             let new_token = format!("{}{}", part_a, part_b);
@@ -520,12 +534,12 @@ impl BpeTrainer {
 
             // Insert new token if it does not already exist
             let new_token_id = word_to_id
-                .get(&new_token)
+                .get(&CompactString::from(&new_token))
                 .copied()
                 .unwrap_or(id_to_word.len() as u32);
-            if !word_to_id.contains_key(&new_token) {
-                id_to_word.push(new_token.clone());
-                word_to_id.insert(new_token.clone(), new_token_id);
+            if !word_to_id.contains_key(&CompactString::from(&new_token)) {
+                id_to_word.push(CompactString::from(&new_token));
+                word_to_id.insert(CompactString::from(&new_token), new_token_id);
             }
             merges.push((top.pair, new_token_id));
 
@@ -536,7 +550,7 @@ impl BpeTrainer {
                 .flat_map(|&i| {
                     let word = &words[i] as *const _ as *mut Word;
                     // We can merge each of these words in parallel here because each position
-                    // can be there only once (HashSet). So this is safe.
+                    // can be there only once (AHashSet). So this is safe.
                     unsafe {
                         // let word: &mut Word = &mut (*word);
                         (*word)
@@ -562,7 +576,7 @@ impl BpeTrainer {
                             h.insert(iw);
                         })
                         .or_insert_with(|| {
-                            let mut h = HashSet::new();
+                            let mut h = AHashSet::new();
                             h.insert(iw);
                             h
                         });
@@ -586,7 +600,12 @@ impl BpeTrainer {
         self.finalize_progress(&progress, merges.len());
 
         // Transfer new vocab & options to model
-        model.vocab = word_to_id;
+        //model.vocab = word_to_id;
+        model.vocab = word_to_id
+            .into_iter()
+            // we have to look up the string in id_to_word because the key in word_to_id is a hash
+            .map(|(_key, val)| (id_to_word[val as usize].to_string(), val))
+            .collect();
         model.vocab_r = model
             .vocab
             .iter()
@@ -632,18 +651,20 @@ impl Trainer for BpeTrainer {
         S: AsRef<str> + Send,
         F: Fn(&str) -> Result<Vec<String>> + Sync,
     {
-        let words: Result<HashMap<String, u64>> = iterator
+        let words: Result<AHashMap<CompactString, u64>> = iterator
             .maybe_par_bridge()
             .map(|sequence| {
                 let words = process(sequence.as_ref())?;
-                let mut map = HashMap::new();
+                let mut map = AHashMap::new();
                 for word in words {
-                    map.entry(word).and_modify(|c| *c += 1).or_insert(1);
+                    map.entry(CompactString::from(word))
+                        .and_modify(|c| *c += 1)
+                        .or_insert(1);
                 }
                 Ok(map)
             })
             .reduce(
-                || Ok(HashMap::new()),
+                || Ok(AHashMap::new()),
                 |acc, ws| {
                     let mut acc = acc?;
                     for (k, v) in ws? {
@@ -661,11 +682,12 @@ impl Trainer for BpeTrainer {
 #[cfg(test)]
 mod tests {
     use super::{BpeTrainer, Pair, BPE};
-    use std::collections::HashMap;
+    use ahash::AHashMap;
+    use compact_str::CompactString;
 
     #[test]
     fn test_train() {
-        let word_counts: HashMap<String, u64> = [
+        let word_counts: AHashMap<CompactString, u64> = [
             ("roses".into(), 1),
             ("are".into(), 2),
             ("red".into(), 1),
@@ -690,7 +712,7 @@ mod tests {
 
         // Vocab should contain all of the characters from the `word_counts` mapping
         // as well as three merges: 're', 'are', and 'is'.
-        let expected_vocab: HashMap<String, u32> = [
+        let expected_vocab: AHashMap<String, u32> = [
             ("-".into(), 0),
             ("2".into(), 1),
             ("B".into(), 2),
@@ -726,7 +748,7 @@ mod tests {
         // where 'rank' determines the order in which this merge will be applied during
         // tokenization, and 'id' is the vocab id of the symbol resulting from merging
         // the pair of symbols in the corresponding key.
-        let expected_merges: HashMap<Pair, (u32, u32)> = [
+        let expected_merges: AHashMap<Pair, (u32, u32)> = [
             ((17, 11), (0, 22)), // 'r' + 'e'  -> 're'
             ((8, 22), (1, 23)),  // 'a' + 're' -> 'are'
             ((13, 18), (2, 24)), // 'i' + 's'  -> 'is'
@@ -744,7 +766,7 @@ mod tests {
          */
 
         let max_token_length = 16;
-        let long_word_counts: HashMap<String, u64> = [
+        let long_word_counts: AHashMap<CompactString, u64> = [
             ("singlelongtokenwithoutcasechange", 2),
             ("singleLongTokenWithCamelCaseChange", 2),
             ("Longsingletokenwithpunctu@t!onwithin", 2),
@@ -759,7 +781,7 @@ mod tests {
             ("GPT-2", 2),
         ]
         .iter()
-        .map(|(key, value)| (key.to_string(), *value))
+        .map(|(key, value)| (CompactString::from(key.to_string()), *value))
         .collect();
         let trainer = BpeTrainer::builder()
             .max_token_length(Some(max_token_length))
@@ -784,7 +806,7 @@ mod tests {
         // directly compares tokens with known expected values.
         // maybe unstable depending on specific settings or changes.
          */
-        let long_word_counts: HashMap<String, u64> = [
+        let long_word_counts: AHashMap<CompactString, u64> = [
             ("sin", 2),
             ("Sin", 2),
             ("Lon", 2),
@@ -799,7 +821,7 @@ mod tests {
             ("GP", 2),
         ]
         .iter()
-        .map(|(key, value)| (key.to_string(), *value))
+        .map(|(key, value)| (CompactString::from(key.to_string()), *value))
         .collect();
         let trainer = BpeTrainer::builder()
             .max_token_length(Some(2))
@@ -808,8 +830,8 @@ mod tests {
             .build();
         let mut model = BPE::default();
         trainer.do_train(&long_word_counts, &mut model).unwrap();
-        let trained_vocab: HashMap<String, u32> = model.get_vocab();
-        let expected_vocab: HashMap<String, u32> = [
+        let trained_vocab: AHashMap<String, u32> = model.get_vocab_ahash();
+        let expected_vocab: AHashMap<String, u32> = [
             ("短", 12),
             ("n", 6),
             ("i", 5),
diff --git a/tokenizers/src/models/bpe/word.rs b/tokenizers/src/models/bpe/word.rs
index 6fc8033e3..df9d32a94 100644
--- a/tokenizers/src/models/bpe/word.rs
+++ b/tokenizers/src/models/bpe/word.rs
@@ -1,7 +1,8 @@
 use super::Pair;
+use ahash::AHashMap;
+use dary_heap::QuaternaryHeap;
 use rand::{thread_rng, Rng};
 use std::cmp::Ordering;
-use std::collections::{BinaryHeap, HashMap};
 
 #[derive(Debug, Eq)]
 struct Merge {
@@ -158,8 +159,8 @@ impl Word {
         changes
     }
 
-    pub(super) fn merge_all(&mut self, merges: &HashMap<Pair, (u32, u32)>, dropout: Option<f32>) {
-        let mut queue = BinaryHeap::with_capacity(self.symbols.len());
+    pub(super) fn merge_all(&mut self, merges: &AHashMap<Pair, (u32, u32)>, dropout: Option<f32>) {
+        let mut queue = QuaternaryHeap::with_capacity(self.symbols.len());
         let mut skip = Vec::with_capacity(queue.len());
 
         queue.extend(
diff --git a/tokenizers/src/models/mod.rs b/tokenizers/src/models/mod.rs
index cdfb731a8..8692e2502 100644
--- a/tokenizers/src/models/mod.rs
+++ b/tokenizers/src/models/mod.rs
@@ -5,6 +5,7 @@ pub mod unigram;
 pub mod wordlevel;
 pub mod wordpiece;
 
+use ahash::AHashMap;
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 
@@ -19,11 +20,11 @@ use crate::{AddedToken, Model, Result, Token, Trainer};
 /// Wraps a vocab mapping (ID -> token) to a struct that will be serialized in order
 /// of token ID, smallest to largest.
 struct OrderedVocabIter<'a> {
-    vocab_r: &'a HashMap<u32, String>,
+    vocab_r: &'a AHashMap<u32, String>,
 }
 
 impl<'a> OrderedVocabIter<'a> {
-    fn new(vocab_r: &'a HashMap<u32, String>) -> Self {
+    fn new(vocab_r: &'a AHashMap<u32, String>) -> Self {
         Self { vocab_r }
     }
 }
@@ -179,6 +180,15 @@ impl Model for ModelWrapper {
         }
     }
 
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
+        match self {
+            Self::WordLevel(t) => t.get_vocab_ahash(),
+            Self::WordPiece(t) => t.get_vocab_ahash(),
+            Self::BPE(t) => t.get_vocab_ahash(),
+            Self::Unigram(t) => t.get_vocab_ahash(),
+        }
+    }
+
     fn get_vocab_size(&self) -> usize {
         match self {
             Self::WordLevel(t) => t.get_vocab_size(),
@@ -284,8 +294,8 @@ mod tests {
 
     #[test]
     fn incomplete_ordered_vocab() {
-        let vocab_r: HashMap<u32, String> =
-            HashMap::from([(0, "Hi".to_string()), (2, "There".to_string())]);
+        let vocab_r: AHashMap<u32, String> =
+            AHashMap::from([(0, "Hi".to_string()), (2, "There".to_string())]);
 
         let ordered = OrderedVocabIter::new(&vocab_r);
 
diff --git a/tokenizers/src/models/unigram/lattice.rs b/tokenizers/src/models/unigram/lattice.rs
index 30b82245d..710112916 100644
--- a/tokenizers/src/models/unigram/lattice.rs
+++ b/tokenizers/src/models/unigram/lattice.rs
@@ -1,13 +1,13 @@
+use dary_heap::QuaternaryHeap;
 use rand::distributions::WeightedIndex;
 use rand::prelude::*;
 use std::cell::RefCell;
 use std::cmp::{min, Ordering};
-use std::collections::BinaryHeap;
 use std::rc::Rc;
 
 type NodeRef = Rc<RefCell<Node>>;
 type HypothesisRef = Rc<RefCell<Hypothesis>>;
-type Agenda = BinaryHeap<Hypothesis>;
+type Agenda = QuaternaryHeap<Hypothesis>;
 
 struct Hypothesis {
     node_ref: NodeRef,
@@ -240,7 +240,7 @@ impl<'a> Lattice<'a> {
             1 => vec![self.viterbi()],
             _ => {
                 // let k_reserved_hypothesis_size = 512;
-                let mut agenda: Agenda = BinaryHeap::new();
+                let mut agenda: Agenda = QuaternaryHeap::new();
                 let mut hypotheses: Vec<Vec<NodeRef>> = vec![];
                 let eos = self.eos_node();
                 let score = eos.borrow().score;
@@ -282,7 +282,7 @@ impl<'a> Lattice<'a> {
                         let k_max_agenda_size = 100_000;
                         let k_min_agenda_size = 512;
                         if agenda.len() > k_max_agenda_size {
-                            let mut new_agenda = BinaryHeap::new();
+                            let mut new_agenda = QuaternaryHeap::new();
                             let len = min(k_min_agenda_size, n * 10);
                             for _i in 0..len {
                                 new_agenda.push(agenda.pop().unwrap());
diff --git a/tokenizers/src/models/unigram/model.rs b/tokenizers/src/models/unigram/model.rs
index defc7d93d..ad406d493 100644
--- a/tokenizers/src/models/unigram/model.rs
+++ b/tokenizers/src/models/unigram/model.rs
@@ -5,13 +5,14 @@ use super::{
 };
 use crate::tokenizer::{Model, Result, Token};
 use crate::utils::cache::Cache;
-
 use std::collections::HashMap;
+
+use ahash::AHashMap;
 use std::convert::TryInto;
 use std::fs::read_to_string;
 use std::path::{Path, PathBuf};
 
-type TokenMap = HashMap<String, u32>;
+type TokenMap = AHashMap<String, u32>;
 type Vocab = Vec<(String, f64)>;
 
 /// A `Unigram` model to encode sentences.
@@ -98,7 +99,7 @@ impl Unigram {
         byte_fallback: bool,
     ) -> Result<Self> {
         let n = vocab.len();
-        let mut token_to_ids: TokenMap = HashMap::new();
+        let mut token_to_ids: TokenMap = AHashMap::new();
         let mut builder = TrieBuilder::default();
 
         if let Some(unk_id) = unk_id {
@@ -404,6 +405,10 @@ impl Model for Unigram {
     type Trainer = UnigramTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
+        self.token_to_ids.clone().into_iter().collect()
+    }
+
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
         self.token_to_ids.clone()
     }
 
diff --git a/tokenizers/src/models/unigram/trainer.rs b/tokenizers/src/models/unigram/trainer.rs
index 5d178e77b..e61e3898d 100644
--- a/tokenizers/src/models/unigram/trainer.rs
+++ b/tokenizers/src/models/unigram/trainer.rs
@@ -2,10 +2,10 @@ use crate::models::unigram::{lattice::Lattice, model::Unigram};
 use crate::tokenizer::{AddedToken, Result, Trainer};
 use crate::utils::parallelism::*;
 use crate::utils::progress::{ProgressBar, ProgressStyle};
+use ahash::{AHashMap, AHashSet};
 use log::debug;
 use serde::{Deserialize, Serialize};
 use std::cmp::Reverse;
-use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 
 // A token and a score
@@ -57,8 +57,8 @@ pub struct UnigramTrainer {
     pub shrinking_factor: f64,
     #[builder(default = "vec![]")]
     pub special_tokens: Vec<AddedToken>,
-    #[builder(default = "HashSet::new()")]
-    pub initial_alphabet: HashSet<char>,
+    #[builder(default = "AHashSet::new()")]
+    pub initial_alphabet: AHashSet<char>,
 
     #[builder(default = "None")]
     pub unk_token: Option<String>,
@@ -67,8 +67,8 @@ pub struct UnigramTrainer {
     pub max_piece_length: usize,
     #[builder(default = "1_000_000")]
     seed_size: usize,
-    #[builder(default = "HashMap::new()")]
-    words: HashMap<String, u32>,
+    #[builder(default = "AHashMap::new()")]
+    words: AHashMap<String, u32>,
 }
 
 impl Default for UnigramTrainer {
@@ -110,17 +110,17 @@ impl UnigramTrainer {
         true
     }
 
-    fn finalize(&self, model: Unigram, required_chars: HashSet<String>) -> Result<Unigram> {
+    fn finalize(&self, model: Unigram, required_chars: AHashSet<String>) -> Result<Unigram> {
         let mut min_score_penalty = 0.0;
         let min_score_penalty_delta = 0.0001;
 
         let mut pieces: Vec<(String, f64)> = vec![];
-        let mut inserted: HashSet<String> = HashSet::new();
+        let mut inserted: AHashSet<String> = AHashSet::new();
 
         // We don't want to include the <UNK> that was used to train
         inserted.insert("<UNK>".into());
 
-        let existing_pieces: HashMap<String, f64> = model.iter().cloned().collect();
+        let existing_pieces: AHashMap<String, f64> = model.iter().cloned().collect();
         for c in required_chars {
             if let Some(t) = existing_pieces.get(&c) {
                 inserted.insert(c.clone());
@@ -185,7 +185,7 @@ impl UnigramTrainer {
         )
     }
 
-    fn required_chars(&self, word_counts: &[Sentence]) -> HashSet<String> {
+    fn required_chars(&self, word_counts: &[Sentence]) -> AHashSet<String> {
         word_counts
             .iter()
             .flat_map(|(s, _count)| s.chars())
@@ -205,7 +205,7 @@ impl UnigramTrainer {
             .sum::<usize>()
             + sentences.len();
         let mut flat_string = String::with_capacity(total);
-        let mut all_chars: HashMap<char, u32> = HashMap::new();
+        let mut all_chars: AHashMap<char, u32> = AHashMap::new();
         let c_sentence_boundary = '\0';
         let k_sentence_boundary = '\0'.to_string();
         for (string, n) in sentences {
@@ -631,18 +631,18 @@ impl Trainer for UnigramTrainer {
         S: AsRef<str> + Send,
         F: Fn(&str) -> Result<Vec<String>> + Sync,
     {
-        let words: Result<HashMap<String, u32>> = iterator
+        let words: Result<AHashMap<String, u32>> = iterator
             .maybe_par_bridge()
             .map(|sequence| {
                 let words = process(sequence.as_ref())?;
-                let mut map = HashMap::new();
+                let mut map = AHashMap::new();
                 for word in words {
                     map.entry(word).and_modify(|c| *c += 1).or_insert(1);
                 }
                 Ok(map)
             })
             .reduce(
-                || Ok(HashMap::new()),
+                || Ok(AHashMap::new()),
                 |acc, ws| {
                     let mut acc = acc?;
                     for (k, v) in ws? {
@@ -716,7 +716,7 @@ mod tests {
     fn test_initial_alphabet() {
         let trainer = UnigramTrainerBuilder::default()
             .show_progress(false)
-            .initial_alphabet(HashSet::from_iter(vec!['a', 'b', 'c', 'd', 'e', 'f']))
+            .initial_alphabet(AHashSet::from_iter(vec!['a', 'b', 'c', 'd', 'e', 'f']))
             .build()
             .unwrap();
 
@@ -727,7 +727,7 @@ mod tests {
             vec!["こ", "ん", "に", "ち", "は", "友", "達", "a", "b", "c", "d", "e", "f"]
                 .into_iter()
                 .map(|s| s.to_owned())
-                .collect::<HashSet<_>>()
+                .collect::<AHashSet<_>>()
         );
     }
 
diff --git a/tokenizers/src/models/unigram/trie.rs b/tokenizers/src/models/unigram/trie.rs
index 2f94b1766..dd06f7f02 100644
--- a/tokenizers/src/models/unigram/trie.rs
+++ b/tokenizers/src/models/unigram/trie.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use ahash::AHashMap;
 use std::hash::Hash;
 
 #[derive(Default)]
@@ -78,14 +78,14 @@ impl<Label> Default for Trie<Label> {
 #[derive(Clone)]
 pub struct Node<Label> {
     is_leaf: bool,
-    children: HashMap<Label, Node<Label>>,
+    children: AHashMap<Label, Node<Label>>,
 }
 
 impl<Label> Default for Node<Label> {
     fn default() -> Self {
         Self {
             is_leaf: false,
-            children: HashMap::new(),
+            children: AHashMap::new(),
         }
     }
 }
diff --git a/tokenizers/src/models/wordlevel/mod.rs b/tokenizers/src/models/wordlevel/mod.rs
index 3482ffee0..11b6698b7 100644
--- a/tokenizers/src/models/wordlevel/mod.rs
+++ b/tokenizers/src/models/wordlevel/mod.rs
@@ -1,5 +1,6 @@
 use super::OrderedVocabIter;
 use crate::tokenizer::{Model, Result, Token};
+use ahash::AHashMap;
 use serde_json::Value;
 use std::collections::HashMap;
 use std::fs::File;
@@ -12,7 +13,7 @@ mod trainer;
 // Re-export
 pub use trainer::*;
 
-type Vocab = HashMap<String, u32>;
+type Vocab = AHashMap<String, u32>;
 
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
@@ -24,7 +25,7 @@ pub enum Error {
 
 struct Config {
     files: Option<String>,
-    vocab: HashMap<String, u32>,
+    vocab: AHashMap<String, u32>,
     unk_token: String,
 }
 
@@ -39,7 +40,7 @@ impl Default for WordLevelBuilder {
         Self {
             config: Config {
                 files: None,
-                vocab: HashMap::new(),
+                vocab: AHashMap::new(),
                 unk_token: String::from("<unk>"),
             },
         }
@@ -61,7 +62,7 @@ impl WordLevelBuilder {
 
     /// Set the vocab (token -> ID) mapping.
     #[must_use]
-    pub fn vocab(mut self, vocab: HashMap<String, u32>) -> Self {
+    pub fn vocab(mut self, vocab: AHashMap<String, u32>) -> Self {
         self.config.vocab = vocab;
         self
     }
@@ -96,8 +97,8 @@ impl WordLevelBuilder {
 
 #[derive(PartialEq, Clone, Eq)]
 pub struct WordLevel {
-    vocab: HashMap<String, u32>,
-    vocab_r: HashMap<u32, String>,
+    vocab: AHashMap<String, u32>,
+    vocab_r: AHashMap<u32, String>,
     pub unk_token: String,
 }
 
@@ -119,7 +120,7 @@ impl WordLevel {
         let vocab_file = File::open(vocab_path)?;
         let mut vocab_file = BufReader::new(vocab_file);
         let mut buffer = String::new();
-        let mut vocab = HashMap::new();
+        let mut vocab = AHashMap::new();
 
         vocab_file.read_to_string(&mut buffer)?;
         let json: Value = serde_json::from_str(&buffer)?;
@@ -148,8 +149,8 @@ impl WordLevel {
 impl Default for WordLevel {
     fn default() -> Self {
         Self {
-            vocab: HashMap::new(),
-            vocab_r: HashMap::new(),
+            vocab: AHashMap::new(),
+            vocab_r: AHashMap::new(),
             unk_token: String::from("<unk>"),
         }
     }
@@ -185,6 +186,10 @@ impl Model for WordLevel {
     }
 
     fn get_vocab(&self) -> HashMap<String, u32> {
+        self.vocab.clone().into_iter().collect()
+    }
+
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
         self.vocab.clone()
     }
 
diff --git a/tokenizers/src/models/wordlevel/serialization.rs b/tokenizers/src/models/wordlevel/serialization.rs
index a077a4999..1cc79339e 100644
--- a/tokenizers/src/models/wordlevel/serialization.rs
+++ b/tokenizers/src/models/wordlevel/serialization.rs
@@ -1,10 +1,10 @@
 use super::{super::OrderedVocabIter, WordLevel, WordLevelBuilder};
+use ahash::AHashSet;
 use serde::{
     de::{MapAccess, Visitor},
     ser::SerializeStruct,
     Deserialize, Deserializer, Serialize, Serializer,
 };
-use std::collections::HashSet;
 
 impl Serialize for WordLevel {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@@ -52,7 +52,7 @@ impl<'de> Visitor<'de> for WordLevelVisitor {
             "vocab",
         ]
         .into_iter()
-        .collect::<HashSet<_>>();
+        .collect::<AHashSet<_>>();
         while let Some(key) = map.next_key::<String>()? {
             match key.as_ref() {
                 "vocab" => builder = builder.vocab(map.next_value()?),
diff --git a/tokenizers/src/models/wordlevel/trainer.rs b/tokenizers/src/models/wordlevel/trainer.rs
index c52ad08d7..04f7955a8 100644
--- a/tokenizers/src/models/wordlevel/trainer.rs
+++ b/tokenizers/src/models/wordlevel/trainer.rs
@@ -1,9 +1,9 @@
 use super::WordLevel;
 use crate::utils::parallelism::*;
 use crate::{AddedToken, Result, Trainer};
+use ahash::AHashMap;
 use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
-use std::collections::HashMap;
 
 #[non_exhaustive]
 #[derive(Debug, Clone, Builder, Serialize, Deserialize)]
@@ -22,7 +22,7 @@ pub struct WordLevelTrainer {
     pub special_tokens: Vec<AddedToken>,
 
     #[builder(default, private)]
-    words: HashMap<String, u64>,
+    words: AHashMap<String, u64>,
 }
 
 impl Default for WordLevelTrainer {
@@ -38,7 +38,7 @@ impl WordLevelTrainer {
 
     fn do_train(
         &self,
-        word_counts: &HashMap<String, u64>,
+        word_counts: &AHashMap<String, u64>,
         model: &mut WordLevel,
     ) -> Result<Vec<AddedToken>> {
         let mut ordered_counts = word_counts.iter().collect::<Vec<_>>();
@@ -100,18 +100,18 @@ impl Trainer for WordLevelTrainer {
         S: AsRef<str> + Send,
         F: Fn(&str) -> Result<Vec<String>> + Sync,
     {
-        let words: Result<HashMap<String, u64>> = iterator
+        let words: Result<AHashMap<String, u64>> = iterator
             .maybe_par_bridge()
             .map(|sequence| {
                 let words = process(sequence.as_ref())?;
-                let mut map = HashMap::new();
+                let mut map = AHashMap::new();
                 for word in words {
                     map.entry(word).and_modify(|c| *c += 1).or_insert(1);
                 }
                 Ok(map)
             })
             .reduce(
-                || Ok(HashMap::new()),
+                || Ok(AHashMap::new()),
                 |acc, ws| {
                     let mut acc = acc?;
                     for (k, v) in ws? {
@@ -132,7 +132,7 @@ mod tests {
 
     #[test]
     fn test_train() {
-        let word_counts: HashMap<String, u64> = [
+        let word_counts: AHashMap<String, u64> = [
             ("the".into(), 25),
             ("roses".into(), 22),
             ("are".into(), 24),
@@ -151,7 +151,7 @@ mod tests {
 
         let mut model = WordLevel::default();
         trainer.do_train(&word_counts, &mut model).unwrap();
-        let expected_vocab: HashMap<String, u32> = [
+        let expected_vocab: AHashMap<String, u32> = [
             ("the".into(), 0),
             ("are".into(), 1),
             ("roses".into(), 2),
@@ -167,7 +167,7 @@ mod tests {
         trainer.min_frequency = 15;
         let mut model = WordLevel::default();
         trainer.do_train(&word_counts, &mut model).unwrap();
-        let expected_vocab: HashMap<String, u32> = [
+        let expected_vocab: AHashMap<String, u32> = [
             ("the".into(), 0),
             ("are".into(), 1),
             ("roses".into(), 2),
diff --git a/tokenizers/src/models/wordpiece/mod.rs b/tokenizers/src/models/wordpiece/mod.rs
index a75134d2c..629dfa35f 100644
--- a/tokenizers/src/models/wordpiece/mod.rs
+++ b/tokenizers/src/models/wordpiece/mod.rs
@@ -3,9 +3,10 @@
 
 use crate::models::bpe::BPE;
 use crate::tokenizer::{Model, Result, Token};
+use ahash::AHashMap;
+use std::collections::HashMap;
 use std::{
     borrow::Cow,
-    collections::HashMap,
     fs::File,
     io::prelude::*,
     io::{BufRead, BufReader},
@@ -22,8 +23,8 @@ pub enum Error {
     MissingUnkToken,
 }
 
-type Vocab = HashMap<String, u32>;
-type VocabR = HashMap<u32, String>;
+type Vocab = AHashMap<String, u32>;
+type VocabR = AHashMap<u32, String>;
 
 struct Config {
     files: Option<String>,
@@ -43,7 +44,7 @@ impl Default for WordPieceBuilder {
         Self {
             config: Config {
                 files: None,
-                vocab: HashMap::new(),
+                vocab: AHashMap::new(),
                 unk_token: String::from("[UNK]"),
                 continuing_subword_prefix: String::from("##"),
                 max_input_chars_per_word: 100,
@@ -142,8 +143,8 @@ impl std::fmt::Debug for WordPiece {
 impl Default for WordPiece {
     fn default() -> Self {
         Self {
-            vocab: HashMap::new(),
-            vocab_r: HashMap::new(),
+            vocab: AHashMap::new(),
+            vocab_r: AHashMap::new(),
             unk_token: String::from("[UNK]"),
             continuing_subword_prefix: String::from("##"),
             max_input_chars_per_word: 100,
@@ -162,7 +163,7 @@ impl WordPiece {
         let file = File::open(vocab)?;
         let file = BufReader::new(file);
 
-        let mut vocab = HashMap::new();
+        let mut vocab = AHashMap::new();
         for (index, line) in file.lines().enumerate() {
             let line = line?;
             vocab.insert(line.trim_end().to_owned(), index as u32);
@@ -178,7 +179,10 @@ impl WordPiece {
 
     /// Create a `WordPiece` model from a `BPE` model.
     pub fn from_bpe(bpe: &BPE) -> Self {
-        let mut wp = Self::builder().vocab(bpe.get_vocab()).build().unwrap();
+        let mut wp = Self::builder()
+            .vocab(bpe.get_vocab_ahash())
+            .build()
+            .unwrap();
         if let Some(unk) = bpe.get_unk_token() {
             unk.clone_into(&mut wp.unk_token);
         }
@@ -193,6 +197,10 @@ impl Model for WordPiece {
     type Trainer = WordPieceTrainer;
 
     fn get_vocab(&self) -> HashMap<String, u32> {
+        self.vocab.clone().into_iter().collect()
+    }
+
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
         self.vocab.clone()
     }
 
diff --git a/tokenizers/src/models/wordpiece/serialization.rs b/tokenizers/src/models/wordpiece/serialization.rs
index a50274e81..12f28868a 100644
--- a/tokenizers/src/models/wordpiece/serialization.rs
+++ b/tokenizers/src/models/wordpiece/serialization.rs
@@ -1,10 +1,10 @@
 use super::{super::OrderedVocabIter, WordPiece, WordPieceBuilder};
+use ahash::AHashSet;
 use serde::{
     de::{MapAccess, Visitor},
     ser::SerializeStruct,
     Deserialize, Deserializer, Serialize, Serializer,
 };
-use std::collections::HashSet;
 
 impl Serialize for WordPiece {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@@ -67,7 +67,7 @@ impl<'de> Visitor<'de> for WordPieceVisitor {
             "vocab",
         ]
         .into_iter()
-        .collect::<HashSet<_>>();
+        .collect::<AHashSet<_>>();
 
         while let Some(key) = map.next_key::<String>()? {
             match key.as_ref() {
diff --git a/tokenizers/src/models/wordpiece/trainer.rs b/tokenizers/src/models/wordpiece/trainer.rs
index 58a5abc8f..c1f4dfa86 100644
--- a/tokenizers/src/models/wordpiece/trainer.rs
+++ b/tokenizers/src/models/wordpiece/trainer.rs
@@ -1,8 +1,8 @@
 use super::WordPiece;
 use crate::models::bpe::{BpeTrainer, BpeTrainerBuilder, BPE};
 use crate::tokenizer::{AddedToken, Result, Trainer};
+use ahash::AHashSet;
 use serde::{Deserialize, Serialize};
-use std::collections::HashSet;
 
 /// A `WordPieceTrainerBuilder` can be used to create a `WordPieceTrainer` with a custom
 /// configuration.
@@ -61,7 +61,7 @@ impl WordPieceTrainerBuilder {
 
     /// Set the initial alphabet
     #[must_use]
-    pub fn initial_alphabet(mut self, alphabet: HashSet<char>) -> Self {
+    pub fn initial_alphabet(mut self, alphabet: AHashSet<char>) -> Self {
         self.bpe_trainer_builder = self.bpe_trainer_builder.initial_alphabet(alphabet);
         self
     }
@@ -134,11 +134,11 @@ impl WordPieceTrainer {
         self.bpe_trainer.limit_alphabet = limit;
     }
 
-    pub fn initial_alphabet(&self) -> &HashSet<char> {
+    pub fn initial_alphabet(&self) -> &AHashSet<char> {
         &self.bpe_trainer.initial_alphabet
     }
 
-    pub fn set_initial_alphabet(&mut self, alphabet: HashSet<char>) {
+    pub fn set_initial_alphabet(&mut self, alphabet: AHashSet<char>) {
         self.bpe_trainer.initial_alphabet = alphabet;
     }
 
diff --git a/tokenizers/src/normalizers/byte_level.rs b/tokenizers/src/normalizers/byte_level.rs
index 130e2ce1e..beb996022 100644
--- a/tokenizers/src/normalizers/byte_level.rs
+++ b/tokenizers/src/normalizers/byte_level.rs
@@ -1,15 +1,15 @@
 use crate::processors::byte_level::bytes_char;
 use crate::tokenizer::{NormalizedString, Normalizer, Result};
 use crate::utils::macro_rules_attribute;
-use std::collections::{HashMap, HashSet};
+use ahash::{AHashMap, AHashSet};
 
 #[derive(Clone, Debug)]
 #[macro_rules_attribute(impl_serde_type!)]
 pub struct ByteLevel;
 
 lazy_static! {
-    static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();
-    static ref CHAR_BYTES: HashMap<char, u8> =
+    static ref BYTES_CHAR: AHashMap<u8, char> = bytes_char();
+    static ref CHAR_BYTES: AHashMap<char, u8> =
         bytes_char().into_iter().map(|(c, b)| (b, c)).collect();
 }
 
@@ -24,7 +24,7 @@ impl ByteLevel {
         Self {}
     }
 
-    pub fn alphabet() -> HashSet<char> {
+    pub fn alphabet() -> AHashSet<char> {
         BYTES_CHAR.values().copied().collect()
     }
 }
diff --git a/tokenizers/src/pre_tokenizers/byte_level.rs b/tokenizers/src/pre_tokenizers/byte_level.rs
index 2d3845b55..f59e4a5d2 100644
--- a/tokenizers/src/pre_tokenizers/byte_level.rs
+++ b/tokenizers/src/pre_tokenizers/byte_level.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, HashSet};
+use ahash::{AHashMap, AHashSet};
 
 use crate::utils::SysRegex;
 use serde::{Deserialize, Serialize};
@@ -11,7 +11,7 @@ use crate::utils::macro_rules_attribute;
 
 /// Converts bytes to unicode characters.
 /// See https://github.com/openai/gpt-2/blob/master/src/encoder.py#L9
-pub(crate) fn bytes_char() -> HashMap<u8, char> {
+pub(crate) fn bytes_char() -> AHashMap<u8, char> {
     let mut bs: Vec<u8> = vec![];
     bs.extend(b'!'..=b'~');
     bs.extend(b'\xA1'..=b'\xAC');
@@ -41,8 +41,8 @@ lazy_static! {
         r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"
     )
     .unwrap();
-    static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();
-    static ref CHAR_BYTES: HashMap<char, u8> =
+    static ref BYTES_CHAR: AHashMap<u8, char> = bytes_char();
+    static ref CHAR_BYTES: AHashMap<char, u8> =
         bytes_char().into_iter().map(|(c, b)| (b, c)).collect();
 }
 
@@ -88,7 +88,7 @@ impl ByteLevel {
         }
     }
 
-    pub fn alphabet() -> HashSet<char> {
+    pub fn alphabet() -> AHashSet<char> {
         BYTES_CHAR.values().copied().collect()
     }
 
@@ -448,7 +448,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         );
         process_offsets(&mut encoding, true);
         assert_eq!(
@@ -462,7 +462,7 @@ mod tests {
                 vec![],
                 vec![],
                 vec![],
-                HashMap::new(),
+                AHashMap::new(),
             )
         );
     }
@@ -484,7 +484,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         );
         let expected = Encoding::new(
             vec![0; 5],
@@ -501,7 +501,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::from_iter(vec![(0, 0..5)]),
+            AHashMap::from_iter(vec![(0, 0..5)]),
         );
 
         let bytelevel = ByteLevel::default().trim_offsets(true);
@@ -541,7 +541,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::from_iter(vec![(0, 0..5), (1, 5..10)]),
+            AHashMap::from_iter(vec![(0, 0..5), (1, 5..10)]),
         );
         assert_eq!(
             pair_expected,
diff --git a/tokenizers/src/processors/bert.rs b/tokenizers/src/processors/bert.rs
index 627f9d180..f09730985 100644
--- a/tokenizers/src/processors/bert.rs
+++ b/tokenizers/src/processors/bert.rs
@@ -1,6 +1,6 @@
 use crate::tokenizer::{Encoding, PostProcessor, Result};
+use ahash::AHashMap;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
 use std::iter::FromIterator;
 
 #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
@@ -70,7 +70,7 @@ impl PostProcessor for BertProcessing {
 
                     // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't contain
                     // the special tokens.
-                    let sequence_ranges = HashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
+                    let sequence_ranges = AHashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
                     Encoding::new(
                         ids,
                         type_ids,
@@ -103,7 +103,7 @@ impl PostProcessor for BertProcessing {
                                 // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't
                                 // contain the special tokens.
                                 let sequence_ranges =
-                                    HashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
+                                    AHashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
                                 Encoding::new(
                                     ids,
                                     type_ids,
@@ -131,7 +131,8 @@ impl PostProcessor for BertProcessing {
 
                     // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't contain
                     // the special tokens.
-                    let pair_sequence_ranges = HashMap::from_iter(vec![(1, 0..pair_ids.len() - 1)]);
+                    let pair_sequence_ranges =
+                        AHashMap::from_iter(vec![(1, 0..pair_ids.len() - 1)]);
                     Encoding::new(
                         pair_ids,
                         pair_type_ids,
@@ -157,7 +158,7 @@ impl PostProcessor for BertProcessing {
                                 // For compatibility with `TemplateProcessing`, the sequence_ranges
                                 // shouldn't contain the special tokens.
                                 let pair_sequence_ranges =
-                                    HashMap::from_iter(vec![(1, 0..pair_ids.len() - 1)]);
+                                    AHashMap::from_iter(vec![(1, 0..pair_ids.len() - 1)]);
                                 Encoding::new(
                                     pair_ids,
                                     pair_type_ids,
@@ -228,7 +229,7 @@ mod tests {
                 vec![1, 0, 0, 1],
                 vec![1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3)]),
+                AHashMap::from_iter(vec![(0, 1..3)]),
             )
         );
         assert_eq!(single_encoding.token_to_sequence(2), Some(0));
@@ -254,7 +255,7 @@ mod tests {
                 vec![1, 0, 0, 1, 0, 1],
                 vec![1, 1, 1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
+                AHashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(2), Some(0));
@@ -275,7 +276,7 @@ mod tests {
                 vec![0, 0, 0],
                 vec![1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 0..2), (1, 2..3)]),
+                AHashMap::from_iter(vec![(0, 0..2), (1, 2..3)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(0), Some(0));
diff --git a/tokenizers/src/processors/roberta.rs b/tokenizers/src/processors/roberta.rs
index 3af9a8d60..3869dc18e 100644
--- a/tokenizers/src/processors/roberta.rs
+++ b/tokenizers/src/processors/roberta.rs
@@ -1,7 +1,7 @@
 use crate::processors::byte_level::process_offsets;
 use crate::tokenizer::{Encoding, PostProcessor, Result};
+use ahash::AHashMap;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
 use std::iter::FromIterator;
 
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
@@ -100,7 +100,7 @@ impl PostProcessor for RobertaProcessing {
 
                     // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't contain
                     // the special tokens.
-                    let sequence_ranges = HashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
+                    let sequence_ranges = AHashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
                     Encoding::new(
                         ids,
                         type_ids,
@@ -133,7 +133,7 @@ impl PostProcessor for RobertaProcessing {
                                 // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't
                                 // contain the special tokens.
                                 let sequence_ranges =
-                                    HashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
+                                    AHashMap::from_iter(vec![(0, 1..ids.len() - 1)]);
                                 Encoding::new(
                                     ids,
                                     type_ids,
@@ -166,7 +166,8 @@ impl PostProcessor for RobertaProcessing {
 
                     // For compatibility with `TemplateProcessing`, the sequence_ranges shouldn't contain
                     // the special tokens.
-                    let pair_sequence_ranges = HashMap::from_iter(vec![(1, 1..pair_ids.len() - 1)]);
+                    let pair_sequence_ranges =
+                        AHashMap::from_iter(vec![(1, 1..pair_ids.len() - 1)]);
                     Encoding::new(
                         pair_ids,
                         pair_type_ids,
@@ -200,7 +201,7 @@ impl PostProcessor for RobertaProcessing {
                                 // For compatibility with `TemplateProcessing`, the sequence_ranges
                                 // shouldn't contain the special tokens.
                                 let pair_sequence_ranges =
-                                    HashMap::from_iter(vec![(1, 1..pair_ids.len() - 1)]);
+                                    AHashMap::from_iter(vec![(1, 1..pair_ids.len() - 1)]);
                                 Encoding::new(
                                     pair_ids,
                                     pair_type_ids,
@@ -273,7 +274,7 @@ mod tests {
                 vec![1, 0, 0, 1],
                 vec![1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3)]),
+                AHashMap::from_iter(vec![(0, 1..3)]),
             )
         );
         assert_eq!(single_encoding.token_to_sequence(2), Some(0));
@@ -300,7 +301,7 @@ mod tests {
                 vec![1, 0, 0, 1, 1, 0, 1],
                 vec![1, 1, 1, 1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3), (1, 5..6)]),
+                AHashMap::from_iter(vec![(0, 1..3), (1, 5..6)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(2), Some(0));
@@ -322,7 +323,7 @@ mod tests {
                 vec![0, 0, 0],
                 vec![1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 0..2), (1, 2..3)]),
+                AHashMap::from_iter(vec![(0, 0..2), (1, 2..3)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(0), Some(0));
diff --git a/tokenizers/src/processors/sequence.rs b/tokenizers/src/processors/sequence.rs
index 66c670ad8..b97377b7d 100644
--- a/tokenizers/src/processors/sequence.rs
+++ b/tokenizers/src/processors/sequence.rs
@@ -40,7 +40,7 @@ mod tests {
     use super::*;
     use crate::processors::{ByteLevel, PostProcessorWrapper};
     use crate::tokenizer::{Encoding, PostProcessor};
-    use std::collections::HashMap;
+    use ahash::AHashMap;
     use std::iter::FromIterator;
 
     #[test]
@@ -60,7 +60,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         );
 
         let bytelevel = ByteLevel::default().trim_offsets(true);
@@ -80,7 +80,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::from_iter(vec![(0, 0..5)]),
+            AHashMap::from_iter(vec![(0, 0..5)]),
         );
 
         assert_eq!(
@@ -123,7 +123,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::from_iter(vec![(0, 0..5), (1, 5..10)]),
+            AHashMap::from_iter(vec![(0, 0..5), (1, 5..10)]),
         );
         assert_eq!(
             pair_expected,
diff --git a/tokenizers/src/processors/template.rs b/tokenizers/src/processors/template.rs
index 9259180d0..ff9be66e5 100644
--- a/tokenizers/src/processors/template.rs
+++ b/tokenizers/src/processors/template.rs
@@ -57,9 +57,9 @@
 //! [`TemplateProcessing`]: struct.TemplateProcessing.html
 //!
 use crate::{Encoding, PostProcessor, Result};
+use ahash::{AHashMap, AHashSet};
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, HashSet};
 use std::convert::{TryFrom, TryInto};
 use std::result::Result as StdResult;
 
@@ -293,7 +293,7 @@ impl TryFrom<&str> for Template {
 #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, Eq)]
 #[serde(transparent)]
 pub struct Tokens(
-    #[serde(serialize_with = "crate::utils::ordered_map")] pub HashMap<String, SpecialToken>,
+    #[serde(serialize_with = "crate::utils::ordered_map")] pub AHashMap<String, SpecialToken>,
 );
 
 impl<T: Into<SpecialToken>> From<Vec<T>> for Tokens {
@@ -309,8 +309,8 @@ impl<T: Into<SpecialToken>> From<Vec<T>> for Tokens {
     }
 }
 
-impl From<HashMap<String, SpecialToken>> for Tokens {
-    fn from(v: HashMap<String, SpecialToken>) -> Self {
+impl From<AHashMap<String, SpecialToken>> for Tokens {
+    fn from(v: AHashMap<String, SpecialToken>) -> Self {
         Self(v)
     }
 }
@@ -450,7 +450,7 @@ impl TemplateProcessingBuilder {
         };
 
         let empty = [];
-        let missing: HashSet<&str> = self
+        let missing: AHashSet<&str> = self
             .single
             .as_ref()
             .map_or(empty.iter(), |s| s.0.iter())
@@ -459,7 +459,7 @@ impl TemplateProcessingBuilder {
                 Piece::Sequence { .. } => None,
                 Piece::SpecialToken { id, .. } => check(id.as_ref()),
             })
-            .collect::<HashSet<_>>();
+            .collect::<AHashSet<_>>();
 
         if missing.is_empty() {
             Ok(())
@@ -526,7 +526,7 @@ impl TemplateProcessing {
                                 // overflowing
                                 vec![],
                                 // sequence_range
-                                HashMap::new(),
+                                AHashMap::new(),
                             );
                             Some(encoding)
                         } else {
@@ -865,7 +865,7 @@ mod tests {
                 vec![1, 0, 0, 1],
                 vec![1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3)]),
+                AHashMap::from_iter(vec![(0, 1..3)]),
             )
         );
         assert_eq!(single_encoding.token_to_sequence(2), Some(0));
@@ -889,7 +889,7 @@ mod tests {
                 vec![1, 0, 0, 1, 0, 1],
                 vec![1, 1, 1, 1, 1, 1],
                 vec![],
-                HashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
+                AHashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(2), Some(0));
@@ -951,9 +951,9 @@ mod tests {
                     vec![1, 0, 1],
                     vec![1, 1, 1],
                     vec![],
-                    HashMap::from_iter(vec![(0, 1..2)]),
+                    AHashMap::from_iter(vec![(0, 1..2)]),
                 )],
-                HashMap::from_iter(vec![(0, 1..3)]),
+                AHashMap::from_iter(vec![(0, 1..3)]),
             )
         );
         assert_eq!(single_encoding.token_to_sequence(2), Some(0));
@@ -1009,9 +1009,9 @@ mod tests {
                             vec![1, 0, 1, 0, 1],
                             vec![1, 1, 1, 1, 1],
                             vec![],
-                            HashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
+                            AHashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
                         ),],
-                        HashMap::from_iter(vec![(1, 3..5), (0, 1..2)]),
+                        AHashMap::from_iter(vec![(1, 3..5), (0, 1..2)]),
                     ),
                     Encoding::new(
                         vec![1, 13, 0, 17, 0],
@@ -1028,7 +1028,7 @@ mod tests {
                         vec![1, 0, 1, 0, 1],
                         vec![1, 1, 1, 1, 1],
                         vec![],
-                        HashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
+                        AHashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
                     ),
                     Encoding::new(
                         vec![1, 12, 14, 0, 17, 0],
@@ -1060,12 +1060,12 @@ mod tests {
                             vec![1, 0, 1, 0, 1],
                             vec![1, 1, 1, 1, 1],
                             vec![],
-                            HashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
+                            AHashMap::from_iter(vec![(0, 1..2), (1, 3..4)]),
                         ),],
-                        HashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
+                        AHashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
                     )
                 ],
-                HashMap::from_iter(vec![(0, 1..3), (1, 4..6)]),
+                AHashMap::from_iter(vec![(0, 1..3), (1, 4..6)]),
             )
         );
         assert_eq!(pair_encoding.token_to_sequence(2), Some(0));
diff --git a/tokenizers/src/tokenizer/added_vocabulary.rs b/tokenizers/src/tokenizer/added_vocabulary.rs
index a0c2f4542..9efd812c2 100644
--- a/tokenizers/src/tokenizer/added_vocabulary.rs
+++ b/tokenizers/src/tokenizer/added_vocabulary.rs
@@ -1,10 +1,10 @@
 use super::{
     normalizer::Range, Model, NormalizedString, Normalizer, Offsets, PreTokenizedString, Token,
 };
+use ahash::{AHashMap, AHashSet};
 use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
 use regex::Regex;
 use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer};
-use std::collections::{HashMap, HashSet};
 
 /// Represent a token added by the user on top of the existing Model vocabulary.
 /// AddedToken can be configured to specify the behavior they should have in various situations
@@ -142,10 +142,10 @@ fn space_rightmost_at_start(sentence: &str) -> usize {
 pub struct AddedVocabulary {
     /// Contains the mapping from String (token content) to ID. This map contains both special
     /// tokens and classic added tokens that were added to the this vocabulary.
-    added_tokens_map: HashMap<String, u32>,
+    added_tokens_map: AHashMap<String, u32>,
     /// Contains the mapping from ID to AddedToken for all the added tokens, both special
     /// and classic.
-    added_tokens_map_r: HashMap<u32, AddedToken>,
+    added_tokens_map_r: AHashMap<u32, AddedToken>,
 
     /// Contains only the classic AddedToken, in the specific order the user gave them.
     added_tokens: Vec<AddedToken>,
@@ -154,7 +154,7 @@ pub struct AddedVocabulary {
 
     /// A Set, containing all the special token for easy access while decoding. This let's
     /// us remove them easily with an O(1) complexity.
-    special_tokens_set: HashSet<String>,
+    special_tokens_set: AHashSet<String>,
 
     /// A RegexSet containing all the non-normalized patterns used to split on AddedTokens
     split_trie: MatchingSet,
@@ -176,11 +176,11 @@ impl AddedVocabulary {
             .build::<_, &&[u8]>([])
             .expect("The normalized trie should build correctly");
         Self {
-            added_tokens_map: HashMap::new(),
-            added_tokens_map_r: HashMap::new(),
+            added_tokens_map: AHashMap::new(),
+            added_tokens_map_r: AHashMap::new(),
             added_tokens: vec![],
             special_tokens: vec![],
-            special_tokens_set: HashSet::new(),
+            special_tokens_set: AHashSet::new(),
             split_trie: (trie, vec![]),
             split_normalized_trie: (normalized_trie, vec![]),
             encode_special_tokens: false,
@@ -198,12 +198,12 @@ impl AddedVocabulary {
     }
 
     /// Get the additional vocabulary
-    pub fn get_vocab(&self) -> &HashMap<String, u32> {
+    pub fn get_vocab(&self) -> &AHashMap<String, u32> {
         &self.added_tokens_map
     }
 
     /// Get the additional vocabulary with the AddedTokens
-    pub fn get_added_tokens_decoder(&self) -> &HashMap<u32, AddedToken> {
+    pub fn get_added_tokens_decoder(&self) -> &AHashMap<u32, AddedToken> {
         &self.added_tokens_map_r
     }
 
@@ -547,19 +547,20 @@ mod tests {
     use crate::normalizers::utils::Lowercase;
     use crate::normalizers::NormalizerWrapper;
     use crate::{OffsetReferential, OffsetType, Result, Token, Trainer};
+    use std::collections::HashMap;
     use std::path::{Path, PathBuf};
 
     #[derive(Serialize, Deserialize)]
     struct ModelMock {
-        vocab: HashMap<String, u32>,
-        vocab_r: HashMap<u32, String>,
+        vocab: AHashMap<String, u32>,
+        vocab_r: AHashMap<u32, String>,
     }
     impl ModelMock {
         pub fn new<I>(iter: I) -> Self
         where
             I: IntoIterator<Item = &'static (&'static str, u32)>,
         {
-            let vocab: HashMap<String, u32> = iter
+            let vocab: AHashMap<String, u32> = iter
                 .into_iter()
                 .map(|&(tok, id)| (tok.to_string(), id))
                 .collect();
@@ -620,6 +621,9 @@ mod tests {
             self.vocab_r.get(&id).cloned()
         }
         fn get_vocab(&self) -> HashMap<String, u32> {
+            self.vocab.clone().into_iter().collect()
+        }
+        fn get_vocab_ahash(&self) -> AHashMap<String, u32> {
             self.vocab.clone()
         }
         fn get_vocab_size(&self) -> usize {
@@ -716,7 +720,7 @@ mod tests {
         assert!(vocab.is_special_token("test"));
         assert_eq!(
             *vocab.get_added_tokens_decoder(),
-            HashMap::from([
+            AHashMap::from([
                 (0, AddedToken::from("test", true)),
                 (2, AddedToken::from("added_token_1", true)),
                 (3, AddedToken::from("added_token_2", true)),
diff --git a/tokenizers/src/tokenizer/encoding.rs b/tokenizers/src/tokenizer/encoding.rs
index 0693ad1e1..ae391ed88 100644
--- a/tokenizers/src/tokenizer/encoding.rs
+++ b/tokenizers/src/tokenizer/encoding.rs
@@ -2,8 +2,8 @@ use crate::parallelism::*;
 use crate::tokenizer::{Offsets, Token};
 use crate::utils::padding::PaddingDirection;
 use crate::utils::truncation::TruncationDirection;
+use ahash::AHashMap;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
 use std::ops::Range;
 
 /// Represents the output of a `Tokenizer`.
@@ -27,7 +27,7 @@ pub struct Encoding {
     overflowing: Vec<Encoding>,
     /// Ranges of tokens covered by each sequence. If this is empty we consider
     /// there is only one sequence in this Encoding, and that it covers the entire range.
-    sequence_ranges: HashMap<usize, Range<usize>>,
+    sequence_ranges: AHashMap<usize, Range<usize>>,
 }
 impl Encoding {
     #[allow(clippy::too_many_arguments)]
@@ -40,7 +40,7 @@ impl Encoding {
         special_tokens_mask: Vec<u32>,
         attention_mask: Vec<u32>,
         overflowing: Vec<Self>,
-        sequence_ranges: HashMap<usize, Range<usize>>,
+        sequence_ranges: AHashMap<usize, Range<usize>>,
     ) -> Self {
         Self {
             ids,
@@ -65,7 +65,7 @@ impl Encoding {
             special_tokens_mask: Vec::with_capacity(len),
             attention_mask: Vec::with_capacity(len),
             overflowing: vec![],
-            sequence_ranges: HashMap::new(),
+            sequence_ranges: AHashMap::new(),
         }
     }
 
@@ -94,7 +94,7 @@ impl Encoding {
             attention_mask: vec![1; length],
             special_tokens_mask: vec![0; length],
             overflowing: vec![],
-            sequence_ranges: HashMap::new(),
+            sequence_ranges: AHashMap::new(),
         }
     }
 
@@ -363,7 +363,7 @@ impl Encoding {
             special_tokens_mask: self.special_tokens_mask[start..stop].to_vec(),
             attention_mask: self.attention_mask[start..stop].to_vec(),
             overflowing: vec![],
-            sequence_ranges: HashMap::new(),
+            sequence_ranges: AHashMap::new(),
         };
 
         loop {
@@ -381,7 +381,7 @@ impl Encoding {
                 special_tokens_mask: self.special_tokens_mask[start..stop].to_vec(),
                 attention_mask: self.attention_mask[start..stop].to_vec(),
                 overflowing: vec![],
-                sequence_ranges: HashMap::new(),
+                sequence_ranges: AHashMap::new(),
             });
         }
         *self = new_encoding;
@@ -837,7 +837,7 @@ mod tests {
                 Some(2),
                 Some(3),
             ],
-            sequence_ranges: HashMap::from_iter(vec![(0, 0..7), (1, 7..11)]),
+            sequence_ranges: AHashMap::from_iter(vec![(0, 0..7), (1, 7..11)]),
             ..Default::default()
         };
         assert_eq!(encoding.word_to_tokens(0, 0), Some((0, 2)));
@@ -890,7 +890,7 @@ mod tests {
             offsets: vec![(0, 6)],
             special_tokens_mask: vec![0],
             attention_mask: vec![1],
-            sequence_ranges: HashMap::from([(0, 0..1)]),
+            sequence_ranges: AHashMap::from([(0, 0..1)]),
             ..Default::default()
         };
         let target_length = 2;
@@ -904,6 +904,6 @@ mod tests {
             pad_token,
             PaddingDirection::Left,
         );
-        assert_eq!(a.sequence_ranges, HashMap::from([(0, 1..2)]));
+        assert_eq!(a.sequence_ranges, AHashMap::from([(0, 1..2)]));
     }
 }
diff --git a/tokenizers/src/tokenizer/mod.rs b/tokenizers/src/tokenizer/mod.rs
index d92e04a1d..de9151514 100644
--- a/tokenizers/src/tokenizer/mod.rs
+++ b/tokenizers/src/tokenizer/mod.rs
@@ -9,8 +9,8 @@
 //!   - [`PostProcessor`](trait.PostProcessor.html): Takes care of the processing after tokenization (like truncating, padding,
 //!     ...).
 
+use ahash::AHashMap;
 use std::{
-    collections::HashMap,
     fs::{read_to_string, File},
     io::prelude::*,
     io::BufReader,
@@ -79,6 +79,8 @@ pub trait Model {
     fn id_to_token(&self, id: u32) -> Option<String>;
     /// Retrieve the entire vocabulary mapping (token -> ID)
     fn get_vocab(&self) -> HashMap<String, u32>;
+    /// Retrieve the raw AHashMap vocab
+    fn get_vocab_ahash(&self) -> AHashMap<String, u32>;
     /// Retrieve the size of the vocabulary
     fn get_vocab_size(&self) -> usize;
     /// Save the current `Model` in the given folder, using the given `prefix` for the various
@@ -190,6 +192,8 @@ impl Token {
 }
 
 use std::borrow::Cow;
+use std::collections::HashMap;
+
 #[derive(Debug, Clone)]
 pub enum InputSequence<'s> {
     Raw(Cow<'s, str>),
@@ -659,8 +663,8 @@ where
     }
 
     /// Get the vocabulary
-    pub fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
-        let mut final_vocab = self.model.get_vocab();
+    pub fn get_vocab_ahash(&self, with_added_tokens: bool) -> AHashMap<String, u32> {
+        let mut final_vocab = self.model.get_vocab_ahash();
 
         if with_added_tokens {
             let added_vocab = self.added_vocabulary.get_vocab();
@@ -675,8 +679,15 @@ where
         final_vocab
     }
 
+    // Get the vocabulary as a plain HashMap for bindings compatibility
+    pub fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
+        self.get_vocab_ahash(with_added_tokens)
+            .into_iter()
+            .collect()
+    }
+
     /// Get the added tokens decoder
-    pub fn get_added_tokens_decoder(&self) -> HashMap<u32, AddedToken> {
+    pub fn get_added_tokens_decoder(&self) -> AHashMap<u32, AddedToken> {
         self.added_vocabulary.get_added_tokens_decoder().clone()
     }
 
@@ -703,7 +714,7 @@ where
             .or_else(|| self.model.id_to_token(id))
     }
 
-    /// set the added bocab's splitting scheme
+    /// set the added vocab's splitting scheme
     pub fn set_encode_special_tokens(&mut self, value: bool) {
         self.added_vocabulary.set_encode_special_tokens(value);
     }
diff --git a/tokenizers/src/utils/cache.rs b/tokenizers/src/utils/cache.rs
index dceb58da8..99558ad44 100644
--- a/tokenizers/src/utils/cache.rs
+++ b/tokenizers/src/utils/cache.rs
@@ -1,5 +1,5 @@
+use ahash::AHashMap;
 use std::borrow::Borrow;
-use std::collections::HashMap;
 use std::hash::Hash;
 use std::sync::RwLock;
 
@@ -16,7 +16,7 @@ where
     K: Eq + Hash + Clone,
     V: Clone,
 {
-    map: RwLock<HashMap<K, V>>,
+    map: RwLock<AHashMap<K, V>>,
     pub capacity: usize,
 }
 
@@ -48,7 +48,7 @@ where
 {
     /// Create new `Cache` with the given capacity.
     pub(crate) fn new(capacity: usize) -> Self {
-        let map = RwLock::new(HashMap::with_capacity(capacity));
+        let map = RwLock::new(AHashMap::with_capacity(capacity));
         Cache { map, capacity }
     }
 
diff --git a/tokenizers/src/utils/mod.rs b/tokenizers/src/utils/mod.rs
index deda862eb..9ddd54c45 100644
--- a/tokenizers/src/utils/mod.rs
+++ b/tokenizers/src/utils/mod.rs
@@ -17,11 +17,12 @@ pub mod parallelism;
 pub(crate) mod progress;
 pub mod truncation;
 
+use ahash::AHashMap;
 use serde::{Serialize, Serializer};
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 
 pub(crate) fn ordered_map<S, K, V>(
-    value: &HashMap<K, V>,
+    value: &AHashMap<K, V>,
     serializer: S,
 ) -> std::result::Result<S::Ok, S::Error>
 where
diff --git a/tokenizers/src/utils/padding.rs b/tokenizers/src/utils/padding.rs
index 39585a304..57f8b1d41 100644
--- a/tokenizers/src/utils/padding.rs
+++ b/tokenizers/src/utils/padding.rs
@@ -84,7 +84,7 @@ pub fn pad_encodings(encodings: &mut [Encoding], params: &PaddingParams) -> Resu
 mod tests {
     use super::*;
     use crate::tokenizer::Encoding;
-    use std::collections::HashMap;
+    use ahash::AHashMap;
 
     #[test]
     fn pad_to_multiple() {
@@ -99,7 +99,7 @@ mod tests {
                     vec![],
                     vec![],
                     vec![],
-                    HashMap::new(),
+                    AHashMap::new(),
                 ),
                 Encoding::new(
                     vec![0, 1, 2],
@@ -110,7 +110,7 @@ mod tests {
                     vec![],
                     vec![],
                     vec![],
-                    HashMap::new(),
+                    AHashMap::new(),
                 ),
             ]
         }
diff --git a/tokenizers/src/utils/truncation.rs b/tokenizers/src/utils/truncation.rs
index a8ad2a614..b5375a7d0 100644
--- a/tokenizers/src/utils/truncation.rs
+++ b/tokenizers/src/utils/truncation.rs
@@ -170,7 +170,7 @@ pub fn truncate_encodings(
 mod tests {
     use super::*;
     use crate::tokenizer::Encoding;
-    use std::collections::HashMap;
+    use ahash::AHashMap;
 
     fn get_empty() -> Encoding {
         Encoding::new(
@@ -182,7 +182,7 @@ mod tests {
             vec![],
             vec![],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         )
     }
 
@@ -196,7 +196,7 @@ mod tests {
             vec![0, 0],
             vec![1, 1],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         )
     }
 
@@ -215,7 +215,7 @@ mod tests {
             vec![0, 0, 0, 0],
             vec![1, 1, 1, 1],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         )
     }
 
@@ -256,7 +256,7 @@ mod tests {
             vec![0, 0, 0, 0, 0, 0, 0, 0],
             vec![1, 1, 1, 1, 1, 1, 1, 1],
             vec![],
-            HashMap::new(),
+            AHashMap::new(),
         )
     }