Skip to content

Commit

Permalink
add replace spaces flag
Browse files Browse the repository at this point in the history
  • Loading branch information
rishabhy committed Sep 30, 2024
1 parent b9ac46c commit 7f52f3d
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ impl Encoding {
self.core_bpe.encode_ordinary(text)
}

pub fn estimate_num_tokens_no_special_tokens_fast(&self, text: &str) -> usize {
pub fn estimate_num_tokens_no_special_tokens_fast(&self, text: &str, replace_spaces_with_lower_one_eighth_block: bool = false) -> usize {
if replace_spaces_with_lower_one_eighth_block {
text = text.replace(" ", "\u{2581}");
}

let mut token_count = 0;
let mut current_token = Vec::new();
let mut current_token_hash: i64 = 0;
Expand Down

0 comments on commit 7f52f3d

Please sign in to comment.