Skip to content

Commit

Permalink
Use bytes not strings in substr matcher; fixes #100
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Apr 29, 2024
1 parent e610ab9 commit 017179c
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions controllers/aici_abi/src/substring.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fmt::Display;

use crate::{
bytes::limit_str,
bytes::limit_bytes,
recognizer::{FunctionalRecognizer, StackRecognizer},
toktree::SpecialToken,
};
Expand All @@ -14,7 +14,7 @@ enum Node {

pub struct SubStrMatcher {
end_str: String,
source: String,
source: Vec<u8>,
nodes: Vec<Node>,
}

Expand Down Expand Up @@ -52,7 +52,7 @@ impl SubStrMatcher {
serde_json::Value::Object(children_json)
}
Node::Leaf { source_offset } => {
json!(limit_str(&self.source[*source_offset..], 20))
json!(limit_bytes(&self.source[*source_offset..], 20))
}
}
}
Expand All @@ -77,7 +77,7 @@ impl SubStrMatcher {
"{:indent$}{}: {:?}",
"",
*source_offset,
limit_str(&self.source[*source_offset..], 20),
limit_bytes(&self.source[*source_offset..], 20),
)?;
}
}
Expand All @@ -86,13 +86,13 @@ impl SubStrMatcher {

pub fn new(source: &str, end_str: &str) -> Self {
let mut tmp = Self {
source: source.to_string() + " ",
source: (source.to_string() + " ").as_bytes().to_vec(),
end_str: end_str.to_string(),
nodes: vec![Node::Inner { children: vec![] }],
};
tmp.add(0);
for i in 0..tmp.source.len() {
if tmp.source.as_bytes()[i] == b' ' {
if tmp.source[i] == b' ' {
tmp.add(i + 1);
}
}
Expand All @@ -101,15 +101,15 @@ impl SubStrMatcher {
tmp
}

fn find(&self, s: &str) -> (usize, usize) {
fn find(&self, s: &[u8]) -> (usize, usize) {
let mut node_idx = 0;
for (i, b) in s.bytes().enumerate() {
for (i, b) in s.iter().enumerate() {
let node = &self.nodes[node_idx];
match node {
Node::Inner { children } => {
let mut found = false;
for (c, idx) in children.iter() {
if *c == b {
if *c == *b {
node_idx = *idx;
found = true;
break;
Expand Down Expand Up @@ -137,7 +137,7 @@ impl SubStrMatcher {
let num_nodes = self.nodes.len();
match &mut self.nodes[node_idx] {
Node::Inner { children } => {
children.push((s1.as_bytes()[0], num_nodes));
children.push((s1[0], num_nodes));
let n = add_node(
&mut self.nodes,
Node::Leaf {
Expand All @@ -160,8 +160,8 @@ impl SubStrMatcher {
}

for i in 0..s1.len() {
let b1 = s1.as_bytes()[i];
let b2 = s2.as_bytes()[i];
let b1 = s1[i];
let b2 = s2[i];
if b1 != b2 {
let n1 = add_node(
&mut self.nodes,
Expand Down Expand Up @@ -196,7 +196,7 @@ impl SubStrMatcher {
}

fn append_to_src_off(&self, off: usize, byte: u8) -> SubStrState {
if off < self.source.len() && self.source.as_bytes()[off] == byte {
if off < self.source.len() && self.source[off] == byte {
SubStrState::SourceOffset(off + 1)
} else {
SubStrState::Dead
Expand Down

0 comments on commit 017179c

Please sign in to comment.