Skip to content

Commit

Permalink
further cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jul 2, 2024
1 parent 62f742c commit ca32478
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 57 deletions.
2 changes: 1 addition & 1 deletion controllers/derivre/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ impl ExprSet {
self.cost
}

pub fn disable_optimizations(&mut self) {
pub(crate) fn disable_optimizations(&mut self) {
self.optimize = false;
}

Expand Down
57 changes: 2 additions & 55 deletions controllers/llguidance_ctrl/src/earley/regexvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@ use std::fmt::Debug;

pub use derivre::{AlphabetInfo, ExprRef, NextByte, SimpleVob, StateID};

const DEBUG: bool = false;

macro_rules! debug {
($($arg:tt)*) => {
if DEBUG {
eprintln!($($arg)*);
}
};
}

#[derive(Clone)]
pub struct RegexVec {
exprs: ExprSet,
Expand Down Expand Up @@ -63,10 +53,6 @@ impl RegexVec {
&self.lazy
}

pub fn initial_state_all(&mut self) -> StateID {
self.initial_state(&SimpleVob::all_true(self.rx_list.len()))
}

pub fn initial_state(&mut self, selected: &SimpleVob) -> StateID {
let mut vec_desc = vec![];
for idx in selected.iter() {
Expand Down Expand Up @@ -128,32 +114,6 @@ impl RegexVec {
}
}

pub fn transition_bytes(&mut self, state: StateID, bytes: &[u8]) -> StateID {
let mut state = state;
for &b in bytes {
state = self.transition(state, b);
}
state
}

pub fn is_match(&mut self, text: &str) -> bool {
self.lookahead_len(text).is_some()
}

pub fn lookahead_len(&mut self, text: &str) -> Option<usize> {
let selected = SimpleVob::alloc(self.rx_list.len());
let mut state = self.initial_state(&selected.negated());
for b in text.bytes() {
let new_state = self.transition(state, b);
debug!("b: {:?} --{:?}--> {:?}", state, b as char, new_state);
state = new_state;
if state == StateID::DEAD {
return None;
}
}
self.lookahead_len_for_state(state)
}

/// Estimate the size of the regex tables in bytes.
pub fn num_bytes(&self) -> usize {
self.exprs.num_bytes()
Expand Down Expand Up @@ -306,11 +266,7 @@ impl RegexVec {
let (alpha, exprset, rx_list) = AlphabetInfo::from_exprset(exprset, rx_list);
let num_ast_nodes = exprset.len();

let mut rx_sets = VecHashCons::new();
let id = rx_sets.insert(&[]);
assert!(id == StateID::DEAD.as_u32());
let id = rx_sets.insert(&[0]);
assert!(id == StateID::MISSING.as_u32());
let rx_sets = StateID::new_hash_cons();

let mut r = RegexVec {
deriv: DerivCache::new(),
Expand All @@ -328,9 +284,6 @@ impl RegexVec {
max_states: usize::MAX,
};

// disable expensive optimizations after initial construction
r.exprs.disable_optimizations();

r.insert_state(vec![]);
// also append state for the "MISSING"
r.append_state(r.state_descs[0].clone());
Expand Down Expand Up @@ -362,10 +315,6 @@ impl RegexVec {
id
}

fn exprs(&self) -> &ExprSet {
&self.exprs
}

fn compute_state_desc(&self, state: StateID) -> StateDesc {
let mut res = StateDesc {
state,
Expand All @@ -379,7 +328,7 @@ impl RegexVec {
};
for (idx, e) in iter_state(&self.rx_sets, state) {
res.possible.set(idx, true);
if self.exprs().is_nullable(e) {
if self.exprs.is_nullable(e) {
res.accepting.set(idx, true);
if res.lowest_accepting.is_none() {
res.lowest_accepting = Some(idx);
Expand Down Expand Up @@ -448,7 +397,6 @@ fn iter_state<'a>(
.map(move |idx| (lst[idx] as usize, ExprRef::new(lst[idx + 1])))
}


// #[test]
// fn test_fuel() {
// let mut rx = RegexVec::new_single("a(bc+|b[eh])g|.h").unwrap();
Expand All @@ -462,4 +410,3 @@ fn iter_state<'a>(
// no_match(&mut rx, "abcg");
// assert!(rx.has_error());
// }

2 changes: 1 addition & 1 deletion py/guidance

0 comments on commit ca32478

Please sign in to comment.