From ca32478202f34315300bd0af147e31344fc27ef9 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Tue, 2 Jul 2024 01:29:50 +0000 Subject: [PATCH] further cleanup --- controllers/derivre/src/ast.rs | 2 +- .../llguidance_ctrl/src/earley/regexvec.rs | 57 +------------------ py/guidance | 2 +- 3 files changed, 4 insertions(+), 57 deletions(-) diff --git a/controllers/derivre/src/ast.rs b/controllers/derivre/src/ast.rs index a488b89a..925083ce 100644 --- a/controllers/derivre/src/ast.rs +++ b/controllers/derivre/src/ast.rs @@ -286,7 +286,7 @@ impl ExprSet { self.cost } - pub fn disable_optimizations(&mut self) { + pub(crate) fn disable_optimizations(&mut self) { self.optimize = false; } diff --git a/controllers/llguidance_ctrl/src/earley/regexvec.rs b/controllers/llguidance_ctrl/src/earley/regexvec.rs index 825c85c3..62977615 100644 --- a/controllers/llguidance_ctrl/src/earley/regexvec.rs +++ b/controllers/llguidance_ctrl/src/earley/regexvec.rs @@ -3,16 +3,6 @@ use std::fmt::Debug; pub use derivre::{AlphabetInfo, ExprRef, NextByte, SimpleVob, StateID}; -const DEBUG: bool = false; - -macro_rules! debug { - ($($arg:tt)*) => { - if DEBUG { - eprintln!($($arg)*); - } - }; -} - #[derive(Clone)] pub struct RegexVec { exprs: ExprSet, @@ -63,10 +53,6 @@ impl RegexVec { &self.lazy } - pub fn initial_state_all(&mut self) -> StateID { - self.initial_state(&SimpleVob::all_true(self.rx_list.len())) - } - pub fn initial_state(&mut self, selected: &SimpleVob) -> StateID { let mut vec_desc = vec![]; for idx in selected.iter() { @@ -128,32 +114,6 @@ impl RegexVec { } } - pub fn transition_bytes(&mut self, state: StateID, bytes: &[u8]) -> StateID { - let mut state = state; - for &b in bytes { - state = self.transition(state, b); - } - state - } - - pub fn is_match(&mut self, text: &str) -> bool { - self.lookahead_len(text).is_some() - } - - pub fn lookahead_len(&mut self, text: &str) -> Option { - let selected = SimpleVob::alloc(self.rx_list.len()); - let mut state = self.initial_state(&selected.negated()); - for b in text.bytes() { - let new_state = self.transition(state, b); - debug!("b: {:?} --{:?}--> {:?}", state, b as char, new_state); - state = new_state; - if state == StateID::DEAD { - return None; - } - } - self.lookahead_len_for_state(state) - } - /// Estimate the size of the regex tables in bytes. pub fn num_bytes(&self) -> usize { self.exprs.num_bytes() @@ -306,11 +266,7 @@ impl RegexVec { let (alpha, exprset, rx_list) = AlphabetInfo::from_exprset(exprset, rx_list); let num_ast_nodes = exprset.len(); - let mut rx_sets = VecHashCons::new(); - let id = rx_sets.insert(&[]); - assert!(id == StateID::DEAD.as_u32()); - let id = rx_sets.insert(&[0]); - assert!(id == StateID::MISSING.as_u32()); + let rx_sets = StateID::new_hash_cons(); let mut r = RegexVec { deriv: DerivCache::new(), @@ -328,9 +284,6 @@ impl RegexVec { max_states: usize::MAX, }; - // disable expensive optimizations after initial construction - r.exprs.disable_optimizations(); - r.insert_state(vec![]); // also append state for the "MISSING" r.append_state(r.state_descs[0].clone()); @@ -362,10 +315,6 @@ impl RegexVec { id } - fn exprs(&self) -> &ExprSet { - &self.exprs - } - fn compute_state_desc(&self, state: StateID) -> StateDesc { let mut res = StateDesc { state, @@ -379,7 +328,7 @@ impl RegexVec { }; for (idx, e) in iter_state(&self.rx_sets, state) { res.possible.set(idx, true); - if self.exprs().is_nullable(e) { + if self.exprs.is_nullable(e) { res.accepting.set(idx, true); if res.lowest_accepting.is_none() { res.lowest_accepting = Some(idx); @@ -448,7 +397,6 @@ fn iter_state<'a>( .map(move |idx| (lst[idx] as usize, ExprRef::new(lst[idx + 1]))) } - // #[test] // fn test_fuel() { // let mut rx = RegexVec::new_single("a(bc+|b[eh])g|.h").unwrap(); @@ -462,4 +410,3 @@ fn iter_state<'a>( // no_match(&mut rx, "abcg"); // assert!(rx.has_error()); // } - diff --git a/py/guidance b/py/guidance index 6372a15c..1afc9bb3 160000 --- a/py/guidance +++ b/py/guidance @@ -1 +1 @@ -Subproject commit 6372a15c68e3c3e3bc8b2a779cdb7fbf5764e446 +Subproject commit 1afc9bb30fb74f2819877601684441da38eea651