From 64999d37d8c39e8abb562a587289f4efa23ef97b Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Wed, 3 Jul 2024 20:30:58 +0000 Subject: [PATCH] fix https://github.com/hudson-ai/guidance/issues/19 --- controllers/llguidance_ctrl/run_g.py | 9 ++++ .../llguidance_ctrl/src/earley/regexvec.rs | 47 ++++++++++++++----- py/guidance | 2 +- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/controllers/llguidance_ctrl/run_g.py b/controllers/llguidance_ctrl/run_g.py index 2c275c0e..39eae8d3 100644 --- a/controllers/llguidance_ctrl/run_g.py +++ b/controllers/llguidance_ctrl/run_g.py @@ -220,6 +220,15 @@ def character_maker2(lm, id, description, valid_weapons): ) grm = "Here: 2 + 2 = " + greedy_grammar(name="num", body=lexeme("[0-9]+")) + grm = "Here: 1 / 10 = " + greedy_grammar( + body=select( + [ + lexeme(r"-?(?:0|[1-9][0-9]*)", contextual=True), + lexeme(r"-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)", contextual=True), + ] + ) + ) + # grm = "Here: 2 + 2 = " + guidance.json(name="num", schema={"type": "integer"}) # grm = guidance.json(name="num", schema={"type": "integer"}) # m = grm.match("123") diff --git a/controllers/llguidance_ctrl/src/earley/regexvec.rs b/controllers/llguidance_ctrl/src/earley/regexvec.rs index 62977615..4f21f43d 100644 --- a/controllers/llguidance_ctrl/src/earley/regexvec.rs +++ b/controllers/llguidance_ctrl/src/earley/regexvec.rs @@ -124,26 +124,47 @@ impl RegexVec { + self.rx_sets.num_bytes() } - /// Return index of lowest matching regex if any. - /// Lazy regexes match as soon as they accept, while greedy only - /// if they accept and force EOI. - pub fn lowest_match(&mut self, state: StateID) -> Option<(usize, usize)> { - let desc = &mut self.state_descs[state.as_usize()]; - if let Some(lowest_match) = desc.lowest_match { - return lowest_match; - } - let mut res = None; + fn lowest_match_inner(&mut self, state: StateID) -> Option<(usize, usize)> { + let mut all_eoi = true; + let mut eoi_candidate = None; + // fine the first lazy matching regex + // failing that, if all regexes are matching and force EOI, pick the first one for (idx, e) in iter_state(&self.rx_sets, state) { if !self.exprs.is_nullable(e) { + all_eoi = false; continue; } - if self.lazy[idx] || self.next_byte.next_byte(&self.exprs, e) == NextByte::ForcedEOI { + if self.lazy[idx] { let len = self.exprs.possible_lookahead_len(e); - res = Some((idx, len)); - break; + return Some((idx, len)); + } + if all_eoi { + if self.next_byte.next_byte(&self.exprs, e) == NextByte::ForcedEOI { + if eoi_candidate.is_none() { + eoi_candidate = Some((idx, self.exprs.possible_lookahead_len(e))); + } + } else { + all_eoi = false; + } } } - desc.lowest_match = Some(res); + + if all_eoi { + eoi_candidate + } else { + None + } + } + + /// Return index of lowest matching regex if any. + /// Lazy regexes match as soon as they accept, while greedy only + /// if they accept and force EOI. + pub fn lowest_match(&mut self, state: StateID) -> Option<(usize, usize)> { + if let Some(lowest_match) = self.state_descs[state.as_usize()].lowest_match { + return lowest_match; + } + let res = self.lowest_match_inner(state); + self.state_descs[state.as_usize()].lowest_match = Some(res); res } diff --git a/py/guidance b/py/guidance index 34fd0046..25f9cafe 160000 --- a/py/guidance +++ b/py/guidance @@ -1 +1 @@ -Subproject commit 34fd00467d3cec24aba5a4bbb3171ccdb44f41fa +Subproject commit 25f9cafeff6f47f79c971b2b938b7d0bcc04e8d2