Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-61640: Toy: Fuzzy and Wildcard dynamic scoring #20

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions automaton.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ func (m *AlwaysMatch) Accept(int, byte) int {

// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
var alwaysMatchAutomaton = &AlwaysMatch{}

type FuzzyAutomaton interface {
Automaton
EditDistance(int) uint8
}
14 changes: 14 additions & 0 deletions fst_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ type Iterator interface {
Close() error
}

type FuzzyIterator interface {
Iterator
Distance() uint8
}

// FSTIterator is a structure for iterating key/value pairs in this FST in
// lexicographic order. Iterators should be constructed with the FSTIterator
// method on the parent FST structure.
Expand All @@ -61,6 +66,8 @@ type FSTIterator struct {
autStatesStack []int

nextStart []byte

keysDistance uint8
}

func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
Expand All @@ -74,6 +81,10 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
return rv, nil
}

func (i *FSTIterator) Distance() uint8 {
return i.keysDistance
}

// Reset resets the Iterator' internal state to allow for iterator
// reuse (e.g. pooling).
func (i *FSTIterator) Reset(f *FST,
Expand Down Expand Up @@ -206,6 +217,9 @@ OUTER:

cmp := bytes.Compare(i.keysStack, i.nextStart)
if cmp > 0 {
if fa, ok := i.aut.(FuzzyAutomaton); ok {
i.keysDistance = fa.EditDistance(autCurr)
}
// in final state greater than start key
return nil
}
Expand Down
14 changes: 9 additions & 5 deletions levenshtein/dfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,27 @@ type DFA struct {
ed uint8
}

/// Returns the initial state
// Returns the initial state
func (d *DFA) initialState() int {
return d.initState
}

/// Returns the Levenshtein distance associated to the
/// current state.
// Returns the Levenshtein distance associated to the
// current state.
func (d *DFA) distance(stateId int) Distance {
return d.distances[stateId]
}

/// Returns the number of states in the `DFA`.
func (d *DFA) EditDistance(stateId int) uint8 {
return d.distances[stateId].distance()
}

// Returns the number of states in the `DFA`.
func (d *DFA) numStates() int {
return len(d.transitions)
}

/// Returns the destination state reached after consuming a given byte.
// Returns the destination state reached after consuming a given byte.
func (d *DFA) transition(fromState int, b uint8) int {
return int(d.transitions[fromState][b])
}
Expand Down
Loading