Skip to content

Commit

Permalink
Merge pull request #3713 from michaellilltokiwa/lib_knuth_morris_pratt
Browse files Browse the repository at this point in the history
lib: add knuth morris pratt search algorithm
  • Loading branch information
fridis authored Sep 12, 2024
2 parents 94f4f7d + 9eb3fe0 commit 04eed87
Show file tree
Hide file tree
Showing 8 changed files with 219 additions and 10 deletions.
67 changes: 59 additions & 8 deletions lib/Sequence.fz
Original file line number Diff line number Diff line change
Expand Up @@ -637,18 +637,63 @@ public Sequence(public T type) ref is
find [x]


# get the index of l within this list or nil if it does not exist
# get the index of pattern within this Sequence or nil if it does not exist
#
public find(l Sequence T) option i32
# uses the Knuth-Morris-Pratt algorithm
# port of racket code from this paper:
# https://www.cambridge.org/core/services/aop-cambridge-core/content/view/8EFA77D663D585B68630E372BCE1EBA4/S0956796824000017a.pdf/knuth-morris-pratt-illustrated.pdf
#
# worst-case performance: O( seq_length ) + O( pattern_length )
# worst-case space complexity: O( pattern_length )
#
public find(pattern Sequence T) option i32
pre
T : property.equatable
=>
if starts_with l
0
else
match as_list
nil => nil
c Cons => (c.tail.find l) >>= +1

find_lm.instate_self ()->

make(t Sequence T, r option (Node T)) =>
n option (Node T) =>
if t.is_empty then nil else make (t.drop 1) (step r t[0])
r_star =>
if t.is_empty
r
else if is_match r t[0]
r.get.rest
else
r
Node t (once find_lm (option (Node T)) ()->n) r_star

init := make pattern nil

step(acc option (Node T), x T) =>
match acc
nil => init
n Node => if is_match acc x then n.next.get else step n.rest x

is_done (option (Node T))->bool => (acc)->
match acc
nil => false
n Node => n.top.is_empty

is_match(acc option (Node T), x T) =>
match acc
nil => false
n Node => !n.top.is_empty && n.top[0] = x

fold_until(acc option (Node T), step (option (Node T), T)->option (Node T), data Sequence T) option i32 =>
if is_done acc
Sequence.this.count - data.count - pattern.count
else if data.is_empty
nil
else
acc_star =>
fold_until.this.step acc data[0]
fold_until acc_star fold_until.this.step (data.drop 1)

fold_until init step Sequence.this



# replace all occurrences of old by new
Expand Down Expand Up @@ -856,3 +901,9 @@ public Sequence(public T type) ref is
# Sequence.
#
public type.AS_STRING_NON_FINITE_MAX_ELEMENTS => 10


# helper features for knuth morris pratt algorithm
#
find_lm : mutate is
Node(T type, top Sequence T, next once find_lm (option (Node T)), rest option (Node T)) ref is
36 changes: 36 additions & 0 deletions lib/once.fz
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation. If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
# Tokiwa Software GmbH, Germany
#
# Source code of Fuzion standard library feature once
#
# -----------------------------------------------------------------------

# executes `f` only the first time when
# calling `get` caching its result.
#
public once(LM type : mutate, T type, f Lazy T) is

cache := LM.env.new (option T) nil

# get the result of `f`
#
public get T =>
if cache.get.is_nil
cache <- f()
cache.get.get
14 changes: 13 additions & 1 deletion lib/time/duration.fz
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ private:public duration (
# the duration in nano seconds
#
public nanos u64
)
) : property.orderable

is

Expand Down Expand Up @@ -74,6 +74,11 @@ is
public years => nanos / units.nanos_per_year


# this duration multiplied by n
#
public infix * (n u64) => (duration nanos*n)


# create a string representation of this duration. The string
# representation is not accurate, it consists of at least two
# and at most 4 decimal digits followed by a time unit string.
Expand All @@ -89,6 +94,13 @@ is
n := $(nanos / f)
" "*(max 0 4-n.byte_length) + n + u


# total order
#
public fixed redef type.lteq(a, b time.duration) bool =>
u64.type.lteq a.nanos b.nanos


# time.durations -- unit type defining features related to duration but not requiring
# an instance
#
Expand Down
2 changes: 1 addition & 1 deletion lib/u64.fz
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ public u64(public val u64) : num.wrap_around, has_interval is

# total order
#
fixed redef type.lteq(a, b u64) bool => intrinsic
public fixed redef type.lteq(a, b u64) bool => intrinsic


# returns the number in whose bit representation all bits are ones
Expand Down
25 changes: 25 additions & 0 deletions tests/lib_knuth_morris_pratt/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation. If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
# Tokiwa Software GmbH, Germany
#
# Source code of Fuzion test Makefile
#
# -----------------------------------------------------------------------

override NAME = lib_knuth_morris_pratt
include ../simple.mk
75 changes: 75 additions & 0 deletions tests/lib_knuth_morris_pratt/lib_knuth_morris_pratt.fz
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation. If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
# Tokiwa Software GmbH, Germany
#
# Source code of Fuzion test lib_knuth_morris_pratt
#
# -----------------------------------------------------------------------

lib_knuth_morris_pratt =>

functionality =>
say ("Helló World".as_codepoints.find "World".as_codepoints)
say ("Helló World".as_codepoints.find "Helló".as_codepoints)
say ("Helló World".as_codepoints.find "Internet".as_codepoints)

say ("Helló World".utf8.find "World".utf8)
say ("Helló World".utf8.find "Helló".utf8)
say ("Helló World".utf8.find "Internet".utf8)

say ("".utf8.find "".utf8)
say ("".utf8.find "word".utf8)
say ("Some text".utf8.find "".utf8)


performance =>
Sequence.find_brute_force(l Sequence T) option i32
pre
T : property.equatable
=>
if starts_with l
0
else
match as_list
nil => nil
c Cons => (c.tail.find_brute_force l) >>= +1


test_brute_force(n i32) =>
("a"*2*n).utf8.find_brute_force ("a"*n+"b").utf8

test_kmp(n i32) =>
("a"*2*n).utf8.find ("a"*n+"b").utf8

say (test_kmp 1000)

len := 250

if (envir.args.index_of "-benchmark")??

# for len=250 and interpreter takes ~70s on my machine
brute_force := time.stopwatch (()-> _:= test_brute_force len)
# for len=250 and interpreter takes ~1s on my machine
kmp := time.stopwatch (()-> _:= test_kmp len)

# check that kmp is at least four times faster
check (kmp*4 < brute_force)


functionality
performance
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
6
0
--nil--
7
0
--nil--
0
--nil--
0
--nil--

0 comments on commit 04eed87

Please sign in to comment.