forked from bitonic/language-spelling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench.hs
106 lines (87 loc) · 4.62 KB
/
bench.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
import Control.Applicative ((<$>))
import Data.Char (toLower)
import Data.Monoid (Last (..))
import Control.DeepSeq (NFData)
import qualified Data.ByteString.Char8 as ByteString
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import Data.Time.Clock.POSIX (getPOSIXTime)
import Criterion.Config
import Criterion.Main
import Data.ListLike.Text ()
import System.Random.Shuffle
import Language.Distance.Internal (Distance (..))
import qualified Language.Distance.Search.BK as BK
import qualified Language.Distance.Search.TST as TST
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~ Utils and dictionaries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
everyN n xs = case drop n xs of
[] -> []
(x : xs') -> x : everyN n xs'
-- We get every 20 words since testing with the whole dict is too much
dict m lin read_ = (map (m toLower)) . everyN 20 . lin <$> read_ "/usr/share/dict/words"
dictS = dict map lines readFile
dictBS = dict ByteString.map ByteString.lines ByteString.readFile
dictT = dict Text.map Text.lines Text.readFile
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~ Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
deriving instance NFData (Distance algo)
query queryc n ss dist = map (\s -> queryc n s dist) ss
group1 emptyc insertc queryc ss rand1ss rand2ss name =
dist `seq` distRand `seq`
[ bench ("insert " ++ name) $ whnf (foldr insertc emptyc) ss
, bench ("insert rand " ++ name) $ whnf (foldr insertc emptyc) rand1ss
, bench ("lookup " ++ name) $ nf (query queryc 0 rand2ss) dist
, bench ("lookup rand " ++ name) $ nf (query queryc 0 rand2ss) distRand
, bench ("query 1 " ++ name) $ nf (query queryc 1 (take 100 rand2ss)) dist
, bench ("query 1 rand " ++ name) $ nf (query queryc 1 (take 100 rand2ss)) distRand
, bench ("query 2 " ++ name) $ nf (query queryc 2 (take 100 rand2ss)) dist
, bench ("query 2 rand " ++ name) $ nf (query queryc 2 (take 100 rand2ss)) distRand
]
where
dist = foldr insertc emptyc ss
distRand = foldr insertc emptyc rand1ss
#define group2(NAME, TYPE, SS, RAND1SS, RAND2SS, EMPTY, INSERT, LEVEN, DAMLEV) \
(bgroup (NAME ++ " " ++ TYPE) \
((group1 EMPTY INSERT LEVEN SS RAND1SS RAND2SS "lev") ++ \
(group1 EMPTY INSERT LEVEN SS RAND1SS RAND2SS "dam-lev")))
group3 ss rand1ss rand2ss =
[ group2("tst", "string", ss, rand1ss, rand2ss, TST.empty, TST.insert,
TST.levenshtein, TST.damerauLevenshtein)
, group2("tst", "bytestring", bss, rand1bss, rand2bss, TST.empty, TST.insert,
TST.levenshtein, TST.damerauLevenshtein)
, group2("tst", "text", ts, rand1ts, rand2ts, TST.empty, TST.insert, TST.levenshtein,
TST.damerauLevenshtein)
, group2("bk", "string", ss, rand1ss, rand2ss, BK.empty, BK.insert, BK.levenshtein,
BK.damerauLevenshtein)
, group2("bk", "bytestring", bss, rand1bss, rand2bss, BK.empty, BK.insert,
BK.levenshtein, BK.damerauLevenshtein)
, group2("bk", "text", ts, rand1ts, rand2ts, BK.empty, BK.insert, BK.levenshtein,
BK.damerauLevenshtein)
]
where
bss = map ByteString.pack ss
rand1bss = map ByteString.pack rand1ss
rand2bss = map ByteString.pack rand2ss
ts = map Text.pack ss
rand1ts = map Text.pack rand1ss
rand2ts = map Text.pack rand2ss
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~ Distance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --
main = do ss <- dictS
rand1ss <- shuffleM ss
rand2ss <- shuffleM ss
fn <- (++ ".html") . (show :: Integer -> String) . ceiling <$>
getPOSIXTime
let config = defaultConfig { cfgReport = (Last (Just fn))
, cfgSamples = (Last (Just 100))
}
defaultMainWith config (return ()) (group3 ss rand1ss rand2ss)