-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieval.py
67 lines (61 loc) · 1.53 KB
/
retrieval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from classifiers import BM25Classifier
from classifiers import GramMatchClassifier
from extractors import NGramExtractor
from extractors import Nto1GramExtractor
from extractors import SkipGramExtractor
from documents import Corpus
import sys
presidents = 'Presidents'
queries = [
'adams',
'lincoln',
'president',
'assassinated president',
'great president',
'first president',
'civil war president',
'the greatest president',
'america',
'vietnam war president',
'terrorism',
'world war',
'united nations',
'great depression',
'impeachment',
'second term',
'first president after civil war'
]
def test(classifier,name):
print '\n'
print name
classifier
for query in queries:
print '\n'
print 'Query:',query
for (cat,score) in classifier.getStringScores(query):
print cat,':',score
raw_input('Press Enter To Continue')
def main():
corpus = Corpus(presidents)
c = BM25Classifier(corpus)
test(c, 'Term Matching with BM25')
ex = NGramExtractor(2)
corpus = Corpus(presidents,ex)
c = BM25Classifier(corpus)
test(c, 'bigrams with BM25')
c = GramMatchClassifier(corpus)
test(c, 'bigrams with length normalized match count')
ex = Nto1GramExtractor(2)
corpus = Corpus(presidents,ex)
c = GramMatchClassifier(corpus)
test(c, 'unigrams and bigrams')
ex = SkipGramExtractor(1,2)
corpus = Corpus(presidents,ex)
c = GramMatchClassifier(corpus)
test(c,'skip bigrams')
ex = SkipGramExtractor(2,2)
corpus = Corpus(presidents,ex)
c = GramMatchClassifier(corpus)
test(c,'two skip bigrams')
if __name__ == "__main__":
main()