-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordcountsE.py
41 lines (29 loc) · 946 Bytes
/
wordcountsE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import re
# Third example. Back to using a regular dict. Try a trick from the Counter class.
def word_gen(fn):
with open(fn, 'rb') as text:
splitter = re.compile(r'[\s.,?;!]+')
for line in text:
words = splitter.split(line)
for word in words:
yield word.lower()
def count_words(fn):
counts = {}
for word in word_gen(fn):
try:
counts[word] += 1
except KeyError:
counts[word] = 1
return [(w, c) for w, c in counts.iteritems()]
def top_ten(counts):
s = sorted(counts, key=lambda x: x[1], reverse=True)
return s[0:10]
def bottom_ten(counts):
s = sorted(counts, key=lambda x: x[1])
return s[0:10]
if __name__ == '__main__':
word_counts = count_words('pg10.txt')
print top_ten(word_counts)
print bottom_ten(word_counts)
print(len(word_counts))
print len([x for x in word_counts if x[1] == 1])