-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrequency.py
51 lines (43 loc) · 1.34 KB
/
frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import random
decimal_limit = 6
def probabilities(words):
freq_dist = frequencies(words)
probabilities = []
prob = 0
for word, freq in freq_dist.items():
prob = round(prob + probability(word, freq_dist), decimal_limit)
probabilities.append((prob, word))
return sorted(probabilities)
def probability(word, freq_dist):
numWords = 0
for _, freq in freq_dist.items():
numWords += freq
return round(freq_dist[word] / numWords, decimal_limit)
def frequencies(words):
counts = {}
for word in words:
if word in counts:
counts[word] = counts[word] + 1
else:
counts[word] = 1
return counts
def make_word_walker(sorted_probabilities):
history = []
limit = sorted_probabilities[-1][0]
def word_walker():
choice = round(random.uniform(0, limit), decimal_limit)
for freq, word in sorted_probabilities:
if choice < freq:
history.append(word)
return (word,)
return word_walker
if __name__ == '__main__':
import sys
import pickle
from words import words
files = sys.argv[1:]
corpus = words(files)
dist = probabilities(corpus)
pickle_file = "freq.pickle"
pickle.dump(dist, open(pickle_file, "wb"))
print("Probability distribution pickled to " + pickle_file + ".")