-
Notifications
You must be signed in to change notification settings - Fork 0
/
process.py
83 lines (71 loc) · 2.24 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys
import matplotlib.pyplot as plt
import statistics as st
print(sys.argv[1])
print(sys.argv[2])
print(sys.argv[3])
print(sys.argv[4])
print("Loading Files...")
f = open('negative-words.txt', errors='replace')
negwords = f.read().split(';')[-1].split()
negwords = {a:1 for a in negwords}
f.close()
f = open('positive-words.txt', errors='replace')
poswords = f.read().split(';')[-1].split()
poswords = {a:1 for a in poswords}
f.close()
f = open(sys.argv[1])
tweets = f.read().split('\n')
f.close()
tbx = []
tby = []
chunksize = int(sys.argv[2])
print("Processing Tweets...")
for chunk in [tweets[i:i+chunksize] for i in range(0, len(tweets), chunksize)]:
tweetCoord = []
for tweet in chunk:
words = tweet.split()
if 'RT' in words:
continue
nrank = sum([1 if negwords.get(w) else 0 for w in words])/(len(words)+1.0)
prank = sum([1 if poswords.get(w) else 0 for w in words])/(len(words)+1.0)
tweetCoord.append((nrank, prank))
tbx.append(st.mean([t[0] for t in tweetCoord]))
tby.append(st.mean([t[1] for t in tweetCoord]))
print("Saving...")
f = open(str(sys.argv[1]) + "_results", "w")
f.write("\n".join([str(x) for x in list(zip(tbx,tby))]))
f.close()
print("Plotting...")
plt.plot(tbx, tby, 'bx')
plt.plot(st.mean(tbx), st.mean(tby), 'gs')
f = open(sys.argv[3])
tweets = f.read().split('\n')
f.close()
tbx = []
tby = []
chunksize = int(sys.argv[4])
print("Processing Tweets...")
for chunk in [tweets[i:i+chunksize] for i in range(0, len(tweets), chunksize)]:
tweetCoord = []
for tweet in chunk:
words = tweet.split()
if 'RT' in words:
continue
nrank = sum([1 if negwords.get(w) else 0 for w in words])/(len(words)+1.0)
prank = sum([1 if poswords.get(w) else 0 for w in words])/(len(words)+1.0)
tweetCoord.append((nrank, prank))
tbx.append(st.mean([t[0] for t in tweetCoord]))
tby.append(st.mean([t[1] for t in tweetCoord]))
print("Saving...")
f = open(str(sys.argv[3]) + "_results", "w")
f.write("\n".join([str(x) for x in list(zip(tbx,tby))]))
f.close()
print("Plotting...")
plt.plot(tbx, tby, 'rx')
plt.plot(st.mean(tbx), st.mean(tby), 'gs')
plt.xlabel('negative word freq.')
plt.ylabel('positive word freq.')
plt.show()
print(tbx)
print(tby)