-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcalculation.py
116 lines (95 loc) · 2.5 KB
/
calculation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import tweetstream, enchant, re, math
from pickle import Unpickler
from nltk.stem.wordnet import WordNetLemmatizer
import string
anew = Unpickler(file('anew.pickle','r')).load()
d = enchant.Dict("en_US")
lmtzr = WordNetLemmatizer()
def tweetFilter(uname, psswd):
stream = tweetstream.SampleStream(uname, psswd)
for tweet in stream:
if 'user' in tweet.keys() and tweet['user']['lang'] == 'en':
yield tweet
def correctWord(word):
sug = d.suggest(word)
if len(sug) <= 0:
return word
else:
return d.suggest(word)[0]
def calcSentiment(agg):
valence = 0
valenceNorm = []
valenceSum = 0
valenceSD = 0
arousal = 0
arousalNorm = []
arousalSum = 0
arousalSD = 0
wordFreq = 0
for word in agg:
wordFreq += word['word-freq']
v = (1/math.sqrt(2*math.pi*math.pow(word['valence-SD'],2)))
valenceSum += v
valenceNorm.append((v,word['valence-mean']))
valenceSD += word['valence-SD']
a = (1/math.sqrt(2*math.pi*math.pow(word['arousal-SD'],2)))
arousalSum += a
arousalNorm.append((a,word['arousal-mean']))
arousalSD += word['arousal-SD']
for v in valenceNorm:
valence += (v[0]/valenceSum)*v[1]
for a in arousalNorm:
arousal += (a[0]/arousalSum)*a[1]
return {'valence':valence, 'valence-SD':valenceSD, 'arousal':arousal, 'arousal-SD':arousalSD, 'word-freq':wordFreq}
def normTweet(tweet):
if 'text' in tweet.keys():
text = tweet['text']
else:
return None
exclude = set(string.punctuation)
exclude.difference_update(set(['\'', '\"']))
stripped = ''.join(ch for ch in text if ch not in exclude)
words = re.findall('[a-z\'\"]+', stripped.lower())
#words = stripped.lower().split(" ")
english = 0;
agg = []
for word in words:
if len(word) <= 2:
continue
if d.check(word):
english += 1;
replace = word
else:
replace = correctWord(word)
if d.check(replace):
text = text.replace(word, replace)
else:
continue
stem = lmtzr.lemmatize(replace)
agg.append(stem)
tweet['text'] = text
if english >= 2:
return (tweet, agg)
else:
return (None, agg)
def getAnewSentiment(tweet):
tweet, stems = normTweet(tweet)
agg = []
if tweet:
for stem in stems:
if stem in anew.keys():
agg.append(anew[stem])
if len(agg) >= 2:
sentiment = calcSentiment(agg)
return (sentiment, tweet['text'], tweet)
else:
return None
else:
return None
if __name__ == "__main__":
stream = tweetFilter('SentimentTest', 'passwd')
for tweet in stream:
#print(tweet['text'])
fixed = getAnewSentiment(tweet)
if fixed:
print(fixed[0])