-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter.py
64 lines (44 loc) · 1.52 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import os
import tqdm
import tweepy
import preprocessor
import pandas as pd
def _load_credentials() -> dict:
with open('twitter-credentials.json') as f:
return json.load(f)[0]
cache = set()
def _authenticate(cred: dict) -> tweepy.API:
auth = tweepy.OAuthHandler(cred['api-key'], cred['api-secret'])
auth.set_access_token(cred['access-key'], cred['access-secret'])
return tweepy.API(auth)
def get_tweets(hashtag: str, lang: str, count: int = 0, out_file: str = None):
if hashtag in cache:
print(f'loaded {hashtag} from cache')
df = pd.read_csv(f'cache/{hashtag}.csv')
df = df.astype({'date': 'datetime64[ns]'})
return df
api = _authenticate(_load_credentials())
res = []
try:
for tweet in tqdm.tqdm(tweepy.Cursor(api.search, q=f'#{hashtag}', lang=lang,
tweet_mode='extended', count=100).items(count)):
res.append([
preprocessor.clean(tweet.full_text),
tweet.created_at,
-1
])
except:
pass
df = pd.DataFrame(res, columns=['text', 'date', 'sentiment'])
df.to_csv(f'cache/{hashtag}.csv')
cache.add(hashtag)
return df
# load cache
for file in os.listdir('cache'):
if file.endswith('.csv'):
print(f'found {os.path.splitext(file)[0]} in cache')
cache.add(os.path.splitext(file)[0])
# keyword = 'ryanair'
# tweets = get_tweets(keyword)
# tweets.to_csv(f'{keyword}.csv', index=False)