-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflair_predictor.py
73 lines (62 loc) · 2.16 KB
/
flair_predictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import joblib
import nltk
from nltk.corpus import stopwords
import praw
from RedditAPI import accinfo
import pandas as pd
import sklearn
nltk.download('punkt')
info = accinfo()
stops = set(stopwords.words("english"))
model = joblib.load("./Models/finalized_model.sav")
reddit = praw.Reddit(client_id=info[0], client_secret=info[1], user_agent=info[2], username=info[3], password=info[4])
def title_preprocessing(row):
title = row['title']
tokens = nltk.word_tokenize(title)
token_words = [w for w in tokens if w.isalnum()]
key_words = [word for word in token_words if not word in stops]
joined = (" ".join(key_words))
return(joined)
def body_preprocessing(row):
body = row['body']
tokens = nltk.word_tokenize(body)
token_words = [w for w in tokens if w.isalnum()]
key_words = [W for W in token_words if not W in stops]
joined = (" ".join(key_words))
return(joined)
def comment_preprocessing(row):
comments = row['comments']
tokens = nltk.word_tokenize(comments)
token_words = [w for w in tokens if w.isalnum()]
key_words = [W for W in token_words if not W in stops]
joined = (" ".join(key_words))
return(joined)
def FlairifyMe(url):
c = []
s = ''
post_dict = {"id": [], "title":[], "body":[], "comments": []}
submission = reddit.submission(url = url)
post_dict['id']=submission.id
post_dict['title']=submission.title
post_dict['body']=submission.selftext
submission.comments.replace_more(limit=0)
for comment in submission.comments.list():
c.append(comment.body)
post_dict['comments'].append(c)
data = pd.DataFrame(post_dict)
data.fillna("")
data['title'] = data['title'].str.lower()
data['body'] = data['body'].str.lower()
for a in data['comments'][0]:
s+=str(a)+" "
data['comments']=s
data['title_words'] = data.apply(title_preprocessing,axis=1)
data['body_words'] = data.apply(body_preprocessing,axis=1)
data['comment_words'] = data.apply(comment_preprocessing,axis=1)
combine = data['title_words']+data['body_words']+data['comment_words']
data = data.assign(combined=combine)
return(model.predict(data['combined'])[0])
def FlairDetect(url):
submission = reddit.submission(url = url)
actual_flair = submission.link_flair_css_class
return(actual_flair)