-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
107 lines (90 loc) · 2.95 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import requests
import operator
import re
import nltk
import json
from flask import Flask, render_template, request, jsonify
from flask_sqlalchemy import SQLAlchemy
from stop_words import stops
from collections import Counter
from bs4 import BeautifulSoup
from rq import Queue
from rq.job import Job
from worker import conn
app = Flask(__name__)
app.config.from_object(os.environ['APP_SETTINGS'])
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
db = SQLAlchemy(app)
q = Queue(connection=conn) # setup redis connection and initialize Queue
from models import *
# function to process input and save to db
def count_and_save_words(url):
errors = []
try:
r = requests.get(url)
except Exception as e:
errors.append("Unable to get info from URL. Please make sure URL is valid.")
errors.append("Error:")
errors.append(e)
return render_template('index.html', errors=errors)
# text processing
if r:
raw = BeautifulSoup(r.text, "html.parser").get_text()
nltk.data.path.append('./nltk_data/') # access to nltk resources in case nltk not installed
tokens = nltk.word_tokenize(raw)
text = nltk.Text(tokens)
# remove punctuation and count raw words
nonPunct = re.compile('.*[A-Za-z].*')
raw_words = [w for w in text if nonPunct.match(w)]
raw_word_count = Counter(raw_words)
# stop words
no_stop_words = [w for w in raw_words if w.lower() not in stops]
no_stop_words_count = Counter(no_stop_words)
# save the results
try:
result = Result(
url=url,
result_all=raw_word_count,
result_no_stop_words=no_stop_words_count
)
db.session.add(result)
db.session.commit()
print("DB saved")
return result.id
except:
errors.append("Unable to add item to database.")
return {"errors": errors}
# home page
@app.route('/', methods=['GET', 'POST'])
def index():
return render_template("index.html")
# process input
@app.route('/start', methods=['POST'])
def get_counts():
# process url
data = json.loads(request.data.decode())
url = data["url"]
if not url[:8].startswith(('https://', 'http://')):
url = 'http://' + url
# start job
job = q.enqueue_call(
func=count_and_save_words, args=(url,), result_ttl=5000
)
return job.get_id()
# for angular to check on job
@app.route("/results/<job_key>", methods=['GET'])
def get_results(job_key):
job = Job.fetch(job_key, connection=conn)
if job.is_finished:
result = Result.query.filter_by(id=job.result).first()
results = sorted(
result.result_no_stop_words.items(),
key=operator.itemgetter(1),
reverse=True
)[:10]
return jsonify(results)
else:
return "Still counting words.", 202
if __name__ == '__main__':
app.run()