From 37b385ab579eb28a28ad8059a5bf7927b118bb7e Mon Sep 17 00:00:00 2001 From: Roanak Baviskar Date: Sun, 21 Jun 2020 12:09:00 -0700 Subject: [PATCH] adding flask app and deploying --- Procfile | 1 + README.md | 16 +++++++ app.py | 19 ++++++++ get_synonyms.py | 46 ++++++++----------- model_init.py | 5 +++ requirements.txt | 102 ++++++++++++++++++++++++++++++++++++++++++ templates/index.html | 15 +++++++ templates/output.html | 0 8 files changed, 176 insertions(+), 28 deletions(-) create mode 100644 Procfile create mode 100644 app.py create mode 100644 requirements.txt create mode 100644 templates/index.html create mode 100644 templates/output.html diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..8001d1a --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn app:app \ No newline at end of file diff --git a/README.md b/README.md index 710b018..10cf451 100644 --- a/README.md +++ b/README.md @@ -4,3 +4,19 @@ Dependencies: nltk, numpy, torch Required nltk downloads:
`nltk.download('averaged_perceptron_tagger')`
`nltk.download('wordnet')` + +Examples:
+`sentence = "I tightened the bolt to make sure it didn't fall apart"`
+`word = "bolt"` + +`sentence = "The fast guy ran by in a bolt"`
+`word = "bolt"` + +`sentence = "The bolt during the thunderstorm shocked me"`
+`word = "bolt"` + +`sentence = "The current was too strong to swim against"`
+`word = "current"` + +`sentence = "The high current on the wire shocked me"`
+`word = "current"` \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..f2f05ef --- /dev/null +++ b/app.py @@ -0,0 +1,19 @@ +from flask import Flask, request, render_template + +from get_synonyms import get_words, get_best_words + +app = Flask(__name__) + + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/', methods=['POST']) +def index_post(): + word = request.form['word'] + sent = request.form['sent'] + synsets = get_words(word) + best_words = get_best_words(word, sent, synsets) + return render_template('index.html', synsets=synsets, best_words=best_words, word=word, sentence=sent) diff --git a/get_synonyms.py b/get_synonyms.py index a0f9539..f278dd2 100644 --- a/get_synonyms.py +++ b/get_synonyms.py @@ -6,13 +6,7 @@ # nltk.download('wordnet') # nltk.download('punkt') -from model_init import load_model -print('loading model') -model = load_model() -print('model loaded') - -sent1 = "The current from the wire hurt" -word1 = "current" +from model_init import model def get_words(in_word): @@ -34,36 +28,32 @@ def cosine(u, v): return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) -def get_best_words(word, sent): - syn = get_words(word) +def get_best_words(word, sent, syn): parts = re.split(f'{word}', sent) - oldSent = model.encode([sent])[0] - oldWord = word + old_sent = model.encode([sent])[0] - synMax = -float('inf') - bestWords = [] - wordMax = -float('inf') - bestWord = '' + syn_max = -float('inf') + best_words = [] + word_max = -float('inf') + best_word = '' for synset in syn: total = 0 for w in synset: - newSent = f'{w}'.join(parts) - sim = cosine(model.encode([newSent])[0], oldSent) - if sim > wordMax: - bestWord = w - wordMax = sim + new_sent = f'{w}'.join(parts) + sim = cosine(model.encode([new_sent])[0], old_sent) + if sim > word_max: + best_word = w + word_max = sim total += sim print(f'{w} and {sim}') avg = total / len(synset) print(f'{synset} and {avg}') - if avg > synMax: - bestWords = synset - synMax = avg - - print(bestWords) - - print(f'best word indiv: {bestWord}') + if avg > syn_max: + best_words = synset + syn_max = avg + print(best_words) + print(f'best word indiv: {best_word}') -get_best_words(word1, sent1) + return best_words diff --git a/model_init.py b/model_init.py index 9808f5e..577fb8d 100644 --- a/model_init.py +++ b/model_init.py @@ -18,3 +18,8 @@ def load_model(): model.build_vocab_k_words(K=100000) print('done building vocab') return model + + +print('loading model') +model = load_model() +print('model loaded') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a967a2d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,102 @@ +absl-py==0.9.0 +appnope==0.1.0 +astor==0.8.1 +astunparse==1.6.3 +attrs==19.3.0 +backcall==0.1.0 +bleach==3.1.5 +cachetools==4.1.0 +certifi==2020.4.5.1 +chardet==3.0.4 +click==7.1.2 +decorator==4.4.2 +defusedxml==0.6.0 +entrypoints==0.3 +flake8==3.7.8 +Flask==1.1.2 +future==0.18.2 +gast==0.3.3 +google-auth==1.15.0 +google-auth-oauthlib==0.4.1 +google-pasta==0.2.0 +grpcio==1.29.0 +gunicorn==20.0.4 +h5py==2.10.0 +idna==2.9 +importlib-metadata==1.6.0 +ipykernel==5.3.0 +ipython==7.14.0 +ipython-genutils==0.2.0 +ipywidgets==7.5.1 +itsdangerous==1.1.0 +jedi==0.17.0 +Jinja2==2.11.2 +joblib==0.15.1 +jsonschema==3.2.0 +jupyter==1.0.0 +jupyter-client==6.1.3 +jupyter-console==6.1.0 +jupyter-core==4.6.3 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.2 +Mako==1.1.0 +Markdown==3.1.1 +MarkupSafe==1.1.1 +mccabe==0.6.1 +mistune==0.8.4 +nbconvert==5.6.1 +nbformat==5.0.6 +nltk==3.5 +notebook==6.0.3 +numpy==1.17.2 +oauthlib==3.1.0 +opencv-python==4.2.0.34 +opt-einsum==3.2.1 +packaging==20.4 +pandocfilters==1.4.2 +parso==0.7.0 +pdoc3==0.7.0 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==6.0.0 +prometheus-client==0.8.0 +prompt-toolkit==3.0.5 +protobuf==3.12.2 +ptyprocess==0.6.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.5.0 +pyflakes==2.1.1 +Pygments==2.6.1 +pyparsing==2.4.7 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +pyzmq==19.0.1 +qtconsole==4.7.4 +QtPy==1.9.0 +regex==2020.6.8 +requests==2.23.0 +requests-oauthlib==1.3.0 +rsa==4.0 +scipy==1.4.1 +Send2Trash==1.5.0 +six==1.15.0 +tensorboard==2.2.1 +tensorboard-plugin-wit==1.6.0.post3 +tensorflow==2.2.0 +tensorflow-estimator==2.2.0 +termcolor==1.1.0 +terminado==0.8.3 +testpath==0.4.4 +torch==1.5.0 +torchvision==0.6.0 +tornado==6.0.4 +tqdm==4.46.1 +traitlets==4.3.3 +urllib3==1.25.9 +wcwidth==0.1.9 +webencodings==0.5.1 +Werkzeug==1.0.1 +widgetsnbextension==3.5.1 +wrapt==1.12.1 +zipp==3.1.0 diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..fdd5c2f --- /dev/null +++ b/templates/index.html @@ -0,0 +1,15 @@ + +Contextual Thesaurus +
+ Word: + Sentence: + +
+{% if synsets %} +
Given Word: {{word}}
+
Given Sentence: {{sentence}}
+
Possible Synsets: {{synsets}}
+
Determined Synset: {{best_words}}
+{% else %} +
This doesnt really work that well
+{% endif %} diff --git a/templates/output.html b/templates/output.html new file mode 100644 index 0000000..e69de29