From b4208ec88ec2be641a1f9f19e5b88be06fe3d40f Mon Sep 17 00:00:00 2001 From: rasbt Date: Wed, 31 May 2017 23:49:52 -0400 Subject: [PATCH] cat movieclassifier files into notebook for easier reference --- code/ch09/ch09.ipynb | 547 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 469 insertions(+), 78 deletions(-) diff --git a/code/ch09/ch09.ipynb b/code/ch09/ch09.ipynb index 1778becf..2b9cb875 100644 --- a/code/ch09/ch09.ipynb +++ b/code/ch09/ch09.ipynb @@ -35,9 +35,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -177,9 +175,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -208,9 +204,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -270,9 +264,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def get_minibatch(doc_stream, size):\n", @@ -290,9 +282,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import HashingVectorizer\n", @@ -310,9 +300,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -341,9 +329,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -362,9 +348,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "clf = clf.partial_fit(X_test, y_test)" @@ -397,9 +381,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import pickle\n", @@ -423,9 +405,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -492,9 +472,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import pickle\n", @@ -508,9 +486,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -556,9 +532,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import sqlite3\n", @@ -584,9 +558,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "conn = sqlite3.connect('reviews.sqlite')\n", @@ -601,9 +573,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -620,9 +590,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -676,7 +644,67 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "..." + "Directory structure:\n", + "\n", + " 1st_flask_app_1/\n", + " app.py\n", + " templates/\n", + " first_app.html\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "from flask import Flask, render_template\r\n", + "\r\n", + "app = Flask(__name__)\r\n", + "\r\n", + "@app.route('/')\r\n", + "def index():\r\n", + " return render_template('first_app.html')\r\n", + "\r\n", + "if __name__ == '__main__':\r\n", + " app.run(debug=True)" + ] + } + ], + "source": [ + "!cat 1st_flask_app_1/app.py" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\r\n", + " \r\n", + " First app\r\n", + " \r\n", + " \r\n", + "\r\n", + "
\r\n", + "\tHi, this is my first Flask web app!\r\n", + "
\r\n", + "\r\n", + " \r\n", + "" + ] + } + ], + "source": [ + "!cat 1st_flask_app_1/templates/first_app.html" ] }, { @@ -689,9 +717,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -716,9 +742,7 @@ { "cell_type": "code", "execution_count": 20, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -740,6 +764,90 @@ "Image(filename='../images/09_03.png', width=400) " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directory structure:\n", + " \n", + " 1st_flask_app_2/\n", + " app.py\n", + " static/\n", + " style.css\n", + " templates/\n", + " _formhelpers.html\n", + " first_app.html\n", + " hello.html" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "from flask import Flask, render_template, request\r\n", + "from wtforms import Form, TextAreaField, validators\r\n", + "\r\n", + "app = Flask(__name__)\r\n", + "\r\n", + "class HelloForm(Form):\r\n", + " sayhello = TextAreaField('',[validators.DataRequired()])\r\n", + "\r\n", + "@app.route('/')\r\n", + "def index():\r\n", + " form = HelloForm(request.form)\r\n", + " return render_template('first_app.html', form=form)\r\n", + "\r\n", + "@app.route('/hello', methods=['POST'])\r\n", + "def hello():\r\n", + " form = HelloForm(request.form)\r\n", + " if request.method == 'POST' and form.validate():\r\n", + " name = request.form['sayhello']\r\n", + " return render_template('hello.html', name=name)\r\n", + " return render_template('first_app.html', form=form)\r\n", + "\r\n", + "if __name__ == '__main__':\r\n", + " app.run(debug=True)" + ] + } + ], + "source": [ + "!cat 1st_flask_app_2/app.py" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{% macro render_field(field) %}\r\n", + "
{{ field.label }}\r\n", + "
{{ field(**kwargs)|safe }}\r\n", + " {% if field.errors %}\r\n", + " \r\n", + " {% endif %}\r\n", + "
\r\n", + " \r\n", + "{% endmacro %}\r\n" + ] + } + ], + "source": [ + "!cat 1st_flask_app_2/templates/_formhelpers.html" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -758,9 +866,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -785,9 +891,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -812,9 +916,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -839,9 +941,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -863,6 +963,243 @@ "Image(filename='../images/09_07.png', width=200) " ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "from flask import Flask, render_template, request\r\n", + "from wtforms import Form, TextAreaField, validators\r\n", + "import pickle\r\n", + "import sqlite3\r\n", + "import os\r\n", + "import numpy as np\r\n", + "\r\n", + "# import HashingVectorizer from local dir\r\n", + "from vectorizer import vect\r\n", + "\r\n", + "app = Flask(__name__)\r\n", + "\r\n", + "######## Preparing the Classifier\r\n", + "cur_dir = os.path.dirname(__file__)\r\n", + "clf = pickle.load(open(os.path.join(cur_dir,\r\n", + " 'pkl_objects',\r\n", + " 'classifier.pkl'), 'rb'))\r\n", + "db = os.path.join(cur_dir, 'reviews.sqlite')\r\n", + "\r\n", + "def classify(document):\r\n", + " label = {0: 'negative', 1: 'positive'}\r\n", + " X = vect.transform([document])\r\n", + " y = clf.predict(X)[0]\r\n", + " proba = np.max(clf.predict_proba(X))\r\n", + " return label[y], proba\r\n", + "\r\n", + "def train(document, y):\r\n", + " X = vect.transform([document])\r\n", + " clf.partial_fit(X, [y])\r\n", + "\r\n", + "def sqlite_entry(path, document, y):\r\n", + " conn = sqlite3.connect(path)\r\n", + " c = conn.cursor()\r\n", + " c.execute(\"INSERT INTO review_db (review, sentiment, date)\"\\\r\n", + " \" VALUES (?, ?, DATETIME('now'))\", (document, y))\r\n", + " conn.commit()\r\n", + " conn.close()\r\n", + "\r\n", + "######## Flask\r\n", + "class ReviewForm(Form):\r\n", + " moviereview = TextAreaField('',\r\n", + " [validators.DataRequired(),\r\n", + " validators.length(min=15)])\r\n", + "\r\n", + "@app.route('/')\r\n", + "def index():\r\n", + " form = ReviewForm(request.form)\r\n", + " return render_template('reviewform.html', form=form)\r\n", + "\r\n", + "@app.route('/results', methods=['POST'])\r\n", + "def results():\r\n", + " form = ReviewForm(request.form)\r\n", + " if request.method == 'POST' and form.validate():\r\n", + " review = request.form['moviereview']\r\n", + " y, proba = classify(review)\r\n", + " return render_template('results.html',\r\n", + " content=review,\r\n", + " prediction=y,\r\n", + " probability=round(proba*100, 2))\r\n", + " return render_template('reviewform.html', form=form)\r\n", + "\r\n", + "@app.route('/thanks', methods=['POST'])\r\n", + "def feedback():\r\n", + " feedback = request.form['feedback_button']\r\n", + " review = request.form['review']\r\n", + " prediction = request.form['prediction']\r\n", + "\r\n", + " inv_label = {'negative': 0, 'positive': 1}\r\n", + " y = inv_label[prediction]\r\n", + " if feedback == 'Incorrect':\r\n", + " y = int(not(y))\r\n", + " train(review, y)\r\n", + " sqlite_entry(db, review, y)\r\n", + " return render_template('thanks.html')\r\n", + "\r\n", + "if __name__ == '__main__':\r\n", + " app.run(debug=True)\r\n" + ] + } + ], + "source": [ + "!cat ./movieclassifier/app.py" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\r\n", + " \r\n", + " Movie Classification\r\n", + "\t\r\n", + " \r\n", + " \r\n", + "\r\n", + "

Please enter your movie review:

\r\n", + "\r\n", + "{% from \"_formhelpers.html\" import render_field %}\r\n", + "\r\n", + "
\r\n", + "
\r\n", + "\t{{ render_field(form.moviereview, cols='30', rows='10') }}\r\n", + "
\r\n", + "
\r\n", + "\t \r\n", + "
\r\n", + "
\r\n", + "\r\n", + " \r\n", + "" + ] + } + ], + "source": [ + "!cat ./movieclassifier/templates/reviewform.html" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\r\n", + " \r\n", + " Movie Classification\r\n", + "\t\r\n", + " \r\n", + " \r\n", + "\r\n", + "

Your movie review:

\r\n", + "
{{ content }}
\r\n", + "\r\n", + "

Prediction:

\r\n", + "
This movie review is {{ prediction }}\r\n", + "\t (probability: {{ probability }}%).
\r\n", + "\r\n", + "
\r\n", + "\t
\r\n", + "\t \r\n", + "\t\t\r\n", + "\t\t\r\n", + "\t\t\r\n", + "\t
\r\n", + "
\r\n", + "\r\n", + "
\r\n", + "\t
\r\n", + "\t \r\n", + "\t
\r\n", + "
\r\n", + "\r\n", + " \r\n", + "\r\n" + ] + } + ], + "source": [ + "!cat ./movieclassifier/templates/results.html" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "body{\r\n", + "\twidth:600px;\r\n", + "}\r\n", + "\r\n", + ".button{\r\n", + "\tpadding-top: 20px;\r\n", + "}\r\n" + ] + } + ], + "source": [ + "!cat ./movieclassifier/static/style.css" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r\n", + "\r\n", + " \r\n", + " Movie Classification\r\n", + "\t\r\n", + " \r\n", + " \r\n", + "\r\n", + "

Thank you for your feedback!

\r\n", + "\r\n", + "
\r\n", + "\t
\r\n", + "\t \r\n", + "\t
\r\n", + "
\r\n", + "\r\n", + " \r\n", + "" + ] + } + ], + "source": [ + "!cat ./movieclassifier/templates/thanks.html" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -881,9 +1218,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -937,9 +1272,7 @@ { "cell_type": "code", "execution_count": 26, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import pickle\n", @@ -980,9 +1313,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "cur_dir = '.'\n", @@ -1007,6 +1338,66 @@ "# , protocol=4)" ] }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import pickle\r\n", + "import sqlite3\r\n", + "import numpy as np\r\n", + "import os\r\n", + "\r\n", + "# import HashingVectorizer from local dir\r\n", + "from vectorizer import vect\r\n", + "\r\n", + "def update_model(db_path, model, batch_size=10000):\r\n", + "\r\n", + " conn = sqlite3.connect(db_path)\r\n", + " c = conn.cursor()\r\n", + " c.execute('SELECT * from review_db')\r\n", + "\r\n", + " results = c.fetchmany(batch_size)\r\n", + " while results:\r\n", + " data = np.array(results)\r\n", + " X = data[:, 0]\r\n", + " y = data[:, 1].astype(int)\r\n", + "\r\n", + " classes = np.array([0, 1])\r\n", + " X_train = vect.transform(X)\r\n", + " model.partial_fit(X_train, y, classes=classes)\r\n", + " results = c.fetchmany(batch_size)\r\n", + "\r\n", + " conn.close()\r\n", + " return model\r\n", + "\r\n", + "cur_dir = os.path.dirname(__file__)\r\n", + "\r\n", + "clf = pickle.load(open(os.path.join(cur_dir,\r\n", + " 'pkl_objects',\r\n", + " 'classifier.pkl'), 'rb'))\r\n", + "db = os.path.join(cur_dir, 'reviews.sqlite')\r\n", + "\r\n", + "clf = update_model(db_path=db, model=clf, batch_size=10000)\r\n", + "\r\n", + "# Uncomment the following lines if you are sure that\r\n", + "# you want to update your classifier.pkl file\r\n", + "# permanently.\r\n", + "\r\n", + "# pickle.dump(clf, open(os.path.join(cur_dir,\r\n", + "# 'pkl_objects', 'classifier.pkl'), 'wb')\r\n", + "# , protocol=4)\r\n" + ] + } + ], + "source": [ + "!cat ./movieclassifier_with_update/update.py" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1050,9 +1441,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.0" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }