From b4208ec88ec2be641a1f9f19e5b88be06fe3d40f Mon Sep 17 00:00:00 2001
From: rasbt <mail@sebastianraschka.com>
Date: Wed, 31 May 2017 23:49:52 -0400
Subject: [PATCH] cat movieclassifier files into notebook for easier reference

---
 code/ch09/ch09.ipynb | 547 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 469 insertions(+), 78 deletions(-)

diff --git a/code/ch09/ch09.ipynb b/code/ch09/ch09.ipynb
index 1778becf..2b9cb875 100644
--- a/code/ch09/ch09.ipynb
+++ b/code/ch09/ch09.ipynb
@@ -35,9 +35,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -177,9 +175,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -208,9 +204,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -270,9 +264,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def get_minibatch(doc_stream, size):\n",
@@ -290,9 +282,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from sklearn.feature_extraction.text import HashingVectorizer\n",
@@ -310,9 +300,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -341,9 +329,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -362,9 +348,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "clf = clf.partial_fit(X_test, y_test)"
@@ -397,9 +381,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pickle\n",
@@ -423,9 +405,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -492,9 +472,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pickle\n",
@@ -508,9 +486,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -556,9 +532,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import sqlite3\n",
@@ -584,9 +558,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "conn = sqlite3.connect('reviews.sqlite')\n",
@@ -601,9 +573,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -620,9 +590,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -676,7 +644,67 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "..."
+    "Directory structure:\n",
+    "\n",
+    "    1st_flask_app_1/\n",
+    "       app.py\n",
+    "       templates/\n",
+    "           first_app.html\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "from flask import Flask, render_template\r\n",
+      "\r\n",
+      "app = Flask(__name__)\r\n",
+      "\r\n",
+      "@app.route('/')\r\n",
+      "def index():\r\n",
+      "    return render_template('first_app.html')\r\n",
+      "\r\n",
+      "if __name__ == '__main__':\r\n",
+      "    app.run(debug=True)"
+     ]
+    }
+   ],
+   "source": [
+    "!cat 1st_flask_app_1/app.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<!doctype html>\r\n",
+      "<html>\r\n",
+      "  <head>\r\n",
+      "    <title>First app</title>\r\n",
+      "  </head>\r\n",
+      "  <body>\r\n",
+      "\r\n",
+      "  <div>\r\n",
+      "\tHi, this is my first Flask web app!\r\n",
+      "  </div>\r\n",
+      "\r\n",
+      "  </body>\r\n",
+      "</html>"
+     ]
+    }
+   ],
+   "source": [
+    "!cat 1st_flask_app_1/templates/first_app.html"
    ]
   },
   {
@@ -689,9 +717,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -716,9 +742,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -740,6 +764,90 @@
     "Image(filename='../images/09_03.png', width=400) "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Directory structure:\n",
+    "    \n",
+    "    1st_flask_app_2/\n",
+    "       app.py\n",
+    "       static/\n",
+    "           style.css\n",
+    "       templates/\n",
+    "           _formhelpers.html\n",
+    "           first_app.html\n",
+    "           hello.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "from flask import Flask, render_template, request\r\n",
+      "from wtforms import Form, TextAreaField, validators\r\n",
+      "\r\n",
+      "app = Flask(__name__)\r\n",
+      "\r\n",
+      "class HelloForm(Form):\r\n",
+      "    sayhello = TextAreaField('',[validators.DataRequired()])\r\n",
+      "\r\n",
+      "@app.route('/')\r\n",
+      "def index():\r\n",
+      "    form = HelloForm(request.form)\r\n",
+      "    return render_template('first_app.html', form=form)\r\n",
+      "\r\n",
+      "@app.route('/hello', methods=['POST'])\r\n",
+      "def hello():\r\n",
+      "    form = HelloForm(request.form)\r\n",
+      "    if request.method == 'POST' and form.validate():\r\n",
+      "        name = request.form['sayhello']\r\n",
+      "        return render_template('hello.html', name=name)\r\n",
+      "    return render_template('first_app.html', form=form)\r\n",
+      "\r\n",
+      "if __name__ == '__main__':\r\n",
+      "    app.run(debug=True)"
+     ]
+    }
+   ],
+   "source": [
+    "!cat 1st_flask_app_2/app.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{% macro render_field(field) %}\r\n",
+      "  <dt>{{ field.label }}\r\n",
+      "  <dd>{{ field(**kwargs)|safe }}\r\n",
+      "  {% if field.errors %}\r\n",
+      "    <ul class=errors>\r\n",
+      "    {% for error in field.errors %}\r\n",
+      "      <li>{{ error }}</li>\r\n",
+      "    {% endfor %}\r\n",
+      "    </ul>\r\n",
+      "  {% endif %}\r\n",
+      "  </dd>\r\n",
+      "  </dt>\r\n",
+      "{% endmacro %}\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat 1st_flask_app_2/templates/_formhelpers.html"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -758,9 +866,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -785,9 +891,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -812,9 +916,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -839,9 +941,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -863,6 +963,243 @@
     "Image(filename='../images/09_07.png', width=200) "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "from flask import Flask, render_template, request\r\n",
+      "from wtforms import Form, TextAreaField, validators\r\n",
+      "import pickle\r\n",
+      "import sqlite3\r\n",
+      "import os\r\n",
+      "import numpy as np\r\n",
+      "\r\n",
+      "# import HashingVectorizer from local dir\r\n",
+      "from vectorizer import vect\r\n",
+      "\r\n",
+      "app = Flask(__name__)\r\n",
+      "\r\n",
+      "######## Preparing the Classifier\r\n",
+      "cur_dir = os.path.dirname(__file__)\r\n",
+      "clf = pickle.load(open(os.path.join(cur_dir,\r\n",
+      "                 'pkl_objects',\r\n",
+      "                 'classifier.pkl'), 'rb'))\r\n",
+      "db = os.path.join(cur_dir, 'reviews.sqlite')\r\n",
+      "\r\n",
+      "def classify(document):\r\n",
+      "    label = {0: 'negative', 1: 'positive'}\r\n",
+      "    X = vect.transform([document])\r\n",
+      "    y = clf.predict(X)[0]\r\n",
+      "    proba = np.max(clf.predict_proba(X))\r\n",
+      "    return label[y], proba\r\n",
+      "\r\n",
+      "def train(document, y):\r\n",
+      "    X = vect.transform([document])\r\n",
+      "    clf.partial_fit(X, [y])\r\n",
+      "\r\n",
+      "def sqlite_entry(path, document, y):\r\n",
+      "    conn = sqlite3.connect(path)\r\n",
+      "    c = conn.cursor()\r\n",
+      "    c.execute(\"INSERT INTO review_db (review, sentiment, date)\"\\\r\n",
+      "    \" VALUES (?, ?, DATETIME('now'))\", (document, y))\r\n",
+      "    conn.commit()\r\n",
+      "    conn.close()\r\n",
+      "\r\n",
+      "######## Flask\r\n",
+      "class ReviewForm(Form):\r\n",
+      "    moviereview = TextAreaField('',\r\n",
+      "                                [validators.DataRequired(),\r\n",
+      "                                validators.length(min=15)])\r\n",
+      "\r\n",
+      "@app.route('/')\r\n",
+      "def index():\r\n",
+      "    form = ReviewForm(request.form)\r\n",
+      "    return render_template('reviewform.html', form=form)\r\n",
+      "\r\n",
+      "@app.route('/results', methods=['POST'])\r\n",
+      "def results():\r\n",
+      "    form = ReviewForm(request.form)\r\n",
+      "    if request.method == 'POST' and form.validate():\r\n",
+      "        review = request.form['moviereview']\r\n",
+      "        y, proba = classify(review)\r\n",
+      "        return render_template('results.html',\r\n",
+      "                                content=review,\r\n",
+      "                                prediction=y,\r\n",
+      "                                probability=round(proba*100, 2))\r\n",
+      "    return render_template('reviewform.html', form=form)\r\n",
+      "\r\n",
+      "@app.route('/thanks', methods=['POST'])\r\n",
+      "def feedback():\r\n",
+      "    feedback = request.form['feedback_button']\r\n",
+      "    review = request.form['review']\r\n",
+      "    prediction = request.form['prediction']\r\n",
+      "\r\n",
+      "    inv_label = {'negative': 0, 'positive': 1}\r\n",
+      "    y = inv_label[prediction]\r\n",
+      "    if feedback == 'Incorrect':\r\n",
+      "        y = int(not(y))\r\n",
+      "    train(review, y)\r\n",
+      "    sqlite_entry(db, review, y)\r\n",
+      "    return render_template('thanks.html')\r\n",
+      "\r\n",
+      "if __name__ == '__main__':\r\n",
+      "    app.run(debug=True)\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier/app.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<!doctype html>\r\n",
+      "<html>\r\n",
+      "  <head>\r\n",
+      "    <title>Movie Classification</title>\r\n",
+      "\t<link rel=\"stylesheet\" href=\"{{ url_for('static', filename='style.css') }}\">\r\n",
+      "  </head>\r\n",
+      "  <body>\r\n",
+      "\r\n",
+      "<h2>Please enter your movie review:</h2>\r\n",
+      "\r\n",
+      "{% from \"_formhelpers.html\" import render_field %}\r\n",
+      "\r\n",
+      "<form method=post action=\"/results\">\r\n",
+      "  <dl>\r\n",
+      "\t{{ render_field(form.moviereview, cols='30', rows='10') }}\r\n",
+      "  </dl>\r\n",
+      "  <div>\r\n",
+      "\t  <input type=submit value='Submit review' name='submit_btn'>\r\n",
+      "  </div>\r\n",
+      "</form>\r\n",
+      "\r\n",
+      "  </body>\r\n",
+      "</html>"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier/templates/reviewform.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<!doctype html>\r\n",
+      "<html>\r\n",
+      "  <head>\r\n",
+      "    <title>Movie Classification</title>\r\n",
+      "\t<link rel=\"stylesheet\" href=\"{{ url_for('static', filename='style.css') }}\">\r\n",
+      "  </head>\r\n",
+      "  <body>\r\n",
+      "\r\n",
+      "<h3>Your movie review:</h3>\r\n",
+      "<div>{{ content }}</div>\r\n",
+      "\r\n",
+      "<h3>Prediction:</h3>\r\n",
+      "<div>This movie review is <strong>{{ prediction }}</strong>\r\n",
+      "\t (probability: {{ probability }}%).</div>\r\n",
+      "\r\n",
+      "<div id='button'>\r\n",
+      "\t  <form action=\"/thanks\" method=\"post\">\r\n",
+      "\t    <input type=submit value='Correct' name='feedback_button'>\r\n",
+      "\t\t<input type=submit value='Incorrect' name='feedback_button'>\r\n",
+      "\t\t<input type=hidden value='{{ prediction }}' name='prediction'>\r\n",
+      "\t\t<input type=hidden value='{{ content }}' name='review'>\r\n",
+      "\t  </form>\r\n",
+      "</div>\r\n",
+      "\r\n",
+      "<div id='button'>\r\n",
+      "\t  <form action=\"/\">\r\n",
+      "\t    <input type=submit value='Submit another review'>\r\n",
+      "\t  </form>\r\n",
+      "</div>\r\n",
+      "\r\n",
+      "  </body>\r\n",
+      "</html>\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier/templates/results.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "body{\r\n",
+      "\twidth:600px;\r\n",
+      "}\r\n",
+      "\r\n",
+      ".button{\r\n",
+      "\tpadding-top: 20px;\r\n",
+      "}\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier/static/style.css"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<!doctype html>\r\n",
+      "<html>\r\n",
+      "  <head>\r\n",
+      "    <title>Movie Classification</title>\r\n",
+      "\t<link rel=\"stylesheet\" href=\"{{ url_for('static', filename='style.css') }}\">\r\n",
+      "  </head>\r\n",
+      "  <body>\r\n",
+      "\r\n",
+      "<h3>Thank you for your feedback!</h3>\r\n",
+      "\r\n",
+      "<div id='button'>\r\n",
+      "\t  <form action=\"/\">\r\n",
+      "\t    <input type=submit value='Submit another review'>\r\n",
+      "\t  </form>\r\n",
+      "</div>\r\n",
+      "\r\n",
+      "  </body>\r\n",
+      "</html>"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier/templates/thanks.html"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -881,9 +1218,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -937,9 +1272,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pickle\n",
@@ -980,9 +1313,7 @@
   {
    "cell_type": "code",
    "execution_count": 27,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cur_dir = '.'\n",
@@ -1007,6 +1338,66 @@
     "#             , protocol=4)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "import pickle\r\n",
+      "import sqlite3\r\n",
+      "import numpy as np\r\n",
+      "import os\r\n",
+      "\r\n",
+      "# import HashingVectorizer from local dir\r\n",
+      "from vectorizer import vect\r\n",
+      "\r\n",
+      "def update_model(db_path, model, batch_size=10000):\r\n",
+      "\r\n",
+      "    conn = sqlite3.connect(db_path)\r\n",
+      "    c = conn.cursor()\r\n",
+      "    c.execute('SELECT * from review_db')\r\n",
+      "\r\n",
+      "    results = c.fetchmany(batch_size)\r\n",
+      "    while results:\r\n",
+      "        data = np.array(results)\r\n",
+      "        X = data[:, 0]\r\n",
+      "        y = data[:, 1].astype(int)\r\n",
+      "\r\n",
+      "        classes = np.array([0, 1])\r\n",
+      "        X_train = vect.transform(X)\r\n",
+      "        model.partial_fit(X_train, y, classes=classes)\r\n",
+      "        results = c.fetchmany(batch_size)\r\n",
+      "\r\n",
+      "    conn.close()\r\n",
+      "    return model\r\n",
+      "\r\n",
+      "cur_dir = os.path.dirname(__file__)\r\n",
+      "\r\n",
+      "clf = pickle.load(open(os.path.join(cur_dir,\r\n",
+      "                  'pkl_objects',\r\n",
+      "                  'classifier.pkl'), 'rb'))\r\n",
+      "db = os.path.join(cur_dir, 'reviews.sqlite')\r\n",
+      "\r\n",
+      "clf = update_model(db_path=db, model=clf, batch_size=10000)\r\n",
+      "\r\n",
+      "# Uncomment the following lines if you are sure that\r\n",
+      "# you want to update your classifier.pkl file\r\n",
+      "# permanently.\r\n",
+      "\r\n",
+      "# pickle.dump(clf, open(os.path.join(cur_dir,\r\n",
+      "#             'pkl_objects', 'classifier.pkl'), 'wb')\r\n",
+      "#             , protocol=4)\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat ./movieclassifier_with_update/update.py"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1050,9 +1441,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.6.0"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }