From 1673987129f8dd8cfcde4bdd110ed9e8b6b0e935 Mon Sep 17 00:00:00 2001 From: Daniel Bourke Date: Thu, 23 Sep 2021 15:34:21 +1000 Subject: [PATCH] update each model to have own embedding layer as in #204 --- 08_introduction_to_nlp_in_tensorflow.ipynb | 2206 ++++++++++---------- 1 file changed, 1079 insertions(+), 1127 deletions(-) diff --git a/08_introduction_to_nlp_in_tensorflow.ipynb b/08_introduction_to_nlp_in_tensorflow.ipynb index 2812ab52..4ba0f216 100644 --- a/08_introduction_to_nlp_in_tensorflow.ipynb +++ b/08_introduction_to_nlp_in_tensorflow.ipynb @@ -6,7 +6,7 @@ "name": "08_introduction_to_nlp_in_tensorflow.ipynb", "provenance": [], "collapsed_sections": [], - "authorship_tag": "ABX9TyOzyMKNtxBNI1iKmyBeAgFY", + "authorship_tag": "ABX9TyNl2+t3VUTcweJBoXgoWw7J", "include_colab_link": true }, "kernelspec": { @@ -113,20 +113,20 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "4749ce31-d0bf-4c0d-b9c6-4e87d297901e" + "outputId": "57c44ca3-b01f-412c-a670-73b70e861937" }, "source": [ "# Check for GPU\n", "!nvidia-smi -L" ], - "execution_count": null, + "execution_count": 1, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "GPU 0: Tesla T4 (UUID: GPU-2162e221-0b02-e24d-c182-b649d7dfb5c3)\n" - ], - "name": "stdout" + "GPU 0: Tesla K80 (UUID: GPU-7c8181f1-42c3-e0c6-0862-932bb75fde7b)\n" + ] } ] }, @@ -152,30 +152,30 @@ "base_uri": "https://localhost:8080/" }, "id": "aFOHPqgE8pv-", - "outputId": "3ec94e60-4fce-4ec8-cc79-733d8b448435" + "outputId": "523e55ff-21ee-41ed-e3a4-7e19274f9ea8" }, "source": [ "# Download helper functions script\n", "!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py" ], - "execution_count": null, + "execution_count": 2, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "--2021-04-29 03:05:39-- https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "--2021-09-23 05:25:54-- https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 10246 (10K) [text/plain]\n", - "Saving to: ‘helper_functions.py’\n", + "Saving to: ‘helper_functions.py.4’\n", "\n", - "helper_functions.py 100%[===================>] 10.01K --.-KB/s in 0s \n", + "\rhelper_functions.py 0%[ ] 0 --.-KB/s \rhelper_functions.py 100%[===================>] 10.01K --.-KB/s in 0s \n", "\n", - "2021-04-29 03:05:39 (108 MB/s) - ‘helper_functions.py’ saved [10246/10246]\n", + "2021-09-23 05:25:54 (43.6 MB/s) - ‘helper_functions.py.4’ saved [10246/10246]\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -188,7 +188,7 @@ "# Import series of helper functions for the notebook\n", "from helper_functions import unzip_data, create_tensorboard_callback, plot_loss_curves, compare_historys" ], - "execution_count": null, + "execution_count": 3, "outputs": [] }, { @@ -225,7 +225,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "136c3091-e972-4df9-c279-207d86e784b0" + "outputId": "75a40c19-6eae-4323-d84e-c6af405fc0ba" }, "source": [ "# Download data (same as from Kaggle)\n", @@ -234,24 +234,24 @@ "# Unzip data\n", "unzip_data(\"nlp_getting_started.zip\")" ], - "execution_count": null, + "execution_count": 4, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "--2021-04-29 03:05:41-- https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip\n", - "Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.13.240, 172.253.63.128, 142.250.31.128, ...\n", - "Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.13.240|:443... connected.\n", + "--2021-09-23 05:25:57-- https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip\n", + "Resolving storage.googleapis.com (storage.googleapis.com)... 66.102.1.128, 172.253.120.128, 74.125.206.128, ...\n", + "Connecting to storage.googleapis.com (storage.googleapis.com)|66.102.1.128|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 607343 (593K) [application/zip]\n", - "Saving to: ‘nlp_getting_started.zip’\n", + "Saving to: ‘nlp_getting_started.zip.4’\n", "\n", - "\rnlp_getting_started 0%[ ] 0 --.-KB/s \rnlp_getting_started 100%[===================>] 593.11K --.-KB/s in 0.005s \n", + "\r nlp_getti 0%[ ] 0 --.-KB/s \rnlp_getting_started 100%[===================>] 593.11K --.-KB/s in 0.006s \n", "\n", - "2021-04-29 03:05:41 (109 MB/s) - ‘nlp_getting_started.zip’ saved [607343/607343]\n", + "2021-09-23 05:25:57 (101 MB/s) - ‘nlp_getting_started.zip.4’ saved [607343/607343]\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -296,9 +296,9 @@ "id": "qRvkeYEJIKsw", "colab": { "base_uri": "https://localhost:8080/", - "height": 202 + "height": 204 }, - "outputId": "e8252773-a1b6-4ac0-d5e1-5195eef4d595" + "outputId": "052a1d3e-1d52-47e9-ea39-df6dd4d149ca" }, "source": [ "# Turn .csv files into pandas DataFrame's\n", @@ -307,7 +307,7 @@ "test_df = pd.read_csv(\"test.csv\")\n", "train_df.head()" ], - "execution_count": null, + "execution_count": 5, "outputs": [ { "output_type": "execute_result", @@ -394,9 +394,7 @@ "[5 rows x 5 columns]" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 5 } ] @@ -416,16 +414,16 @@ "id": "ACCE7h6OMVjR", "colab": { "base_uri": "https://localhost:8080/", - "height": 202 + "height": 204 }, - "outputId": "d019c52f-9fca-477b-9c32-cd507e2f58e7" + "outputId": "51f2ae2e-df2a-4e15-a618-7087ecba1914" }, "source": [ "# Shuffle training dataframe\n", "train_df_shuffled = train_df.sample(frac=1, random_state=42) # shuffle with random_state=42 for reproducibility\n", "train_df_shuffled.head()" ], - "execution_count": null, + "execution_count": 6, "outputs": [ { "output_type": "execute_result", @@ -512,9 +510,7 @@ "[5 rows x 5 columns]" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 6 } ] @@ -545,15 +541,15 @@ "id": "tDh5t7thI5BM", "colab": { "base_uri": "https://localhost:8080/", - "height": 202 + "height": 204 }, - "outputId": "d7e003d4-f965-42fc-c0b0-4c5fd02d0027" + "outputId": "9f4321e2-a45e-4eef-93d1-71972c992e37" }, "source": [ "# The test data doesn't have a target (that's what we'd try to predict)\n", "test_df.head()" ], - "execution_count": null, + "execution_count": 7, "outputs": [ { "output_type": "execute_result", @@ -632,9 +628,7 @@ "4 11 NaN NaN Typhoon Soudelor kills 28 in China and Taiwan" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 7 } ] @@ -655,13 +649,13 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c790b792-2bc6-44a0-db97-3f96a47b5a74" + "outputId": "50fa6a7a-a7cf-4dc5-b5a9-01ccb8dfbe61" }, "source": [ "# How many examples of each class?\n", "train_df.target.value_counts()" ], - "execution_count": null, + "execution_count": 8, "outputs": [ { "output_type": "execute_result", @@ -672,9 +666,7 @@ "Name: target, dtype: int64" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 8 } ] @@ -704,7 +696,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "fe529060-d675-43ac-b86b-8f0686dda62c" + "outputId": "ca4aefe4-3405-4a94-8e8a-163386b25709" }, "source": [ "# How many samples total?\n", @@ -712,16 +704,16 @@ "print(f\"Total test samples: {len(test_df)}\")\n", "print(f\"Total samples: {len(train_df) + len(test_df)}\")" ], - "execution_count": null, + "execution_count": 9, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Total training samples: 7613\n", "Total test samples: 3263\n", "Total samples: 10876\n" - ], - "name": "stdout" + ] } ] }, @@ -745,7 +737,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c05afe9d-72a0-461f-e347-365f609630a8" + "outputId": "4e068f05-06fd-4611-b0c9-519f4e67d8b9" }, "source": [ "# Let's visualize some random training examples\n", @@ -757,43 +749,44 @@ " print(f\"Text:\\n{text}\\n\")\n", " print(\"---\\n\")" ], - "execution_count": null, + "execution_count": 10, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Target: 0 (not real disaster)\n", "Text:\n", - "I thought the loudest goal I ever screamed was Higuain's offside goal against Germany\n", + "https://t.co/eCMUjkKqX1 @ArianaGrande @ScreamQueens \n", + "Katherine's Death\n", "\n", "---\n", "\n", - "Target: 1 (real disaster)\n", + "Target: 0 (not real disaster)\n", "Text:\n", - "#hot Funtenna: hijacking computers to send data as sound waves [Black Hat 2015] http://t.co/J2aQs5loxu #prebreak #best\n", + "@TinyJecht Are you another Stand-user? If you are I will have to detonate you with my Killer Queen.\n", "\n", "---\n", "\n", - "Target: 0 (not real disaster)\n", + "Target: 1 (real disaster)\n", "Text:\n", - "I'm an emotional wreck right now.\n", + "70 Years After Atomic Bombs Japan Still Struggles With War Past http://t.co/5wfXbAQMBK The anniversary of the devastation wrought by the‰Û_\n", "\n", "---\n", "\n", "Target: 0 (not real disaster)\n", "Text:\n", - "It was a queer sultry summer the summer they electrocuted the Rosenbergs and I didn't know what I was doing in New York.\n", + "My lifelong all-time favorite song is 'Landslide'. This song has gotten me through a lot of though times &... http://t.co/RfB3JXbiEJ\n", "\n", "---\n", "\n", "Target: 0 (not real disaster)\n", "Text:\n", - "@TeamHendrick @TeamHendrick @RIRInsider Fingers crossed that there will be a driver from Hendricks in Military Hospitality w/ @neanea2724!\n", + "I hear the mumbling i hear the cackling i got em scared shook panicking\n", "\n", "---\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -828,7 +821,7 @@ " test_size=0.1, # dedicate 10% of samples to validation set\n", " random_state=42) # random state for reproducibility" ], - "execution_count": null, + "execution_count": 11, "outputs": [] }, { @@ -838,13 +831,13 @@ "base_uri": "https://localhost:8080/" }, "id": "NWGOTjanBaTQ", - "outputId": "cf0e9af7-e129-4824-ecba-b3d371670fa2" + "outputId": "dd1a42bc-d291-42ad-f370-6b4008b3ec45" }, "source": [ "# Check the lengths\n", "len(train_sentences), len(train_labels), len(val_sentences), len(val_labels)" ], - "execution_count": null, + "execution_count": 12, "outputs": [ { "output_type": "execute_result", @@ -853,9 +846,7 @@ "(6851, 6851, 762, 762)" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 12 } ] @@ -867,13 +858,13 @@ "base_uri": "https://localhost:8080/" }, "id": "VqhvQK9wBTbw", - "outputId": "e9a0b7f9-a548-413b-9792-b176de7d12fe" + "outputId": "31c510a5-5b33-4c3c-b093-de3982bf145b" }, "source": [ "# View the first 10 training sentences and their labels\n", "train_sentences[:10], train_labels[:10]" ], - "execution_count": null, + "execution_count": 13, "outputs": [ { "output_type": "execute_result", @@ -892,9 +883,7 @@ " dtype=object), array([0, 0, 1, 0, 0, 1, 1, 0, 1, 1]))" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 13 } ] @@ -982,7 +971,7 @@ " output_sequence_length=None) # how long should the output sequence of tokens be?\n", " # pad_to_max_tokens=True) # Not valid if using max_tokens=None" ], - "execution_count": null, + "execution_count": 14, "outputs": [] }, { @@ -1009,13 +998,13 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "4c82e26e-a630-4658-904e-6a6568ac48e3" + "outputId": "093c7d74-8c3a-4fd1-8624-baa5cc95e183" }, "source": [ "# Find average number of tokens (words) in training Tweets\n", "round(sum([len(i.split()) for i in train_sentences])/len(train_sentences))" ], - "execution_count": null, + "execution_count": 15, "outputs": [ { "output_type": "execute_result", @@ -1024,9 +1013,7 @@ "15" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 15 } ] @@ -1054,7 +1041,7 @@ " output_mode=\"int\",\n", " output_sequence_length=max_length)" ], - "execution_count": null, + "execution_count": 16, "outputs": [] }, { @@ -1077,7 +1064,7 @@ "# Fit the text vectorizer to the training text\n", "text_vectorizer.adapt(train_sentences)" ], - "execution_count": null, + "execution_count": 17, "outputs": [] }, { @@ -1096,14 +1083,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2e85f8f1-3dfa-4ee3-efd3-4cdb6304d964" + "outputId": "908d8b3e-0110-47ba-b7d4-ab2e402af79c" }, "source": [ "# Create sample sentence and tokenize it\n", "sample_sentence = \"There's a flood in my street!\"\n", "text_vectorizer([sample_sentence])" ], - "execution_count": null, + "execution_count": 18, "outputs": [ { "output_type": "execute_result", @@ -1114,9 +1101,7 @@ " 0, 0]])>" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 18 } ] @@ -1139,7 +1124,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "782546e5-bf29-49cb-b761-2081f8fb8470" + "outputId": "227a570e-d987-49a1-bcf1-3f561d907d00" }, "source": [ "# Choose a random sentence from the training dataset and tokenize it\n", @@ -1148,30 +1133,28 @@ " \\n\\nVectorized version:\")\n", "text_vectorizer([random_sentence])" ], - "execution_count": null, + "execution_count": 19, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Original text:\n", - ".@APHL responds: FedEx no longer to transport bioterror germs in wake of anthrax lab mishaps http://t.co/cGdj3dRso9 \n", + "Black Eye 9: A space battle occurred at Star O784 involving 3 fleets totaling 3942 ships with 14 destroyed \n", "\n", "Vectorized version:\n" - ], - "name": "stdout" + ] }, { "output_type": "execute_result", "data": { "text/plain": [ "" + "array([[ 159, 898, 491, 3, 759, 442, 1068, 17, 874, 1629, 1129,\n", + " 118, 1524, 1457, 6327]])>" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 19 } ] @@ -1194,7 +1177,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "e0c5b519-0c5e-4be3-9e67-ea261896ad53" + "outputId": "f23e1a41-5b2d-4042-cfec-c6b22226a798" }, "source": [ "# Get the unique words in the vocabulary\n", @@ -1205,16 +1188,16 @@ "print(f\"Top 5 most common words: {top_5_words}\") \n", "print(f\"Bottom 5 least common words: {bottom_5_words}\")" ], - "execution_count": null, + "execution_count": 20, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Number of words in vocab: 10000\n", "Top 5 most common words: ['', '[UNK]', 'the', 'a', 'in']\n", "Bottom 5 least common words: ['pages', 'paeds', 'pads', 'padres', 'paddytomlinson1']\n" - ], - "name": "stdout" + ] } ] }, @@ -1248,30 +1231,30 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "884c6337-865b-46b8-e6e5-9afa58970725" + "outputId": "cffa28e2-f368-4f82-ff97-aab8b54241d3" }, "source": [ + "tf.random.set_seed(42)\n", "from tensorflow.keras import layers\n", "\n", "embedding = layers.Embedding(input_dim=max_vocab_length, # set input shape\n", " output_dim=128, # set size of embedding vector\n", " embeddings_initializer=\"uniform\", # default, intialize randomly\n", - " input_length=max_length) # how long is each input\n", + " input_length=max_length, # how long is each input\n", + " name=\"embedding_1\") \n", "\n", "embedding" ], - "execution_count": null, + "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 21 } ] @@ -1294,7 +1277,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ba4a5858-0674-4a1d-fabf-3941dd876b23" + "outputId": "83f28dec-93fb-483a-a997-a4552ef74426" }, "source": [ "# Get a random sentence from training set\n", @@ -1306,45 +1289,39 @@ "sample_embed = embedding(text_vectorizer([random_sentence]))\n", "sample_embed" ], - "execution_count": null, + "execution_count": 22, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Original text:\n", - "No #news of #hostages in #Libya\n", - "\n", - "http://t.co/eXil1bKzmP\n", - "\n", - "#India #terrorism #Africa #AP #TS #NRI #News #TRS #TDP #BJP http://t.co/ehomn68oJB \n", + "UNR issues Severe Thunderstorm Warning [wind: 60 MPH hail: 0.75 IN] for Weston [WY] and Custer Fall River Lawrence Meade Pennington [S‰Û_ \n", "\n", "Embedded version:\n" - ], - "name": "stdout" + ] }, { "output_type": "execute_result", "data": { "text/plain": [ "" + " [-0.00522641, 0.04871375, -0.03742788, ..., 0.00540795,\n", + " -0.04380312, -0.01817607],\n", + " [ 0.03406706, -0.00160446, 0.00894339, ..., -0.0356751 ,\n", + " 0.00541915, 0.00282475],\n", + " [ 0.02248487, -0.02848336, 0.04786098, ..., 0.03069806,\n", + " -0.04317403, -0.04145076]]], dtype=float32)>" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 22 } ] @@ -1365,50 +1342,55 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "f60b5e7c-a994-4c06-97f5-9f960aeaf7cf" + "outputId": "7e62e9cf-bf1c-4e21-a304-ba0b277bb220" }, "source": [ "# Check out a single token's embedding\n", "sample_embed[0][0]" ], - "execution_count": null, + "execution_count": 23, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" + "array([-2.3548698e-02, -2.9111434e-02, -1.6001653e-02, -2.4854636e-02,\n", + " -1.8839194e-02, -1.7742872e-02, 3.5991777e-02, 4.7734752e-03,\n", + " -3.1464353e-02, -1.0192860e-02, 3.9272513e-02, 1.6213503e-02,\n", + " 3.5752203e-02, -7.9760700e-04, -4.6503343e-02, 4.1901264e-02,\n", + " 2.7158771e-02, -2.9694129e-02, 6.3859299e-04, -3.6073186e-02,\n", + " 3.7186686e-02, 8.1444494e-03, -3.4610189e-02, -1.2373447e-02,\n", + " 3.3506799e-02, 3.4542195e-03, -3.4555770e-02, 3.0121803e-03,\n", + " 1.2546945e-02, 1.8180419e-02, -2.8727353e-02, 3.0131452e-03,\n", + " 2.2011306e-02, 1.5216086e-02, 8.3960593e-05, -4.9976040e-02,\n", + " -4.1987814e-02, -1.4751814e-02, 3.1978119e-02, 3.0810181e-02,\n", + " 1.3748173e-02, 1.3646554e-02, -1.8768311e-03, 5.6033619e-03,\n", + " -3.2450367e-02, -3.2819200e-02, 6.4723380e-03, 2.4402250e-02,\n", + " -4.9929023e-02, 8.7605603e-03, 3.7449453e-02, -3.0369056e-02,\n", + " 2.8607275e-02, -8.9427829e-03, -2.6780851e-03, 1.9382443e-02,\n", + " -4.4139970e-02, -4.8123684e-02, 3.2326613e-02, 1.0355391e-02,\n", + " -6.2159896e-03, 3.3066813e-02, 4.1976977e-02, 9.8001361e-03,\n", + " 9.7909793e-03, 1.8213544e-02, 1.6274918e-02, -1.7997943e-02,\n", + " 1.4698040e-02, 1.0068141e-02, -2.3385560e-02, 1.7339502e-02,\n", + " 3.5935570e-02, -4.9711645e-02, 3.2845590e-02, 3.8101044e-02,\n", + " 3.9486382e-02, -3.1647660e-02, -4.8475552e-02, 4.4873584e-02,\n", + " 2.7549271e-02, -4.1145109e-02, -3.3895336e-02, -3.6730655e-03,\n", + " 4.9198270e-03, 9.6562132e-03, -2.2904599e-02, -1.3657093e-02,\n", + " 1.5388299e-02, 8.1878789e-03, 1.8028166e-02, 3.1150069e-02,\n", + " 4.7483686e-02, -3.7815310e-02, -4.5389161e-03, 4.1796099e-02,\n", + " 4.3265197e-02, 3.1167094e-02, -4.9614847e-02, -5.8911927e-03,\n", + " 4.3997217e-02, -2.2734845e-02, -4.1017674e-02, 1.7939974e-02,\n", + " 2.3607183e-02, 1.5478458e-02, 7.7072531e-04, -4.3312550e-02,\n", + " -4.2333078e-02, -2.2680223e-02, 3.2546792e-02, -4.9846746e-02,\n", + " 1.3042022e-02, -3.2268692e-02, -1.8501390e-02, 5.7965517e-03,\n", + " -6.9886930e-03, -1.9324971e-02, -4.5883238e-02, 3.7569497e-02,\n", + " 1.4392149e-02, -1.0649189e-03, 2.4147406e-03, -2.7852738e-02,\n", + " -3.7008919e-02, -3.1357028e-02, -3.6617707e-02, 2.8127018e-02],\n", + " dtype=float32)>" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 23 } ] @@ -1482,7 +1464,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ff36d2d7-d667-4452-b929-dd9f477e9fcf" + "outputId": "0cec96b9-d78a-49af-9197-86a03b8043c8" }, "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", @@ -1498,7 +1480,7 @@ "# Fit the pipeline to the training data\n", "model_0.fit(train_sentences, train_labels)" ], - "execution_count": null, + "execution_count": 24, "outputs": [ { "output_type": "execute_result", @@ -1523,9 +1505,7 @@ " verbose=False)" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 24 } ] @@ -1548,20 +1528,20 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "7e64806a-8a0b-42b7-823c-13053db1f4b0" + "outputId": "b947adf2-aaaf-4d9d-afc0-81359138cc5a" }, "source": [ "baseline_score = model_0.score(val_sentences, val_labels)\n", "print(f\"Our baseline model achieves an accuracy of: {baseline_score*100:.2f}%\")" ], - "execution_count": null, + "execution_count": 25, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Our baseline model achieves an accuracy of: 79.27%\n" - ], - "name": "stdout" + ] } ] }, @@ -1581,14 +1561,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "40b9f534-a23b-4cc2-ab4c-2a8226e5e7b4" + "outputId": "69a3f67c-1da2-434e-fd29-73d723cd181e" }, "source": [ "# Make predictions\n", "baseline_preds = model_0.predict(val_sentences)\n", "baseline_preds[:20]" ], - "execution_count": null, + "execution_count": 26, "outputs": [ { "output_type": "execute_result", @@ -1597,9 +1577,7 @@ "array([1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 26 } ] @@ -1651,7 +1629,7 @@ " \"f1\": model_f1}\n", " return model_results" ], - "execution_count": null, + "execution_count": 27, "outputs": [] }, { @@ -1661,7 +1639,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2492255b-e4d2-4793-cb80-d139ab2ef9f4" + "outputId": "ff1ea5fd-b76d-447a-e1f9-af4b46e8aaba" }, "source": [ "# Get baseline results\n", @@ -1669,7 +1647,7 @@ " y_pred=baseline_preds)\n", "baseline_results" ], - "execution_count": null, + "execution_count": 28, "outputs": [ { "output_type": "execute_result", @@ -1681,9 +1659,7 @@ " 'recall': 0.7926509186351706}" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 28 } ] @@ -1717,7 +1693,7 @@ "# Create directory to save TensorBoard logs\n", "SAVE_DIR = \"model_logs\"" ], - "execution_count": null, + "execution_count": 29, "outputs": [] }, { @@ -1744,7 +1720,7 @@ "outputs = layers.Dense(1, activation=\"sigmoid\")(x) # create the output layer, want binary outputs so use sigmoid activation\n", "model_1 = tf.keras.Model(inputs, outputs, name=\"model_1_dense\") # construct the model" ], - "execution_count": null, + "execution_count": 30, "outputs": [] }, { @@ -1775,7 +1751,7 @@ " optimizer=tf.keras.optimizers.Adam(),\n", " metrics=[\"accuracy\"])" ], - "execution_count": null, + "execution_count": 31, "outputs": [] }, { @@ -1794,16 +1770,17 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "a7cb7785-b2f2-4e10-b8ac-a274796ccbaa" + "outputId": "a0c4ada2-1984-41fa-9653-dbfc6bfdfd42" }, "source": [ "# Get a summary of the model\n", "model_1.summary()" ], - "execution_count": null, + "execution_count": 32, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_1_dense\"\n", "_________________________________________________________________\n", @@ -1813,7 +1790,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_1 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "global_average_pooling1d (Gl (None, 128) 0 \n", "_________________________________________________________________\n", @@ -1823,8 +1800,7 @@ "Trainable params: 1,280,129\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -1846,7 +1822,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "d62478eb-0da8-4377-b793-ee85743ca40d" + "outputId": "cc6ad75c-338c-4e79-d6a0-a1a2bf588652" }, "source": [ "# Fit the model\n", @@ -1857,24 +1833,24 @@ " callbacks=[create_tensorboard_callback(dir_name=SAVE_DIR, \n", " experiment_name=\"simple_dense_model\")])" ], - "execution_count": null, + "execution_count": 33, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/simple_dense_model/20210429-030547\n", + "Saving TensorBoard log files to: model_logs/simple_dense_model/20210923-052559\n", "Epoch 1/5\n", - "215/215 [==============================] - 7s 19ms/step - loss: 0.6539 - accuracy: 0.6380 - val_loss: 0.5379 - val_accuracy: 0.7454\n", + "215/215 [==============================] - 5s 17ms/step - loss: 0.6094 - accuracy: 0.6916 - val_loss: 0.5357 - val_accuracy: 0.7572\n", "Epoch 2/5\n", - "215/215 [==============================] - 3s 14ms/step - loss: 0.4546 - accuracy: 0.8132 - val_loss: 0.4698 - val_accuracy: 0.7887\n", + "215/215 [==============================] - 3s 13ms/step - loss: 0.4410 - accuracy: 0.8189 - val_loss: 0.4691 - val_accuracy: 0.7848\n", "Epoch 3/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.3507 - accuracy: 0.8592 - val_loss: 0.4601 - val_accuracy: 0.7927\n", + "215/215 [==============================] - 3s 13ms/step - loss: 0.3463 - accuracy: 0.8605 - val_loss: 0.4590 - val_accuracy: 0.7900\n", "Epoch 4/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.2822 - accuracy: 0.8921 - val_loss: 0.4644 - val_accuracy: 0.7874\n", + "215/215 [==============================] - 3s 14ms/step - loss: 0.2848 - accuracy: 0.8923 - val_loss: 0.4641 - val_accuracy: 0.7927\n", "Epoch 5/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.2387 - accuracy: 0.9122 - val_loss: 0.4813 - val_accuracy: 0.7835\n" - ], - "name": "stdout" + "215/215 [==============================] - 3s 14ms/step - loss: 0.2380 - accuracy: 0.9118 - val_loss: 0.4767 - val_accuracy: 0.7874\n" + ] } ] }, @@ -1896,35 +1872,96 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "fe355ad2-f0d6-46c2-c007-fe422216a7e0" + "outputId": "3dab9b26-7bd0-42c8-8ad8-c55ae73d0e09" }, "source": [ "# Check the results\n", "model_1.evaluate(val_sentences, val_labels)" ], - "execution_count": null, + "execution_count": 34, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "24/24 [==============================] - 0s 3ms/step - loss: 0.4813 - accuracy: 0.7835\n" - ], - "name": "stdout" + "24/24 [==============================] - 0s 7ms/step - loss: 0.4767 - accuracy: 0.7874\n" + ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "[0.48131218552589417, 0.7834645509719849]" + "[0.4766846001148224, 0.787401556968689]" ] }, - "metadata": { - "tags": [] - }, + "metadata": {}, "execution_count": 34 } ] }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5M2CTAetBVfW", + "outputId": "263af483-3739-4ff7-fa21-52b9eab7a81b" + }, + "source": [ + "embedding.weights" + ], + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M3rfhJFSBrga", + "outputId": "e16f8a95-540b-40a7-98f4-8fe1a9dff5ac" + }, + "source": [ + "embed_weights = model_1.get_layer(\"embedding_1\").get_weights()[0]\n", + "print(embed_weights.shape)" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(10000, 128)\n" + ] + } + ] + }, { "cell_type": "markdown", "metadata": { @@ -1951,7 +1988,7 @@ "# --description \"Trying a dense model with an embedding layer\" \\\n", "# --one_shot # exits the uploader when upload has finished" ], - "execution_count": null, + "execution_count": 37, "outputs": [] }, { @@ -1963,7 +2000,7 @@ "# If you need to remove previous experiments, you can do so using the following command\n", "# !tensorboard dev delete --experiment_id EXPERIMENT_ID_TO_DELETE" ], - "execution_count": null, + "execution_count": 38, "outputs": [] }, { @@ -1990,35 +2027,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "e2b81e04-3cc9-4d4e-fadd-8960681bf8bb" + "outputId": "5d3a68f3-5b96-4dd4-e8e0-bf351ff68c9f" }, "source": [ "# Make predictions (these come back in the form of probabilities)\n", "model_1_pred_probs = model_1.predict(val_sentences)\n", "model_1_pred_probs[:10] # only print out the first 10 prediction probabilities" ], - "execution_count": null, + "execution_count": 39, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "array([[0.38902506],\n", - " [0.6600889 ],\n", - " [0.9979772 ],\n", - " [0.19496465],\n", - " [0.13191536],\n", - " [0.9409251 ],\n", - " [0.91324633],\n", - " [0.99308854],\n", - " [0.96710646],\n", - " [0.3796546 ]], dtype=float32)" + "array([[0.4048821 ],\n", + " [0.7443312 ],\n", + " [0.997895 ],\n", + " [0.10889997],\n", + " [0.11143532],\n", + " [0.93556094],\n", + " [0.9134595 ],\n", + " [0.9925345 ],\n", + " [0.97156817],\n", + " [0.26570338]], dtype=float32)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 37 + "metadata": {}, + "execution_count": 39 } ] }, @@ -2042,14 +2077,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "e99eb9a8-7a84-4171-fe93-2c0629161a60" + "outputId": "3062f826-9564-48d7-bbec-a803b41e9cba" }, "source": [ "# Turn prediction probabilities into single-dimension tensor of floats\n", "model_1_preds = tf.squeeze(tf.round(model_1_pred_probs)) # squeeze removes single dimensions\n", "model_1_preds[:20]" ], - "execution_count": null, + "execution_count": 40, "outputs": [ { "output_type": "execute_result", @@ -2060,10 +2095,8 @@ " 0., 0., 1.], dtype=float32)>" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 38 + "metadata": {}, + "execution_count": 40 } ] }, @@ -2083,7 +2116,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "5c5fbaa1-70dd-4a8f-ff41-27b9a7883a51" + "outputId": "e31e8bca-894a-44ad-88cc-fdea0dbc453a" }, "source": [ "# Calculate model_1 metrics\n", @@ -2091,22 +2124,20 @@ " y_pred=model_1_preds)\n", "model_1_results" ], - "execution_count": null, + "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 78.34645669291339,\n", - " 'f1': 0.7813141680786296,\n", - " 'precision': 0.785851650078301,\n", - " 'recall': 0.7834645669291339}" + "{'accuracy': 78.74015748031496,\n", + " 'f1': 0.7846966492209201,\n", + " 'precision': 0.7914920592553047,\n", + " 'recall': 0.7874015748031497}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 39 + "metadata": {}, + "execution_count": 41 } ] }, @@ -2126,14 +2157,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2a3ac4e6-68fb-4cb4-fb2f-cd3112ffce91" + "outputId": "ba27df1c-624b-4a24-da50-012a56653038" }, "source": [ "# Is our simple Keras model better than our baseline model?\n", "import numpy as np\n", "np.array(list(model_1_results.values())) > np.array(list(baseline_results.values()))" ], - "execution_count": null, + "execution_count": 42, "outputs": [ { "output_type": "execute_result", @@ -2142,10 +2173,8 @@ "array([False, False, False, False])" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 40 + "metadata": {}, + "execution_count": 42 } ] }, @@ -2165,7 +2194,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2f370695-3605-4602-b928-3fa377b52d6e" + "outputId": "14b74922-fe0a-44cb-8932-327e05402960" }, "source": [ "# Create a helper function to compare our baseline results to new model results\n", @@ -2176,17 +2205,17 @@ "compare_baseline_to_new_results(baseline_results=baseline_results, \n", " new_model_results=model_1_results)" ], - "execution_count": null, + "execution_count": 43, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 78.35, Difference: -0.92\n", - "Baseline precision: 0.81, New precision: 0.79, Difference: -0.03\n", - "Baseline recall: 0.79, New recall: 0.78, Difference: -0.01\n", + "Baseline accuracy: 79.27, New accuracy: 78.74, Difference: -0.52\n", + "Baseline precision: 0.81, New precision: 0.79, Difference: -0.02\n", + "Baseline recall: 0.79, New recall: 0.79, Difference: -0.01\n", "Baseline f1: 0.79, New f1: 0.78, Difference: -0.00\n" - ], - "name": "stdout" + ] } ] }, @@ -2214,14 +2243,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3c0116f1-e538-4c29-c193-cee496383aa6" + "outputId": "9e10f4fd-50ef-4ccd-a99b-eff8a64082f8" }, "source": [ "# Get the vocabulary from the text vectorization layer\n", "words_in_vocab = text_vectorizer.get_vocabulary()\n", "len(words_in_vocab), words_in_vocab[:10]" ], - "execution_count": null, + "execution_count": 44, "outputs": [ { "output_type": "execute_result", @@ -2230,10 +2259,8 @@ "(10000, ['', '[UNK]', 'the', 'a', 'in', 'to', 'of', 'and', 'i', 'is'])" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 42 + "metadata": {}, + "execution_count": 44 } ] }, @@ -2253,15 +2280,16 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "95b247a4-c8c7-49bb-9bc6-5c04875bc8d7" + "outputId": "eb047f23-dbef-46ea-ce98-2ea91b309d09" }, "source": [ "model_1.summary()" ], - "execution_count": null, + "execution_count": 45, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_1_dense\"\n", "_________________________________________________________________\n", @@ -2271,7 +2299,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_1 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "global_average_pooling1d (Gl (None, 128) 0 \n", "_________________________________________________________________\n", @@ -2281,8 +2309,7 @@ "Trainable params: 1,280,129\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -2293,22 +2320,22 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "959f0a21-72bc-4342-ea21-60bfa916af48" + "outputId": "ffc27236-56f5-4ecf-e107-37a51722194a" }, "source": [ "# Get the weight matrix of embedding layer \n", "# (these are the numerical patterns between the text in the training dataset the model has learned)\n", - "embed_weights = model_1.get_layer(\"embedding\").get_weights()[0]\n", + "embed_weights = model_1.get_layer(\"embedding_1\").get_weights()[0]\n", "print(embed_weights.shape) # same size as vocab size and embedding_dim (each word is a embedding_dim size vector)" ], - "execution_count": null, + "execution_count": 46, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "(10000, 128)\n" - ], - "name": "stdout" + ] } ] }, @@ -2359,7 +2386,7 @@ "# files.download(\"embedding_vectors.tsv\")\n", "# files.download(\"embedding_metadata.tsv\")" ], - "execution_count": null, + "execution_count": 47, "outputs": [] }, { @@ -2460,7 +2487,11 @@ "Input (text) -> Tokenize -> Embedding -> Layers -> Output (label probability)\n", "```\n", "\n", - "The main difference will be that we're going to add an LSTM layer between our embedding and output." + "The main difference will be that we're going to add an LSTM layer between our embedding and output.\n", + "\n", + "And to make sure we're not getting reusing trained embeddings (this would involve data leakage between models, leading to an uneven comparison later on), we'll create another embedding layer (`model_2_embedding`) for our model. The `text_vectorizer` layer can be reused since it doesn't get updated during training.\n", + "\n", + "> 🔑 **Note:** The reason we use a new embedding layer for each model is since the embedding layer is a *learned* representation of words (as numbers), if we were to use the same embedding layer (`embedding_1`) for each model, we'd be mixing what one model learned with the next. And because we want to compare our models later on, starting them with their own embedding layer each time is a better idea." ] }, { @@ -2470,14 +2501,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "d877a99e-f5e1-4bf6-9b0b-d1837106b0b1" + "outputId": "373cad05-9cf7-4c40-b41b-622f51563819" }, "source": [ - "# Create LSTM model\n", + "# Set random seed and create embedding layer (new embedding layer for each model)\n", + "tf.random.set_seed(42)\n", "from tensorflow.keras import layers\n", + "model_2_embedding = layers.Embedding(input_dim=max_vocab_length,\n", + " output_dim=128,\n", + " embeddings_initializer=\"uniform\",\n", + " input_length=max_length,\n", + " name=\"embedding_2\")\n", + "\n", + "\n", + "# Create LSTM model\n", "inputs = layers.Input(shape=(1,), dtype=\"string\")\n", "x = text_vectorizer(inputs)\n", - "x = embedding(x)\n", + "x = model_2_embedding(x)\n", "print(x.shape)\n", "# x = layers.LSTM(64, return_sequences=True)(x) # return vector for each word in the Tweet (you can stack RNN cells as long as return_sequences=True)\n", "x = layers.LSTM(64)(x) # return vector for whole sequence\n", @@ -2486,15 +2526,15 @@ "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n", "model_2 = tf.keras.Model(inputs, outputs, name=\"model_2_LSTM\")" ], - "execution_count": null, + "execution_count": 48, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "(None, 15, 128)\n", "(None, 64)\n" - ], - "name": "stdout" + ] } ] }, @@ -2520,7 +2560,7 @@ " optimizer=tf.keras.optimizers.Adam(),\n", " metrics=[\"accuracy\"])" ], - "execution_count": null, + "execution_count": 49, "outputs": [] }, { @@ -2539,15 +2579,16 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "42b7cca4-e582-4c5d-d9ef-599b66391d56" + "outputId": "f345c914-32a3-437a-89c9-07ae4989ec62" }, "source": [ "model_2.summary()" ], - "execution_count": null, + "execution_count": 50, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_2_LSTM\"\n", "_________________________________________________________________\n", @@ -2557,7 +2598,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_2 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "lstm (LSTM) (None, 64) 49408 \n", "_________________________________________________________________\n", @@ -2567,8 +2608,7 @@ "Trainable params: 1,329,473\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -2594,7 +2634,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "a57ab485-cc06-4d53-83ea-7dd1154b9133" + "outputId": "34da9032-cfe3-46b2-8cab-80d6abc3a593" }, "source": [ "# Fit model\n", @@ -2605,24 +2645,24 @@ " callbacks=[create_tensorboard_callback(SAVE_DIR, \n", " \"LSTM\")])" ], - "execution_count": null, + "execution_count": 51, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/LSTM/20210415-022358\n", + "Saving TensorBoard log files to: model_logs/LSTM/20210923-052618\n", "Epoch 1/5\n", - "215/215 [==============================] - 35s 21ms/step - loss: 0.2995 - accuracy: 0.9068 - val_loss: 0.5397 - val_accuracy: 0.7887\n", + "215/215 [==============================] - 12s 34ms/step - loss: 0.5100 - accuracy: 0.7416 - val_loss: 0.4566 - val_accuracy: 0.7822\n", "Epoch 2/5\n", - "215/215 [==============================] - 3s 16ms/step - loss: 0.1516 - accuracy: 0.9419 - val_loss: 0.6465 - val_accuracy: 0.7822\n", + "215/215 [==============================] - 4s 19ms/step - loss: 0.3176 - accuracy: 0.8717 - val_loss: 0.5138 - val_accuracy: 0.7756\n", "Epoch 3/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.1192 - accuracy: 0.9550 - val_loss: 0.6384 - val_accuracy: 0.7913\n", + "215/215 [==============================] - 4s 18ms/step - loss: 0.2201 - accuracy: 0.9152 - val_loss: 0.5858 - val_accuracy: 0.7677\n", "Epoch 4/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.1113 - accuracy: 0.9544 - val_loss: 0.8098 - val_accuracy: 0.7848\n", + "215/215 [==============================] - 4s 19ms/step - loss: 0.1556 - accuracy: 0.9428 - val_loss: 0.6041 - val_accuracy: 0.7743\n", "Epoch 5/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.0735 - accuracy: 0.9685 - val_loss: 0.7787 - val_accuracy: 0.7677\n" - ], - "name": "stdout" + "215/215 [==============================] - 4s 20ms/step - loss: 0.1076 - accuracy: 0.9594 - val_loss: 0.8746 - val_accuracy: 0.7507\n" + ] } ] }, @@ -2644,35 +2684,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "aef26f80-b847-44ac-816e-2690ad7ef04b" + "outputId": "08161e1d-fb72-4046-8ba1-1937487eebc4" }, "source": [ "# Make predictions on the validation dataset\n", "model_2_pred_probs = model_2.predict(val_sentences)\n", "model_2_pred_probs.shape, model_2_pred_probs[:10] # view the first 10" ], - "execution_count": null, + "execution_count": 52, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "((762, 1), array([[4.6278764e-02],\n", - " [8.9790154e-01],\n", - " [9.9968016e-01],\n", - " [1.7329222e-01],\n", - " [4.6210753e-04],\n", - " [9.9592650e-01],\n", - " [8.5770720e-01],\n", - " [9.9980468e-01],\n", - " [9.9961120e-01],\n", - " [4.0211138e-01]], dtype=float32))" + "((762, 1), array([[0.00712602],\n", + " [0.7873681 ],\n", + " [0.9996376 ],\n", + " [0.05679193],\n", + " [0.0025822 ],\n", + " [0.9996238 ],\n", + " [0.9217023 ],\n", + " [0.9997993 ],\n", + " [0.9994954 ],\n", + " [0.6645735 ]], dtype=float32))" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 50 + "metadata": {}, + "execution_count": 52 } ] }, @@ -2692,26 +2730,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "e874b17d-6340-44cb-9d18-79cd638fd11c" + "outputId": "284865b4-bfd5-470d-b3da-c3bef6f7ea92" }, "source": [ "# Round out predictions and reduce to 1-dimensional array\n", "model_2_preds = tf.squeeze(tf.round(model_2_pred_probs))\n", "model_2_preds[:10]" ], - "execution_count": null, + "execution_count": 53, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 51 + "metadata": {}, + "execution_count": 53 } ] }, @@ -2731,7 +2767,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "efa18ec7-8797-4b9c-b4ef-5dc296802a65" + "outputId": "9265fa0c-a725-4333-f91f-4d96fa5a5c06" }, "source": [ "# Calculate LSTM model results\n", @@ -2739,22 +2775,20 @@ " y_pred=model_2_preds)\n", "model_2_results" ], - "execution_count": null, + "execution_count": 54, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 76.77165354330708,\n", - " 'f1': 0.7661635916954678,\n", - " 'precision': 0.7683074753719822,\n", - " 'recall': 0.7677165354330708}" + "{'accuracy': 75.06561679790026,\n", + " 'f1': 0.7489268622514025,\n", + " 'precision': 0.7510077975908164,\n", + " 'recall': 0.7506561679790026}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 52 + "metadata": {}, + "execution_count": 54 } ] }, @@ -2765,23 +2799,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3c2f6c9b-f5d2-49c5-f730-210c85357895" + "outputId": "686d82ce-8eb5-40d3-9db2-85ffc4fa6484" }, "source": [ "# Compare model 2 to baseline\n", "compare_baseline_to_new_results(baseline_results, model_2_results)" ], - "execution_count": null, + "execution_count": 55, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 76.77, Difference: -2.49\n", - "Baseline precision: 0.81, New precision: 0.77, Difference: -0.04\n", - "Baseline recall: 0.79, New recall: 0.77, Difference: -0.02\n", - "Baseline f1: 0.79, New f1: 0.77, Difference: -0.02\n" - ], - "name": "stdout" + "Baseline accuracy: 79.27, New accuracy: 75.07, Difference: -4.20\n", + "Baseline precision: 0.81, New precision: 0.75, Difference: -0.06\n", + "Baseline recall: 0.79, New recall: 0.75, Difference: -0.04\n", + "Baseline f1: 0.79, New f1: 0.75, Difference: -0.04\n" + ] } ] }, @@ -2818,18 +2852,26 @@ "id": "SoSCGq3H47Yo" }, "source": [ - "# Build an RNN using the GRU cell\n", + "# Set random seed and create embedding layer (new embedding layer for each model)\n", + "tf.random.set_seed(42)\n", "from tensorflow.keras import layers\n", + "model_3_embedding = layers.Embedding(input_dim=max_vocab_length,\n", + " output_dim=128,\n", + " embeddings_initializer=\"uniform\",\n", + " input_length=max_length,\n", + " name=\"embedding_3\")\n", + "\n", + "# Build an RNN using the GRU cell\n", "inputs = layers.Input(shape=(1,), dtype=\"string\")\n", "x = text_vectorizer(inputs)\n", - "x = embedding(x)\n", + "x = model_3_embedding(x)\n", "# x = layers.GRU(64, return_sequences=True) # stacking recurrent cells requires return_sequences=True\n", "x = layers.GRU(64)(x) \n", "# x = layers.Dense(64, activation=\"relu\")(x) # optional dense layer after GRU cell\n", "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n", "model_3 = tf.keras.Model(inputs, outputs, name=\"model_3_GRU\")" ], - "execution_count": null, + "execution_count": 56, "outputs": [] }, { @@ -2852,7 +2894,7 @@ " optimizer=tf.keras.optimizers.Adam(),\n", " metrics=[\"accuracy\"])" ], - "execution_count": null, + "execution_count": 57, "outputs": [] }, { @@ -2871,16 +2913,17 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "5465418b-9fc7-4544-fbdb-65af152f8976" + "outputId": "068184f9-c913-46fd-83d9-8547e8f4f1f6" }, "source": [ "# Get a summary of the GRU model\n", "model_3.summary()" ], - "execution_count": null, + "execution_count": 58, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_3_GRU\"\n", "_________________________________________________________________\n", @@ -2890,7 +2933,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_3 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "gru (GRU) (None, 64) 37248 \n", "_________________________________________________________________\n", @@ -2900,8 +2943,7 @@ "Trainable params: 1,317,313\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -2923,7 +2965,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "24bae292-0fc8-406e-92f6-3c80c298b78d" + "outputId": "629279a6-1dd9-43fb-f0ae-975be030ebe7" }, "source": [ "# Fit model\n", @@ -2933,24 +2975,24 @@ " validation_data=(val_sentences, val_labels),\n", " callbacks=[create_tensorboard_callback(SAVE_DIR, \"GRU\")])" ], - "execution_count": null, + "execution_count": 59, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/GRU/20210415-022447\n", + "Saving TensorBoard log files to: model_logs/GRU/20210923-052650\n", "Epoch 1/5\n", - "215/215 [==============================] - 6s 20ms/step - loss: 0.2611 - accuracy: 0.8959 - val_loss: 0.7477 - val_accuracy: 0.7756\n", + "215/215 [==============================] - 12s 24ms/step - loss: 0.5242 - accuracy: 0.7314 - val_loss: 0.4553 - val_accuracy: 0.7769\n", "Epoch 2/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.0875 - accuracy: 0.9668 - val_loss: 0.8385 - val_accuracy: 0.7848\n", + "215/215 [==============================] - 3s 15ms/step - loss: 0.3195 - accuracy: 0.8694 - val_loss: 0.4937 - val_accuracy: 0.7808\n", "Epoch 3/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.0654 - accuracy: 0.9760 - val_loss: 0.9034 - val_accuracy: 0.7795\n", + "215/215 [==============================] - 2s 10ms/step - loss: 0.2197 - accuracy: 0.9181 - val_loss: 0.5607 - val_accuracy: 0.7743\n", "Epoch 4/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.0656 - accuracy: 0.9693 - val_loss: 1.0272 - val_accuracy: 0.7808\n", + "215/215 [==============================] - 2s 10ms/step - loss: 0.1599 - accuracy: 0.9441 - val_loss: 0.6220 - val_accuracy: 0.7782\n", "Epoch 5/5\n", - "215/215 [==============================] - 3s 15ms/step - loss: 0.0654 - accuracy: 0.9740 - val_loss: 1.2087 - val_accuracy: 0.7730\n" - ], - "name": "stdout" + "215/215 [==============================] - 2s 10ms/step - loss: 0.1221 - accuracy: 0.9584 - val_loss: 0.6205 - val_accuracy: 0.7677\n" + ] } ] }, @@ -2972,35 +3014,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c13612d7-734a-486c-b0aa-b63a5a4d95f6" + "outputId": "21116150-7412-49c4-ae12-679f3c688c44" }, "source": [ "# Make predictions on the validation data\n", "model_3_pred_probs = model_3.predict(val_sentences)\n", "model_3_pred_probs.shape, model_3_pred_probs[:10]" ], - "execution_count": null, + "execution_count": 60, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "((762, 1), array([[2.2413477e-03],\n", - " [8.3937943e-01],\n", - " [9.9986148e-01],\n", - " [1.4057460e-01],\n", - " [8.8131892e-05],\n", - " [9.9974793e-01],\n", - " [9.7088730e-01],\n", - " [9.9995959e-01],\n", - " [9.9989879e-01],\n", - " [9.3830937e-01]], dtype=float32))" + "((762, 1), array([[0.33325258],\n", + " [0.87741184],\n", + " [0.9980252 ],\n", + " [0.11561754],\n", + " [0.01235959],\n", + " [0.9925639 ],\n", + " [0.6214262 ],\n", + " [0.99813336],\n", + " [0.9982377 ],\n", + " [0.50181067]], dtype=float32))" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 58 + "metadata": {}, + "execution_count": 60 } ] }, @@ -3020,14 +3060,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2ed204eb-307d-4f96-ca66-12cdaa56d986" + "outputId": "8d567022-4f1f-46cb-aae1-70242ce55e56" }, "source": [ "# Convert prediction probabilities to prediction classes\n", "model_3_preds = tf.squeeze(tf.round(model_3_pred_probs))\n", "model_3_preds[:10]" ], - "execution_count": null, + "execution_count": 61, "outputs": [ { "output_type": "execute_result", @@ -3036,10 +3076,8 @@ "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 59 + "metadata": {}, + "execution_count": 61 } ] }, @@ -3059,7 +3097,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "981789d2-cdcd-4c42-f1fc-d41a1492a3e5" + "outputId": "3b32ee9f-3291-4f8c-873d-c7780488cab8" }, "source": [ "# Calcuate model_3 results\n", @@ -3067,22 +3105,20 @@ " y_pred=model_3_preds)\n", "model_3_results" ], - "execution_count": null, + "execution_count": 62, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 77.29658792650919,\n", - " 'f1': 0.7712160418848196,\n", - " 'precision': 0.7740517401498704,\n", - " 'recall': 0.7729658792650919}" + "{'accuracy': 76.77165354330708,\n", + " 'f1': 0.7667932666650168,\n", + " 'precision': 0.7675450859410361,\n", + " 'recall': 0.7677165354330708}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 60 + "metadata": {}, + "execution_count": 62 } ] }, @@ -3102,23 +3138,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "190430b8-6020-45e7-c39d-e21538c6b9e4" + "outputId": "ff8ad89a-089a-4b29-be3f-b0c6f87c19ad" }, "source": [ "# Compare to baseline\n", "compare_baseline_to_new_results(baseline_results, model_3_results)" ], - "execution_count": null, + "execution_count": 63, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 77.30, Difference: -1.97\n", + "Baseline accuracy: 79.27, New accuracy: 76.77, Difference: -2.49\n", "Baseline precision: 0.81, New precision: 0.77, Difference: -0.04\n", "Baseline recall: 0.79, New recall: 0.77, Difference: -0.02\n", "Baseline f1: 0.79, New f1: 0.77, Difference: -0.02\n" - ], - "name": "stdout" + ] } ] }, @@ -3151,17 +3187,25 @@ "id": "NAU9dvGm47_2" }, "source": [ - "# Build a Bidirectional RNN in TensorFlow\n", + "# Set random seed and create embedding layer (new embedding layer for each model)\n", + "tf.random.set_seed(42)\n", "from tensorflow.keras import layers\n", + "model_4_embedding = layers.Embedding(input_dim=max_vocab_length,\n", + " output_dim=128,\n", + " embeddings_initializer=\"uniform\",\n", + " input_length=max_length,\n", + " name=\"embedding_4\")\n", + "\n", + "# Build a Bidirectional RNN in TensorFlow\n", "inputs = layers.Input(shape=(1,), dtype=\"string\")\n", "x = text_vectorizer(inputs)\n", - "x = embedding(x)\n", + "x = model_4_embedding(x)\n", "# x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) # stacking RNN layers requires return_sequences=True\n", "x = layers.Bidirectional(layers.LSTM(64))(x) # bidirectional goes both ways so has double the parameters of a regular LSTM layer\n", "outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n", "model_4 = tf.keras.Model(inputs, outputs, name=\"model_4_Bidirectional\")" ], - "execution_count": null, + "execution_count": 64, "outputs": [] }, { @@ -3186,7 +3230,7 @@ " optimizer=tf.keras.optimizers.Adam(),\n", " metrics=[\"accuracy\"])" ], - "execution_count": null, + "execution_count": 65, "outputs": [] }, { @@ -3205,16 +3249,17 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "9369b46d-138c-4b78-c27b-e71a76aa9c17" + "outputId": "dc1d6295-7753-41b9-df0d-4f4986cb3cbf" }, "source": [ "# Get a summary of our bidirectional model\n", "model_4.summary()" ], - "execution_count": null, + "execution_count": 66, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_4_Bidirectional\"\n", "_________________________________________________________________\n", @@ -3224,7 +3269,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_4 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "bidirectional (Bidirectional (None, 128) 98816 \n", "_________________________________________________________________\n", @@ -3234,8 +3279,7 @@ "Trainable params: 1,378,945\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -3257,7 +3301,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "eb59b84a-be2c-4d73-f8c9-90087d3bfdd0" + "outputId": "1e576aae-b7a6-4bc5-b229-99e0885d9bd2" }, "source": [ "# Fit the model (takes longer because of the bidirectional layers)\n", @@ -3267,24 +3311,24 @@ " validation_data=(val_sentences, val_labels),\n", " callbacks=[create_tensorboard_callback(SAVE_DIR, \"bidirectional_RNN\")])" ], - "execution_count": null, + "execution_count": 67, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/bidirectional_RNN/20210415-022507\n", + "Saving TensorBoard log files to: model_logs/bidirectional_RNN/20210923-052719\n", "Epoch 1/5\n", - "215/215 [==============================] - 8s 24ms/step - loss: 0.2004 - accuracy: 0.9454 - val_loss: 1.0018 - val_accuracy: 0.7717\n", + "215/215 [==============================] - 8s 21ms/step - loss: 0.5093 - accuracy: 0.7481 - val_loss: 0.4606 - val_accuracy: 0.7795\n", "Epoch 2/5\n", - "215/215 [==============================] - 4s 17ms/step - loss: 0.0472 - accuracy: 0.9785 - val_loss: 1.0636 - val_accuracy: 0.7756\n", + "215/215 [==============================] - 3s 14ms/step - loss: 0.3135 - accuracy: 0.8708 - val_loss: 0.5144 - val_accuracy: 0.7690\n", "Epoch 3/5\n", - "215/215 [==============================] - 4s 17ms/step - loss: 0.0432 - accuracy: 0.9830 - val_loss: 1.3335 - val_accuracy: 0.7677\n", + "215/215 [==============================] - 3s 14ms/step - loss: 0.2150 - accuracy: 0.9178 - val_loss: 0.5626 - val_accuracy: 0.7677\n", "Epoch 4/5\n", - "215/215 [==============================] - 4s 17ms/step - loss: 0.0363 - accuracy: 0.9828 - val_loss: 1.3843 - val_accuracy: 0.7717\n", + "215/215 [==============================] - 3s 14ms/step - loss: 0.1523 - accuracy: 0.9469 - val_loss: 0.6365 - val_accuracy: 0.7769\n", "Epoch 5/5\n", - "215/215 [==============================] - 4s 18ms/step - loss: 0.0385 - accuracy: 0.9834 - val_loss: 1.3579 - val_accuracy: 0.7664\n" - ], - "name": "stdout" + "215/215 [==============================] - 3s 14ms/step - loss: 0.1083 - accuracy: 0.9639 - val_loss: 0.6509 - val_accuracy: 0.7664\n" + ] } ] }, @@ -3308,35 +3352,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "5745758a-d33e-4651-990c-a922d74bb00b" + "outputId": "67a915b9-2e27-492e-86a7-6b114354cfcf" }, "source": [ "# Make predictions with bidirectional RNN on the validation data\n", "model_4_pred_probs = model_4.predict(val_sentences)\n", "model_4_pred_probs[:10]" ], - "execution_count": null, + "execution_count": 68, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "array([[6.8994458e-03],\n", - " [7.1700656e-01],\n", - " [9.9994183e-01],\n", - " [2.2768566e-01],\n", - " [2.4008737e-05],\n", - " [9.9926597e-01],\n", - " [7.3443264e-01],\n", - " [9.9997735e-01],\n", - " [9.9993920e-01],\n", - " [9.9585587e-01]], dtype=float32)" + "array([[0.04000043],\n", + " [0.827929 ],\n", + " [0.99842227],\n", + " [0.1353109 ],\n", + " [0.00311337],\n", + " [0.99220747],\n", + " [0.9552836 ],\n", + " [0.99945647],\n", + " [0.99898285],\n", + " [0.28141677]], dtype=float32)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 66 + "metadata": {}, + "execution_count": 68 } ] }, @@ -3356,26 +3398,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "6d0a9786-1d54-40dc-ec8e-770224e2e659" + "outputId": "ecbb2c9e-5b45-4872-a126-6f6b4de3f091" }, "source": [ "# Convert prediction probabilities to labels\n", "model_4_preds = tf.squeeze(tf.round(model_4_pred_probs))\n", "model_4_preds[:10]" ], - "execution_count": null, + "execution_count": 69, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 67 + "metadata": {}, + "execution_count": 69 } ] }, @@ -3386,29 +3426,27 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "840f731c-8c7b-4c2c-d8d9-0d5beb7bf84c" + "outputId": "a3d841a2-6cdd-4da2-d623-d046f608841c" }, "source": [ "# Calculate bidirectional RNN model results\n", "model_4_results = calculate_results(val_labels, model_4_preds)\n", "model_4_results" ], - "execution_count": null, + "execution_count": 70, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'accuracy': 76.64041994750657,\n", - " 'f1': 0.764784113056577,\n", - " 'precision': 0.7670590562420062,\n", + " 'f1': 0.7651213533864446,\n", + " 'precision': 0.7665895370389821,\n", " 'recall': 0.7664041994750657}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 68 + "metadata": {}, + "execution_count": 70 } ] }, @@ -3419,23 +3457,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "34c352b0-ecc8-46cb-a961-57f9fc766bd7" + "outputId": "d69437c7-e780-41ab-d575-b9cfdefdf82b" }, "source": [ "# Check to see how the bidirectional model performs against the baseline\n", "compare_baseline_to_new_results(baseline_results, model_4_results)" ], - "execution_count": null, + "execution_count": 71, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Baseline accuracy: 79.27, New accuracy: 76.64, Difference: -2.62\n", "Baseline precision: 0.81, New precision: 0.77, Difference: -0.04\n", "Baseline recall: 0.79, New recall: 0.77, Difference: -0.03\n", - "Baseline f1: 0.79, New f1: 0.76, Difference: -0.02\n" - ], - "name": "stdout" + "Baseline f1: 0.79, New f1: 0.77, Difference: -0.02\n" + ] } ] }, @@ -3493,7 +3531,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "2ea1469d-29ac-4f34-8624-060d71f47fe8" + "outputId": "32a7e900-e4b4-4b1b-9b4e-617ca9f7eb92" }, "source": [ "# Test out the embedding, 1D convolutional and max pooling\n", @@ -3504,7 +3542,7 @@ "max_pool_output = max_pool(conv_1d_output) # get the most important features\n", "embedding_test.shape, conv_1d_output.shape, max_pool_output.shape" ], - "execution_count": null, + "execution_count": 72, "outputs": [ { "output_type": "execute_result", @@ -3513,10 +3551,8 @@ "(TensorShape([1, 15, 128]), TensorShape([1, 11, 32]), TensorShape([1, 32]))" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 70 + "metadata": {}, + "execution_count": 72 } ] }, @@ -3544,124 +3580,122 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "274f9ad6-ee42-49ff-d861-b4431d3be570" + "outputId": "33616ed5-7333-4891-d3fa-d798c64fd789" }, "source": [ "# See the outputs of each layer\n", "embedding_test[:1], conv_1d_output[:1], max_pool_output[:1]" ], - "execution_count": null, + "execution_count": 73, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(,\n", + " [ 0.00073166, 0.01504797, -0.03425457, ..., -0.04403538,\n", + " -0.01042282, 0.01876436],\n", + " [ 0.00073166, 0.01504797, -0.03425457, ..., -0.04403538,\n", + " -0.01042282, 0.01876436],\n", + " [ 0.00073166, 0.01504797, -0.03425457, ..., -0.04403538,\n", + " -0.01042282, 0.01876436]]], dtype=float32)>,\n", " ,\n", + " array([[[0.08324985, 0.00648716, 0. , 0.03983572, 0. ,\n", + " 0.01144416, 0.00416251, 0.0228839 , 0. , 0.00900978,\n", + " 0. , 0. , 0.03401771, 0.06408274, 0.08103722,\n", + " 0.00409014, 0.01579616, 0. , 0.07930177, 0. ,\n", + " 0. , 0. , 0.14525084, 0. , 0. ,\n", + " 0. , 0.03682078, 0.06534287, 0. , 0. ,\n", + " 0.05094624, 0. ],\n", + " [0. , 0.05387188, 0. , 0.11491331, 0. ,\n", + " 0. , 0.1623708 , 0. , 0. , 0.00171254,\n", + " 0.14336711, 0. , 0. , 0. , 0. ,\n", + " 0.01197936, 0. , 0. , 0.13551372, 0.0040106 ,\n", + " 0.10309819, 0.09445544, 0.08390297, 0. , 0.04213036,\n", + " 0.04487597, 0.06560461, 0. , 0.02272684, 0. ,\n", + " 0. , 0. ],\n", + " [0.03683221, 0.04895764, 0. , 0.1532475 , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.04650313, 0.00496456, 0.07349401, 0.01608641,\n", + " 0. , 0.02779119, 0. , 0.0808056 , 0.01403176,\n", + " 0. , 0.03768815, 0.1038278 , 0. , 0.03361662,\n", + " 0. , 0.02577607, 0.00140354, 0. , 0. ,\n", + " 0.03211498, 0. ],\n", + " [0.0088782 , 0.10450974, 0. , 0.06974535, 0.02328686,\n", + " 0. , 0.04052207, 0. , 0. , 0.02733764,\n", + " 0.08674346, 0. , 0. , 0.06129852, 0.02007267,\n", + " 0. , 0. , 0. , 0.03364263, 0. ,\n", + " 0.04525332, 0.05219702, 0.06375706, 0. , 0. ,\n", + " 0.00774407, 0.00273467, 0. , 0. , 0.00499633,\n", + " 0. , 0. ],\n", + " [0. , 0.02369069, 0. , 0.05827617, 0.05297644,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0.01719718, 0.02936822, 0.00466103, 0.06879887, 0.01944808,\n", + " 0.01585533, 0.01294545, 0. , 0.06866529, 0. ,\n", + " 0.00623766, 0.0351405 , 0.02407533, 0. , 0.05979815,\n", + " 0. , 0.01170142, 0. , 0. , 0. ,\n", + " 0.04444929, 0. ],\n", + " [0.03544863, 0. , 0. , 0.05054973, 0.06105441,\n", + " 0. , 0.00997427, 0.01403005, 0. , 0.01680727,\n", + " 0.0314851 , 0.03889389, 0. , 0.07710679, 0.0059097 ,\n", + " 0. , 0.00263033, 0. , 0.08935824, 0. ,\n", + " 0. , 0.05331149, 0.0522795 , 0. , 0.06658384,\n", + " 0.01881707, 0.02448696, 0. , 0. , 0. ,\n", + " 0.02008456, 0. ],\n", + " [0.03544863, 0. , 0. , 0.05054973, 0.06105442,\n", + " 0. , 0.00997426, 0.01403006, 0. , 0.01680727,\n", + " 0.03148509, 0.03889391, 0. , 0.07710679, 0.0059097 ,\n", + " 0. , 0.00263035, 0. , 0.08935823, 0. ,\n", + " 0. , 0.05331149, 0.05227951, 0. , 0.06658384,\n", + " 0.01881707, 0.02448694, 0. , 0. , 0. ,\n", + " 0.02008457, 0. ],\n", + " [0.03544864, 0. , 0. , 0.05054973, 0.06105441,\n", + " 0. , 0.00997426, 0.01403005, 0. , 0.01680726,\n", + " 0.0314851 , 0.03889389, 0. , 0.07710679, 0.0059097 ,\n", + " 0. , 0.00263034, 0. , 0.08935826, 0. ,\n", + " 0. , 0.0533115 , 0.0522795 , 0. , 0.06658384,\n", + " 0.01881707, 0.02448694, 0. , 0. , 0. ,\n", + " 0.02008457, 0. ],\n", + " [0.03544863, 0. , 0. , 0.05054973, 0.06105442,\n", + " 0. , 0.00997426, 0.01403005, 0. , 0.01680727,\n", + " 0.0314851 , 0.0388939 , 0. , 0.07710679, 0.0059097 ,\n", + " 0. , 0.00263034, 0. , 0.08935825, 0. ,\n", + " 0. , 0.05331149, 0.05227951, 0. , 0.06658386,\n", + " 0.01881707, 0.02448695, 0. , 0. , 0. ,\n", + " 0.02008456, 0. ],\n", + " [0.03544863, 0. , 0. , 0.05054973, 0.0610544 ,\n", + " 0. , 0.00997427, 0.01403005, 0. , 0.01680727,\n", + " 0.0314851 , 0.0388939 , 0. , 0.0771068 , 0.0059097 ,\n", + " 0. , 0.00263034, 0. , 0.08935825, 0. ,\n", + " 0. , 0.05331149, 0.05227951, 0. , 0.06658386,\n", + " 0.01881707, 0.02448695, 0. , 0. , 0. ,\n", + " 0.02008456, 0. ],\n", + " [0.03544863, 0. , 0. , 0.05054973, 0.06105442,\n", + " 0. , 0.00997426, 0.01403006, 0. , 0.01680726,\n", + " 0.03148509, 0.0388939 , 0. , 0.0771068 , 0.0059097 ,\n", + " 0. , 0.00263034, 0. , 0.08935824, 0. ,\n", + " 0. , 0.05331149, 0.05227952, 0. , 0.06658386,\n", + " 0.01881706, 0.02448695, 0. , 0. , 0. ,\n", + " 0.02008456, 0. ]]], dtype=float32)>,\n", " )" + " array([[0.08324985, 0.10450974, 0. , 0.1532475 , 0.06105442,\n", + " 0.01144416, 0.1623708 , 0.0228839 , 0. , 0.02733764,\n", + " 0.14336711, 0.04650313, 0.03401771, 0.0771068 , 0.08103722,\n", + " 0.01585533, 0.02779119, 0. , 0.13551372, 0.01403176,\n", + " 0.10309819, 0.09445544, 0.14525084, 0. , 0.06658386,\n", + " 0.04487597, 0.06560461, 0.06534287, 0.02272684, 0.00499633,\n", + " 0.05094624, 0. ]], dtype=float32)>)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 71 + "metadata": {}, + "execution_count": 73 } ] }, @@ -3681,14 +3715,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "fec313e5-3e1f-45fd-9e34-f552f189bdf9" + "outputId": "6d723bc4-84d1-49eb-9ef9-0887364f029b" }, "source": [ + "# Set random seed and create embedding layer (new embedding layer for each model)\n", + "tf.random.set_seed(42)\n", + "from tensorflow.keras import layers\n", + "model_5_embedding = layers.Embedding(input_dim=max_vocab_length,\n", + " output_dim=128,\n", + " embeddings_initializer=\"uniform\",\n", + " input_length=max_length,\n", + " name=\"embedding_5\")\n", + "\n", "# Create 1-dimensional convolutional layer to model sequences\n", "from tensorflow.keras import layers\n", "inputs = layers.Input(shape=(1,), dtype=\"string\")\n", "x = text_vectorizer(inputs)\n", - "x = embedding(x)\n", + "x = model_5_embedding(x)\n", "x = layers.Conv1D(filters=32, kernel_size=5, activation=\"relu\")(x)\n", "x = layers.GlobalMaxPool1D()(x)\n", "# x = layers.Dense(64, activation=\"relu\")(x) # optional dense layer\n", @@ -3703,10 +3746,11 @@ "# Get a summary of our 1D convolution model\n", "model_5.summary()" ], - "execution_count": null, + "execution_count": 74, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_5_Conv1D\"\n", "_________________________________________________________________\n", @@ -3716,7 +3760,7 @@ "_________________________________________________________________\n", "text_vectorization_1 (TextVe (None, 15) 0 \n", "_________________________________________________________________\n", - "embedding (Embedding) (None, 15, 128) 1280000 \n", + "embedding_5 (Embedding) (None, 15, 128) 1280000 \n", "_________________________________________________________________\n", "conv1d_1 (Conv1D) (None, 11, 32) 20512 \n", "_________________________________________________________________\n", @@ -3728,8 +3772,7 @@ "Trainable params: 1,300,545\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -3751,7 +3794,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "57b5c728-4f34-4c8d-dfe0-9febe28049c1" + "outputId": "544b3d97-2d9c-40a3-b0e5-51feac67a4fa" }, "source": [ "# Fit the model\n", @@ -3762,24 +3805,24 @@ " callbacks=[create_tensorboard_callback(SAVE_DIR, \n", " \"Conv1D\")])" ], - "execution_count": null, + "execution_count": 75, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/Conv1D/20210415-022532\n", + "Saving TensorBoard log files to: model_logs/Conv1D/20210923-052810\n", "Epoch 1/5\n", - "215/215 [==============================] - 5s 18ms/step - loss: 0.2099 - accuracy: 0.9455 - val_loss: 0.8471 - val_accuracy: 0.7782\n", + "215/215 [==============================] - 5s 10ms/step - loss: 0.5652 - accuracy: 0.7141 - val_loss: 0.4733 - val_accuracy: 0.7795\n", "Epoch 2/5\n", - "215/215 [==============================] - 3s 14ms/step - loss: 0.0682 - accuracy: 0.9769 - val_loss: 0.9715 - val_accuracy: 0.7769\n", + "215/215 [==============================] - 2s 7ms/step - loss: 0.3380 - accuracy: 0.8615 - val_loss: 0.4758 - val_accuracy: 0.7730\n", "Epoch 3/5\n", - "215/215 [==============================] - 3s 14ms/step - loss: 0.0582 - accuracy: 0.9781 - val_loss: 1.0967 - val_accuracy: 0.7717\n", + "215/215 [==============================] - 2s 8ms/step - loss: 0.2070 - accuracy: 0.9234 - val_loss: 0.5457 - val_accuracy: 0.7730\n", "Epoch 4/5\n", - "215/215 [==============================] - 3s 14ms/step - loss: 0.0519 - accuracy: 0.9766 - val_loss: 1.1367 - val_accuracy: 0.7677\n", + "215/215 [==============================] - 2s 7ms/step - loss: 0.1314 - accuracy: 0.9578 - val_loss: 0.6163 - val_accuracy: 0.7730\n", "Epoch 5/5\n", - "215/215 [==============================] - 3s 14ms/step - loss: 0.0473 - accuracy: 0.9798 - val_loss: 1.1908 - val_accuracy: 0.7598\n" - ], - "name": "stdout" + "215/215 [==============================] - 2s 7ms/step - loss: 0.0933 - accuracy: 0.9691 - val_loss: 0.6779 - val_accuracy: 0.7782\n" + ] } ] }, @@ -3799,35 +3842,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "58417fc7-dd25-4451-c6fa-ffbe83ddd0c6" + "outputId": "44a902e9-9710-4854-d61d-649b7a6f5ef3" }, "source": [ "# Make predictions with model_5\n", "model_5_pred_probs = model_5.predict(val_sentences)\n", "model_5_pred_probs[:10]" ], - "execution_count": null, + "execution_count": 76, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "array([[1.1530651e-01],\n", - " [5.0019783e-01],\n", - " [9.9993503e-01],\n", - " [5.3372920e-02],\n", - " [1.2500317e-06],\n", - " [9.9449313e-01],\n", - " [9.9005276e-01],\n", - " [9.9999309e-01],\n", - " [9.9999833e-01],\n", - " [8.0878931e-01]], dtype=float32)" + "array([[0.225345 ],\n", + " [0.7534112 ],\n", + " [0.9995602 ],\n", + " [0.05562792],\n", + " [0.01449848],\n", + " [0.9858518 ],\n", + " [0.98418933],\n", + " [0.99758804],\n", + " [0.99862623],\n", + " [0.26914373]], dtype=float32)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 74 + "metadata": {}, + "execution_count": 76 } ] }, @@ -3838,26 +3879,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c6cdf09b-9d3f-45a1-c632-b8addd91884a" + "outputId": "0bb1e353-5ecc-4cdb-cd73-07911d7857c2" }, "source": [ "# Convert model_5 prediction probabilities to labels\n", "model_5_preds = tf.squeeze(tf.round(model_5_pred_probs))\n", "model_5_preds[:10]" ], - "execution_count": null, + "execution_count": 77, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 75 + "metadata": {}, + "execution_count": 77 } ] }, @@ -3868,7 +3907,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3acf433c-7537-4e5d-a800-04594d46311f" + "outputId": "d45f024e-6352-4583-8946-bf3da8249a21" }, "source": [ "# Calculate model_5 evaluation metrics \n", @@ -3876,22 +3915,20 @@ " y_pred=model_5_preds)\n", "model_5_results" ], - "execution_count": null, + "execution_count": 78, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 75.98425196850394,\n", - " 'f1': 0.758116023760764,\n", - " 'precision': 0.7604578907479682,\n", - " 'recall': 0.7598425196850394}" + "{'accuracy': 77.82152230971128,\n", + " 'f1': 0.7758810170952618,\n", + " 'precision': 0.7807522349051432,\n", + " 'recall': 0.7782152230971129}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 76 + "metadata": {}, + "execution_count": 78 } ] }, @@ -3902,23 +3939,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "cdd73a08-0470-45a4-f906-32616cf07c42" + "outputId": "8de35775-b434-4465-e63c-0066a7fa3f69" }, "source": [ "# Compare model_5 results to baseline \n", "compare_baseline_to_new_results(baseline_results, model_5_results)" ], - "execution_count": null, + "execution_count": 79, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 75.98, Difference: -3.28\n", - "Baseline precision: 0.81, New precision: 0.76, Difference: -0.05\n", - "Baseline recall: 0.79, New recall: 0.76, Difference: -0.03\n", - "Baseline f1: 0.79, New f1: 0.76, Difference: -0.03\n" - ], - "name": "stdout" + "Baseline accuracy: 79.27, New accuracy: 77.82, Difference: -1.44\n", + "Baseline precision: 0.81, New precision: 0.78, Difference: -0.03\n", + "Baseline recall: 0.79, New recall: 0.78, Difference: -0.01\n", + "Baseline f1: 0.79, New f1: 0.78, Difference: -0.01\n" + ] } ] }, @@ -3973,7 +4010,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "9419bd29-5272-4791-e55f-4dff0c0df276" + "outputId": "fc509df1-e7a6-4145-82ca-674785ac5257" }, "source": [ "# Example of pretrained embedding with universal sentence encoder - https://tfhub.dev/google/universal-sentence-encoder/4\n", @@ -3984,23 +4021,23 @@ "\n", "print(embed_samples[0][:50])" ], - "execution_count": null, + "execution_count": 80, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "tf.Tensor(\n", - "[-0.01157027 0.02485911 0.02878048 -0.012715 0.03971538 0.08827761\n", - " 0.02680985 0.05589839 -0.01068729 -0.00597292 0.00639323 -0.0181952\n", - " 0.00030814 0.09105889 0.05874645 -0.03180628 0.01512474 -0.05162929\n", - " 0.00991367 -0.06865346 -0.04209306 0.02678981 0.03011008 0.00321069\n", - " -0.00337971 -0.04787357 0.02266719 -0.00985925 -0.04063613 -0.01292093\n", - " -0.04666385 0.056303 -0.03949255 0.00517688 0.02495828 -0.07014441\n", - " 0.02871509 0.04947684 -0.00633978 -0.08960193 0.02807117 -0.00808362\n", - " -0.01360601 0.0599865 -0.10361787 -0.05195374 0.00232955 -0.0233253\n", - " -0.03758106 0.0332773 ], shape=(50,), dtype=float32)\n" - ], - "name": "stdout" + "[-0.01157024 0.0248591 0.0287805 -0.01271502 0.03971543 0.08827759\n", + " 0.02680986 0.05589837 -0.01068731 -0.0059729 0.00639324 -0.01819523\n", + " 0.00030817 0.09105891 0.05874644 -0.03180627 0.01512476 -0.05162928\n", + " 0.00991369 -0.06865346 -0.04209306 0.0267898 0.03011008 0.00321069\n", + " -0.00337969 -0.04787359 0.02266718 -0.00985924 -0.04063614 -0.01292095\n", + " -0.04666384 0.056303 -0.03949255 0.00517685 0.02495828 -0.07014439\n", + " 0.02871508 0.04947682 -0.00633971 -0.08960191 0.02807117 -0.00808362\n", + " -0.01360601 0.05998649 -0.10361786 -0.05195372 0.00232955 -0.02332528\n", + " -0.03758105 0.0332773 ], shape=(50,), dtype=float32)\n" + ] } ] }, @@ -4011,13 +4048,13 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "1a6f8a7e-dc14-49f6-96b7-b42ad290150b" + "outputId": "67d73b29-193e-4e95-bd8d-e071b551ec3b" }, "source": [ "# Each sentence has been encoded into a 512 dimension vector\n", "embed_samples[0].shape" ], - "execution_count": null, + "execution_count": 81, "outputs": [ { "output_type": "execute_result", @@ -4026,10 +4063,8 @@ "TensorShape([512])" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 79 + "metadata": {}, + "execution_count": 81 } ] }, @@ -4061,7 +4096,7 @@ " trainable=False, # keep the pretrained weights (we'll create a feature extractor)\n", " name=\"USE\") " ], - "execution_count": null, + "execution_count": 82, "outputs": [] }, { @@ -4080,7 +4115,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "5b400734-163e-4421-f11f-b052b7f97020" + "outputId": "141cb32c-b1a7-46e8-8b95-12c7ec4d589e" }, "source": [ "# Create model using the Sequential API\n", @@ -4097,10 +4132,11 @@ "\n", "model_6.summary()" ], - "execution_count": null, + "execution_count": 83, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_6_USE\"\n", "_________________________________________________________________\n", @@ -4116,8 +4152,7 @@ "Trainable params: 32,897\n", "Non-trainable params: 256,797,824\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -4141,7 +4176,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "6eafe8bd-9c7c-43de-930d-e148ceae2360" + "outputId": "f4c3c8d1-b3cf-4c87-dd75-2ed786b5e5d5" }, "source": [ "# Train a classifier on top of pretrained embeddings\n", @@ -4152,24 +4187,24 @@ " callbacks=[create_tensorboard_callback(SAVE_DIR, \n", " \"tf_hub_sentence_encoder\")])" ], - "execution_count": null, + "execution_count": 84, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/tf_hub_sentence_encoder/20210415-022610\n", + "Saving TensorBoard log files to: model_logs/tf_hub_sentence_encoder/20210923-052854\n", "Epoch 1/5\n", - "215/215 [==============================] - 7s 22ms/step - loss: 0.5686 - accuracy: 0.7774 - val_loss: 0.4494 - val_accuracy: 0.8031\n", + "215/215 [==============================] - 10s 32ms/step - loss: 0.5008 - accuracy: 0.7892 - val_loss: 0.4478 - val_accuracy: 0.7966\n", "Epoch 2/5\n", - "215/215 [==============================] - 2s 10ms/step - loss: 0.4254 - accuracy: 0.8112 - val_loss: 0.4434 - val_accuracy: 0.8084\n", + "215/215 [==============================] - 4s 19ms/step - loss: 0.4144 - accuracy: 0.8133 - val_loss: 0.4369 - val_accuracy: 0.8058\n", "Epoch 3/5\n", - "215/215 [==============================] - 2s 10ms/step - loss: 0.3904 - accuracy: 0.8295 - val_loss: 0.4322 - val_accuracy: 0.8163\n", + "215/215 [==============================] - 4s 19ms/step - loss: 0.3998 - accuracy: 0.8212 - val_loss: 0.4329 - val_accuracy: 0.8110\n", "Epoch 4/5\n", - "215/215 [==============================] - 2s 10ms/step - loss: 0.3790 - accuracy: 0.8373 - val_loss: 0.4293 - val_accuracy: 0.8176\n", + "215/215 [==============================] - 4s 18ms/step - loss: 0.3925 - accuracy: 0.8266 - val_loss: 0.4288 - val_accuracy: 0.8110\n", "Epoch 5/5\n", - "215/215 [==============================] - 2s 10ms/step - loss: 0.3907 - accuracy: 0.8250 - val_loss: 0.4304 - val_accuracy: 0.8176\n" - ], - "name": "stdout" + "215/215 [==============================] - 4s 17ms/step - loss: 0.3860 - accuracy: 0.8276 - val_loss: 0.4309 - val_accuracy: 0.8123\n" + ] } ] }, @@ -4189,35 +4224,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "dc248dbf-c152-44b2-cd5f-7fb7920f6298" + "outputId": "cbbc27f6-ee98-4633-f703-7a186fc6c932" }, "source": [ "# Make predictions with USE TF Hub model\n", "model_6_pred_probs = model_6.predict(val_sentences)\n", "model_6_pred_probs[:10]" ], - "execution_count": null, + "execution_count": 85, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "array([[0.24390718],\n", - " [0.83779854],\n", - " [0.99006784],\n", - " [0.20040977],\n", - " [0.76734614],\n", - " [0.78680325],\n", - " [0.98286486],\n", - " [0.98235786],\n", - " [0.9523692 ],\n", - " [0.12027436]], dtype=float32)" + "array([[0.14443193],\n", + " [0.7271502 ],\n", + " [0.9856655 ],\n", + " [0.19740924],\n", + " [0.73417026],\n", + " [0.6859663 ],\n", + " [0.9808888 ],\n", + " [0.97411025],\n", + " [0.91573215],\n", + " [0.08070081]], dtype=float32)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 83 + "metadata": {}, + "execution_count": 85 } ] }, @@ -4228,14 +4261,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "9f59105e-f5e0-46e3-998a-7884a4bced61" + "outputId": "101ab428-2f85-42a0-b308-feb7888f8b5d" }, "source": [ "# Convert prediction probabilities to labels\n", "model_6_preds = tf.squeeze(tf.round(model_6_pred_probs))\n", "model_6_preds[:10]" ], - "execution_count": null, + "execution_count": 86, "outputs": [ { "output_type": "execute_result", @@ -4244,10 +4277,8 @@ "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 84 + "metadata": {}, + "execution_count": 86 } ] }, @@ -4258,29 +4289,27 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "fd531d67-b5c7-44e8-c09c-ac5a19c26bf8" + "outputId": "0b82286e-5589-4ddc-8982-d1dd0da78f49" }, "source": [ "# Calculate model 6 performance metrics\n", "model_6_results = calculate_results(val_labels, model_6_preds)\n", "model_6_results" ], - "execution_count": null, + "execution_count": 87, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 81.75853018372703,\n", - " 'f1': 0.8170715418510598,\n", - " 'precision': 0.8175872271776553,\n", - " 'recall': 0.8175853018372703}" + "{'accuracy': 81.23359580052494,\n", + " 'f1': 0.810686575717776,\n", + " 'precision': 0.8148798668657973,\n", + " 'recall': 0.8123359580052494}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 85 + "metadata": {}, + "execution_count": 87 } ] }, @@ -4291,23 +4320,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ab07cacd-2651-4e11-97c4-31c91611264a" + "outputId": "0ab489c1-4f16-428d-b391-46a95f10c044" }, "source": [ "# Compare TF Hub model to baseline\n", "compare_baseline_to_new_results(baseline_results, model_6_results)" ], - "execution_count": null, + "execution_count": 88, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 81.76, Difference: 2.49\n", - "Baseline precision: 0.81, New precision: 0.82, Difference: 0.01\n", - "Baseline recall: 0.79, New recall: 0.82, Difference: 0.02\n", - "Baseline f1: 0.79, New f1: 0.82, Difference: 0.03\n" - ], - "name": "stdout" + "Baseline accuracy: 79.27, New accuracy: 81.23, Difference: 1.97\n", + "Baseline precision: 0.81, New precision: 0.81, Difference: 0.00\n", + "Baseline recall: 0.79, New recall: 0.81, Difference: 0.02\n", + "Baseline f1: 0.79, New f1: 0.81, Difference: 0.02\n" + ] } ] }, @@ -4327,11 +4356,7 @@ { "cell_type": "code", "metadata": { - "id": "W5Sal8DpjzWm", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "4b84c0c7-f337-4da8-f1f0-1427b3906234" + "id": "W5Sal8DpjzWm" }, "source": [ "### NOTE: Making splits like this will lead to data leakage ###\n", @@ -4345,21 +4370,8 @@ "# train_labels_10_percent = train_10_percent[\"target\"].to_list()\n", "# len(train_sentences_10_percent), len(train_labels_10_percent)" ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(761, 761)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 87 - } - ] + "execution_count": 89, + "outputs": [] }, { "cell_type": "code", @@ -4374,7 +4386,7 @@ " test_size=0.1,\n", " random_state=42)\n" ], - "execution_count": null, + "execution_count": 90, "outputs": [] }, { @@ -4384,22 +4396,22 @@ "base_uri": "https://localhost:8080/" }, "id": "j8jaydmiVnJP", - "outputId": "ca7240c0-2300-4070-a728-bc1ef2c2945c" + "outputId": "17634293-52c5-4a51-e4ef-b5521d339bb3" }, "source": [ "# Check length of 10 percent datasets\n", "print(f\"Total training examples: {len(train_sentences)}\")\n", "print(f\"Length of 10% training examples: {len(train_sentences_10_percent)}\")" ], - "execution_count": null, + "execution_count": 91, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Total training examples: 6851\n", "Length of 10% training examples: 686\n" - ], - "name": "stdout" + ] } ] }, @@ -4419,14 +4431,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "6f07d1ed-296c-4d0e-a951-c626947857c1" + "outputId": "140c4560-e0a5-472e-bdc2-47374fe7f95e" }, "source": [ "# Check the number of targets in our subset of data \n", "# (this should be close to the distribution of labels in the original train_labels)\n", "pd.Series(train_labels_10_percent).value_counts()" ], - "execution_count": null, + "execution_count": 92, "outputs": [ { "output_type": "execute_result", @@ -4437,10 +4449,8 @@ "dtype: int64" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 89 + "metadata": {}, + "execution_count": 92 } ] }, @@ -4462,7 +4472,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "701ec34f-770b-4256-8b59-6dcae7190b23" + "outputId": "2c321240-d2c1-4afe-e8a0-fa35fe64038e" }, "source": [ "# Clone model_6 but reset weights\n", @@ -4476,10 +4486,11 @@ "# Get a summary (will be same as model_6)\n", "model_7.summary()" ], - "execution_count": null, + "execution_count": 93, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Model: \"model_6_USE\"\n", "_________________________________________________________________\n", @@ -4495,8 +4506,7 @@ "Trainable params: 32,897\n", "Non-trainable params: 256,797,824\n", "_________________________________________________________________\n" - ], - "name": "stdout" + ] } ] }, @@ -4516,7 +4526,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "284f5663-efa9-4cb6-ca69-0b24bba152c3" + "outputId": "65c50e4b-73f1-4012-af7a-f6ec5e2a1591" }, "source": [ "# Fit the model to 10% of the training data\n", @@ -4526,24 +4536,24 @@ " validation_data=(val_sentences, val_labels),\n", " callbacks=[create_tensorboard_callback(SAVE_DIR, \"10_percent_tf_hub_sentence_encoder\")])" ], - "execution_count": null, + "execution_count": 94, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Saving TensorBoard log files to: model_logs/10_percent_tf_hub_sentence_encoder/20210415-022654\n", + "Saving TensorBoard log files to: model_logs/10_percent_tf_hub_sentence_encoder/20210923-052925\n", "Epoch 1/5\n", - "22/22 [==============================] - 4s 117ms/step - loss: 0.6776 - accuracy: 0.6288 - val_loss: 0.6452 - val_accuracy: 0.6929\n", + "22/22 [==============================] - 6s 147ms/step - loss: 0.6716 - accuracy: 0.6574 - val_loss: 0.6526 - val_accuracy: 0.6903\n", "Epoch 2/5\n", - "22/22 [==============================] - 0s 19ms/step - loss: 0.6089 - accuracy: 0.7863 - val_loss: 0.5873 - val_accuracy: 0.7349\n", + "22/22 [==============================] - 1s 47ms/step - loss: 0.5972 - accuracy: 0.8032 - val_loss: 0.5944 - val_accuracy: 0.7362\n", "Epoch 3/5\n", - "22/22 [==============================] - 0s 18ms/step - loss: 0.5365 - accuracy: 0.8006 - val_loss: 0.5378 - val_accuracy: 0.7664\n", + "22/22 [==============================] - 1s 45ms/step - loss: 0.5178 - accuracy: 0.8149 - val_loss: 0.5398 - val_accuracy: 0.7625\n", "Epoch 4/5\n", - "22/22 [==============================] - 0s 19ms/step - loss: 0.4736 - accuracy: 0.8033 - val_loss: 0.5070 - val_accuracy: 0.7690\n", + "22/22 [==============================] - 1s 31ms/step - loss: 0.4526 - accuracy: 0.8265 - val_loss: 0.5084 - val_accuracy: 0.7677\n", "Epoch 5/5\n", - "22/22 [==============================] - 0s 19ms/step - loss: 0.4161 - accuracy: 0.8246 - val_loss: 0.4924 - val_accuracy: 0.7887\n" - ], - "name": "stdout" + "22/22 [==============================] - 1s 46ms/step - loss: 0.4094 - accuracy: 0.8382 - val_loss: 0.4915 - val_accuracy: 0.7703\n" + ] } ] }, @@ -4565,35 +4575,33 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "85e5ff4d-2e5d-4203-c996-f29525341f20" + "outputId": "3e9b410c-117c-4292-b33e-206ebf4de1a9" }, "source": [ "# Make predictions with the model trained on 10% of the data\n", "model_7_pred_probs = model_7.predict(val_sentences)\n", "model_7_pred_probs[:10]" ], - "execution_count": null, + "execution_count": 95, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "array([[0.25300834],\n", - " [0.7650462 ],\n", - " [0.8800145 ],\n", - " [0.30310774],\n", - " [0.5812007 ],\n", - " [0.8237751 ],\n", - " [0.81309175],\n", - " [0.8532617 ],\n", - " [0.8164243 ],\n", - " [0.12485447]], dtype=float32)" + "array([[0.24043235],\n", + " [0.76837844],\n", + " [0.90137184],\n", + " [0.29067948],\n", + " [0.57149994],\n", + " [0.8356514 ],\n", + " [0.8062943 ],\n", + " [0.83358175],\n", + " [0.85545677],\n", + " [0.11749928]], dtype=float32)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 92 + "metadata": {}, + "execution_count": 95 } ] }, @@ -4604,14 +4612,14 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "b5dc24f9-6245-427f-a93a-eb36c3c9317f" + "outputId": "6ce5ceab-3cbf-4d0d-c762-f4c3893affe1" }, "source": [ "# Convert prediction probabilities to labels\n", "model_7_preds = tf.squeeze(tf.round(model_7_pred_probs))\n", "model_7_preds[:10]" ], - "execution_count": null, + "execution_count": 96, "outputs": [ { "output_type": "execute_result", @@ -4620,10 +4628,8 @@ "" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 93 + "metadata": {}, + "execution_count": 96 } ] }, @@ -4634,29 +4640,27 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "b145a8e4-7e43-4aad-e3e3-be5fe0c212bc" + "outputId": "ad1a4228-9944-4e17-ecae-6c864c3a51fa" }, "source": [ "# Calculate model results\n", "model_7_results = calculate_results(val_labels, model_7_preds)\n", "model_7_results" ], - "execution_count": null, + "execution_count": 97, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 78.87139107611549,\n", - " 'f1': 0.7853694687698636,\n", - " 'precision': 0.7948464726843409,\n", - " 'recall': 0.7887139107611548}" + "{'accuracy': 77.03412073490814,\n", + " 'f1': 0.7667059443150692,\n", + " 'precision': 0.7755630249535594,\n", + " 'recall': 0.7703412073490814}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 94 + "metadata": {}, + "execution_count": 97 } ] }, @@ -4667,23 +4671,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "667f9daa-4e76-4e4c-b491-2338946e6109" + "outputId": "3d966d5b-0c50-48e9-bb29-1e55063a06e8" }, "source": [ "# Compare to baseline\n", "compare_baseline_to_new_results(baseline_results, model_7_results)" ], - "execution_count": null, + "execution_count": 98, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Baseline accuracy: 79.27, New accuracy: 78.87, Difference: -0.39\n", - "Baseline precision: 0.81, New precision: 0.79, Difference: -0.02\n", - "Baseline recall: 0.79, New recall: 0.79, Difference: -0.00\n", - "Baseline f1: 0.79, New f1: 0.79, Difference: -0.00\n" - ], - "name": "stdout" + "Baseline accuracy: 79.27, New accuracy: 77.03, Difference: -2.23\n", + "Baseline precision: 0.81, New precision: 0.78, Difference: -0.04\n", + "Baseline recall: 0.79, New recall: 0.77, Difference: -0.02\n", + "Baseline f1: 0.79, New f1: 0.77, Difference: -0.02\n" + ] } ] }, @@ -4712,9 +4716,9 @@ "id": "Ex0NSaz7lRf-", "colab": { "base_uri": "https://localhost:8080/", - "height": 288 + "height": 297 }, - "outputId": "638eebac-0a17-400d-a7b7-ec25d6195a8f" + "outputId": "421ea5ed-744d-4bc8-a0fb-b79e367f5ec3" }, "source": [ "# Combine model results into a DataFrame\n", @@ -4729,7 +4733,7 @@ "all_model_results = all_model_results.transpose()\n", "all_model_results" ], - "execution_count": null, + "execution_count": 99, "outputs": [ { "output_type": "execute_result", @@ -4769,52 +4773,52 @@ " \n", " \n", " simple_dense\n", - " 78.608924\n", - " 0.792092\n", - " 0.786089\n", - " 0.782703\n", + " 78.740157\n", + " 0.791492\n", + " 0.787402\n", + " 0.784697\n", " \n", " \n", " lstm\n", - " 76.771654\n", - " 0.768307\n", - " 0.767717\n", - " 0.766164\n", + " 75.065617\n", + " 0.751008\n", + " 0.750656\n", + " 0.748927\n", " \n", " \n", " gru\n", - " 77.296588\n", - " 0.774052\n", - " 0.772966\n", - " 0.771216\n", + " 76.771654\n", + " 0.767545\n", + " 0.767717\n", + " 0.766793\n", " \n", " \n", " bidirectional\n", " 76.640420\n", - " 0.767059\n", + " 0.766590\n", " 0.766404\n", - " 0.764784\n", + " 0.765121\n", " \n", " \n", " conv1d\n", - " 75.984252\n", - " 0.760458\n", - " 0.759843\n", - " 0.758116\n", + " 77.821522\n", + " 0.780752\n", + " 0.778215\n", + " 0.775881\n", " \n", " \n", " tf_hub_sentence_encoder\n", - " 81.758530\n", - " 0.817587\n", - " 0.817585\n", - " 0.817072\n", + " 81.233596\n", + " 0.814880\n", + " 0.812336\n", + " 0.810687\n", " \n", " \n", " tf_hub_10_percent_data\n", - " 78.871391\n", - " 0.794846\n", - " 0.788714\n", - " 0.785369\n", + " 77.034121\n", + " 0.775563\n", + " 0.770341\n", + " 0.766706\n", " \n", " \n", "\n", @@ -4823,19 +4827,17 @@ "text/plain": [ " accuracy precision recall f1\n", "baseline 79.265092 0.811139 0.792651 0.786219\n", - "simple_dense 78.608924 0.792092 0.786089 0.782703\n", - "lstm 76.771654 0.768307 0.767717 0.766164\n", - "gru 77.296588 0.774052 0.772966 0.771216\n", - "bidirectional 76.640420 0.767059 0.766404 0.764784\n", - "conv1d 75.984252 0.760458 0.759843 0.758116\n", - "tf_hub_sentence_encoder 81.758530 0.817587 0.817585 0.817072\n", - "tf_hub_10_percent_data 78.871391 0.794846 0.788714 0.785369" + "simple_dense 78.740157 0.791492 0.787402 0.784697\n", + "lstm 75.065617 0.751008 0.750656 0.748927\n", + "gru 76.771654 0.767545 0.767717 0.766793\n", + "bidirectional 76.640420 0.766590 0.766404 0.765121\n", + "conv1d 77.821522 0.780752 0.778215 0.775881\n", + "tf_hub_sentence_encoder 81.233596 0.814880 0.812336 0.810687\n", + "tf_hub_10_percent_data 77.034121 0.775563 0.770341 0.766706" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 95 + "metadata": {}, + "execution_count": 99 } ] }, @@ -4848,7 +4850,7 @@ "# Reduce the accuracy to same scale as other metrics\n", "all_model_results[\"accuracy\"] = all_model_results[\"accuracy\"]/100" ], - "execution_count": null, + "execution_count": 100, "outputs": [] }, { @@ -4859,24 +4861,23 @@ "base_uri": "https://localhost:8080/", "height": 546 }, - "outputId": "83aa7e03-bd87-4b3f-ac10-0555a695e666" + "outputId": "cb7ae0e4-2a9c-4ef3-d23a-a9a93a6a992d" }, "source": [ "# Plot and compare all of the model results\n", "all_model_results.plot(kind=\"bar\", figsize=(10, 7)).legend(bbox_to_anchor=(1.0, 1.0));" ], - "execution_count": null, + "execution_count": 101, "outputs": [ { "output_type": "display_data", "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { - "tags": [], "needs_background": "light" } } @@ -4901,24 +4902,23 @@ "base_uri": "https://localhost:8080/", "height": 546 }, - "outputId": "f07c852a-9cdc-4755-8e72-27002da5d296" + "outputId": "97090bc0-63d5-41e2-f7b1-deddfcd99f53" }, "source": [ "# Sort model results by f1-score\n", "all_model_results.sort_values(\"f1\", ascending=False)[\"f1\"].plot(kind=\"bar\", figsize=(10, 7));" ], - "execution_count": null, + "execution_count": 102, "outputs": [ { "output_type": "display_data", "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { - "tags": [], "needs_background": "light" } } @@ -4948,7 +4948,7 @@ "# --description \"A series of different NLP modellings experiments with various models\" \\\n", "# --one_shot # exits the uploader when upload has finished" ], - "execution_count": null, + "execution_count": 103, "outputs": [] }, { @@ -4969,7 +4969,7 @@ "# If you need to remove previous experiments, you can do so using the following command\n", "# !tensorboard dev delete --experiment_id EXPERIMENT_ID_TO_DELETE" ], - "execution_count": null, + "execution_count": 104, "outputs": [] }, { @@ -5011,7 +5011,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "413fd9a0-083f-45c5-c6dc-e58550bbd48f" + "outputId": "47a65c8c-325a-402c-bfa3-0a940416bd54" }, "source": [ "# Get mean pred probs for 3 models\n", @@ -5020,21 +5020,19 @@ "combined_preds = tf.round(combined_pred_probs/3) # average and round the prediction probabilities to get prediction classes\n", "combined_preds[:20]" ], - "execution_count": null, + "execution_count": 105, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" + "array([0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 1.], dtype=float32)>" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 101 + "metadata": {}, + "execution_count": 105 } ] }, @@ -5054,29 +5052,27 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "6f95d40b-0e13-4c78-b233-c285616d7a88" + "outputId": "b3b965dd-54c8-4796-edcc-b31e344b38bb" }, "source": [ "# Calculate results from averaging the prediction probabilities\n", "ensemble_results = calculate_results(val_labels, combined_preds)\n", "ensemble_results" ], - "execution_count": null, + "execution_count": 106, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "{'accuracy': 78.60892388451444,\n", - " 'f1': 0.7863723349682415,\n", - " 'precision': 0.7872647389280777,\n", - " 'recall': 0.7860892388451444}" + "{'accuracy': 78.08398950131233,\n", + " 'f1': 0.7805169025578647,\n", + " 'precision': 0.7805216999297674,\n", + " 'recall': 0.7808398950131233}" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 102 + "metadata": {}, + "execution_count": 106 } ] }, @@ -5089,7 +5085,7 @@ "# Add our combined model's results to the results DataFrame\n", "all_model_results.loc[\"ensemble_results\"] = ensemble_results" ], - "execution_count": null, + "execution_count": 107, "outputs": [] }, { @@ -5101,7 +5097,7 @@ "# Convert the accuracy to the same scale as the rest of the results\n", "all_model_results.loc[\"ensemble_results\"][\"accuracy\"] = all_model_results.loc[\"ensemble_results\"][\"accuracy\"]/100" ], - "execution_count": null, + "execution_count": 108, "outputs": [] }, { @@ -5110,14 +5106,14 @@ "id": "trmdZ6eEpwHI", "colab": { "base_uri": "https://localhost:8080/", - "height": 318 + "height": 328 }, - "outputId": "6837f4e4-5eb9-461f-9feb-b9a0b3006941" + "outputId": "b2704163-e293-4b88-f289-efcc04d9b4b2" }, "source": [ "all_model_results" ], - "execution_count": null, + "execution_count": 109, "outputs": [ { "output_type": "execute_result", @@ -5157,59 +5153,59 @@ " \n", " \n", " simple_dense\n", - " 0.786089\n", - " 0.792092\n", - " 0.786089\n", - " 0.782703\n", + " 0.787402\n", + " 0.791492\n", + " 0.787402\n", + " 0.784697\n", " \n", " \n", " lstm\n", - " 0.767717\n", - " 0.768307\n", - " 0.767717\n", - " 0.766164\n", + " 0.750656\n", + " 0.751008\n", + " 0.750656\n", + " 0.748927\n", " \n", " \n", " gru\n", - " 0.772966\n", - " 0.774052\n", - " 0.772966\n", - " 0.771216\n", + " 0.767717\n", + " 0.767545\n", + " 0.767717\n", + " 0.766793\n", " \n", " \n", " bidirectional\n", " 0.766404\n", - " 0.767059\n", + " 0.766590\n", " 0.766404\n", - " 0.764784\n", + " 0.765121\n", " \n", " \n", " conv1d\n", - " 0.759843\n", - " 0.760458\n", - " 0.759843\n", - " 0.758116\n", + " 0.778215\n", + " 0.780752\n", + " 0.778215\n", + " 0.775881\n", " \n", " \n", " tf_hub_sentence_encoder\n", - " 0.817585\n", - " 0.817587\n", - " 0.817585\n", - " 0.817072\n", + " 0.812336\n", + " 0.814880\n", + " 0.812336\n", + " 0.810687\n", " \n", " \n", " tf_hub_10_percent_data\n", - " 0.788714\n", - " 0.794846\n", - " 0.788714\n", - " 0.785369\n", + " 0.770341\n", + " 0.775563\n", + " 0.770341\n", + " 0.766706\n", " \n", " \n", " ensemble_results\n", - " 0.786089\n", - " 0.787265\n", - " 0.786089\n", - " 0.786372\n", + " 0.780840\n", + " 0.780522\n", + " 0.780840\n", + " 0.780517\n", " \n", " \n", "\n", @@ -5218,20 +5214,18 @@ "text/plain": [ " accuracy precision recall f1\n", "baseline 0.792651 0.811139 0.792651 0.786219\n", - "simple_dense 0.786089 0.792092 0.786089 0.782703\n", - "lstm 0.767717 0.768307 0.767717 0.766164\n", - "gru 0.772966 0.774052 0.772966 0.771216\n", - "bidirectional 0.766404 0.767059 0.766404 0.764784\n", - "conv1d 0.759843 0.760458 0.759843 0.758116\n", - "tf_hub_sentence_encoder 0.817585 0.817587 0.817585 0.817072\n", - "tf_hub_10_percent_data 0.788714 0.794846 0.788714 0.785369\n", - "ensemble_results 0.786089 0.787265 0.786089 0.786372" + "simple_dense 0.787402 0.791492 0.787402 0.784697\n", + "lstm 0.750656 0.751008 0.750656 0.748927\n", + "gru 0.767717 0.767545 0.767717 0.766793\n", + "bidirectional 0.766404 0.766590 0.766404 0.765121\n", + "conv1d 0.778215 0.780752 0.778215 0.775881\n", + "tf_hub_sentence_encoder 0.812336 0.814880 0.812336 0.810687\n", + "tf_hub_10_percent_data 0.770341 0.775563 0.770341 0.766706\n", + "ensemble_results 0.780840 0.780522 0.780840 0.780517" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 105 + "metadata": {}, + "execution_count": 109 } ] }, @@ -5274,7 +5268,7 @@ "# Save TF Hub Sentence Encoder model to HDF5 format\n", "model_6.save(\"model_6.h5\")" ], - "execution_count": null, + "execution_count": 110, "outputs": [] }, { @@ -5289,34 +5283,15 @@ { "cell_type": "code", "metadata": { - "id": "sSINZ0Q-nRb2", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "4ab16ab7-f31f-425d-9f62-596adc6d16af" + "id": "sSINZ0Q-nRb2" }, "source": [ "# Load model with custom Hub Layer (required with HDF5 format)\n", "loaded_model_6 = tf.keras.models.load_model(\"model_6.h5\", \n", " custom_objects={\"KerasLayer\": hub.KerasLayer})" ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "WARNING:tensorflow:5 out of the last 5 calls to .restored_function_body at 0x7f924d1eb680> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ], - "name": "stdout" - }, - { - "output_type": "stream", - "text": [ - "WARNING:tensorflow:5 out of the last 5 calls to .restored_function_body at 0x7f924d1eb680> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ], - "name": "stderr" - } - ] + "execution_count": 111, + "outputs": [] }, { "cell_type": "code", @@ -5325,32 +5300,30 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ad6021c4-2b33-4155-8d71-09636cb35984" + "outputId": "4d3a7c0e-bea8-4f99-923d-943213cd72cd" }, "source": [ "# How does our loaded model perform?\n", "loaded_model_6.evaluate(val_sentences, val_labels)" ], - "execution_count": null, + "execution_count": 112, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "24/24 [==============================] - 1s 8ms/step - loss: 0.4304 - accuracy: 0.8176\n" - ], - "name": "stdout" + "24/24 [==============================] - 1s 14ms/step - loss: 0.4309 - accuracy: 0.8123\n" + ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "[0.4303818345069885, 0.817585289478302]" + "[0.43088313937187195, 0.8123359680175781]" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 108 + "metadata": {}, + "execution_count": 112 } ] }, @@ -5370,27 +5343,34 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "94533137-4178-443a-da61-d4d3d6830150" + "outputId": "a55b3bd1-ac2d-45c1-90f2-4b021c9368f9" }, "source": [ "# Save TF Hub Sentence Encoder model to SavedModel format (default)\n", "model_6.save(\"model_6_SavedModel_format\")" ], - "execution_count": null, + "execution_count": 113, "outputs": [ { "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:absl:Function `_wrapped_model` contains input name(s) USE_input with unsupported characters which will be renamed to use_input in the SavedModel.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", "text": [ "INFO:tensorflow:Assets written to: model_6_SavedModel_format/assets\n" - ], - "name": "stdout" + ] }, { "output_type": "stream", + "name": "stderr", "text": [ "INFO:tensorflow:Assets written to: model_6_SavedModel_format/assets\n" - ], - "name": "stderr" + ] } ] }, @@ -5406,33 +5386,14 @@ { "cell_type": "code", "metadata": { - "id": "Dw3zf4fVoU5H", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "5f73ff4b-2741-4a61-8c58-ad84863fe19c" + "id": "Dw3zf4fVoU5H" }, "source": [ "# Load TF Hub Sentence Encoder SavedModel\n", "loaded_model_6_SavedModel = tf.keras.models.load_model(\"model_6_SavedModel_format\")" ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "WARNING:tensorflow:6 out of the last 6 calls to .restored_function_body at 0x7f9039be9320> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ], - "name": "stdout" - }, - { - "output_type": "stream", - "text": [ - "WARNING:tensorflow:6 out of the last 6 calls to .restored_function_body at 0x7f9039be9320> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" - ], - "name": "stderr" - } - ] + "execution_count": 114, + "outputs": [] }, { "cell_type": "code", @@ -5441,32 +5402,30 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c599ea7b-e852-49fb-db3b-8bc0b852717e" + "outputId": "d4395889-dca0-4061-ba22-1723001c5c4e" }, "source": [ "# Evaluate loaded SavedModel format\n", "loaded_model_6_SavedModel.evaluate(val_sentences, val_labels)" ], - "execution_count": null, + "execution_count": 115, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "24/24 [==============================] - 1s 8ms/step - loss: 0.4304 - accuracy: 0.8176\n" - ], - "name": "stdout" + "24/24 [==============================] - 1s 14ms/step - loss: 0.4309 - accuracy: 0.8123\n" + ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "[0.4303818345069885, 0.817585289478302]" + "[0.43088313937187195, 0.8123359680175781]" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 111 + "metadata": {}, + "execution_count": 115 } ] }, @@ -5510,9 +5469,9 @@ "id": "gnHfX--TwMIW", "colab": { "base_uri": "https://localhost:8080/", - "height": 198 + "height": 204 }, - "outputId": "2f70c623-9f24-4563-87f6-9363774fb3aa" + "outputId": "d060505e-21c7-42b7-e664-8c517f92d425" }, "source": [ "# Create dataframe with validation sentences and best performing model predictions\n", @@ -5522,7 +5481,7 @@ " \"pred_prob\": tf.squeeze(model_6_pred_probs)})\n", "val_df.head()" ], - "execution_count": null, + "execution_count": 116, "outputs": [ { "output_type": "execute_result", @@ -5558,35 +5517,35 @@ " DFR EP016 Monthly Meltdown - On Dnbheaven 2015...\n", " 0\n", " 0.0\n", - " 0.223132\n", + " 0.144432\n", " \n", " \n", " 1\n", " FedEx no longer to transport bioterror germs i...\n", " 0\n", " 1.0\n", - " 0.828156\n", + " 0.727150\n", " \n", " \n", " 2\n", " Gunmen kill four in El Salvador bus attack: Su...\n", " 1\n", " 1.0\n", - " 0.986846\n", + " 0.985666\n", " \n", " \n", " 3\n", " @camilacabello97 Internally and externally scr...\n", " 1\n", " 0.0\n", - " 0.215614\n", + " 0.197409\n", " \n", " \n", " 4\n", " Radiation emergency #preparedness starts with ...\n", " 1\n", " 1.0\n", - " 0.727963\n", + " 0.734170\n", " \n", " \n", "\n", @@ -5594,17 +5553,15 @@ ], "text/plain": [ " text target pred pred_prob\n", - "0 DFR EP016 Monthly Meltdown - On Dnbheaven 2015... 0 0.0 0.223132\n", - "1 FedEx no longer to transport bioterror germs i... 0 1.0 0.828156\n", - "2 Gunmen kill four in El Salvador bus attack: Su... 1 1.0 0.986846\n", - "3 @camilacabello97 Internally and externally scr... 1 0.0 0.215614\n", - "4 Radiation emergency #preparedness starts with ... 1 1.0 0.727963" + "0 DFR EP016 Monthly Meltdown - On Dnbheaven 2015... 0 0.0 0.144432\n", + "1 FedEx no longer to transport bioterror germs i... 0 1.0 0.727150\n", + "2 Gunmen kill four in El Salvador bus attack: Su... 1 1.0 0.985666\n", + "3 @camilacabello97 Internally and externally scr... 1 0.0 0.197409\n", + "4 Radiation emergency #preparedness starts with ... 1 1.0 0.734170" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 112 + "metadata": {}, + "execution_count": 116 } ] }, @@ -5623,16 +5580,16 @@ "id": "0DwBXQS1wvZx", "colab": { "base_uri": "https://localhost:8080/", - "height": 348 + "height": 359 }, - "outputId": "c3e2127c-87a8-46fc-ce62-ae3776faab18" + "outputId": "7d37fad0-db94-471e-fef1-3859b48c58f3" }, "source": [ "# Find the wrong predictions and sort by prediction probabilities\n", "most_wrong = val_df[val_df[\"target\"] != val_df[\"pred\"]].sort_values(\"pred_prob\", ascending=False)\n", "most_wrong[:10]" ], - "execution_count": null, + "execution_count": 117, "outputs": [ { "output_type": "execute_result", @@ -5664,74 +5621,74 @@ " \n", " \n", " \n", - " 759\n", - " FedEx will no longer transport bioterror patho...\n", - " 0\n", - " 1.0\n", - " 0.917414\n", - " \n", - " \n", " 31\n", " ? High Skies - Burning Buildings ? http://t.co...\n", " 0\n", " 1.0\n", - " 0.914550\n", + " 0.910481\n", " \n", " \n", - " 49\n", - " @madonnamking RSPCA site multiple 7 story high...\n", + " 759\n", + " FedEx will no longer transport bioterror patho...\n", " 0\n", " 1.0\n", - " 0.858741\n", + " 0.864676\n", " \n", " \n", - " 628\n", - " @noah_anyname That's where the concentration c...\n", + " 209\n", + " Ashes 2015: Australia‰Ûªs collapse at Trent Br...\n", " 0\n", " 1.0\n", - " 0.856757\n", + " 0.837961\n", " \n", " \n", - " 209\n", - " Ashes 2015: Australia‰Ûªs collapse at Trent Br...\n", + " 393\n", + " @SonofLiberty357 all illuminated by the bright...\n", " 0\n", " 1.0\n", - " 0.840476\n", + " 0.836361\n", " \n", " \n", - " 1\n", - " FedEx no longer to transport bioterror germs i...\n", + " 628\n", + " @noah_anyname That's where the concentration c...\n", " 0\n", " 1.0\n", - " 0.828156\n", + " 0.835225\n", " \n", " \n", - " 393\n", - " @SonofLiberty357 all illuminated by the bright...\n", + " 49\n", + " @madonnamking RSPCA site multiple 7 story high...\n", " 0\n", " 1.0\n", - " 0.826324\n", + " 0.834875\n", " \n", " \n", " 109\n", " [55436] 1950 LIONEL TRAINS SMOKE LOCOMOTIVES W...\n", " 0\n", " 1.0\n", - " 0.817606\n", + " 0.800890\n", " \n", " \n", " 251\n", " @AshGhebranious civil rights continued in the ...\n", " 0\n", " 1.0\n", - " 0.802468\n", + " 0.782611\n", " \n", " \n", " 698\n", " åÈMGN-AFRICAå¨ pin:263789F4 åÈ Correction: Ten...\n", " 0\n", " 1.0\n", - " 0.785669\n", + " 0.782433\n", + " \n", + " \n", + " 144\n", + " The Sound of Arson\n", + " 0\n", + " 1.0\n", + " 0.771343\n", " \n", " \n", "\n", @@ -5739,22 +5696,20 @@ ], "text/plain": [ " text target pred pred_prob\n", - "759 FedEx will no longer transport bioterror patho... 0 1.0 0.917414\n", - "31 ? High Skies - Burning Buildings ? http://t.co... 0 1.0 0.914550\n", - "49 @madonnamking RSPCA site multiple 7 story high... 0 1.0 0.858741\n", - "628 @noah_anyname That's where the concentration c... 0 1.0 0.856757\n", - "209 Ashes 2015: Australia‰Ûªs collapse at Trent Br... 0 1.0 0.840476\n", - "1 FedEx no longer to transport bioterror germs i... 0 1.0 0.828156\n", - "393 @SonofLiberty357 all illuminated by the bright... 0 1.0 0.826324\n", - "109 [55436] 1950 LIONEL TRAINS SMOKE LOCOMOTIVES W... 0 1.0 0.817606\n", - "251 @AshGhebranious civil rights continued in the ... 0 1.0 0.802468\n", - "698 åÈMGN-AFRICAå¨ pin:263789F4 åÈ Correction: Ten... 0 1.0 0.785669" + "31 ? High Skies - Burning Buildings ? http://t.co... 0 1.0 0.910481\n", + "759 FedEx will no longer transport bioterror patho... 0 1.0 0.864676\n", + "209 Ashes 2015: Australia‰Ûªs collapse at Trent Br... 0 1.0 0.837961\n", + "393 @SonofLiberty357 all illuminated by the bright... 0 1.0 0.836361\n", + "628 @noah_anyname That's where the concentration c... 0 1.0 0.835225\n", + "49 @madonnamking RSPCA site multiple 7 story high... 0 1.0 0.834875\n", + "109 [55436] 1950 LIONEL TRAINS SMOKE LOCOMOTIVES W... 0 1.0 0.800890\n", + "251 @AshGhebranious civil rights continued in the ... 0 1.0 0.782611\n", + "698 åÈMGN-AFRICAå¨ pin:263789F4 åÈ Correction: Ten... 0 1.0 0.782433\n", + "144 The Sound of Arson 0 1.0 0.771343" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 113 + "metadata": {}, + "execution_count": 117 } ] }, @@ -5778,7 +5733,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "bdcc733e-92d4-4113-bbf7-672762a882d0" + "outputId": "28d66210-3007-4661-e8c2-655d621f0e90" }, "source": [ "# Check the false positives (model predicted 1 when should've been 0)\n", @@ -5788,75 +5743,75 @@ " print(f\"Text:\\n{text}\\n\")\n", " print(\"----\\n\")" ], - "execution_count": null, + "execution_count": 118, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Target: 0, Pred: 1, Prob: 0.9174144864082336\n", - "Text:\n", - "FedEx will no longer transport bioterror pathogens in wake of anthrax lab mishaps http://t.co/lHpgxc4b8J\n", - "\n", - "----\n", - "\n", - "Target: 0, Pred: 1, Prob: 0.9145500659942627\n", + "Target: 0, Pred: 1, Prob: 0.9104808568954468\n", "Text:\n", "? High Skies - Burning Buildings ? http://t.co/uVq41i3Kx2 #nowplaying\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8587406277656555\n", + "Target: 0, Pred: 1, Prob: 0.8646755218505859\n", "Text:\n", - "@madonnamking RSPCA site multiple 7 story high rise buildings next to low density character residential in an area that floods\n", + "FedEx will no longer transport bioterror pathogens in wake of anthrax lab mishaps http://t.co/lHpgxc4b8J\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8567568063735962\n", + "Target: 0, Pred: 1, Prob: 0.8379608988761902\n", "Text:\n", - "@noah_anyname That's where the concentration camps and mass murder come in. \n", - " \n", - "EVERY. FUCKING. TIME.\n", + "Ashes 2015: Australia‰Ûªs collapse at Trent Bridge among worst in history: England bundled out Australia for 60 ... http://t.co/t5TrhjUAU0\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8404760956764221\n", + "Target: 0, Pred: 1, Prob: 0.8363614082336426\n", "Text:\n", - "Ashes 2015: Australia‰Ûªs collapse at Trent Bridge among worst in history: England bundled out Australia for 60 ... http://t.co/t5TrhjUAU0\n", + "@SonofLiberty357 all illuminated by the brightly burning buildings all around the town!\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8281557559967041\n", + "Target: 0, Pred: 1, Prob: 0.8352250456809998\n", "Text:\n", - "FedEx no longer to transport bioterror germs in wake of anthrax lab mishaps http://t.co/qZQc8WWwcN via @usatoday\n", + "@noah_anyname That's where the concentration camps and mass murder come in. \n", + " \n", + "EVERY. FUCKING. TIME.\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8263236284255981\n", + "Target: 0, Pred: 1, Prob: 0.8348745107650757\n", "Text:\n", - "@SonofLiberty357 all illuminated by the brightly burning buildings all around the town!\n", + "@madonnamking RSPCA site multiple 7 story high rise buildings next to low density character residential in an area that floods\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8176058530807495\n", + "Target: 0, Pred: 1, Prob: 0.800889790058136\n", "Text:\n", "[55436] 1950 LIONEL TRAINS SMOKE LOCOMOTIVES WITH MAGNE-TRACTION INSTRUCTIONS http://t.co/xEZBs3sq0y http://t.co/C2x0QoKGlY\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.8024678230285645\n", + "Target: 0, Pred: 1, Prob: 0.7826112508773804\n", "Text:\n", "@AshGhebranious civil rights continued in the 60s. And what about trans-generational trauma? if anything we should listen to the Americans.\n", "\n", "----\n", "\n", - "Target: 0, Pred: 1, Prob: 0.785668671131134\n", + "Target: 0, Pred: 1, Prob: 0.7824334502220154\n", "Text:\n", "åÈMGN-AFRICAå¨ pin:263789F4 åÈ Correction: Tent Collapse Story: Correction: Tent Collapse story åÈ http://t.co/fDJUYvZMrv @wizkidayo\n", "\n", "----\n", + "\n", + "Target: 0, Pred: 1, Prob: 0.7713427543640137\n", + "Text:\n", + "The Sound of Arson\n", + "\n", + "----\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -5876,7 +5831,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "fd6c6812-631c-473a-e4eb-d5369f9738ef" + "outputId": "4c5fdb89-38b2-405a-f5d5-f816b83d3c79" }, "source": [ "# Check the most wrong false negatives (model predicted 0 when should've predict 1)\n", @@ -5886,73 +5841,73 @@ " print(f\"Text:\\n{text}\\n\")\n", " print(\"----\\n\")" ], - "execution_count": null, + "execution_count": 119, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Target: 1, Pred: 0, Prob: 0.06116531044244766\n", + "Target: 1, Pred: 0, Prob: 0.06304337829351425\n", "Text:\n", - "'The way you move is like a full on rainstorm and I'm a house of cards'\n", + "@BoyInAHorsemask its a panda trapped in a dogs body\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.056979626417160034\n", + "Target: 1, Pred: 0, Prob: 0.06279505044221878\n", "Text:\n", - "Lucas Duda is Ghost Rider. Not the Nic Cage version but an actual 'engulfed in flames' badass. #Mets\n", + "going to redo my nails and watch behind the scenes of desolation of smaug ayyy\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.05697683244943619\n", + "Target: 1, Pred: 0, Prob: 0.06060810014605522\n", "Text:\n", - "@DavidVonderhaar At least you were sincere ??\n", + "VICTORINOX SWISS ARMY DATE WOMEN'S RUBBER MOP WATCH 241487 http://t.co/yFy3nkkcoH http://t.co/KNEhVvOHVK\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.05442988872528076\n", + "Target: 1, Pred: 0, Prob: 0.0573178268969059\n", "Text:\n", "@willienelson We need help! Horses will die!Please RT & sign petition!Take a stand & be a voice for them! #gilbert23 https://t.co/e8dl1lNCVu\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.04985428601503372\n", + "Target: 1, Pred: 0, Prob: 0.04535556212067604\n", "Text:\n", "You can never escape me. Bullets don't harm me. Nothing harms me. But I know pain. I know pain. Sometimes I share it. With someone like you.\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.0436292327940464\n", + "Target: 1, Pred: 0, Prob: 0.04145137220621109\n", "Text:\n", "I get to smoke my shit in peace\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.04113191366195679\n", + "Target: 1, Pred: 0, Prob: 0.03926113247871399\n", "Text:\n", - "Ron & Fez - Dave's High School Crush https://t.co/aN3W16c8F6 via @YouTube\n", + "@SoonerMagic_ I mean I'm a fan but I don't need a girl sounding off like a damn siren\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.04028240218758583\n", + "Target: 1, Pred: 0, Prob: 0.0385933592915535\n", "Text:\n", "Why are you deluged with low self-image? Take the quiz: http://t.co/XsPqdOrIqj http://t.co/CQYvFR4UCy\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.037921249866485596\n", + "Target: 1, Pred: 0, Prob: 0.03627230226993561\n", "Text:\n", "Reddit Will Now Quarantine‰Û_ http://t.co/pkUAMXw6pm #onlinecommunities #reddit #amageddon #freespeech #Business http://t.co/PAWvNJ4sAP\n", "\n", "----\n", "\n", - "Target: 1, Pred: 0, Prob: 0.03653673082590103\n", + "Target: 1, Pred: 0, Prob: 0.032887961715459824\n", "Text:\n", - "@SoonerMagic_ I mean I'm a fan but I don't need a girl sounding off like a damn siren\n", + "Ron & Fez - Dave's High School Crush https://t.co/aN3W16c8F6 via @YouTube\n", "\n", "----\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -5991,7 +5946,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "982a08d6-ac7e-4ac2-8a92-f672662a785f" + "outputId": "f81e856d-8024-4471-ee0d-3c14eb128cb0" }, "source": [ "# Making predictions on the test dataset\n", @@ -6004,73 +5959,75 @@ " print(f\"Text:\\n{test_sample}\\n\")\n", " print(\"----\\n\")" ], - "execution_count": null, + "execution_count": 120, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Pred: 0, Prob: 0.33723971247673035\n", + "Pred: 1, Prob: 0.538340151309967\n", "Text:\n", - "02 03 04 05 AVALANCHE 1500 REAR AXLE ASSEMBLY 2055271 http://t.co/VxZhZsAlra http://t.co/HmXWRkbLS0\n", + "Flash Flood Watch in effect through 7:00am Thursday morning/12:00pm Thursday afternoon.\n", + "For: Perry Wayne Cape... http://t.co/fs7vro5seS\n", "\n", "----\n", "\n", - "Pred: 1, Prob: 0.8898343443870544\n", + "Pred: 1, Prob: 0.9250481128692627\n", "Text:\n", - "Here's a quick timelapse I made of the Finger Rock Fire last night from about 9PM - 1AM. Check it out! #fingerrockfire #wildfire #catalinas\n", + "NONSENSE >> famine memories -- strong exaggeration of Ukrainian MSM\n", + "#ukraine #russia #?????????? #sanctions https://t.co/dDOTd7W2o8\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.4341371953487396\n", + "Pred: 1, Prob: 0.8940093517303467\n", "Text:\n", - "Sun_OfGod: Breaking news! Unconfirmed! I just heard a loud bang nearby. in what appears to be a blast of wind from my neighbour's ass.\n", + "New warning for Central Hills 1' hail 60 mph winds. NOT affecting Sturgis but could later tonight. #KOTAWeather http://t.co/E8oUxVKuTE\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.20915700495243073\n", + "Pred: 0, Prob: 0.07600127905607224\n", "Text:\n", - "They can only ban SAFE abortions- back alley tragedy will come back. USA is NOT a theocracy!!!!! http://t.co/qIkS2FUTb1\n", + "@imaginator1dx currently reading after. as you can see after we collided is on my dresser waiting to get read http://t.co/QwrASZ6LHO\n", "\n", "----\n", "\n", - "Pred: 1, Prob: 0.5870816111564636\n", + "Pred: 0, Prob: 0.026800094172358513\n", "Text:\n", - "Texian Iliad: A Military History of the Texas Revolution by Hardin Stephen L. http://t.co/RrIDS0mrJ1 http://t.co/wybtUk7LbG\n", + "Don't ruin a good today by thinking about a bad yesterday ????\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.08807205408811569\n", + "Pred: 0, Prob: 0.21049749851226807\n", "Text:\n", - "I'd love to see a nigga try and diss the King haha he would OBLITERATE THEM. With no struggle! https://t.co/cwn2gT0r5p\n", + "I hope I get electrocuted today at work\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.16543787717819214\n", + "Pred: 0, Prob: 0.07167388498783112\n", "Text:\n", - "@ChelseaVPeretti's bit on @meltdown_show was also gold.\n", + "http://t.co/16EClWrW84 Asics GT-II Super Red 2.0 11 Ronnie Fieg Kith Red White 3M x gel grey volcano 2\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.2548377513885498\n", + "Pred: 0, Prob: 0.04366963729262352\n", "Text:\n", - "was that thunder?\n", + "I swear my eyes be bloody red but bitch I feel amazing.\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.3722120225429535\n", + "Pred: 1, Prob: 0.9714540839195251\n", "Text:\n", - "Is This Country Latin America's Next 'Argentina': One week ago we reported on the economic devastation in he o... http://t.co/J3rcOflDyA\n", + "Japan marks 70th anniversary of Hiroshima atomic bombing: Bells tolled in Hiroshima on Thursday as Japan marke... http://t.co/IqAIRPdIhg\n", "\n", "----\n", "\n", - "Pred: 0, Prob: 0.04841297119855881\n", + "Pred: 0, Prob: 0.10844964534044266\n", "Text:\n", - "@ERPESTAR i aint a bitch girl popobawa revolves around you the cyclone\n", + "@USCOURT If 90BLKs&8WHTs colluded 2 take WHT F @USAgov AUTH Hostage&2 make her look BLK w/Bioterrorism&use her lgl/org IDis ID still hers?\n", "\n", "----\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -6107,7 +6064,7 @@ "# Turn Tweet into string\n", "daniels_tweet = \"Life like an ensemble: take the best choices from others and make your own\"" ], - "execution_count": null, + "execution_count": 121, "outputs": [] }, { @@ -6136,7 +6093,7 @@ " print(f\"Pred: {pred_label}\", \"(real disaster)\" if pred_label > 0 else \"(not real disaster)\", f\"Prob: {pred_prob[0][0]}\")\n", " print(f\"Text:\\n{sentence}\")" ], - "execution_count": null, + "execution_count": 122, "outputs": [] }, { @@ -6155,23 +6112,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "3df36bf4-9d07-4878-c7da-ea38f582baec" + "outputId": "ddd8f755-089c-4d6b-a2be-890644d10199" }, "source": [ "# Make a prediction on Tweet from the wild\n", "predict_on_sentence(model=model_6, # use the USE model\n", " sentence=daniels_tweet)" ], - "execution_count": null, + "execution_count": 123, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Pred: 0.0 (not real disaster) Prob: 0.06137441471219063\n", + "Pred: 0.0 (not real disaster) Prob: 0.046233948320150375\n", "Text:\n", "Life like an ensemble: take the best choices from others and make your own\n" - ], - "name": "stdout" + ] } ] }, @@ -6200,7 +6157,7 @@ "# Source - https://twitter.com/BeirutCityGuide/status/1290773498743476224\n", "beirut_tweet_2 = \"#Beirut declared a “devastated city”, two-week state of emergency officially declared. #Lebanon\"" ], - "execution_count": null, + "execution_count": 124, "outputs": [] }, { @@ -6210,23 +6167,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "77b10217-3f93-4615-a31f-bb04a7f05c9c" + "outputId": "6be0b58b-4ac6-4948-cd1e-1d43f4ddb159" }, "source": [ "# Predict on diaster Tweet 1\n", "predict_on_sentence(model=model_6, \n", " sentence=beirut_tweet_1)" ], - "execution_count": null, + "execution_count": 125, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Pred: 1.0 (real disaster) Prob: 0.9640411734580994\n", + "Pred: 1.0 (real disaster) Prob: 0.9625465869903564\n", "Text:\n", "Reports that the smoke in Beirut sky contains nitric acid, which is toxic. Please share and refrain from stepping outside unless urgent. #Lebanon\n" - ], - "name": "stdout" + ] } ] }, @@ -6237,23 +6194,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "98d6b7cf-dcbd-4f0f-d207-9f7ac82d906f" + "outputId": "6b4b7408-f56d-49ed-ff41-584dfe65cf1b" }, "source": [ "# Predict on diaster Tweet 2\n", "predict_on_sentence(model=model_6, \n", " sentence=beirut_tweet_2)" ], - "execution_count": null, + "execution_count": 126, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Pred: 1.0 (real disaster) Prob: 0.9739863276481628\n", + "Pred: 1.0 (real disaster) Prob: 0.9678557515144348\n", "Text:\n", "#Beirut declared a “devastated city”, two-week state of emergency officially declared. #Lebanon\n" - ], - "name": "stdout" + ] } ] }, @@ -6326,7 +6283,7 @@ " time_per_pred = total_time/len(val_sentences) # find prediction time per sample\n", " return total_time, time_per_pred" ], - "execution_count": null, + "execution_count": 127, "outputs": [] }, { @@ -6347,26 +6304,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "06d37406-0bf3-474d-b1d5-1b7e69f96673" + "outputId": "4a151071-956a-4c97-fd95-18392a57a78b" }, "source": [ "# Calculate TF Hub Sentence Encoder prediction times\n", "model_6_total_pred_time, model_6_time_per_pred = pred_timer(model_6, val_sentences)\n", "model_6_total_pred_time, model_6_time_per_pred" ], - "execution_count": null, + "execution_count": 128, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "(0.21601832200008175, 0.00028348861154866374)" + "(0.3529780789999677, 0.0004632258254592752)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 124 + "metadata": {}, + "execution_count": 128 } ] }, @@ -6377,26 +6332,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "ee7cba32-0c27-4c9a-f841-6530f2a9f286" + "outputId": "cfbedfa1-282e-4b04-b2c4-1a3b33771b1e" }, "source": [ "# Calculate Naive Bayes prediction times\n", "baseline_total_pred_time, baseline_time_per_pred = pred_timer(model_0, val_sentences)\n", "baseline_total_pred_time, baseline_time_per_pred" ], - "execution_count": null, + "execution_count": 129, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "(0.018003535000048032, 2.362668635176907e-05)" + "(0.018752853000023606, 2.4610043307117593e-05)" ] }, - "metadata": { - "tags": [] - }, - "execution_count": 125 + "metadata": {}, + "execution_count": 129 } ] }, @@ -6421,7 +6374,7 @@ "base_uri": "https://localhost:8080/", "height": 458 }, - "outputId": "4b2f466e-f3cb-432e-e059-ebd32d8c699d" + "outputId": "a4ca1fff-0b84-41d0-85fd-b7f258d47558" }, "source": [ "import matplotlib.pyplot as plt\n", @@ -6434,18 +6387,17 @@ "plt.xlabel(\"Time per prediction\")\n", "plt.ylabel(\"F1-Score\");" ], - "execution_count": null, + "execution_count": 130, "outputs": [ { "output_type": "display_data", "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAm4AAAG5CAYAAAA3e7gZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5hXZb3//+dbRDHNM+1SUDAVRRg5iccSNcPSrVZquLWvh8rM7Bylu9yZ5fen2S93GqbWVtpa4jEjraQUU9PUYaMoIorKFtAUETQIlMP7+8dnzfhhnBMyn5lZ8Hxc17pmHe51r3utNTgv73WKzESSJEnd3wZd3QBJkiS1j8FNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJayAi/j0iftHV7ejuImJURMytmp4eEaPeQT0fiIiZHdo4qcQMblI3FBGzI2JpRCyuGrYrll0ZETMjYlVEnNzFTV2nNQ0fAJn5fzPzM13VprLKzD0y8+62ykVERsTOVevdm5kDato4qUQMblL39a+ZuVnV8EIx/1HgDOB/urBtAETEhuvjtsumI45VRPToiLZIWjsGN6lkMnNcZt4JLGurbET0iohrI2JBRCyKiIcj4l+KZVtHxNUR8UJELIyIW6vW+2xEzIqIVyNiYkNvX7EsI+ILEfE08HQx74iIeKTYxv0RUddCe34WET9qMu+3EfG1Yny7iLg5IuZHxHMR8aWqcudGxE3F/rwOnBwRIyOiPiJej4iXIuLHRdm39ZQVvZgfKsabXa9J+U2BPwDbVfd6Fu24tijTrzgep0TEnOI4nh4Re0XEtOJ4/LRJvadGxIyi7B0RsWMLx6qh7tOKc/RiRHyjavkGEXFWRDxTnN8bImLrJut+OiKeB+5qpv5RETG3uPT7SnF8TqhaPr44X7+PiCXAQW2cn02KdRZGxBPAXq0c/x7Fdp+JiH9ExJSI6BsR9xTFHy2O9yebnsuI2D0i7i6O7fSIOLJJm8dFxO1FvQ9GxPubO75SaWWmg4NDNxuA2cCH2ihzH3ByG2U+B/wOeBfQAxgObF4sux24HtgK6AkcWMw/GHgFGAZsDFwK3FNVZwJ/ArYGNgGGAi8DexfbOKlo/8bNtOeDwBwgiumtgKXAdlT+R3IK8B/ARsBOwLPA6KLsucBy4Oii7CbAA8CniuWbAfsU46OAuS0d05bWa6a9zdVzLnBtMd6vOB6XA72AD1MJ1LcC7wG2L45Nw7E9CpgF7A5sCHwHuL+FbTfUfR2wKTAYmF+1D18G/gb0Kc7TFcB1Tdb972LdTVrYtxXAj4v1DwSWAAOK5eOB14D9i+P9rjbOzwXAvcXvRV/g8epj1+T4jwUeAwYAAewJbFP1+7Vzc+eAyu/pLODfizYcDPyjSZsXACOL4/srYEJX/3t2cOjIwR43qfu6tehVWFTdG7aGlgPbUPlDuDIzp2Tm6xHxPuAjwOmZuTAzl2fmX4p1TgCuysz/ycw3gLOBfSOiX1W9/19mvpqZS4HTgCsy88FiG78E3gD2aaY991L5w/yBYvoY4IGsXAbeC+idmedl5puZ+Szwc2BM1foPZOatmbmq2PZyYOeI2DYzF2fm39bguLyT9Vry/cxclpmTqISf6zLz5cycV+zz0KLc6VSO3YzMXAH8X2BIS71uhe9l5pLMfAy4Gji+qq5vZ+bc4jydCxwTq18WPbdYd2kr9Z+TmW8U5/924LiqZb/NzL9m5ioqwbG183MccH7xezEHuKSVbX4G+E5mzsyKRzNzQSvlG+xDJWhfULThLuC2qmMC8JvMfKg4vr8ChrSjXqk0DG5S93V0Zm5ZDEe3Z4VY/WGGHYBrgDuACcXlth9GRE8qPSKvZubCZqrZDvjfhonMXEylF2P7qjJzqsZ3BL5eFTIXFfVvRxOZmcAE3vpD+29U/rg21LNdk3r+HfiXFrYL8GlgV+DJqFwGPqKlY9NB67Xkparxpc1Mb1aM7wj8pGr/XqXS41R9bJuq3uf/5a3juiPwm6q6ZgAraf14NbUwM5e0UH/T9ds6P9s109aW9AWeaaNtzdkOmFMEyertVB+/v1eN/5O3jr20TvDmXmkdkpnN/ZH6HvC9osfs98DM4ufWEbFlZi5qUv4FKn+kgcZ7vbYB5lVvqmp8DpWelvPb2czrgEkRcQGVy6sfq6rnuczcpZV1c7WJzKeB4yNiA+DjwE0RsQ2VXq93Ve1DD6B3W+s1CTFv214HaDhWv2qz5Fv6Ak8W4ztQOT8NdZ2amX9tukJV72hb7d8qIjat2u8dqFzibND0PLd2fl4s2jq9qq6WzAHe32Rb7fEC0DciNqgKbzsAT61hPVJp2eMmlUxEbBQRvaj01PSMygMIzf5bjoiDImJwEVxep3KJcFVmvkjlxvvLImKriOgZER8sVrsOOCUihkTExlQu5z2YmbNbaNLPgdMjYu+o2DQiDo+IdzdXODOnUrmH7hfAHVXB8SHgHxHxreJG9x4RMSgi9mqunmL/ToyI3sUf8YZ6VlH5Q96raEdPKveSbdyO9Zp6CdgmIrZoqQ1r6HLg7IjYo2jHFhFxbBvrnBMR7yrWOYXKfYkNdZ3fcJk1InpHxFHvoE3fK36nPgAcAdzYQrm2zs8Nxb5tFRF9gC+2ss1fAN+PiF2K35m6InBD5Zjv1MJ6D1LpRftm8Ts7CvhXKr240nrB4CaVzyQql9/2A64sxj/YQtn3AjdRCW0zgL9QuXwK8CkqQe5JKjfQfwUgM/8MnAPcTKUX5f2sfp/ZajKzHvgs8FNgIZWbx09uYx9+DXyo+NlQz0oqwWEI8BxvhbvWQtNhwPSIWAz8BBiTmUsz8zUqr0z5BZWewiXA3LbWa2bfnqQSZJ8tLg++7fLvmsjM3wAXUrl0/TqVHqePtLHaX6gc0zuBHxX30VG0eyKV3st/UHlQYe81bNLfqZyzF6hcsj692Ofm2t7W+fkelcuWz1H5Hb2mmWoa/JhK0JtE5Xfzv6g8bAKVe/V+WRzv6vvtyMw3qQS1jxTbvwz4Py21WVoXNTzZJUnqRorLnc8BPYsb7Tu6/lFUno7t09F1S6ode9wkSZJKwuAmSZJUEl4qlSRJKgl73CRJkkpivXiP27bbbpv9+vXr6mZIkiS1acqUKa9kZu/mlq0Xwa1fv37U19d3dTMkSZLaFBEtfnnES6WSJEklYXCTJEkqCYObJElSSawX97g1Z/ny5cydO5dly5Z1dVO0nuvVqxd9+vShZ8+eXd0USVI3t94Gt7lz5/Lud7+bfv36ERFd3RytpzKTBQsWMHfuXPr379/VzZEkdXPr7aXSZcuWsc022xja1KUigm222caeX0lSu6y3wQ0wtKlb8PdQktRe63VwkyRJKhODWxeaPXs2gwYNqkndd999N0cccQQAEydO5IILLqjJdiRJUudZbx9OWJ8ceeSRHHnkkV3dDEmStJbscWunW6fOY/8L7qL/Wbez/wV3cevUeR1S74oVKzjhhBPYfffdOeaYY/jnP//Jeeedx1577cWgQYM47bTTyEwALrnkEgYOHEhdXR1jxowBYMmSJZx66qmMHDmSoUOH8tvf/vZt2xg/fjxnnnkmACeffDJf+tKX2G+//dhpp5246aabGstddNFF7LXXXtTV1fHd7363Q/ZPkiR1HINbO9w6dR5n3/IY8xYtJYF5i5Zy9i2PdUh4mzlzJmeccQYzZsxg880357LLLuPMM8/k4Ycf5vHHH2fp0qXcdtttAFxwwQVMnTqVadOmcfnllwNw/vnnc/DBB/PQQw8xefJkxo4dy5IlS1rd5osvvsh9993HbbfdxllnnQXApEmTePrpp3nooYd45JFHmDJlCvfcc89a758kSeo4Brd2uOiOmSxdvnK1eUuXr+SiO2audd19+/Zl//33B+DEE0/kvvvuY/Lkyey9994MHjyYu+66i+nTpwNQV1fHCSecwLXXXsuGG1auck+aNIkLLriAIUOGMGrUKJYtW8bzzz/f6jaPPvpoNthgAwYOHMhLL73UWM+kSZMYOnQow4YN48knn+Tpp59e6/2TJEkdx3vc2uGFRUvXaP6aaPoqiIjgjDPOoL6+nr59+3Luuec2vuPr9ttv55577uF3v/sd559/Po899hiZyc0338yAAQNWq6chkDVn4403bhxvuAybmZx99tl87nOfW+t9kiRpnTLtBrjzPHhtLmzRBw75D6g7rkuaYo9bO2y35SZrNH9NPP/88zzwwAMA/PrXv+aAAw4AYNttt2Xx4sWN96CtWrWKOXPmcNBBB3HhhRfy2muvsXjxYkaPHs2ll17aGMCmTp36jtoxevRorrrqKhYvXgzAvHnzePnll9d29yRJKrdpN8DvvgSvzQGy8vN3X6rM7wL2uLXD2NEDOPuWx1a7XLpJzx6MHT2glbXaZ8CAAYwbN45TTz2VgQMH8vnPf56FCxcyaNAg3vve97LXXnsBsHLlSk488URee+01MpMvfelLbLnllpxzzjl85Stfoa6ujlWrVtG/f//Ge+LWxIc//GFmzJjBvvvuC8Bmm23Gtddey3ve85613kdJkkrrzvNgeZMrbMuXVuZ3Qa9bNPTUrMtGjBiR9fX1q82bMWMGu+++e7vruHXqPC66YyYvLFrKdltuwtjRAzh66PYd3VStp9b091GS1EnO3RJoLisFnLuoJpuMiCmZOaK5Zfa4tdPRQ7c3qEmStL7Zok9xmbSZ+V3Ae9wkSZJacsh/QM8m97T33KQyvwsY3CRJklpSdxz86yWwRV8gKj//9ZIue6rUS6WSJEmtqTuuy4JaU/a4SZIklURNg1tEHBYRMyNiVkSc1czyHSJickRMjYhpEfHRYv42xfzFEfHTJusMj4jHijoviaZvsJUkSVpH1Sy4RUQPYBzwEWAgcHxEDGxS7DvADZk5FBgDXFbMXwacA3yjmap/BnwW2KUYDuv41kuSJHU/texxGwnMysxnM/NNYAJwVJMyCWxejG8BvACQmUsy8z4qAa5RRLwP2Dwz/5aVF9D9N3B0DfehZhYtWsRll13WOD127Fj22GMPxo4d22z5k08+ufErCu3Vr18/XnnllbVq55r6z//8T/75z3926ja70t13380RRxzR1c2QJK0nahnctgeqX3wyt5hX7VzgxIiYC/we+GI76pzbRp0ARMRpEVEfEfXz589fk3Y3b9oNcPGgyov4Lh601p+6aBrcrrzySqZNm8ZFF120ti3tUutbcFtTK1as6OomSJJKrKsfTjgeGJ+ZfYCPAtdERIe0KTOvzMwRmTmid+/ea1dZDb5TdtZZZ/HMM88wZMgQDj30UBYvXszw4cO5/vrrW1znnnvuYb/99mOnnXZq7H1r2uNz5plnMn78+MbpH/7whwwePJiRI0cya9asFuu+8cYbGTRoEHvuuScf/OAHgcpntsaOHctee+1FXV0dV1xxReM2R40axTHHHMNuu+3GCSecQGZyySWX8MILL3DQQQdx0EEHATBp0iT23Xdfhg0bxrHHHtv4LdR+/frx3e9+l2HDhjF48GCefPJJABYvXswpp5zC4MGDqaur4+abb261nuZMmTKFAw88kOHDhzN69GhefPFFAEaNGsW3vvUtRo4cya677sq9997buJ/f+MY3GDRoEHV1dVx66aUA3HnnnQwdOpTBgwdz6qmn8sYbbwDwxz/+kd12241hw4Zxyy23NG53yZIlnHrqqYwcOZKhQ4fy29/+FoDx48dz5JFHcvDBB3PIIYe02G5JktqUmTUZgH2BO6qmzwbOblJmOtC3avpZ4D1V0ycDP62afh/wZNX08cAVbbVl+PDh2dQTTzzxtnkt+vEemd/d/O3Dj/dofx1NPPfcc7nHHm+tv+mmm7Za/qSTTspjjjkmV65cmdOnT8/3v//9mZk5efLkPPzwwxvLfeELX8irr746MzN33HHH/MEPfpCZmb/85S9XK9fUoEGDcu7cuZmZuXDhwszMvOKKK/L73/9+ZmYuW7Yshw8fns8++2xOnjw5N99885wzZ06uXLky99lnn7z33nsbtzl//vzMzJw/f35+4AMfyMWLF2dm5gUXXJDf+973GstdcsklmZk5bty4/PSnP52Zmd/85jfzy1/+cmO7Xn311VbraerNN9/MfffdN19++eXMzJwwYUKecsopmZl54IEH5te+9rXMzLz99tvzkEMOyczMyy67LD/xiU/k8uXLMzNzwYIFuXTp0uzTp0/OnDkzMzM/9alP5cUXX9w4/6mnnspVq1blscce23hczz777Lzmmmsaj+Euu+ySixcvzquvvjq33377XLBgQYvHf41+HyVJ6zSgPlvINLV8j9vDwC4R0R+YR+Xhg39rUuZ54BBgfETsDvQCWryumZkvRsTrEbEP8CDwf4BLa9H41bw2d83m18jRRx/NBhtswMCBA3nppZfatc7xxx/f+POrX/1qi+X2339/Tj75ZI477jg+/vGPA5VermnTpjX27r322ms8/fTTbLTRRowcOZI+fSqf+xgyZAizZ8/mgAMOWK3Ov/3tbzzxxBPsv//+ALz55puNH7EHGrczfPjwxp6rP//5z0yYMKGxzFZbbcVtt93Waj3VZs6cyeOPP86hhx4KVHrT3ve+9zW7zdmzZzdu8/TTT2fDDSv/HLbeemseffRR+vfvz6677grASSedxLhx4xg1ahT9+/dnl112AeDEE0/kyiuvbDxeEydO5Ec/+hEAy5Yt4/nnnwfg0EMPZeutt27x+EuS1B41C26ZuSIizgTuAHoAV2Xm9Ig4j0qSnAh8Hfh5RHyVyoMKJxdJk4iYTeXBhY0i4mjgw5n5BHAGMB7YBPhDMdRWN/lO2cYbb9w4XhwmNtxwQ1atWtU4f9my1Z7noPptKa29OeXyyy/nwQcf5Pbbb2f48OFMmTKFzOTSSy9l9OjRq5W9++67V2tLjx49mr13KzM59NBDue6661rdn5bWb289TcvusccePPDAA2u1zXciM7n55psZMGDAavMffPBBNt100w7dliRp/VTTe9wy8/eZuWtmvj8zzy/m/UcR2sjMJzJz/8zcMzOHZOakqnX7ZebWmblZZvYpQhuZWZ+Zg4o6z2wIejVVg++Uvfvd7+Yf//jHWjYMdtxxR5544gneeOMNFi1axJ133rna8oZ75q6//voWe6kAnnnmGfbee2/OO+88evfuzZw5cxg9ejQ/+9nPWL58OQBPPfUUS5YsabU91fu1zz778Ne//rXx3rolS5bw1FNPtbr+oYceyrhx4xqnFy5cuEb1DBgwgPnz5zcGt+XLlzN9+vQ2t3nFFVc0BrlXX32VAQMGMHv27MZtXnPNNRx44IHstttuzJ49m2eeeQZgtTA5evRoLr300sZQPXXq1Fa3K0nSmurqhxPKoQbfKdtmm23Yf//9GTRoUIuvAGmPvn37ctxxxzFo0CCOO+44hg4dutryhQsXUldXx09+8hMuvvjiFusZO3YsgwcPZtCgQey3337sueeefOYzn2HgwIEMGzaMQYMG8bnPfa7NXqrTTjuNww47jIMOOojevXszfvx4jj/+eOrq6th3330bH0JoyXe+8x0WLlzY+KDE5MmT16iejTbaiJtuuolvfetb7LnnngwZMoT777+/1W1+5jOfYYcddqCuro4999yTX//61/Tq1Yurr76aY489lsGDB7PBBhtw+umn06tXL6688koOP/xwhg0bxnve857Ges455xyWL19OXV0de+yxB+ecc06r25UkaU1FZ3RYdbURI0ZkfX39avNmzJjB7rvv3kUtklbn76MkqUFETMnMEc0ts8dNkiSpJGr5VKnegfPPP58bb7xxtXnHHnss3/72t0tRf2f62Mc+xnPPPbfavAsvvPBtD1NIkrSuWK8vle62226tPmkpdYbM5Mknn/RSqSQJ8FJps3r16sWCBQtYH4Kruq/MZMGCBfTq1aurmyJJKoH19lJpnz59mDt3Lh3yHVNpLfTq1avxZcaSJLVmvQ1uPXv2pH///l3dDEmSpHZbby+VSpIklY3BTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKoqbBLSIOi4iZETErIs5qZvkOETE5IqZGxLSI+GjVsrOL9WZGxOiq+bMj4rGIeCQi6mvZfkmSpO5kw1pVHBE9gHHAocBc4OGImJiZT1QV+w5wQ2b+LCIGAr8H+hXjY4A9gO2AP0fErpm5sljvoMx8pVZtlyRJ6o5q2eM2EpiVmc9m5pvABOCoJmUS2LwY3wJ4oRg/CpiQmW9k5nPArKI+SZKk9VYtg9v2wJyq6bnFvGrnAidGxFwqvW1fbMe6CUyKiCkRcVpLG4+I0yKiPiLq58+f/873QpIkqZvo6ocTjgfGZ2Yf4KPANRHRVpsOyMxhwEeAL0TEB5srlJlXZuaIzBzRu3fvjm21JElSF6hlcJsH9K2a7lPMq/Zp4AaAzHwA6AVs29q6mdnw82XgN3gJVZIkrSdqGdweBnaJiP4RsRGVhw0mNinzPHAIQETsTiW4zS/KjYmIjSOiP7AL8FBEbBoR7y7Kbwp8GHi8hvsgSZLUbdTsqdLMXBERZwJ3AD2AqzJzekScB9Rn5kTg68DPI+KrVO5dOzkzE5geETcATwArgC9k5sqI+BfgNxHR0PZfZ+Yfa7UPkiRJ3UlUctK6bcSIEVlf7yvfJElS9xcRUzJzRHPLuvrhBEmSJLWTwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSqKmwS0iDouImRExKyLOamb5DhExOSKmRsS0iPho1bKzi/VmRsTo9tYpSZK0rqpZcIuIHsA44CPAQOD4iBjYpNh3gBsycygwBrisWHdgMb0HcBhwWUT0aGedkiRJ66Ra9riNBGZl5rOZ+SYwATiqSZkENi/GtwBeKMaPAiZk5huZ+Rwwq6ivPXVKkiStk2oZ3LYH5lRNzy3mVTsXODEi5gK/B77YxrrtqROAiDgtIuojon7+/PnvdB8kSZK6ja5+OOF4YHxm9gE+ClwTER3Spsy8MjNHZOaI3r17d0SVkiRJXWrDGtY9D+hbNd2nmFft01TuYSMzH4iIXsC2bazbVp2SJEnrpHb1bkXErhFxZ0Q8XkzXRcR32ljtYWCXiOgfERtRedhgYpMyzwOHFHXuDvQC5hflxkTExhHRH9gFeKiddUqSJK2T2ntZ8ufA2cBygMycRiU0tSgzVwBnAncAM6g8PTo9Is6LiCOLYl8HPhsRjwLXASdnxXTgBuAJ4I/AFzJzZUt1tn93JUmSyisys+1CEQ9n5l4RMbV4dQcR8UhmDql5CzvAiBEjsr6+vqubIUmS1KaImJKZI5pb1t4et1ci4v1UXt9BRBwDvNhB7ZMkSVI7tPfhhC8AVwK7RcQ84DnghJq1SpIkSW/TZnArvlZwRmZ+KCI2BTbIzH/UvmmSJEmq1mZwy8yVEXFAMb6k9k2SJElSc9p7qXRqREwEbgQaw1tm3lKTVkmSJOlt2hvcegELgIOr5iVgcJMkSeok7QpumXlKrRsiSZKk1rX3ywl9IuI3EfFyMdwcEX1q3ThJkiS9pb3vcbuayqeltiuG3xXzJEmS1EnaG9x6Z+bVmbmiGMYDvWvYLkmSJDXR3uC2ICJOjIgexXAilYcVJEmS1EnaG9xOBY4D/k7lU1fHAD6wIEmS1Ina+1Tp/wJH1rgtkiRJakV7nyr9ZURsWTW9VURcVbtmSZIkqan2Xiqty8xFDROZuRAYWpsmSZIkqTntDW4bRMRWDRMRsTXt/+qCJEmSOkB7w9f/DzwQETcCQeXhhPNr1ipJkiS9TXsfTvjviKin8q3SBD6emU/UtGWSJElaTauXSiPiXRHRE6AIan8CNgJ264S2SZIkqUpb97j9EegHEBE7Aw8AOwFfiIgLats0SZIkVWsruG2VmU8X4ycB12XmF4GPAIfXtGWSJElaTVvBLavGD6ZyqZTMfBNYVatGSZIk6e3aejhhWkT8CJgH7AxMAqh+Ga8kSZI6R1s9bp8FXqFyn9uHM/OfxfyBwI9q2C5JkiQ10WqPW2YuBVZ7CCEihmXm/cD9tWyYJEmSVtfeLydU+0WHt0KSJElteifBLTq8FZIkSWrTOwlu3+vwVkiSJKlNaxzcMvNWgIjw6wmSJEmd6J30uDWY1GGtkCRJUptafao0Ii5paRHgu9wkSZI6UVsv4D0F+DrwRjPLju/45kiSJKklbQW3h4HHi/e2rSYizq1JiyRJkujB3woAABCGSURBVNSstoLbMcCy5hZkZv+Ob44kSZJa0tbDCZtVfeZKkiRJXait4HZrw0hE3FzjtkiSJKkVbQW36q8k7FTLhkiSJKl1bQW3bGFckiRJnaythxP2jIjXqfS8bVKMU0xnZm5e09ZJkiSpUavBLTN7dFZDJEmS1Lq1+eSVJEmSOpHBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJ1DS4RcRhETEzImZFxFnNLL84Ih4phqciYlHVsgsj4vFi+GTV/PER8VzVekNquQ+SJEndxYa1qjgiegDjgEOBucDDETExM59oKJOZX60q/0VgaDF+ODAMGAJsDNwdEX/IzNeL4mMz86ZatV2SJKk7qmWP20hgVmY+m5lvAhOAo1opfzxwXTE+ELgnM1dk5hJgGnBYDdsqSZLU7dUyuG0PzKmanlvMe5uI2BHoD9xVzHoUOCwi3hUR2wIHAX2rVjk/IqYVl1o3bqHO0yKiPiLq58+fv7b7IkmS1OW6y8MJY4CbMnMlQGZOAn4P3E+lF+4BYGVR9mxgN2AvYGvgW81VmJlXZuaIzBzRu3fvGjdfkiSp9moZ3Oaxei9Zn2Jec8bw1mVSADLz/MwckpmHAgE8Vcx/MSveAK6mcklWkiRpnVfL4PYwsEtE9I+IjaiEs4lNC0XEbsBWVHrVGub1iIhtivE6oA6YVEy/r/gZwNHA4zXcB0mSpG6jZk+VZuaKiDgTuAPoAVyVmdMj4jygPjMbQtwYYEJmZtXqPYF7K9mM14ETM3NFsexXEdGbSi/cI8DptdoHSZKk7iRWz0vrphEjRmR9fX1XN0OSJKlNETElM0c0t6y7PJwgSZKkNhjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqCYObJElSSRjcJEmSSsLgJkmSVBIGN0mSpJIwuEmSJJWEwU2SJKkkahrcIuKwiJgZEbMi4qxmll8cEY8Uw1MRsahq2YUR8XgxfLJqfv+IeLCo8/qI2KiW+yBJktRd1Cy4RUQPYBzwEWAgcHxEDKwuk5lfzcwhmTkEuBS4pVj3cGAYMATYG/hGRGxerHYhcHFm7gwsBD5dq32QJEnqTmrZ4zYSmJWZz2bmm8AE4KhWyh8PXFeMDwTuycwVmbkEmAYcFhEBHAzcVJT7JXB0TVovSZLUzdQyuG0PzKmanlvMe5uI2BHoD9xVzHqUSlB7V0RsCxwE9AW2ARZl5op21HlaRNRHRP38+fPXemckSZK6Wnd5OGEMcFNmrgTIzEnA74H7qfTCPQCsXJMKM/PKzByRmSN69+7d0e2VJEnqdLUMbvOo9JI16FPMa84Y3rpMCkBmnl/c/3YoEMBTwAJgy4jYsB11SpIkrVNqGdweBnYpngLdiEo4m9i0UETsBmxFpVetYV6PiNimGK8D6oBJmZnAZOCYouhJwG9ruA+SJEndxoZtF3lnMnNFRJwJ3AH0AK7KzOkRcR5Qn5kNIW4MMKEIZQ16AvdWnkXgdeDEqvvavgVMiIgfAFOB/6rVPkiSJHUnsXpeWjeNGDEi6+vru7oZkiRJbYqIKZk5orll3eXhBEmSJLXB4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CZJklQSBjdJkqSSMLhJkiSVhMFNkiSpJDbs6gaU3a1T53HRHTN5YdFStttyE8aOHsDRQ7fv6mZJkqR1kMFtLdw6dR5n3/IYS5evBGDeoqWcfctjAIY3SZLU4bxUuhYuumNmY2hrsHT5Si66Y2YXtUiSJK3LDG5r4YVFS9doviRJ0towuK2F7bbcZI3mS5IkrQ2D21oYO3oAm/Tssdq8TXr2YOzoAV3UIkmStC7z4YS10PAAgk+VSpKkzmBwW0tHD93eoCZJkjqFl0olSZJKwuAmSZJUEgY3SZKkkjC4SZIklYTBTZIkqSQMbpIkSSVhcJMkSSoJg5skSVJJGNwkSZJKoqbBLSIOi4iZETErIs5qZvnFEfFIMTwVEYuqlv0wIqZHxIyIuCQioph/d1Fnw3rvqeU+SJIkdRc1++RVRPQAxgGHAnOBhyNiYmY+0VAmM79aVf6LwNBifD9gf6CuWHwfcCBwdzF9QmbW16rtkiRJ3VEte9xGArMy89nMfBOYABzVSvnjgeuK8QR6ARsBGwM9gZdq2FZJkqRur5Yfmd8emFM1PRfYu7mCEbEj0B+4CyAzH4iIycCLQAA/zcwZVatcHRErgZuBH2RmNlPnacBpxeTiiJi5lvujrrMt8EpXN0IdxvO5bvF8rjs8l93Hji0tqGVwWxNjgJsycyVAROwM7A70KZb/KSI+kJn3UrlMOi8i3k0luH0K+O+mFWbmlcCVndJ61VRE1GfmiK5uhzqG53Pd4vlcd3guy6GWl0rnAX2rpvsU85ozhrcukwJ8DPhbZi7OzMXAH4B9ATJzXvHzH8CvqVySlSRJWufVMrg9DOwSEf0jYiMq4Wxi00IRsRuwFfBA1ezngQMjYsOI6EnlwYQZxfS2xXo9gSOAx2u4D5IkSd1GzYJbZq4AzgTuAGYAN2Tm9Ig4LyKOrCo6BpjQ5D61m4BngMeAR4FHM/N3VB5UuCMipgGPUOnB+3mt9kHdhpe81y2ez3WL53Pd4bksgWjmvn5JkiR1Q345QZIkqSQMbpIkSSVhcFOnaMfnzzaOiOuL5Q9GRL+qZWcX82dGxOi26oyIM4t52fAwizpOJ5/LXxXzH4+Iq4qHktSBOvl8/ldEPBoR0yLipojYrNb7t77pzPNZtfySiFhcq31SE5np4FDTAehB5WGTnah8DeNRYGCTMmcAlxfjY4Dri/GBRfmNqbyk+ZmivhbrpPLptH7AbGDbrt7/dWnognP5USov4Q4qrwz6fFcfg3Vp6ILzuXlVvT8GzurqY7AuDZ19Pov1RgDXAIu7ev/Xl8EeN3WG9nz+7Cjgl8X4TcAhERHF/AmZ+UZmPgfMKuprsc7MnJqZs2u9U+upzj6Xv88C8BBvvZRbHaOzz+frAMX6m1D5vKE6Tqeez+Kb5BcB36zxfqmKwU2dobnPn23fUpmsvErmNWCbVtZtT53qeF1yLotLpJ8C/rjWe6BqnX4+I+Jq4O/AbsClHbETatTZ5/NMYGJmvthB7Vc7GNwklcFlwD1Z+eydSiwzTwG2o/J+z092cXP0DkXEdsCxGL47ncFNnaE9nz9rLBMRGwJbAAtaWXdNPqmmjtPp5zIivgv0Br7WIXugal3ybzMr36WeAHxirfdA1TrzfA4FdgZmRcRs4F0RMaujdkQtM7ipM7Tn82cTgZOK8WOAu4r7miYCY4onofoDu1C516ldn1RTh+vUcxkRnwFGA8dn5qoa79v6qNPOZ1TsDI33uB0JPFnj/VvfdNr5zMzbM/O9mdkvM/sB/8zMnWu+h/KpUofOGag8HfgUlaeTvl3MOw84shjvBdxI5YbYh4Cdqtb9drHeTOAjrdVZzP8SlfswVgAvAL/o6v1fl4ZOPpcrinmPFMN/dPX+r2tDZ51PKh0Ff6XyKcPHgV9R9ZSpQ7nOZzPb9anSThr85JUkSVJJeKlUkiSpJAxukiRJJWFwkyRJKgmDmyRJUkkY3CRJkkrC4CapS0TENhHxSDH8PSLmFeOLI+Kyrm5fZ4qIfhHxeDE+IiIuaaP8vzeZvr+W7ZPUffg6EEldLiLOpfIeqB91dVuaExEbZuW7jjVZLyL6Abdl5qB21rs4Mzdb0/ZIKj973CR1KxExKiJuK8bPjYhfRsS9EfG/EfHxiPhhRDwWEX8sPj5PRAyPiL9ExJSIuCMi3tdMveMj4vKIqI+IpyLiiGJ+j4i4KCIejohpEfG5qnbcGxETgSeaqW9xRFwcEdMj4s6I6F3Mvzsi/jMi6oEvt9S2Yv6jEfEo8IUW9n+ziLi62N9pEfGJiLgA2KTonfxVQ1uKn1Hsy+PFOp+sqvPuiLgpIp6MiF8VXy+QVDIGN0nd3fuBg6l8IulaYHJmDgaWAocX4e1S4JjMHA5cBZzfQl39gJHA4cDlEdEL+DTwWmbuBewFfLb45A/AMODLmblrM3VtCtRn5h7AX4DvVi3bKDNHAJe00rargS9m5p6t7Ps5RdsGZ2Ydlc8TnQUszcwhmXlCk/IfB4YAewIfAi6qCrFDga8AA4GdgP1b2a6kbmrDrm6AJLXhD5m5PCIeA3oAfyzmP0YliA0ABgF/KjqRegAvtlDXDVn55unTEfEssBvwYaAuIo4pymxB5TuNbwIPZeZzLdS1Cri+GL8WuKVqWcP8ZtsWEVsCW2bmPUW5a4CPNLOND1H5NiQAmbmwhbY0OAC4LisfcX8pIv5CJYy+XuzLXICIeITKsbuvjfokdTMGN0nd3RsAmbkqIpbnWzfmrqLy37AApmfmvu2oq+lNvVms/8XMvKN6QUSMApasQTur625Yr9m2FcGts71RNb4S//svlZKXSiWV3Uygd0TsCxARPSNijxbKHhsRG0TE+6lcLpwJ3AF8vup+uV0jYtN2bHcDoKGX7t9ovveq2bZl5iJgUUQcUJRresmzwZ9Y/f63rYrR5Q3tbeJe4JPFfXu9gQ9S+ZC4pHWEwU1SqWXmm1QC1IXFjf6PAPu1UPx5KkHmD8DpmbkM+AWVhw/+p3glxxW0rzdqCTCyWOdg4Lw1bNspwLjismVLDwr8ANiqeNjgUeCgYv6VwLSGhxOq/AaYBjwK3AV8MzP/3o59kVQSvg5E0nohIsZTeeXGTR1Un6/kkNTp7HGTJEkqCXvcJEmSSsIeN0mSpJIwuEmSJJWEwU2SJKkkDG6SJEklYXCTJEkqif8HsZN/cPkoiigAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { - "tags": [], "needs_background": "light" } }