From f07beec23ee4a1a21976b893c19fe1b2621ba81f Mon Sep 17 00:00:00 2001
From: manfred <koskosanerider@gmail.com>
Date: Mon, 26 Apr 2021 20:20:41 +0700
Subject: [PATCH] Adding data-splitting-automation to Image Collection.ipynb

---
 .../1. Image Collection-checkpoint.ipynb      | 101 ++++++++++++++++--
 1. Image Collection.ipynb                     |  58 ++++++++++
 2 files changed, 150 insertions(+), 9 deletions(-)
diff --git a/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb b/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb
index f737bdb36..d9c445e79 100644
--- a/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb	
+++ b/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb	
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
+    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#1.-Import-Dependencies\" data-toc-modified-id=\"1.-Import-Dependencies-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>1. Import Dependencies</a></span></li><li><span><a href=\"#2.-Define-Images-to-Collect\" data-toc-modified-id=\"2.-Define-Images-to-Collect-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>2. Define Images to Collect</a></span></li><li><span><a href=\"#3.-Setup-Folders\" data-toc-modified-id=\"3.-Setup-Folders-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>3. Setup Folders</a></span></li><li><span><a href=\"#4.-Capture-Images\" data-toc-modified-id=\"4.-Capture-Images-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>4. Capture Images</a></span></li><li><span><a href=\"#5.-Image-Labelling\" data-toc-modified-id=\"5.-Image-Labelling-5\"><span class=\"toc-item-num\">5&nbsp;&nbsp;</span>5. Image Labelling</a></span></li><li><span><a href=\"#6.-Move-them-into-a-Training-and-Testing-Partition\" data-toc-modified-id=\"6.-Move-them-into-a-Training-and-Testing-Partition-6\"><span class=\"toc-item-num\">6&nbsp;&nbsp;</span>6. Move them into a Training and Testing Partition</a></span></li><li><span><a href=\"#OPTIONAL---7.-Compress-them-for-Colab-Training\" data-toc-modified-id=\"OPTIONAL---7.-Compress-them-for-Colab-Training-7\"><span class=\"toc-item-num\">7&nbsp;&nbsp;</span>OPTIONAL - 7. Compress them for Colab Training</a></span></li></ul></div>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -53,11 +63,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "labels = ['thumbsup']\n",
+    "labels = ['thumbsup', 'thumbsdown', 'thankyou', 'livelong']\n",
     "number_imgs = 5"
    ]
   },
@@ -70,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,9 +113,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting images for thumbsup\n",
+      "Collecting image 0\n",
+      "Collecting image 1\n",
+      "Collecting image 2\n",
+      "Collecting image 3\n",
+      "Collecting image 4\n"
+     ]
+    }
+   ],
    "source": [
     "for label in labels:\n",
     "    cap = cv2.VideoCapture(0)\n",
@@ -200,9 +223,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6a706a36-940f-11eb-b4eb-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6a706a36-940f-11eb-b4eb-5cf3709bbcc6.xml\n",
+      "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6ba4d864-940f-11eb-8c74-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6ba4d864-940f-11eb-8c74-5cf3709bbcc6.xml\n",
+      "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6cd9c8e2-940f-11eb-b901-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6cd9c8e2-940f-11eb-b901-5cf3709bbcc6.xml\n",
+      "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6e0f5bc0-940f-11eb-8d18-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6e0f5bc0-940f-11eb-8d18-5cf3709bbcc6.xml\n",
+      "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.693a5158-940f-11eb-8752-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.693a5158-940f-11eb-8752-5cf3709bbcc6.xml\n"
+     ]
+    }
+   ],
    "source": [
     "!cd {LABELIMG_PATH} && python labelImg.py"
    ]
@@ -223,7 +258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,6 +272,41 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "# automate train test splitting\n",
+    "from glob import glob\n",
+    "import shutil\n",
+    "import math\n",
+    "\n",
+    "TEST_SIZE = 0.2\n",
+    "\n",
+    "if not os.path.exists(TRAIN_PATH):\n",
+    "    !mkdir {TRAIN_PATH}\n",
+    "if not os.path.exists(TEST_PATH):\n",
+    "    !mkdir {TEST_PATH}\n",
+    "    \n",
+    "print('using {} % of images as test data'.format(TEST_SIZE * 100))\n",
+    "for label in labels:\n",
+    "    path = os.path.join(IMAGES_PATH, label)\n",
+    "    xml_filenames = glob(path+'/*.xml')\n",
+    "    n_files = len(xml_filenames)\n",
+    "    n_test = math.floor(n_files * TEST_SIZE)\n",
+    "    for i, xml in enumerate(xml_filenames):\n",
+    "        jpg = xml.replace('.xml', '.jpg')\n",
+    "        if i >= n_test:\n",
+    "            dest_folder = TRAIN_PATH\n",
+    "        else:\n",
+    "            dest_folder = TEST_PATH\n",
+    "        shutil.move(xml, dest_folder)\n",
+    "        shutil.move(jpg, dest_folder)\n",
+    "    print('{} -> annotated_images: {} train: {} test: {}'.format(label, n_files, n_files - n_test, n_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}"
    ]
@@ -266,6 +336,19 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
   }
  },
  "nbformat": 4,
diff --git a/1. Image Collection.ipynb b/1. Image Collection.ipynb
index dcffd6c40..d9c445e79 100644
--- a/1. Image Collection.ipynb	
+++ b/1. Image Collection.ipynb	
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
+    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#1.-Import-Dependencies\" data-toc-modified-id=\"1.-Import-Dependencies-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>1. Import Dependencies</a></span></li><li><span><a href=\"#2.-Define-Images-to-Collect\" data-toc-modified-id=\"2.-Define-Images-to-Collect-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>2. Define Images to Collect</a></span></li><li><span><a href=\"#3.-Setup-Folders\" data-toc-modified-id=\"3.-Setup-Folders-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>3. Setup Folders</a></span></li><li><span><a href=\"#4.-Capture-Images\" data-toc-modified-id=\"4.-Capture-Images-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>4. Capture Images</a></span></li><li><span><a href=\"#5.-Image-Labelling\" data-toc-modified-id=\"5.-Image-Labelling-5\"><span class=\"toc-item-num\">5&nbsp;&nbsp;</span>5. Image Labelling</a></span></li><li><span><a href=\"#6.-Move-them-into-a-Training-and-Testing-Partition\" data-toc-modified-id=\"6.-Move-them-into-a-Training-and-Testing-Partition-6\"><span class=\"toc-item-num\">6&nbsp;&nbsp;</span>6. Move them into a Training and Testing Partition</a></span></li><li><span><a href=\"#OPTIONAL---7.-Compress-them-for-Colab-Training\" data-toc-modified-id=\"OPTIONAL---7.-Compress-them-for-Colab-Training-7\"><span class=\"toc-item-num\">7&nbsp;&nbsp;</span>OPTIONAL - 7. Compress them for Colab Training</a></span></li></ul></div>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -257,6 +267,41 @@
     "ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# automate train test splitting\n",
+    "from glob import glob\n",
+    "import shutil\n",
+    "import math\n",
+    "\n",
+    "TEST_SIZE = 0.2\n",
+    "\n",
+    "if not os.path.exists(TRAIN_PATH):\n",
+    "    !mkdir {TRAIN_PATH}\n",
+    "if not os.path.exists(TEST_PATH):\n",
+    "    !mkdir {TEST_PATH}\n",
+    "    \n",
+    "print('using {} % of images as test data'.format(TEST_SIZE * 100))\n",
+    "for label in labels:\n",
+    "    path = os.path.join(IMAGES_PATH, label)\n",
+    "    xml_filenames = glob(path+'/*.xml')\n",
+    "    n_files = len(xml_filenames)\n",
+    "    n_test = math.floor(n_files * TEST_SIZE)\n",
+    "    for i, xml in enumerate(xml_filenames):\n",
+    "        jpg = xml.replace('.xml', '.jpg')\n",
+    "        if i >= n_test:\n",
+    "            dest_folder = TRAIN_PATH\n",
+    "        else:\n",
+    "            dest_folder = TEST_PATH\n",
+    "        shutil.move(xml, dest_folder)\n",
+    "        shutil.move(jpg, dest_folder)\n",
+    "    print('{} -> annotated_images: {} train: {} test: {}'.format(label, n_files, n_files - n_test, n_test))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 21,
@@ -291,6 +336,19 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
   }
  },
  "nbformat": 4,