From f07beec23ee4a1a21976b893c19fe1b2621ba81f Mon Sep 17 00:00:00 2001 From: manfred Date: Mon, 26 Apr 2021 20:20:41 +0700 Subject: [PATCH] Adding data-splitting-automation to Image Collection.ipynb --- .../1. Image Collection-checkpoint.ipynb | 101 ++++++++++++++++-- 1. Image Collection.ipynb | 58 ++++++++++ 2 files changed, 150 insertions(+), 9 deletions(-) diff --git a/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb b/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb index f737bdb36..d9c445e79 100644 --- a/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb +++ b/.ipynb_checkpoints/1. Image Collection-checkpoint.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -53,11 +63,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "labels = ['thumbsup']\n", + "labels = ['thumbsup', 'thumbsdown', 'thankyou', 'livelong']\n", "number_imgs = 5" ] }, @@ -70,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -103,9 +113,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting images for thumbsup\n", + "Collecting image 0\n", + "Collecting image 1\n", + "Collecting image 2\n", + "Collecting image 3\n", + "Collecting image 4\n" + ] + } + ], "source": [ "for label in labels:\n", " cap = cv2.VideoCapture(0)\n", @@ -200,9 +223,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6a706a36-940f-11eb-b4eb-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6a706a36-940f-11eb-b4eb-5cf3709bbcc6.xml\n", + "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6ba4d864-940f-11eb-8c74-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6ba4d864-940f-11eb-8c74-5cf3709bbcc6.xml\n", + "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6cd9c8e2-940f-11eb-b901-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6cd9c8e2-940f-11eb-b901-5cf3709bbcc6.xml\n", + "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.6e0f5bc0-940f-11eb-8d18-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.6e0f5bc0-940f-11eb-8d18-5cf3709bbcc6.xml\n", + "Image:D:\\YouTube\\OD\\TFODCourse\\Tensorflow\\workspace\\images\\collectedimages\\thumbsup\\thumbsup.693a5158-940f-11eb-8752-5cf3709bbcc6.jpg -> Annotation:D:/YouTube/OD/TFODCourse/Tensorflow/workspace/images/collectedimages/thumbsup/thumbsup.693a5158-940f-11eb-8752-5cf3709bbcc6.xml\n" + ] + } + ], "source": [ "!cd {LABELIMG_PATH} && python labelImg.py" ] @@ -223,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -237,6 +272,41 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "# automate train test splitting\n", + "from glob import glob\n", + "import shutil\n", + "import math\n", + "\n", + "TEST_SIZE = 0.2\n", + "\n", + "if not os.path.exists(TRAIN_PATH):\n", + " !mkdir {TRAIN_PATH}\n", + "if not os.path.exists(TEST_PATH):\n", + " !mkdir {TEST_PATH}\n", + " \n", + "print('using {} % of images as test data'.format(TEST_SIZE * 100))\n", + "for label in labels:\n", + " path = os.path.join(IMAGES_PATH, label)\n", + " xml_filenames = glob(path+'/*.xml')\n", + " n_files = len(xml_filenames)\n", + " n_test = math.floor(n_files * TEST_SIZE)\n", + " for i, xml in enumerate(xml_filenames):\n", + " jpg = xml.replace('.xml', '.jpg')\n", + " if i >= n_test:\n", + " dest_folder = TRAIN_PATH\n", + " else:\n", + " dest_folder = TEST_PATH\n", + " shutil.move(xml, dest_folder)\n", + " shutil.move(jpg, dest_folder)\n", + " print('{} -> annotated_images: {} train: {} test: {}'.format(label, n_files, n_files - n_test, n_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], "source": [ "!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}" ] @@ -266,6 +336,19 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4, diff --git a/1. Image Collection.ipynb b/1. Image Collection.ipynb index dcffd6c40..d9c445e79 100644 --- a/1. Image Collection.ipynb +++ b/1. Image Collection.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -257,6 +267,41 @@ "ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# automate train test splitting\n", + "from glob import glob\n", + "import shutil\n", + "import math\n", + "\n", + "TEST_SIZE = 0.2\n", + "\n", + "if not os.path.exists(TRAIN_PATH):\n", + " !mkdir {TRAIN_PATH}\n", + "if not os.path.exists(TEST_PATH):\n", + " !mkdir {TEST_PATH}\n", + " \n", + "print('using {} % of images as test data'.format(TEST_SIZE * 100))\n", + "for label in labels:\n", + " path = os.path.join(IMAGES_PATH, label)\n", + " xml_filenames = glob(path+'/*.xml')\n", + " n_files = len(xml_filenames)\n", + " n_test = math.floor(n_files * TEST_SIZE)\n", + " for i, xml in enumerate(xml_filenames):\n", + " jpg = xml.replace('.xml', '.jpg')\n", + " if i >= n_test:\n", + " dest_folder = TRAIN_PATH\n", + " else:\n", + " dest_folder = TEST_PATH\n", + " shutil.move(xml, dest_folder)\n", + " shutil.move(jpg, dest_folder)\n", + " print('{} -> annotated_images: {} train: {} test: {}'.format(label, n_files, n_files - n_test, n_test))" + ] + }, { "cell_type": "code", "execution_count": 21, @@ -291,6 +336,19 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4,