diff --git a/docs/33_batch_processing/16_zip_folders.ipynb b/docs/33_batch_processing/16_zip_folders.ipynb new file mode 100644 index 000000000..76a5e760c --- /dev/null +++ b/docs/33_batch_processing/16_zip_folders.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50401389", + "metadata": {}, + "source": [ + "## `zip` for Processing Paired Folders\n", + "In this notebook, we will use the Python built-in function `zip` to iterate over paired folders of images and label masks. Specifically, we will process images and their corresponding masks from the following directories:\n", + "* `data/BBBC007/images`\n", + "* `data/BBBC007/masks`\n", + "\n", + "We'll calculate the average intensity of labeled objects and the number of objects in each pair of image and mask files, and store the results in a pandas DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4077ca2f-f34a-4efe-bb4f-4c07fa782b60", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "from skimage import io, measure\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "59d71bed-929e-4c7c-ae21-4352c41d1f28", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define paths\n", + "image_folder = '../../data/BBBC007/images'\n", + "mask_folder = '../../data/BBBC007/masks'" + ] + }, + { + "cell_type": "markdown", + "id": "09904b87-503b-470f-be5e-1db462d31951", + "metadata": {}, + "source": [ + "Before starting, we just have a look at the folder contents to see if there are indeed paired files." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "99b7a708-80bc-4e60-bf2f-c7c6fcb22ae2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['A9 p5d (cropped 1).tif',\n", + " 'A9 p5d (cropped 2).tif',\n", + " 'A9 p5d (cropped 3).tif',\n", + " 'A9 p5d (cropped 4).tif']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "image_files = sorted(os.listdir(image_folder))\n", + "image_files" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ca9a595b-4864-461d-b76c-ed2d529facb4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['A9 p5d (cropped 1).tif',\n", + " 'A9 p5d (cropped 2).tif',\n", + " 'A9 p5d (cropped 3).tif',\n", + " 'A9 p5d (cropped 4).tif']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask_files = sorted(os.listdir(mask_folder))\n", + "mask_files" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3fb862a7-29f4-420c-9a78-bcc9648ae744", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns=['Image', 'Average Intensity', 'Number of Objects'])" + ] + }, + { + "cell_type": "markdown", + "id": "861bf937-9c8b-45c8-9ebc-9d7b991b3b5f", + "metadata": {}, + "source": [ + "To demonstrate how `zip()` allows iterate over image and mask files in parallel, we just print out file names in a short for-loop: " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ca5635ff-51a8-4d67-ab52-d64b60f89608", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../data/BBBC007/images\\A9 p5d (cropped 1).tif ../../data/BBBC007/masks\\A9 p5d (cropped 1).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 2).tif ../../data/BBBC007/masks\\A9 p5d (cropped 2).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 3).tif ../../data/BBBC007/masks\\A9 p5d (cropped 3).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 4).tif ../../data/BBBC007/masks\\A9 p5d (cropped 4).tif \n", + "\n", + "\n" + ] + } + ], + "source": [ + "for image_file, mask_file in zip(image_files, mask_files):\n", + " image_path = os.path.join(image_folder, image_file)\n", + " mask_path = os.path.join(mask_folder, mask_file)\n", + " \n", + " print(image_path, mask_path, \"\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "443218ba-f193-4ad1-8e41-737bec3974eb", + "metadata": {}, + "source": [ + "The same code can be used to go through both folders in parallel and analyse intensity images paired with given label images." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3a07e9a4-f89f-4afa-bd9f-04e38b4a1576", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Image | \n", + "Average Intensity | \n", + "Number of Objects | \n", + "
---|---|---|---|
0 | \n", + "A9 p5d (cropped 1).tif | \n", + "26.269523 | \n", + "2 | \n", + "
1 | \n", + "A9 p5d (cropped 2).tif | \n", + "16.698528 | \n", + "2 | \n", + "
2 | \n", + "A9 p5d (cropped 3).tif | \n", + "34.847166 | \n", + "2 | \n", + "
3 | \n", + "A9 p5d (cropped 4).tif | \n", + "28.707185 | \n", + "2 | \n", + "