diff --git a/docs/33_batch_processing/16_zip_folders.ipynb b/docs/33_batch_processing/16_zip_folders.ipynb new file mode 100644 index 000000000..76a5e760c --- /dev/null +++ b/docs/33_batch_processing/16_zip_folders.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50401389", + "metadata": {}, + "source": [ + "## `zip` for Processing Paired Folders\n", + "In this notebook, we will use the Python built-in function `zip` to iterate over paired folders of images and label masks. Specifically, we will process images and their corresponding masks from the following directories:\n", + "* `data/BBBC007/images`\n", + "* `data/BBBC007/masks`\n", + "\n", + "We'll calculate the average intensity of labeled objects and the number of objects in each pair of image and mask files, and store the results in a pandas DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4077ca2f-f34a-4efe-bb4f-4c07fa782b60", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "from skimage import io, measure\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "59d71bed-929e-4c7c-ae21-4352c41d1f28", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define paths\n", + "image_folder = '../../data/BBBC007/images'\n", + "mask_folder = '../../data/BBBC007/masks'" + ] + }, + { + "cell_type": "markdown", + "id": "09904b87-503b-470f-be5e-1db462d31951", + "metadata": {}, + "source": [ + "Before starting, we just have a look at the folder contents to see if there are indeed paired files." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "99b7a708-80bc-4e60-bf2f-c7c6fcb22ae2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['A9 p5d (cropped 1).tif',\n", + " 'A9 p5d (cropped 2).tif',\n", + " 'A9 p5d (cropped 3).tif',\n", + " 'A9 p5d (cropped 4).tif']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "image_files = sorted(os.listdir(image_folder))\n", + "image_files" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ca9a595b-4864-461d-b76c-ed2d529facb4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['A9 p5d (cropped 1).tif',\n", + " 'A9 p5d (cropped 2).tif',\n", + " 'A9 p5d (cropped 3).tif',\n", + " 'A9 p5d (cropped 4).tif']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask_files = sorted(os.listdir(mask_folder))\n", + "mask_files" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3fb862a7-29f4-420c-9a78-bcc9648ae744", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns=['Image', 'Average Intensity', 'Number of Objects'])" + ] + }, + { + "cell_type": "markdown", + "id": "861bf937-9c8b-45c8-9ebc-9d7b991b3b5f", + "metadata": {}, + "source": [ + "To demonstrate how `zip()` allows iterate over image and mask files in parallel, we just print out file names in a short for-loop: " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ca5635ff-51a8-4d67-ab52-d64b60f89608", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../data/BBBC007/images\\A9 p5d (cropped 1).tif ../../data/BBBC007/masks\\A9 p5d (cropped 1).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 2).tif ../../data/BBBC007/masks\\A9 p5d (cropped 2).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 3).tif ../../data/BBBC007/masks\\A9 p5d (cropped 3).tif \n", + "\n", + "\n", + "../../data/BBBC007/images\\A9 p5d (cropped 4).tif ../../data/BBBC007/masks\\A9 p5d (cropped 4).tif \n", + "\n", + "\n" + ] + } + ], + "source": [ + "for image_file, mask_file in zip(image_files, mask_files):\n", + " image_path = os.path.join(image_folder, image_file)\n", + " mask_path = os.path.join(mask_folder, mask_file)\n", + " \n", + " print(image_path, mask_path, \"\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "443218ba-f193-4ad1-8e41-737bec3974eb", + "metadata": {}, + "source": [ + "The same code can be used to go through both folders in parallel and analyse intensity images paired with given label images." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3a07e9a4-f89f-4afa-bd9f-04e38b4a1576", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ImageAverage IntensityNumber of Objects
0A9 p5d (cropped 1).tif26.2695232
1A9 p5d (cropped 2).tif16.6985282
2A9 p5d (cropped 3).tif34.8471662
3A9 p5d (cropped 4).tif28.7071852
\n", + "
" + ], + "text/plain": [ + " Image Average Intensity Number of Objects\n", + "0 A9 p5d (cropped 1).tif 26.269523 2\n", + "1 A9 p5d (cropped 2).tif 16.698528 2\n", + "2 A9 p5d (cropped 3).tif 34.847166 2\n", + "3 A9 p5d (cropped 4).tif 28.707185 2" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for image_file, mask_file in zip(image_files, mask_files):\n", + " image_path = os.path.join(image_folder, image_file)\n", + " mask_path = os.path.join(mask_folder, mask_file)\n", + " \n", + " # Read the image and its mask\n", + " image = io.imread(image_path)\n", + " mask = io.imread(mask_path).astype(np.uint32)\n", + "\n", + " # Measure labeled regions\n", + " labeled_regions = measure.regionprops(mask, intensity_image=image)\n", + "\n", + " # Calculate average intensity and number of objects\n", + " num_objects = len(labeled_regions)\n", + " avg_intensity = sum(region.mean_intensity for region in labeled_regions) / num_objects\n", + "\n", + " # Append results for the current pair\n", + " df.loc[len(df)] = {\n", + " 'Image': image_file,\n", + " 'Average Intensity': avg_intensity,\n", + " 'Number of Objects': num_objects\n", + " }\n", + "\n", + "# Display the result\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88625c82-99b1-4190-b3ab-cff9539938d6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/33_batch_processing/readme.md b/docs/33_batch_processing/readme.md index 95b60491b..d6d32eefb 100644 --- a/docs/33_batch_processing/readme.md +++ b/docs/33_batch_processing/readme.md @@ -2,3 +2,5 @@ [Batch processing](https://www.investopedia.com/terms/b/batch-processing.asp) comes into play when we want to process multiple images in the same way. One example of batch processing would be to loop over a folder of images using a for-loop to apply a segmentation-workflow. + +For an example using the `zip` function to process paired images and masks, see [16_zip_folders](16_zip_folders.ipynb). diff --git a/docs/_toc.yml b/docs/_toc.yml index afe12b63d..8462a5477 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -308,6 +308,7 @@ parts: sections: - file: 33_batch_processing/12_process_folders - file: 33_batch_processing/14_process_timelapse + - file: 33_batch_processing/16_zip_folders.ipynb # - file: 18_image_filtering/run_on_all_hyperslices # - file: 27_cell_classification/cell_classification