merge

NEUBIAS · Aug 1, 2024 · b0513dd · b0513dd
2 parents 9fe769e + b3b49aa
commit b0513dd
Show file tree

Hide file tree

Showing 21 changed files with 319 additions and 262 deletions.
diff --git a/_includes/batch_processing/batch_measure_nuclei_shape.py b/_includes/batch_processing/batch_measure_nuclei_shape.py
@@ -1,6 +1,7 @@
 # %% 
 # Batch analysis of 2D nuclei shape measurements
 
+
 # %%
 # Import python modules
 from OpenIJTIFF import open_ij_tiff, save_ij_tiff
@@ -9,17 +10,21 @@
 import pandas as pd
 import pathlib
 from pathlib import Path
-from napari import Viewer
+
 
 # %%
 # Create a function that analyses one image
 # Below, this function will be called several times, for all images
-def analyse(image_path, output_folder):
+def analyse(image_filepath, output_folder):
 
     # This prints which image is currently analysed
-    print("Analyzing:", image_path)
+    print("Analyzing:", image_filepath)
+
+    # Convert the image_filepath String to a Path,
+    # which is more convenient to create the output files
+    image_filepath = pathlib.Path(image_filepath)
 
-    image, axes, scales, units = open_ij_tiff(image_path)
+    image, axes, scales, units = open_ij_tiff(image_filepath)
 
     # Binarize the image using auto-thresholding
     threshold = threshold_otsu(image)
@@ -31,10 +36,14 @@ def analyse(image_path, output_folder):
     # We can safely convert to 16 bit as we know that we don't have too many objects
     label_image = label(binary_image).astype('uint16')
 
+    # Save the labels
+    label_image_filepath = output_folder / f"{image_filepath.stem}_labels.tif"
+    save_ij_tiff(label_image_filepath, label_image, axes, scales, units)
+
     # Measure calibrated (scaled) nuclei shapes
     df = pd.DataFrame(regionprops_table(
         label_image,
-        properties={'label', 'area'},
+        properties={'label', 'area', 'centroid'},
         spacing=scales))
 
     # Round all measurements to 2 decimal places.
@@ -43,41 +52,42 @@ def analyse(image_path, output_folder):
     # you may not want to round that much!
     df = df.round(2)
 
-    # Save the results to disk
-
-    # Convert the image_path String to a Path,
-    # which is more convenient to create the output files
-    image_path = pathlib.Path(image_path)
+    # Add the image and label filepaths to the data-frame
+    df['image'] = image_filepath
+    df['labels'] = label_image_filepath
 
-    # Save the labels
-    label_image_path = output_folder / f"{image_path.stem}_labels.tif"
-    save_ij_tiff(label_image_path, label_image, axes, scales, units)
-
-    # Save the measurements table
-    # to a tab delimited text file (sep='\t')
-    # without row numbers (index=False)
-    table_path = output_folder / f"{image_path.stem}_measurements.csv"
-    df.to_csv(table_path, sep='\t', index=False)
+    # Return the data-frame
+    return df
 
 
 # %%
 # Assign an output folder 
 # Note: This uses your current working directory; you may want to change this to another folder on your computer
 output_dir = Path.cwd()
 
+
 # %%
 # Create a list of the paths to all data
-image_paths = ["https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__mitocheck_incenp_t1.tif", 
-               "https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__mitocheck_incenp_t70.tif"]
+image_paths = [output_dir / "xy_8bit__mitocheck_incenp_t1.tif",
+               output_dir / "xy_8bit__mitocheck_incenp_t70.tif"]
+# Create an empty list for the measurement results
+result_dfs = []
 
+
+# %%
+# The loop which performs the analysis
 for image_path in image_paths:
-    analyse(image_path, output_dir)
+
+    # Computes the analysis and returns a data-frame with the resulting measurements
+    result_df = analyse(image_path, output_dir)
+
+    # Append the label image path to the list initialized before the loop
+    result_dfs.append(result_df)
+
 
 # %%
-# Plot the first output image to check if the pipeline worked
-image1, *_ = open_ij_tiff(image_paths[0])
-labels1, *_ = open_ij_tiff('xy_8bit__mitocheck_incenp_t1_labels.tif')
+# Concatenate the result data-frames to a single one which contains all results
+final_df = pd.concat(result_dfs, ignore_index=True)
+# Save the final results to disk
+final_df.to_csv(output_dir / 'batch_processing_results.csv', sep='\t', index=False)
 
-viewer = Viewer()
-viewer.add_image(image1)
-viewer.add_labels(labels1)
diff --git a/_includes/batch_processing/batch_measure_nuclei_shapes.md b/_includes/batch_processing/batch_measure_nuclei_shapes.md
@@ -1,17 +1,21 @@
 <h4 id="batchshape"><a href="#batchshape">Batch analysis of nuclear shapes</a></h4>
 
+- Download the input.zip containing the input images from [here](https://github.com/NEUBIAS/training-resources/tree/master/image_data/batch_process) and unpack to your course directory
 - In a previous module [there is a workflow to measure the shapes of nuclei in one image](https://neubias.github.io/training-resources/workflow_segment_2d_nuclei_measure_shape/index.html#2dnuclei)
 - Adapt this workflow for automated batch analysis of many images
 - Start by building the skeleton of the workflow without filling in the functionality;
 
-  Note that the code below runs fine, but does not produce any results:
+  Note that the pseudo-code below will run fine, but does not produce any results:
 
 ```
-def analyse(image_path, output_folder):
-    print("Analyzing:", image_path)
-    
-for image_path in image_paths:
-    analyse(image_path, output_dir)
+FUNCTION analyse(image_path, output_folder)
+    PRINT "Analyzing:", image_path
+END FUNCTION
+
+FOR each image_path in image_paths
+    CALL analyse(image_path, output_dir)
+END FOR
 ```
 
- - Make sure the loop with the (almost) empty analyse function runs without error before filling in the image analysis steps
+ - Make sure the loop with the (almost) empty analyse function runs without error before filling in the image analysis steps
+ - Inspect the analysis results in a suitable software
diff --git a/_includes/image_file_formats/open_diverse_file_formats_bioio.py b/_includes/image_file_formats/open_diverse_file_formats_bioio.py
@@ -177,56 +177,3 @@
 # %%
 # little excercise:
 # paticipants should try to open one of their files with python
-
-# %%
-# Save image as .ome.tif
-# Option 1: directly from the BioImage object
-# - Observe that some metadata are automatically transferred
-image_url = "https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__nuclei_PLK1_control.tif"
-bioimage = BioImage(image_url)
-print(bioimage.physical_pixel_sizes)
-print(bioimage.metadata)
-
-# Save and reload
-bioimage.save('option1.ome.tif')
-print(BioImage('option1.ome.tif').physical_pixel_sizes)
-print(bioimage.metadata)
-
-# %%
-# Option 2: Start from numpy.array
-from bioio.writers import OmeTiffWriter
-img_data = bioimage.data.squeeze()
-OmeTiffWriter.save(img_data,
-                   'option2.ome.tif',
-                   dim_order='YX',
-                   physical_pixel_sizes=bioimage.physical_pixel_sizes)
-
-# - Observe that pixel size is stored in file
-print(BioImage('option2.ome.tif').physical_pixel_sizes)
-
-# %%
-# Option 3: Save as ome.zarr
-# - Observe that saved image is no longer single file, but whole folder
-from bioio.writers import OmeZarrWriter
-
-OmeZarrWriter('option3.ome.zarr').write_image(img_data,
-                                              image_name='Option3',
-                                              channel_names=None,
-                                              channel_colors=None,
-                                              dimension_order='YX',
-                                              physical_pixel_sizes=bioimage.physical_pixel_sizes)
-
-# - Observe that relaoding shows the same array again with the same pixel sizes
-reloaded_img = BioImage('option3.ome.zarr')
-print(reloaded_img.dims)
-print(reloaded_img.physical_pixel_sizes)
-
-# %%
-# Option 4: Save only the numpy.array
-# - Keep in mind that is not the best way, as all metadata are lost and the file can only be opened with python again
-np.save('option4.npy',img_data)
-
-# Reload and check if they are the same
-reloaded_img = np.load('option4.npy')
-print(f'Are the dimensions the same: {np.all(img_data.shape == reloaded_img.shape)}')
-print(f'Are the images the same: {np.all(img_data == reloaded_img)}')
diff --git a/_includes/script_for_loop/script_for_loop_python.py b/_includes/script_for_loop/script_for_loop_python.py
@@ -0,0 +1,164 @@
+# %%
+# Create a bunch of example images to simulate a typical analysis problem
+# np.random.randint(lower,upper,size): generates random integers from lower to upper
+# .reshape: reshapes a np.array to new dimensions
+import numpy as np
+image1 = np.random.randint(0,255,100).reshape((10,10))
+image2 = np.random.randint(0,255,100).reshape((10,10))
+image3 = np.random.randint(0,255,100).reshape((10,10))
+image4 = np.random.randint(0,255,100).reshape((10,10))
+image5 = np.random.randint(0,255,100).reshape((10,10))
+image6 = np.random.randint(0,255,100).reshape((10,10))
+image7 = np.random.randint(0,255,100).reshape((10,10))
+image8 = np.random.randint(0,255,100).reshape((10,10))
+image9 = np.random.randint(0,255,100).reshape((10,10))
+image10 = np.random.randint(0,255,100).reshape((10,10))
+
+# Calculate the mean of every image
+# Oberserve that it is very tedious and error prone
+print(f'Image {1} has an avg intensity of {image1.mean()}.')
+print(f'Image {2} has an avg intensity of {image2.mean()}.')
+print(f'Image {3} has an avg intensity of {image3.mean()}.')
+print(f'Image {4} has an avg intensity of {image4.mean()}.')
+print(f'Image {5} has an avg intensity of {image5.mean()}.')
+print(f'Image {6} has an avg intensity of {image6.mean()}.')
+print(f'Image {7} has an avg intensity of {image7.mean()}.')
+print(f'Image {8} has an avg intensity of {image8.mean()}.')
+print(f'Image {9} has an avg intensity of {image9.mean()}.')
+print(f'Image {10} has an avg intensity of {image10.mean()}.')
+
+# %%
+# Create a for loop
+# Typical notation in C-style: for i=0, i<10, i++ {do something}
+# For loop in python
+# Observe the difference in notation
+for i in [0,1,2,3,4,5,6,7,8,9]:
+    print(i)
+
+# use range
+# Observe that range is actually a generator producing values on the fly
+print(range(10))
+print(type(range(10)))
+for i in range(10):
+    print(i)
+
+# %%
+# Anything iterable can be iterated over with a for loop
+# Observe that:
+# - list content don't matter
+# - that the iterator will always be overwritten by the next item
+for i in ['a','b','c',1,2,3,[1,2,3],1.5,2.7]:
+    print(f'"i" has the value "{i}" and type "{type(i)}"')
+
+# %%
+# Use a for loop for the example in the beginning
+# First pack images into a list for looping over
+image_list = [image1,image2,image3,image4,image5,image6,image7,image8,image9,image10]
+
+# Loop over images and calculate the mean
+for image in image_list:
+    print(f'Avg intensity: {image.mean()}')
+
+# Excersize: Modify the loop to calculate the standard deviation
+for image in image_list:
+    print(f'Intensity standard deviation: {image.std()}')
+
+#%%
+# Iterate with an index
+number_of_images = len(image_list)
+print(number_of_images)
+for i in range(number_of_images):
+    print(f'Image {i} has an avg intensity of {image_list[i].mean()}.')
+
+# Iterate over two or more list (with the same length)
+for i,image in zip(range(number_of_images),image_list):
+    print(f'Image {i} has an avg intensity of {image.mean()}.')
+
+# Iterate with additional index
+for i,image in enumerate(image_list):
+    print(f'Image {i} has an avg intensity of {image.mean()}.')
+
+# %%
+# For loops but advanced
+
+# %%
+# list comprehension
+squares = []
+for i in range(10):
+    squares.append(i**2)
+print(squares)
+# use list comprehension
+squares = [i**2 for i in range(10)]
+print(squares)
+
+# usefull for creating an filepath iterable
+from pathlib import Path
+[file for file in Path().cwd().iterdir()]
+
+# %%
+# for loop with if
+for x in range(10):
+    if x != 0:
+        print(f'X: {x}, 1/X: {1/x}')
+
+# for loop with if and else
+for x in range(10):
+    if x == 0:
+        print(f'X: {x}, 1/X: Division by {x} not defined.')
+    else:
+        print(f'X: {x}, 1/X: {1/x}')
+
+# %%
+# for loop with continue and break
+# Usage:
+# - skip files/data for processing
+# - debugging
+# continue
+for x in range(10):
+    if x==0:
+        continue
+    print(f'X: {x}, 1/X: {1/x}')
+
+# break
+for x in range(10):
+    if x==3:
+        break
+    print(x)
+
+# %%
+# Nested loops
+for x in range(1,4):
+    for y in range(1,4):
+        print(f'X: {x}, Y: {y}, X*Y: {x*y}')
+
+# Nested lists
+for sublist in [[1,2,3],[4,5,6],[7,8,9]]:
+    for i in sublist:
+        print(i)
+
+# np.array
+# Observe that you iterate only over the first dimension when using simple loop
+array = np.random.randint(0,10,100).reshape(10,10)
+print(array.shape)
+for i in array:
+    print(i)
+
+for row in array:
+    for i in row:
+        print(i)
+
+# %%
+# Looping over pandas DataFrame
+# Observe that
+# - looping with iterrows always gives an index
+# - the second variable will always be a pandas Series object
+import pandas as pd
+df = pd.DataFrame({
+    'a':[1,2,3],
+    'b':[4,5,6]})
+print(df)
+for i,row in df.iterrows():
+    print(type(row))
+    value_a = row['a']
+    value_b = row['b']
+    print(f'Row index {i}\nColumn value "a": {value_a} and Column value "b": {value_b}')