Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
tischi committed Aug 1, 2024
2 parents 9fe769e + b3b49aa commit b0513dd
Show file tree
Hide file tree
Showing 21 changed files with 319 additions and 262 deletions.
66 changes: 38 additions & 28 deletions _includes/batch_processing/batch_measure_nuclei_shape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# %%
# Batch analysis of 2D nuclei shape measurements


# %%
# Import python modules
from OpenIJTIFF import open_ij_tiff, save_ij_tiff
Expand All @@ -9,17 +10,21 @@
import pandas as pd
import pathlib
from pathlib import Path
from napari import Viewer


# %%
# Create a function that analyses one image
# Below, this function will be called several times, for all images
def analyse(image_path, output_folder):
def analyse(image_filepath, output_folder):

# This prints which image is currently analysed
print("Analyzing:", image_path)
print("Analyzing:", image_filepath)

# Convert the image_filepath String to a Path,
# which is more convenient to create the output files
image_filepath = pathlib.Path(image_filepath)

image, axes, scales, units = open_ij_tiff(image_path)
image, axes, scales, units = open_ij_tiff(image_filepath)

# Binarize the image using auto-thresholding
threshold = threshold_otsu(image)
Expand All @@ -31,10 +36,14 @@ def analyse(image_path, output_folder):
# We can safely convert to 16 bit as we know that we don't have too many objects
label_image = label(binary_image).astype('uint16')

# Save the labels
label_image_filepath = output_folder / f"{image_filepath.stem}_labels.tif"
save_ij_tiff(label_image_filepath, label_image, axes, scales, units)

# Measure calibrated (scaled) nuclei shapes
df = pd.DataFrame(regionprops_table(
label_image,
properties={'label', 'area'},
properties={'label', 'area', 'centroid'},
spacing=scales))

# Round all measurements to 2 decimal places.
Expand All @@ -43,41 +52,42 @@ def analyse(image_path, output_folder):
# you may not want to round that much!
df = df.round(2)

# Save the results to disk

# Convert the image_path String to a Path,
# which is more convenient to create the output files
image_path = pathlib.Path(image_path)
# Add the image and label filepaths to the data-frame
df['image'] = image_filepath
df['labels'] = label_image_filepath

# Save the labels
label_image_path = output_folder / f"{image_path.stem}_labels.tif"
save_ij_tiff(label_image_path, label_image, axes, scales, units)

# Save the measurements table
# to a tab delimited text file (sep='\t')
# without row numbers (index=False)
table_path = output_folder / f"{image_path.stem}_measurements.csv"
df.to_csv(table_path, sep='\t', index=False)
# Return the data-frame
return df


# %%
# Assign an output folder
# Note: This uses your current working directory; you may want to change this to another folder on your computer
output_dir = Path.cwd()


# %%
# Create a list of the paths to all data
image_paths = ["https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__mitocheck_incenp_t1.tif",
"https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__mitocheck_incenp_t70.tif"]
image_paths = [output_dir / "xy_8bit__mitocheck_incenp_t1.tif",
output_dir / "xy_8bit__mitocheck_incenp_t70.tif"]
# Create an empty list for the measurement results
result_dfs = []


# %%
# The loop which performs the analysis
for image_path in image_paths:
analyse(image_path, output_dir)

# Computes the analysis and returns a data-frame with the resulting measurements
result_df = analyse(image_path, output_dir)

# Append the label image path to the list initialized before the loop
result_dfs.append(result_df)


# %%
# Plot the first output image to check if the pipeline worked
image1, *_ = open_ij_tiff(image_paths[0])
labels1, *_ = open_ij_tiff('xy_8bit__mitocheck_incenp_t1_labels.tif')
# Concatenate the result data-frames to a single one which contains all results
final_df = pd.concat(result_dfs, ignore_index=True)
# Save the final results to disk
final_df.to_csv(output_dir / 'batch_processing_results.csv', sep='\t', index=False)

viewer = Viewer()
viewer.add_image(image1)
viewer.add_labels(labels1)
18 changes: 11 additions & 7 deletions _includes/batch_processing/batch_measure_nuclei_shapes.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
<h4 id="batchshape"><a href="#batchshape">Batch analysis of nuclear shapes</a></h4>

- Download the input.zip containing the input images from [here](https://github.com/NEUBIAS/training-resources/tree/master/image_data/batch_process) and unpack to your course directory
- In a previous module [there is a workflow to measure the shapes of nuclei in one image](https://neubias.github.io/training-resources/workflow_segment_2d_nuclei_measure_shape/index.html#2dnuclei)
- Adapt this workflow for automated batch analysis of many images
- Start by building the skeleton of the workflow without filling in the functionality;

Note that the code below runs fine, but does not produce any results:
Note that the pseudo-code below will run fine, but does not produce any results:

```
def analyse(image_path, output_folder):
print("Analyzing:", image_path)
for image_path in image_paths:
analyse(image_path, output_dir)
FUNCTION analyse(image_path, output_folder)
PRINT "Analyzing:", image_path
END FUNCTION
FOR each image_path in image_paths
CALL analyse(image_path, output_dir)
END FOR
```

- Make sure the loop with the (almost) empty analyse function runs without error before filling in the image analysis steps
- Make sure the loop with the (almost) empty analyse function runs without error before filling in the image analysis steps
- Inspect the analysis results in a suitable software
53 changes: 0 additions & 53 deletions _includes/image_file_formats/open_diverse_file_formats_bioio.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,56 +177,3 @@
# %%
# little excercise:
# paticipants should try to open one of their files with python

# %%
# Save image as .ome.tif
# Option 1: directly from the BioImage object
# - Observe that some metadata are automatically transferred
image_url = "https://github.com/NEUBIAS/training-resources/raw/master/image_data/xy_8bit__nuclei_PLK1_control.tif"
bioimage = BioImage(image_url)
print(bioimage.physical_pixel_sizes)
print(bioimage.metadata)

# Save and reload
bioimage.save('option1.ome.tif')
print(BioImage('option1.ome.tif').physical_pixel_sizes)
print(bioimage.metadata)

# %%
# Option 2: Start from numpy.array
from bioio.writers import OmeTiffWriter
img_data = bioimage.data.squeeze()
OmeTiffWriter.save(img_data,
'option2.ome.tif',
dim_order='YX',
physical_pixel_sizes=bioimage.physical_pixel_sizes)

# - Observe that pixel size is stored in file
print(BioImage('option2.ome.tif').physical_pixel_sizes)

# %%
# Option 3: Save as ome.zarr
# - Observe that saved image is no longer single file, but whole folder
from bioio.writers import OmeZarrWriter

OmeZarrWriter('option3.ome.zarr').write_image(img_data,
image_name='Option3',
channel_names=None,
channel_colors=None,
dimension_order='YX',
physical_pixel_sizes=bioimage.physical_pixel_sizes)

# - Observe that relaoding shows the same array again with the same pixel sizes
reloaded_img = BioImage('option3.ome.zarr')
print(reloaded_img.dims)
print(reloaded_img.physical_pixel_sizes)

# %%
# Option 4: Save only the numpy.array
# - Keep in mind that is not the best way, as all metadata are lost and the file can only be opened with python again
np.save('option4.npy',img_data)

# Reload and check if they are the same
reloaded_img = np.load('option4.npy')
print(f'Are the dimensions the same: {np.all(img_data.shape == reloaded_img.shape)}')
print(f'Are the images the same: {np.all(img_data == reloaded_img)}')
164 changes: 164 additions & 0 deletions _includes/script_for_loop/script_for_loop_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# %%
# Create a bunch of example images to simulate a typical analysis problem
# np.random.randint(lower,upper,size): generates random integers from lower to upper
# .reshape: reshapes a np.array to new dimensions
import numpy as np
image1 = np.random.randint(0,255,100).reshape((10,10))
image2 = np.random.randint(0,255,100).reshape((10,10))
image3 = np.random.randint(0,255,100).reshape((10,10))
image4 = np.random.randint(0,255,100).reshape((10,10))
image5 = np.random.randint(0,255,100).reshape((10,10))
image6 = np.random.randint(0,255,100).reshape((10,10))
image7 = np.random.randint(0,255,100).reshape((10,10))
image8 = np.random.randint(0,255,100).reshape((10,10))
image9 = np.random.randint(0,255,100).reshape((10,10))
image10 = np.random.randint(0,255,100).reshape((10,10))

# Calculate the mean of every image
# Oberserve that it is very tedious and error prone
print(f'Image {1} has an avg intensity of {image1.mean()}.')
print(f'Image {2} has an avg intensity of {image2.mean()}.')
print(f'Image {3} has an avg intensity of {image3.mean()}.')
print(f'Image {4} has an avg intensity of {image4.mean()}.')
print(f'Image {5} has an avg intensity of {image5.mean()}.')
print(f'Image {6} has an avg intensity of {image6.mean()}.')
print(f'Image {7} has an avg intensity of {image7.mean()}.')
print(f'Image {8} has an avg intensity of {image8.mean()}.')
print(f'Image {9} has an avg intensity of {image9.mean()}.')
print(f'Image {10} has an avg intensity of {image10.mean()}.')

# %%
# Create a for loop
# Typical notation in C-style: for i=0, i<10, i++ {do something}
# For loop in python
# Observe the difference in notation
for i in [0,1,2,3,4,5,6,7,8,9]:
print(i)

# use range
# Observe that range is actually a generator producing values on the fly
print(range(10))
print(type(range(10)))
for i in range(10):
print(i)

# %%
# Anything iterable can be iterated over with a for loop
# Observe that:
# - list content don't matter
# - that the iterator will always be overwritten by the next item
for i in ['a','b','c',1,2,3,[1,2,3],1.5,2.7]:
print(f'"i" has the value "{i}" and type "{type(i)}"')

# %%
# Use a for loop for the example in the beginning
# First pack images into a list for looping over
image_list = [image1,image2,image3,image4,image5,image6,image7,image8,image9,image10]

# Loop over images and calculate the mean
for image in image_list:
print(f'Avg intensity: {image.mean()}')

# Excersize: Modify the loop to calculate the standard deviation
for image in image_list:
print(f'Intensity standard deviation: {image.std()}')

#%%
# Iterate with an index
number_of_images = len(image_list)
print(number_of_images)
for i in range(number_of_images):
print(f'Image {i} has an avg intensity of {image_list[i].mean()}.')

# Iterate over two or more list (with the same length)
for i,image in zip(range(number_of_images),image_list):
print(f'Image {i} has an avg intensity of {image.mean()}.')

# Iterate with additional index
for i,image in enumerate(image_list):
print(f'Image {i} has an avg intensity of {image.mean()}.')

# %%
# For loops but advanced

# %%
# list comprehension
squares = []
for i in range(10):
squares.append(i**2)
print(squares)
# use list comprehension
squares = [i**2 for i in range(10)]
print(squares)

# usefull for creating an filepath iterable
from pathlib import Path
[file for file in Path().cwd().iterdir()]

# %%
# for loop with if
for x in range(10):
if x != 0:
print(f'X: {x}, 1/X: {1/x}')

# for loop with if and else
for x in range(10):
if x == 0:
print(f'X: {x}, 1/X: Division by {x} not defined.')
else:
print(f'X: {x}, 1/X: {1/x}')

# %%
# for loop with continue and break
# Usage:
# - skip files/data for processing
# - debugging
# continue
for x in range(10):
if x==0:
continue
print(f'X: {x}, 1/X: {1/x}')

# break
for x in range(10):
if x==3:
break
print(x)

# %%
# Nested loops
for x in range(1,4):
for y in range(1,4):
print(f'X: {x}, Y: {y}, X*Y: {x*y}')

# Nested lists
for sublist in [[1,2,3],[4,5,6],[7,8,9]]:
for i in sublist:
print(i)

# np.array
# Observe that you iterate only over the first dimension when using simple loop
array = np.random.randint(0,10,100).reshape(10,10)
print(array.shape)
for i in array:
print(i)

for row in array:
for i in row:
print(i)

# %%
# Looping over pandas DataFrame
# Observe that
# - looping with iterrows always gives an index
# - the second variable will always be a pandas Series object
import pandas as pd
df = pd.DataFrame({
'a':[1,2,3],
'b':[4,5,6]})
print(df)
for i,row in df.iterrows():
print(type(row))
value_a = row['a']
value_b = row['b']
print(f'Row index {i}\nColumn value "a": {value_a} and Column value "b": {value_b}')
Loading

0 comments on commit b0513dd

Please sign in to comment.