Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cellvoyager converter improvements #812

Merged
merged 17 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
**Note**: Numbers like (\#123) point to closed Pull Requests on the fractal-tasks-core repository.

# Unreleased
* Tasks:
* `image_glob_patterns` are renamed to `include_glob_patterns` in Convert Cellvoyager to OME-Zarr (regular & multiplexing) (\#812).
* Convert Cellvoyager to OME-Zarr (regular & multiplexing) gain exclusion patterns to exclude specific patterns of images from being processed (\#812).
* Fix issue with arbitrary acquisition names in Convert Cellvoyager Multiplexing to OME-Zarr (\#812).
* In Convert Cellvoyager to OME-Zarr (regular & multiplexing), handle channels in the mrf metadata file that aren't present in the mlf metadata better (\#812).
* In Convert Cellvoyager to OME-Zarr, improve plate metadata for image list when multiple plates with the same plate name are processed (\#812).
* Catch errors for missing mlf & mrf files better in Convert Cellvoyager to OME-Zarr (regular & multiplexing) (\#812).
* Drop defusexml dependency for cellvoyager metadata conversion (\#812).

# 1.2.1
* Core-library
* Add `create_roi_table_from_df_list` library function in `fractal_tasks_core.v1.roi`: It combines a list of ROI table dataframes into an AnnData ROI table and handles repeating labels (\#811).
Expand Down
50 changes: 40 additions & 10 deletions fractal_tasks_core/__FRACTAL_MANIFEST__.json
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,21 @@
"type": "array",
"description": "A list of `OmeroChannel` s, where each channel must include the `wavelength_id` attribute and where the `wavelength_id` values must be unique across the list."
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Include Glob Patterns",
"type": "array",
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59."
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59. Can interact with exclude_glob_patterns: All included images - all excluded images gives the final list of images to process"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Exclude Glob Patterns",
"type": "array",
"description": "If specified, exclude any image where the filename matches any of the exclusion patterns. Patterns are specified the same as for include_glob_patterns."
},
"num_levels": {
"default": 5,
Expand Down Expand Up @@ -203,11 +211,18 @@
"title": "Image Extension",
"type": "string"
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Include Glob Patterns",
"type": "array"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Exclude Glob Patterns",
"type": "array"
},
"acquisition": {
Expand Down Expand Up @@ -398,13 +413,21 @@
"type": "object",
"description": "dictionary of acquisitions. Each key is the acquisition identifier (normally 0, 1, 2, 3 etc.). Each item defines the acquisition by providing the image_dir and the allowed_channels."
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Include Glob Patterns",
"type": "array",
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59."
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59. Can interact with exclude_glob_patterns: All included images - all excluded images gives the final list of images to process"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Exclude Glob Patterns",
"type": "array",
"description": "If specified, exclude any image where the filename matches any of the exclusion patterns. Patterns are specified the same as for include_glob_patterns."
},
"num_levels": {
"default": 5,
Expand Down Expand Up @@ -468,11 +491,18 @@
"title": "Image Extension",
"type": "string"
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Include Glob Patterns",
"type": "array"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Exclude Glob Patterns",
"type": "array"
},
"acquisition": {
Expand Down
39 changes: 29 additions & 10 deletions fractal_tasks_core/cellvoyager/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,42 +22,61 @@
def glob_with_multiple_patterns(
*,
folder: str,
patterns: Sequence[str] = None,
include_patterns: Sequence[str] = None,
exclude_patterns: Sequence[str] = None,
) -> set[str]:
"""
List all the items (files and folders) in a given folder that
simultaneously match a series of glob patterns.
simultaneously match a series of glob include_patterns and do not match
any of the exclude_patterns.

Args:
folder: Base folder where items will be searched.
patterns: If specified, the list of patterns (defined as in
include_patterns: If specified, the list of patterns (defined as in
https://docs.python.org/3/library/fnmatch.html) that item
names will match with.
"""

# Sanitize base-folder path
if folder.endswith("/"):
actual_folder = folder[:-1]
else:
actual_folder = folder[:]

# If not pattern is specified, look for *all* items in the base folder
if not patterns:
patterns = ["*"]
if not include_patterns:
include_patterns = ["*"]
if not exclude_patterns:
exclude_patterns = []

# Combine multiple glob searches (via set intersection)
logging.info(f"[glob_with_multiple_patterns] {patterns=}")
logging.info(f"[glob_with_multiple_patterns] {include_patterns=}")
items = None
for pattern in patterns:
for pattern in include_patterns:
new_matches = glob(f"{actual_folder}/{pattern}")
if items is None:
items = set(new_matches)
else:
items = items.intersection(new_matches)
items = items or set()
logging.info(f"[glob_with_multiple_patterns] Found {len(items)} items")

return items
# Combine all exclude patterns
exclude_items = set()
for pattern in exclude_patterns:
new_matches = glob(f"{actual_folder}/{pattern}")
if len(exclude_items) == 0:
exclude_items = set(new_matches)
else:
exclude_items.update(new_matches)
exclude_items = exclude_items or set()

# Remove exclude_items from included list
consensus_items = items - exclude_items

logging.info(
f"[glob_with_multiple_patterns] Found {len(consensus_items)} items"
)

return consensus_items


def _get_plate_name(plate_prefix: str) -> str:
Expand Down
Loading
Loading