From 0402afc1021a5acdccba08b4ab560e5b42309df5 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 19 Oct 2023 16:21:31 +0200 Subject: [PATCH 01/27] Always add `fractal_roi_table_version` to table zarr attributes (ref #529) --- fractal_tasks_core/__init__.py | 1 + fractal_tasks_core/lib_write.py | 5 +++++ tests/test_unit_zarr.py | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/fractal_tasks_core/__init__.py b/fractal_tasks_core/__init__.py index f128ba358..105a985b0 100644 --- a/fractal_tasks_core/__init__.py +++ b/fractal_tasks_core/__init__.py @@ -7,3 +7,4 @@ __VERSION__ = "0.12.2" __OME_NGFF_VERSION__ = "0.4" +__ROI_TABLE_VERSION__ = "1" diff --git a/fractal_tasks_core/lib_write.py b/fractal_tasks_core/lib_write.py index 3b27c0689..d09cf0a63 100644 --- a/fractal_tasks_core/lib_write.py +++ b/fractal_tasks_core/lib_write.py @@ -23,6 +23,8 @@ from zarr.errors import ContainsGroupError from zarr.errors import GroupNotFoundError +from fractal_tasks_core import __ROI_TABLE_VERSION__ + class OverwriteNotAllowedError(RuntimeError): pass @@ -291,6 +293,9 @@ def write_table( # Update table_group attributes with table_attrs key/value pairs table_group.attrs.update(**table_attrs) + # Always add information about the fractal-roi-table version + table_group.attrs.update(fractal_roi_table_version=__ROI_TABLE_VERSION__) + return table_group diff --git a/tests/test_unit_zarr.py b/tests/test_unit_zarr.py index c40102fc3..791d77544 100644 --- a/tests/test_unit_zarr.py +++ b/tests/test_unit_zarr.py @@ -4,6 +4,7 @@ import zarr from devtools import debug +from fractal_tasks_core import __ROI_TABLE_VERSION__ from fractal_tasks_core.lib_write import _write_elem_with_overwrite from fractal_tasks_core.lib_write import open_zarr_group_with_overwrite from fractal_tasks_core.lib_write import OverwriteNotAllowedError @@ -127,6 +128,10 @@ def test_write_table(tmp_path): assert image_group["tables"].attrs.asdict() == dict(tables=["table_a"]) for key in ["region", "instance_key", "type"]: assert key not in table_a_group.attrs.keys() + assert ( + table_a_group.attrs["fractal_roi_table_version"] + == __ROI_TABLE_VERSION__ + ) # Run write_table again, with overwrite=True table_a_group = write_table( From cc4119eeb01ef4044bd787ea036c8bcf792e0abc Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 19 Oct 2023 16:25:24 +0200 Subject: [PATCH 02/27] Add roi-table doc page placeholder --- docs/roi_tables.md | 3 +++ mkdocs.yml | 1 + 2 files changed, 4 insertions(+) create mode 100644 docs/roi_tables.md diff --git a/docs/roi_tables.md b/docs/roi_tables.md new file mode 100644 index 000000000..0638955a7 --- /dev/null +++ b/docs/roi_tables.md @@ -0,0 +1,3 @@ +# ROI tables + +in progress diff --git a/mkdocs.yml b/mkdocs.yml index b97aa0c25..9b581139b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -51,6 +51,7 @@ nav: - Run tasks in Python scripts: tasks_in_scripts.md - Write a custom task: custom_task.md - Task manifest: manifest.md + - ROI tables: roi_tables.md - Code reference: reference/fractal_tasks_core/ - Development: development.md - Changelog: changelog.md From 0c7a9e454d60018e83d09e67388174a2417bde05 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 09:59:25 +0200 Subject: [PATCH 03/27] First draft of ROI docs --- docs/roi_tables.md | 148 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 0638955a7..81cffc977 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -1,3 +1,151 @@ # ROI tables +We need to store tables as part of NGFF groups for multiple reasons: + +1. Our image-to-OME-Zarr converters stitch all the field of views (FOV) of a given well together in a single NGFF image, and we keep a trace of the original FOV positions in a ROI table. +2. Several tasks in `fractal-tasks-core` take a ROI table as an input, an loop over the ROIs defined in the table rows. This offers some flexibility to the tasks, as they can process a well, a set of FOVs, or a set of custom regions of the array. +3. We store ROIs associated to segmeneted objects, for instance the bounding boxes of organoid/nuclear +4. We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). + + +## Specifications + +The current section describes the first version (V1) of `fractal-tasks-core` +tables, which is based on [a proposed update to NGFF +specs](https://github.com/ome/ngff/pull/64); this update is currently on hold, +and `fractal-tasks-core` will evolve as soon as the official specs will adopt a +new definition. +As in the original NGFF proposed update, the current specifications are +specifically based on AnnData tables -- see [section below](#anndata-tables). + +### Zarr structure + +The structure of Zarr groups is based on the [`image` specification in NGFF 0.4](https://ngff.openmicroscopy.org/0.4/index.html#image-layout), with an additional `tables` group and the corresponding subgroups (similar to `labels`): +``` +image.zarr # Zarr group for a NGFF image +| +├── 0 # Zarr array for multiscale level 0 +├── ... +├── N # Zarr array for multiscale level N +| +├── labels # Zarr subgroup with a list of labels associated to this image +| ├── label_A # Zarr subgroup for a given label +| ├── label_B # Zarr subgroup for a given label +| └── ... +| +├── tables # Zarr subgroup with a list of tables associated to this image +| ├── table_1 # Zarr subgroup for a given table +| ├── table_2 # Zarr subgroup for a given table +| └── ... +| + +``` + +### Zarr attributes + +#### Tables container + +The Zarr attributes of the `tables` group must include the key `"tables"`, +pointing to the list of all tables; this simplifies the discovery of image +tables. +Here is an example of `image.zarr/tables/.zattrs`: +```json +{ + "tables": [ + "table_1", + "table_2", + ] +} +``` + +#### Single table (standard) + +For each table, the Zarr attributes must include the key +`"fractal_roi_table_version"`, pointing to the version of this specification +(e.g. `"1"`). + +Here is an example of `image.zarr/tables/table1/.zattrs` +```json +{ + "fractal_roi_table_version": "1", + "encoding-type": "anndata", # Automatically added by AnnData + "encoding-version": "0.1.0", # Automatically added by AnnData +} +``` + +This is the kind of tables that are used in `fractal-tasks-core` to store ROIs +like the whole well or the list of field of views. + +#### Single table (advanced) + +When table rows correspond to segmented objects. + +Moreover, they must include the key-value pairs proposed in https://github.com/ome/ngff/pull/64, that is: + +> * Attributes MUST contain `"type"`, which is set to `"ngff:region_table"`. +> * Attributes MUST contain `"region"`, which is the path to the data the table is annotating. +> * `"region"` MUST be a single path (single region) or an array of paths (multiple regions). +> * `"region"` paths MUST be objects with a key "path" and the path value MUST be a string. +> * Attributes MUST contain `"region_key"` if `"region"` is an array. `"region_key"` is the key in `obs` denoting which region a given row corresponds to. + + +Here is an example of `image.zarr/tables/table1/.zattrs` +```json +{ + "fractal_roi_table_version": "1", + "type": "ngff:region_table", + "instance_key": "label", + "region": { + "path": "../labels/label_DAPI", + }, + "encoding-type": "anndata", # Automatically added by AnnData + "encoding-version": "0.1.0", # Automatically added by AnnData +} +``` + +### AnnData tables + +On-disk (zarr), see https://anndata.readthedocs.io/en/latest/fileformat-prose.html + +## Example + +the + +which may act for instance on FOVs or on pre-computed + +makes the + +tbaleiterate + + +tra Therefore we use + perform s + +https://github.com/ome/ngff/pull/64 + in progress + + +```python + ROI_table = ad.read_zarr(ROI_table_path) + attrs = zarr.group(ROI_table_path).attrs + if not attrs["type"] == "ngff:region_table": + raise ValueError("Wrong attributes for {ROI_table_path}:\n{attrs}") + label_relative_path = attrs["region"]["path"] + column_name = attrs["instance_key"] + +``` + +## Future updates + +These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in V2. +Development of `fractal-tasks-core` will mantain backwards-compatibility with V1 for a reasonable amount of time. + +Some aspects that most likely will require a review are: + +1. We aim at removing the use of hard-coded units from the column names (e.g. `x_micrometer`), in favor of a more general definition of units. +2. We may re-assess whether AnnData tables are the right tool for our scopes, or whether simpler dataframes (e.g. from `pandas`) are sufficient. Not clear whether this is easily doable with zarr though. +parquet in zarr? + +https://github.com/zarr-developers/community/issues/31 +https://github.com/zarr-developers/numcodecs/issues/452 From 6587653d8a187f90fe7465adc722dbc77395ddf8 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:00:02 +0200 Subject: [PATCH 04/27] Add mathjax support in documentation (close #588) --- docs/javascripts/mathjax.js | 16 ++++++ docs/roi_tables.md | 100 ++++++++++++++++++++++++++++-------- mkdocs.yml | 7 +++ 3 files changed, 102 insertions(+), 21 deletions(-) create mode 100644 docs/javascripts/mathjax.js diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js new file mode 100644 index 000000000..06dbf38bf --- /dev/null +++ b/docs/javascripts/mathjax.js @@ -0,0 +1,16 @@ +window.MathJax = { + tex: { + inlineMath: [["\\(", "\\)"]], + displayMath: [["\\[", "\\]"]], + processEscapes: true, + processEnvironments: true + }, + options: { + ignoreHtmlClass: ".*|", + processHtmlClass: "arithmatex" + } +}; + +document$.subscribe(() => { + MathJax.typesetPromise() +}) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 81cffc977..4d1a03e91 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -1,7 +1,12 @@ # ROI tables +## Scope + +ROIs = rectangles + We need to store tables as part of NGFF groups for multiple reasons: + 1. Our image-to-OME-Zarr converters stitch all the field of views (FOV) of a given well together in a single NGFF image, and we keep a trace of the original FOV positions in a ROI table. 2. Several tasks in `fractal-tasks-core` take a ROI table as an input, an loop over the ROIs defined in the table rows. This offers some flexibility to the tasks, as they can process a well, a set of FOVs, or a set of custom regions of the array. 3. We store ROIs associated to segmeneted objects, for instance the bounding boxes of organoid/nuclear @@ -45,9 +50,10 @@ image.zarr # Zarr group for a NGFF image #### Tables container -The Zarr attributes of the `tables` group must include the key `"tables"`, +The Zarr attributes of the `tables` group must include the key `tables`, pointing to the list of all tables; this simplifies the discovery of image tables. + Here is an example of `image.zarr/tables/.zattrs`: ```json { @@ -61,8 +67,8 @@ Here is an example of `image.zarr/tables/.zattrs`: #### Single table (standard) For each table, the Zarr attributes must include the key -`"fractal_roi_table_version"`, pointing to the version of this specification -(e.g. `"1"`). +`fractal_roi_table_version`, pointing to the string version of this +specification (e.g. `1`). Here is an example of `image.zarr/tables/table1/.zattrs` ```json @@ -74,30 +80,32 @@ Here is an example of `image.zarr/tables/table1/.zattrs` ``` This is the kind of tables that are used in `fractal-tasks-core` to store ROIs -like the whole well or the list of field of views. +like a whole well, or the list of field of views. -#### Single table (advanced) +#### Single table (segmented objects) -When table rows correspond to segmented objects. - -Moreover, they must include the key-value pairs proposed in https://github.com/ome/ngff/pull/64, that is: - -> * Attributes MUST contain `"type"`, which is set to `"ngff:region_table"`. -> * Attributes MUST contain `"region"`, which is the path to the data the table is annotating. -> * `"region"` MUST be a single path (single region) or an array of paths (multiple regions). -> * `"region"` paths MUST be objects with a key "path" and the path value MUST be a string. -> * Attributes MUST contain `"region_key"` if `"region"` is an array. `"region_key"` is the key in `obs` denoting which region a given row corresponds to. +When each table row corresponds to (the bounding box of) a segmented object, +`fractal-tasks-core` follows more closely the [proposed NGFF update mentioned +above](https://github.com/ome/ngff/pull/64), with the following additional +requirements on the Zarr group of a given table: +* Attributes must contain a `type` key, with value `ngff:region_table`. +* Attributes must contain a `region` key; the corresponding value must be an + object with a `path` key and a string value (i.e. the path to the data the + table is annotating). +* Attributes may include a key `instance_key`, which is the key in `obs` that + denotes which instance in `region` the row corresponds to. If `instance_key` + is not provided, the values from the `_index` Zarr attribute of `obs` is used. Here is an example of `image.zarr/tables/table1/.zattrs` ```json { "fractal_roi_table_version": "1", "type": "ngff:region_table", - "instance_key": "label", "region": { "path": "../labels/label_DAPI", }, + "instance_key": "label", "encoding-type": "anndata", # Automatically added by AnnData "encoding-version": "0.1.0", # Automatically added by AnnData } @@ -105,17 +113,67 @@ Here is an example of `image.zarr/tables/table1/.zattrs` ### AnnData tables -On-disk (zarr), see https://anndata.readthedocs.io/en/latest/fileformat-prose.html +Data of a table are stored into a Zarr group as AnnData objects. + +Quoting from the [AnnData documentation](https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html): + +> AnnData is specifically designed for matrix-like data. By this we mean that +> we have $n$ observations, each of which can be represented as d-dimensional +> vectors, where each dimension corresponds to a variable or feature. Both the +> rows and columns of this nxd matrix are special in the sense that they are +> indexed. +> +> (https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html) + +### Columns -## Example +## On-disk input/output -the +The `anndata` library offers a set of functions for input/output of AnnData +tables, including functions specifically targeting the Zarr format. -which may act for instance on FOVs or on pre-computed +### Reading a table + +To read an AnnData table from a Zarr group, one may use the [`read_zarr` +function](https://anndata.readthedocs.io/en/latest/generated/anndata.read_zarr.html). +In the following example a NGFF image was created by sticthing together two +field of views, and the `FOV_ROI_table` has information on the position of the +two original FOVs (named `FOV_1` and `FOV_2`): +```python +import anndata as ad + +table = ad.read_zarr("/somewhere/image.zarr/tables/FOV_ROI_table") + +print(table) +# AnnData object with n_obs × n_vars = 2 × 8 + +print(table.obs_names) +# Index(['FOV_1', 'FOV_2'], dtype='object', name='FieldIndex') + +print(table.var_names) +# Index([ +# 'x_micrometer', +# 'y_micrometer', +# 'z_micrometer', +# 'len_x_micrometer', +# 'len_y_micrometer', +# 'len_z_micrometer', +# 'x_micrometer_original', +# 'y_micrometer_original' +# ], +# dtype='object') + +print(table.X) +# [[ 0. 0. 0. 416. 351. 2. -1448.3 -1517.7] +# [ 416. 0. 0. 416. 351. 2. -1032.3 -1517.7]] +``` -makes the +### Writing a table -tbaleiterate +The `anndata.experimental.write_elem` function provides the required +functionality to write an AnnData object to a Zarr group. In +`fractal-tasks-core`, the `write_table` helper function wraps the `anndata` +function and includes additional functionalities. tra Therefore we use diff --git a/mkdocs.yml b/mkdocs.yml index 9b581139b..70ba8fe98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -18,6 +18,13 @@ markdown_extensions: - pymdownx.tasklist - toc: permalink: true +- pymdownx.arithmatex: + generic: true + +extra_javascript: + - javascripts/mathjax.js + - https://polyfill.io/v3/polyfill.min.js?features=es6 + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js theme: name: "material" From fbaf56eeacdbf5c387cd93397815a3096f83d222 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:53:06 +0200 Subject: [PATCH 05/27] Cross-reference to docs of other projects (close #587) --- mkdocs.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 70ba8fe98..fe2ac89a1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -79,6 +79,13 @@ plugins: default_handler: python handlers: python: + import: + - url: https://docs.python.org/objects.inv + - url: https://numpy.org/doc/stable/objects.inv + - url: https://zarr.readthedocs.io/en/stable/objects.inv + - url: https://anndata.readthedocs.io/en/latest/objects.inv + - url: https://docs.dask.org/en/stable/objects.inv + # - url: https://docs.pydantic.dev/latest/objects.inv # This points to V2 options: show_signature_annotations: false docstring_section_style: "spacy" From f5969e7e0f0f689f455f2600207b75d1dc79b9d7 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:53:33 +0200 Subject: [PATCH 06/27] Use `zarr.hierarchy.Group` for type hints (ref #587) --- fractal_tasks_core/lib_ngff.py | 4 ++-- fractal_tasks_core/lib_write.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fractal_tasks_core/lib_ngff.py b/fractal_tasks_core/lib_ngff.py index 299b32ceb..6872b8a0c 100644 --- a/fractal_tasks_core/lib_ngff.py +++ b/fractal_tasks_core/lib_ngff.py @@ -6,7 +6,7 @@ from typing import Optional from typing import Union -import zarr +import zarr.hierarchy from pydantic import BaseModel from pydantic import Field from pydantic import validator @@ -422,7 +422,7 @@ def load_NgffWellMeta(zarr_path: str) -> NgffWellMeta: raise e -def detect_ome_ngff_type(group: zarr.Group) -> str: +def detect_ome_ngff_type(group: zarr.hierarchy.Group) -> str: """ Given a Zarr group, find whether it is an OME-NGFF plate, well or image. diff --git a/fractal_tasks_core/lib_write.py b/fractal_tasks_core/lib_write.py index d09cf0a63..b27f85fe4 100644 --- a/fractal_tasks_core/lib_write.py +++ b/fractal_tasks_core/lib_write.py @@ -18,7 +18,7 @@ from typing import Union import anndata as ad -import zarr +import zarr.hierarchy from anndata.experimental import write_elem from zarr.errors import ContainsGroupError from zarr.errors import GroupNotFoundError @@ -36,7 +36,7 @@ def open_zarr_group_with_overwrite( overwrite: bool, logger: Optional[logging.Logger] = None, **open_group_kwargs: Any, -) -> zarr.Group: +) -> zarr.hierarchy.Group: """ Wrap `zarr.open_group` and add `overwrite` argument. @@ -124,7 +124,7 @@ def open_zarr_group_with_overwrite( def _write_elem_with_overwrite( - group: zarr.Group, + group: zarr.hierarchy.Group, key: str, elem: Any, *, @@ -186,7 +186,7 @@ def _write_elem_with_overwrite( def write_table( - image_group: zarr.Group, + image_group: zarr.hierarchy.Group, table_name: str, table: ad.AnnData, overwrite: bool = False, @@ -300,7 +300,7 @@ def write_table( def prepare_label_group( - image_group: zarr.Group, + image_group: zarr.hierarchy.Group, label_name: str, overwrite: bool = False, label_attrs: Optional[dict[str, Any]] = None, From 08d7e8b1707ae8ebaeae187fefe902f768012beb Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:06:59 +0200 Subject: [PATCH 07/27] Second draft of ROI-table documentation --- docs/roi_tables.md | 79 +++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 4d1a03e91..ce0c5bfbb 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -2,18 +2,20 @@ ## Scope -ROIs = rectangles +In `fractal-tasks-core`, regions of interest (ROIs) are three-dimensional +regions of space delimited by orthogonal planes. ROI tables are stored as +AnnData tables, within OME-NGFF Zarr images. -We need to store tables as part of NGFF groups for multiple reasons: +We have several use cases for tables: +1. We keep track of the positions of the Field of Views (FOVs) within a well, after stitching the corresponding FOV images into a single whole-well array. +2. We keep track of the original state before some transformations are applied - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. +3. Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. +4. We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. +5. We store measurements associated to segmented objects, e.g. as computed via `regionprops` from `scikit-image` (as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). -1. Our image-to-OME-Zarr converters stitch all the field of views (FOV) of a given well together in a single NGFF image, and we keep a trace of the original FOV positions in a ROI table. -2. Several tasks in `fractal-tasks-core` take a ROI table as an input, an loop over the ROIs defined in the table rows. This offers some flexibility to the tasks, as they can process a well, a set of FOVs, or a set of custom regions of the array. -3. We store ROIs associated to segmeneted objects, for instance the bounding boxes of organoid/nuclear -4. We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). - -## Specifications +## Table specifications The current section describes the first version (V1) of `fractal-tasks-core` tables, which is based on [a proposed update to NGFF @@ -111,23 +113,41 @@ Here is an example of `image.zarr/tables/table1/.zattrs` } ``` -### AnnData tables +### AnnData table format -Data of a table are stored into a Zarr group as AnnData objects. +Data of a table are stored into a Zarr group as AnnData ("Annotated Data") +objects; the `anndata` Python library provides the definition of this format +and the relevant tools. -Quoting from the [AnnData documentation](https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html): +Quoting from `anndata` documentation: > AnnData is specifically designed for matrix-like data. By this we mean that -> we have $n$ observations, each of which can be represented as d-dimensional +> we have $n$ observations, each of which can be represented as $d$-dimensional > vectors, where each dimension corresponds to a variable or feature. Both the -> rows and columns of this nxd matrix are special in the sense that they are -> indexed. +> rows and columns of this $n \times d$ matrix are special in the sense that +> they are indexed. > > (https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html) -### Columns +The same link also constitutes a get-started page for AnnData. + +Note that AnnData tables are easily transformed from/into `pandas.DataFrame` +objects - see e.g. the [`AnnData.to_df` +method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). + +### Table columns + +## Default tables + +When parsing Yokogawa images into OME-Zarr ( via the +[`create_ome_zarr`](../reference/fractal_tasks_core/tasks/create_ome_zarr/#fractal_tasks_core.tasks.create_ome_zarr.create_ome_zarr) +or +[`create_ome_zarr_multiplex`](../reference/fractal_tasks_core/tasks/create_ome_zarr_multiplex/#fractal_tasks_core.tasks.create_ome_zarr_multiplex.create_ome_zarr_multiplex) +tasks) + +FIXME -## On-disk input/output +## Handling tables The `anndata` library offers a set of functions for input/output of AnnData tables, including functions specifically targeting the Zarr format. @@ -173,27 +193,22 @@ print(table.X) The `anndata.experimental.write_elem` function provides the required functionality to write an AnnData object to a Zarr group. In `fractal-tasks-core`, the `write_table` helper function wraps the `anndata` -function and includes additional functionalities. - - -tra Therefore we use - perform s - -https://github.com/ome/ngff/pull/64 - -in progress +function and includes additional functionalities -- see [its +documentation](../reference/fractal_tasks_core/lib_write/#fractal_tasks_core.lib_write.write_table). ```python - ROI_table = ad.read_zarr(ROI_table_path) - attrs = zarr.group(ROI_table_path).attrs - if not attrs["type"] == "ngff:region_table": - raise ValueError("Wrong attributes for {ROI_table_path}:\n{attrs}") - label_relative_path = attrs["region"]["path"] - column_name = attrs["instance_key"] - +def write_table( + image_group: zarr.hierarchy.Group, + table_name: str, + table: ad.AnnData, + overwrite: bool = False, + table_attrs: Optional[dict[str, Any]] = None, + logger: Optional[logging.Logger] = None, +) ``` + ## Future updates These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in V2. From 7097e8693024b7fb082556a0f87dfad6ca320351 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:09:09 +0200 Subject: [PATCH 08/27] Update CHANGELOG [skip ci] --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 427b0f424..2e9c4cac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ * Always use `write_table` in tasks, rather than AnnData `write_elem` (\#581). * Testing: * Cache Zenodo data, within GitHub actions (\#585). +* Documentation: + * Define V1 of ROI-table specs (\#582). + * Add mathjax support (\#582). + * Add cross-reference inventories to external APIs (\#582). # 0.13.0 From 318e9cfe0ea435edff1a0faa9b2590c6bc70cb3b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:22:31 +0200 Subject: [PATCH 09/27] Minor fixes to ROI-tables docs --- docs/roi_tables.md | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index ce0c5bfbb..51c3a39a5 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -20,10 +20,10 @@ We have several use cases for tables: The current section describes the first version (V1) of `fractal-tasks-core` tables, which is based on [a proposed update to NGFF specs](https://github.com/ome/ngff/pull/64); this update is currently on hold, -and `fractal-tasks-core` will evolve as soon as the official specs will adopt a -new definition. -As in the original NGFF proposed update, the current specifications are -specifically based on AnnData tables -- see [section below](#anndata-tables). +and `fractal-tasks-core` will evolve as soon as the NGFF specs will adopt a +definition of tables. +As in the original proposed NGFF update, the current specifications are +specifically based on AnnData tables. ### Zarr structure @@ -59,14 +59,11 @@ tables. Here is an example of `image.zarr/tables/.zattrs`: ```json { - "tables": [ - "table_1", - "table_2", - ] + "tables": ["table_1", "table_2"] } ``` -#### Single table (standard) +#### Single table (default) For each table, the Zarr attributes must include the key `fractal_roi_table_version`, pointing to the string version of this @@ -76,8 +73,8 @@ Here is an example of `image.zarr/tables/table1/.zattrs` ```json { "fractal_roi_table_version": "1", - "encoding-type": "anndata", # Automatically added by AnnData - "encoding-version": "0.1.0", # Automatically added by AnnData + "encoding-type": "anndata", # Automatically added by anndata + "encoding-version": "0.1.0", # Automatically added by anndata } ``` @@ -108,16 +105,16 @@ Here is an example of `image.zarr/tables/table1/.zattrs` "path": "../labels/label_DAPI", }, "instance_key": "label", - "encoding-type": "anndata", # Automatically added by AnnData - "encoding-version": "0.1.0", # Automatically added by AnnData + "encoding-type": "anndata", # Automatically added by anndata + "encoding-version": "0.1.0", # Automatically added by anndata } ``` ### AnnData table format Data of a table are stored into a Zarr group as AnnData ("Annotated Data") -objects; the `anndata` Python library provides the definition of this format -and the relevant tools. +objects; the [`anndata` Python library](anndata.readthedocs.io) provides the +definition of this format and the relevant tools. Quoting from `anndata` documentation: @@ -129,21 +126,23 @@ Quoting from `anndata` documentation: > > (https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html) -The same link also constitutes a get-started page for AnnData. - Note that AnnData tables are easily transformed from/into `pandas.DataFrame` objects - see e.g. the [`AnnData.to_df` method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). -### Table columns +### Table columns (WIP) + +TODO: list here all required/optional columns and their meaning. -## Default tables +## Default tables (WIP) -When parsing Yokogawa images into OME-Zarr ( via the +When parsing Yokogawa images into OME-Zarr (via the [`create_ome_zarr`](../reference/fractal_tasks_core/tasks/create_ome_zarr/#fractal_tasks_core.tasks.create_ome_zarr.create_ome_zarr) or [`create_ome_zarr_multiplex`](../reference/fractal_tasks_core/tasks/create_ome_zarr_multiplex/#fractal_tasks_core.tasks.create_ome_zarr_multiplex.create_ome_zarr_multiplex) -tasks) +tasks), we always create some default ROI tables. + +When importing a Zarr with the import-ome-zarr task... FIXME From 8194716705766b71a1414e442069035c0805584b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:24:50 +0200 Subject: [PATCH 10/27] Fix broken link --- docs/roi_tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 51c3a39a5..ab1a2e68f 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -113,7 +113,7 @@ Here is an example of `image.zarr/tables/table1/.zattrs` ### AnnData table format Data of a table are stored into a Zarr group as AnnData ("Annotated Data") -objects; the [`anndata` Python library](anndata.readthedocs.io) provides the +objects; the [`anndata` Python library](https://anndata.readthedocs.io) provides the definition of this format and the relevant tools. Quoting from `anndata` documentation: From a592935d51aea24af9dbd6bc1641b5ad77ef06f0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:56:59 +0200 Subject: [PATCH 11/27] Add example of `write_table` [skip ci] --- docs/roi_tables.md | 74 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index ab1a2e68f..9ceac0b9b 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -195,20 +195,78 @@ functionality to write an AnnData object to a Zarr group. In function and includes additional functionalities -- see [its documentation](../reference/fractal_tasks_core/lib_write/#fractal_tasks_core.lib_write.write_table). +With respect to the wrapped `anndata` function, the main additional features of `write_table` are +* The boolean parameter `overwrite` (defaulting to `False`), that determines the behavior in case of an already-existing table at the given path. +* The `table_attrs` parameter, as a shorthand for updating the Zarr attributes of the table group after its creation. + +Here is an example of how to use `write_table`: ```python -def write_table( - image_group: zarr.hierarchy.Group, - table_name: str, - table: ad.AnnData, - overwrite: bool = False, - table_attrs: Optional[dict[str, Any]] = None, - logger: Optional[logging.Logger] = None, +import numpy as np +import zarr +import anndata as ad +from fractal_tasks_core.lib_write import write_table + +table = ad.AnnData(X=np.ones((10, 10))) # Generate a dummy AnnData object +image_group = zarr.open_group("/tmp/image.zarr") +table_name = "MyTable" +table_attrs = { + "type": "ngff:region_table", + "region": {"path": "../labels/MyLabel"}, + "instance_key": "label", +} + +write_table( + image_group, + table_name, + table, + overwrite=True, + table_attrs=table_attrs, ) ``` +After running this Python code snippet, the on-disk output is as follows: +```console +$ tree /tmp/image.zarr/tables/ # View folder structure +/tmp/image.zarr/tables/ +└── MyTable + ├── layers + ├── obs + │   └── _index + │   └── 0 + ├── obsm + ├── obsp + ├── uns + ├── var + │   └── _index + │   └── 0 + ├── varm + ├── varp + └── X + └── 0.0 + +12 directories, 3 files + +$ cat /tmp/image.zarr/tables/.zattrs # View tables atributes +{ + "tables": [ + "MyTable" + ] +} +$ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes +{ + "encoding-type": "anndata", + "encoding-version": "0.1.0", + "fractal_roi_table_version": "1", + "instance_key": "label", + "region": { + "path": "../labels/MyLabel" + }, + "type": "ngff:region_table" +} +``` -## Future updates +## Future updates (WIP) These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in V2. Development of `fractal-tasks-core` will mantain backwards-compatibility with V1 for a reasonable amount of time. From 93f1044db3a9c5ed8464aeedba727bd5667d48b3 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:20:43 +0200 Subject: [PATCH 12/27] Add info on table columns [skip ci] --- docs/roi_tables.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 9ceac0b9b..8969cc1a3 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -130,9 +130,28 @@ Note that AnnData tables are easily transformed from/into `pandas.DataFrame` objects - see e.g. the [`AnnData.to_df` method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). -### Table columns (WIP) +### Table contents + +The `.var` attribute of an AnnData object indexes the columns of the table. A +`fractal-tasks-core` ROI table must include the following six columns: + +* `x_micrometer`, `y_micrometer`, `z_micrometer`: the lower bounds of the XYZ intervals defining the ROI, in micrometers; +* `len_x_micrometer`, `len_y_micrometer`, `len_z_micrometer`: the XYZ edge lenghts, in micrometers. + +ROI tables may also include other optional columns: + +* `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; +* `label`, which is used within measurement tables as a reference to the labels corresponding to a row of measurements (see [description of `instance_key` above](#single-table-segmented-objects)). + + +> Notes: +> +> 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) is set +> to coincide with the top-left corner of a well (for the YX axes) and with +> the lowest Z plane. +> 2. ROIs are defined in **physical coordinates**, and they do not store +> information on the number or size of pixels. -TODO: list here all required/optional columns and their meaning. ## Default tables (WIP) From ba4e35a43ab14d589bf9057cd5f209cfff8efaca Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:30:01 +0200 Subject: [PATCH 13/27] Add notes about Z arbitrary units [skip ci] --- docs/roi_tables.md | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 8969cc1a3..b32e5b844 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -40,12 +40,10 @@ image.zarr # Zarr group for a NGFF image | ├── label_B # Zarr subgroup for a given label | └── ... | -├── tables # Zarr subgroup with a list of tables associated to this image -| ├── table_1 # Zarr subgroup for a given table -| ├── table_2 # Zarr subgroup for a given table -| └── ... -| - +└── tables # Zarr subgroup with a list of tables associated to this image + ├── table_1 # Zarr subgroup for a given table + ├── table_2 # Zarr subgroup for a given table + └── ... ``` ### Zarr attributes @@ -132,18 +130,21 @@ method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df ### Table contents -The `.var` attribute of an AnnData object indexes the columns of the table. A -`fractal-tasks-core` ROI table must include the following six columns: +The [`var` attribute of AnnData +objects](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var.html#anndata.AnnData.var) +indexes the columns of the table. A `fractal-tasks-core` ROI table must include +the following six columns: -* `x_micrometer`, `y_micrometer`, `z_micrometer`: the lower bounds of the XYZ intervals defining the ROI, in micrometers; -* `len_x_micrometer`, `len_y_micrometer`, `len_z_micrometer`: the XYZ edge lenghts, in micrometers. +* `x_micrometer`, `y_micrometer`: the lower bounds of the XY intervals defining the ROI, in micrometers; +* `z_micrometer`: the lower bound of the Z interval defining the ROI, in arbitrary units or in micrometers; +* `len_x_micrometer`, `len_y_micrometer`: the XY edge lenghts, in micrometers; +* `len_z_micrometer`: the Z edge lenght in arbitrary units (corresponding to the number of Z planes) or in micrometers. ROI tables may also include other optional columns: * `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; * `label`, which is used within measurement tables as a reference to the labels corresponding to a row of measurements (see [description of `instance_key` above](#single-table-segmented-objects)). - > Notes: > > 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) is set @@ -151,6 +152,9 @@ ROI tables may also include other optional columns: > the lowest Z plane. > 2. ROIs are defined in **physical coordinates**, and they do not store > information on the number or size of pixels. +> 3. The current version of `fractal-tasks-core` only uses **arbitrary units** +> for `z_micrometer` and `len_z_micrometer` columns, where a single unit +> corresponds to the distance between two subsequent Z planes. ## Default tables (WIP) @@ -165,7 +169,7 @@ When importing a Zarr with the import-ome-zarr task... FIXME -## Handling tables +## Examples The `anndata` library offers a set of functions for input/output of AnnData tables, including functions specifically targeting the Zarr format. @@ -206,6 +210,9 @@ print(table.X) # [ 416. 0. 0. 416. 351. 2. -1032.3 -1517.7]] ``` +The first row corresponds YX region first FOV + + ### Writing a table The `anndata.experimental.write_elem` function provides the required From 379637ab842528a36e9266644172ea2a3986ea12 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:37:16 +0200 Subject: [PATCH 14/27] Clean up ROI-table future-perspectives section [skip ci] --- docs/roi_tables.md | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index b32e5b844..966f8081c 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -292,16 +292,21 @@ $ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes } ``` -## Future updates (WIP) - -These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in V2. -Development of `fractal-tasks-core` will mantain backwards-compatibility with V1 for a reasonable amount of time. - -Some aspects that most likely will require a review are: - -1. We aim at removing the use of hard-coded units from the column names (e.g. `x_micrometer`), in favor of a more general definition of units. -2. We may re-assess whether AnnData tables are the right tool for our scopes, or whether simpler dataframes (e.g. from `pandas`) are sufficient. Not clear whether this is easily doable with zarr though. -parquet in zarr? - -https://github.com/zarr-developers/community/issues/31 -https://github.com/zarr-developers/numcodecs/issues/452 +## Future perspectives + +These specifications may evolve (especially based on the future NGFF updates), +eventually leading to breaking changes in V2. Development of +`fractal-tasks-core` will aim at mantaining backwards-compatibility with V1 for +a reasonable amount of time. + +An in-progress list of aspects that may be reviewed: + +1. We aim at removing the use of hard-coded units from the column names (e.g. + `x_micrometer`), in favor of a more general definition of units. This will + also fix the current misleading names for the Z position/length columns + (`z_micrometer` and `len_z_micrometer`, even though corresponding data are + in arbitrary units). +2. We may re-evaluate whether AnnData tables are the most appropriate tool. For + the record, Zarr does not natively support storage of dataframes (see e.g. + https://github.com/zarr-developers/numcodecs/issues/452), which is one + aspect in favor of sticking with the `anndata` library. From f046e9103feee1c452bed68cf03fb947887bf2d8 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:50:45 +0200 Subject: [PATCH 15/27] Add default-tables docs section [skip ci] --- docs/roi_tables.md | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index 966f8081c..b52b507a3 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -12,7 +12,7 @@ We have several use cases for tables: 2. We keep track of the original state before some transformations are applied - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. 3. Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. 4. We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. -5. We store measurements associated to segmented objects, e.g. as computed via `regionprops` from `scikit-image` (as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). +5. We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). Note: we store these tables with the the same AnnData format as described in this page, but they are not *ROI* tables and therefore they are not based on the current specifications (notably with respect to the [required columns](#table-contents)). ## Table specifications @@ -156,18 +156,34 @@ ROI tables may also include other optional columns: > for `z_micrometer` and `len_z_micrometer` columns, where a single unit > corresponds to the distance between two subsequent Z planes. +## Default tables -## Default tables (WIP) - -When parsing Yokogawa images into OME-Zarr (via the +OME-Zarrs created via `fractal-tasks-core` (e.g. by parsing Yokogawa images via +the [`create_ome_zarr`](../reference/fractal_tasks_core/tasks/create_ome_zarr/#fractal_tasks_core.tasks.create_ome_zarr.create_ome_zarr) or [`create_ome_zarr_multiplex`](../reference/fractal_tasks_core/tasks/create_ome_zarr_multiplex/#fractal_tasks_core.tasks.create_ome_zarr_multiplex.create_ome_zarr_multiplex) -tasks), we always create some default ROI tables. - -When importing a Zarr with the import-ome-zarr task... - -FIXME +tasks) always include two specific ROI tables: + +* The table named `FOV_ROI_table`, which lists all original FOVs; +* The table named `well_ROI_table`, which covers the NGFF corresponding to the whole well (formed by all the original FOVs stiched together) + +Each one of these two tables includes ROIs that are only defined in the XY +plane, and span the whole set of Z planes. Note that this differs, e.g., from +the case of bounding-box ROIs based on three-dimensional segmented objects, +which may have a non-trivial Z size. + +When working with an externally-generated OME-Zarr, one may use the +[`import_ome_zarr` +task](../reference/fractal_tasks_core/tasks/import_ome_zarr/#fractal_tasks_core.tasks.import_ome_zarr.import_ome_zarr) +to make it compatible with `fractal-tasks-core`. This task optionally adds two +ROI tables to the NGFF images: + +* The table named `image_ROI_table`, which simply covers the whole image. +* A table named `grid_ROI_table`, which splits the whole-image ROI into a YX + rectangular grid of smaller ROIs. This may correspond to original FOVs, or it + may simply be useful for applying downstream processing to smaller arrays and + avoid large memory requirements. ## Examples From aed64cb83055e6e5f10a56829099031e235bfc98 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 23 Oct 2023 17:27:43 +0200 Subject: [PATCH 16/27] Minor fixes to ROI-tables docs [skip ci] --- docs/roi_tables.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/roi_tables.md b/docs/roi_tables.md index b52b507a3..15363d8ab 100644 --- a/docs/roi_tables.md +++ b/docs/roi_tables.md @@ -166,7 +166,7 @@ or tasks) always include two specific ROI tables: * The table named `FOV_ROI_table`, which lists all original FOVs; -* The table named `well_ROI_table`, which covers the NGFF corresponding to the whole well (formed by all the original FOVs stiched together) +* The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well (formed by all the original FOVs stiched together) Each one of these two tables includes ROIs that are only defined in the XY plane, and span the whole set of Z planes. Note that this differs, e.g., from @@ -195,8 +195,9 @@ tables, including functions specifically targeting the Zarr format. To read an AnnData table from a Zarr group, one may use the [`read_zarr` function](https://anndata.readthedocs.io/en/latest/generated/anndata.read_zarr.html). In the following example a NGFF image was created by sticthing together two -field of views, and the `FOV_ROI_table` has information on the position of the -two original FOVs (named `FOV_1` and `FOV_2`): +field of views, where each one is made of a stack of five Z planes. +The `FOV_ROI_table` has information on the XY position and size of the two +original FOVs (named `FOV_1` and `FOV_2`): ```python import anndata as ad @@ -222,11 +223,15 @@ print(table.var_names) # dtype='object') print(table.X) -# [[ 0. 0. 0. 416. 351. 2. -1448.3 -1517.7] -# [ 416. 0. 0. 416. 351. 2. -1032.3 -1517.7]] +# [[ 0. 0. 0. 416. 351. 5. -1448.3 -1517.7] +# [ 416. 0. 0. 416. 351. 5. -1032.3 -1517.7]] ``` -The first row corresponds YX region first FOV +In this case, the second FOV (labeled `FOV_2`) is defined as the three-dimensional region such that + +* X is between 416 and 832 micrometers; +* Y is between 0 and 351 micrometers; +* Z is between 0 and 5 - which means that all the five available Z planes are included. ### Writing a table From b416c9d998a1f4233f12480ec8cd24f814b48cd0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:03:26 +0200 Subject: [PATCH 17/27] Rename table doc page --- docs/{roi_tables.md => tables.md} | 0 mkdocs.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/{roi_tables.md => tables.md} (100%) diff --git a/docs/roi_tables.md b/docs/tables.md similarity index 100% rename from docs/roi_tables.md rename to docs/tables.md diff --git a/mkdocs.yml b/mkdocs.yml index fe2ac89a1..913ccbd1d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -58,7 +58,7 @@ nav: - Run tasks in Python scripts: tasks_in_scripts.md - Write a custom task: custom_task.md - Task manifest: manifest.md - - ROI tables: roi_tables.md + - Tables: tables.md - Code reference: reference/fractal_tasks_core/ - Development: development.md - Changelog: changelog.md From edac9b8ac3cfef579d33879ff64fb7255529a094 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 26 Oct 2023 13:40:13 +0200 Subject: [PATCH 18/27] First review of table docs --- docs/tables.md | 216 ++++++++++++++++++++++++++++++------------------- 1 file changed, 134 insertions(+), 82 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 15363d8ab..cb4ecd361 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -1,33 +1,64 @@ -# ROI tables +# Fractal tables -## Scope +Within `fractal-tasks-core` we make use of tables stored as `AnnData` objects +within OME-Zarr image groups. This page describes the specifications for +different kinds of tables: -In `fractal-tasks-core`, regions of interest (ROIs) are three-dimensional -regions of space delimited by orthogonal planes. ROI tables are stored as -AnnData tables, within OME-NGFF Zarr images. +* A core [table specification](#core-tables), common to all cases; +* Two levels of specifications for tables that describe regions of interest (ROIs): + * [Basic ROI tables](#basic-roi-tables); (FIXME: better naming?) + * [Advanced ROI tables](#advanced-roi-tables); (FIXME: better naming?) +* A [feature-table specification](#feature-tables). (FIXME: specify this is in progress) -We have several use cases for tables: +These different specifications correspond to different use cases in `fractal-tasks-core`: -1. We keep track of the positions of the Field of Views (FOVs) within a well, after stitching the corresponding FOV images into a single whole-well array. -2. We keep track of the original state before some transformations are applied - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. -3. Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. -4. We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. -5. We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). Note: we store these tables with the the same AnnData format as described in this page, but they are not *ROI* tables and therefore they are not based on the current specifications (notably with respect to the [required columns](#table-contents)). +* Basic ROI tables: + * We keep track of the positions of the Field of Views (FOVs) within a well, after stitching the corresponding FOV images into a single whole-well array. + * We keep track of the original state before some transformations are applied - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. + * Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. +* Advanced ROI tables: + * We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. +* Feature tables: + * We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). +> **Note**: The specifications below are largely based on [a proposed update to +> NGFF specs](https://github.com/ome/ngff/pull/64). This update is currently on +> hold, and `fractal-tasks-core` will evolve as soon as the NGFF specs will +> adopt a definition of tables - see also the [Outlook](#outlook) section. -## Table specifications +## Specifications -The current section describes the first version (V1) of `fractal-tasks-core` -tables, which is based on [a proposed update to NGFF -specs](https://github.com/ome/ngff/pull/64); this update is currently on hold, -and `fractal-tasks-core` will evolve as soon as the NGFF specs will adopt a -definition of tables. -As in the original proposed NGFF update, the current specifications are -specifically based on AnnData tables. +### Core tables + +The core-table specification consists in the definition of the required Zarr +structure and attributes, and of the `AnnData` table format. -### Zarr structure +#### `AnnData` table format -The structure of Zarr groups is based on the [`image` specification in NGFF 0.4](https://ngff.openmicroscopy.org/0.4/index.html#image-layout), with an additional `tables` group and the corresponding subgroups (similar to `labels`): +Data of a table are stored into a Zarr group as `AnnData` ("Annotated Data") +objects; the [`anndata` Python library](https://anndata.readthedocs.io) provides the +definition of this format and the relevant tools. + +Quoting from `anndata` documentation: + +> `AnnData` is specifically designed for matrix-like data. By this we mean that +> we have $n$ observations, each of which can be represented as $d$-dimensional +> vectors, where each dimension corresponds to a variable or feature. Both the +> rows and columns of this $n \times d$ matrix are special in the sense that +> they are indexed. +> +> (https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html) + +Note that `AnnData` tables are easily transformed from/into `pandas.DataFrame` +objects - see e.g. the [`AnnData.to_df` +method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). + +#### Zarr structure + +The structure of Zarr groups is based on the [`image` specification in NGFF +0.4](https://ngff.openmicroscopy.org/0.4/index.html#image-layout), with an +additional `tables` group and the corresponding subgroups (similar to +`labels`): ``` image.zarr # Zarr group for a NGFF image | @@ -46,9 +77,7 @@ image.zarr # Zarr group for a NGFF image └── ... ``` -### Zarr attributes - -#### Tables container +#### Zarr attributes The Zarr attributes of the `tables` group must include the key `tables`, pointing to the list of all tables; this simplifies the discovery of image @@ -61,7 +90,31 @@ Here is an example of `image.zarr/tables/.zattrs`: } ``` -#### Single table (default) +The Zarr attributes of each specific-table group have no required properties, +but writing an `AnnData` object to that group typically sets some default +attributes. For anndata 0.11, for instance, the attributes in +`image.zarr/tables/table1/.zattrs` would be +```json +{ + "encoding-type": "anndata", # Automatically added by anndata + "encoding-version": "0.1.0", # Automatically added by anndata +} +``` + +### ROI tables + +The current section describes the first version (V1) of `fractal-tasks-core` +tables, which is based on [a proposed update to NGFF +specs](https://github.com/ome/ngff/pull/64); this update is currently on hold, +and `fractal-tasks-core` will evolve as soon as the NGFF specs will adopt a +definition of tables. +As in the original proposed NGFF update, the current specifications are +specifically based on `AnnData` tables. + +In Fractal, regions of interest (ROIs) are three-dimensional +regions of space delimited by orthogonal planes. ROI tables are stored as + +#### Basic ROI tables For each table, the Zarr attributes must include the key `fractal_roi_table_version`, pointing to the string version of this @@ -79,7 +132,44 @@ Here is an example of `image.zarr/tables/table1/.zattrs` This is the kind of tables that are used in `fractal-tasks-core` to store ROIs like a whole well, or the list of field of views. -#### Single table (segmented objects) +##### Required columns + +The [`var` attribute of AnnData +objects](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var.html#anndata.AnnData.var) +indexes the columns of the table. A `fractal-tasks-core` ROI table must include +the following six columns: + +* `x_micrometer`, `y_micrometer`: the lower bounds of the XY intervals defining the ROI, in micrometers; +* `z_micrometer`: the lower bound of the Z interval defining the ROI, in arbitrary units or in micrometers; +* `len_x_micrometer`, `len_y_micrometer`: the XY edge lenghts, in micrometers; +* `len_z_micrometer`: the Z edge lenght in arbitrary units (corresponding to the number of Z planes) or in micrometers. + +> Notes: +> +> 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) is set +> to coincide with the top-left corner of a well (for the YX axes) and with +> the lowest Z plane. +> 2. ROIs are defined in **physical coordinates**, and they do not store +> information on the number or size of pixels. +> 3. The current version of `fractal-tasks-core` only uses **arbitrary units** +> for `z_micrometer` and `len_z_micrometer` columns, where a single unit +> corresponds to the distance between two subsequent Z planes. + +##### Other columns + +ROI tables may also include abitrary columns. Here are the ones that are +typically used in `fractal-tasks-core`: + +* `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; +* `label`, which is used within measurement tables as a reference to the labels corresponding to a row of measurements (see [description of `instance_key` below](#single-table-segmented-objects)). +* FIXME: add `translation_x/y/z` columns + + + + + + +#### Advanced ROI tables (FIXME: rename?) When each table row corresponds to (the bounding box of) a segmented object, `fractal-tasks-core` follows more closely the [proposed NGFF update mentioned @@ -108,55 +198,13 @@ Here is an example of `image.zarr/tables/table1/.zattrs` } ``` -### AnnData table format - -Data of a table are stored into a Zarr group as AnnData ("Annotated Data") -objects; the [`anndata` Python library](https://anndata.readthedocs.io) provides the -definition of this format and the relevant tools. - -Quoting from `anndata` documentation: - -> AnnData is specifically designed for matrix-like data. By this we mean that -> we have $n$ observations, each of which can be represented as $d$-dimensional -> vectors, where each dimension corresponds to a variable or feature. Both the -> rows and columns of this $n \times d$ matrix are special in the sense that -> they are indexed. -> -> (https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html) - -Note that AnnData tables are easily transformed from/into `pandas.DataFrame` -objects - see e.g. the [`AnnData.to_df` -method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). - -### Table contents +### Feature tables -The [`var` attribute of AnnData -objects](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var.html#anndata.AnnData.var) -indexes the columns of the table. A `fractal-tasks-core` ROI table must include -the following six columns: +FIXME: to do -* `x_micrometer`, `y_micrometer`: the lower bounds of the XY intervals defining the ROI, in micrometers; -* `z_micrometer`: the lower bound of the Z interval defining the ROI, in arbitrary units or in micrometers; -* `len_x_micrometer`, `len_y_micrometer`: the XY edge lenghts, in micrometers; -* `len_z_micrometer`: the Z edge lenght in arbitrary units (corresponding to the number of Z planes) or in micrometers. - -ROI tables may also include other optional columns: - -* `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; -* `label`, which is used within measurement tables as a reference to the labels corresponding to a row of measurements (see [description of `instance_key` above](#single-table-segmented-objects)). - -> Notes: -> -> 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) is set -> to coincide with the top-left corner of a well (for the YX axes) and with -> the lowest Z plane. -> 2. ROIs are defined in **physical coordinates**, and they do not store -> information on the number or size of pixels. -> 3. The current version of `fractal-tasks-core` only uses **arbitrary units** -> for `z_micrometer` and `len_z_micrometer` columns, where a single unit -> corresponds to the distance between two subsequent Z planes. +## Examples -## Default tables +### Default ROI tables OME-Zarrs created via `fractal-tasks-core` (e.g. by parsing Yokogawa images via the @@ -185,14 +233,15 @@ ROI tables to the NGFF images: may simply be useful for applying downstream processing to smaller arrays and avoid large memory requirements. -## Examples + +### Reading/writing tables The `anndata` library offers a set of functions for input/output of AnnData tables, including functions specifically targeting the Zarr format. -### Reading a table +#### Reading a table -To read an AnnData table from a Zarr group, one may use the [`read_zarr` +To read an `AnnData` table from a Zarr group, one may use the [`read_zarr` function](https://anndata.readthedocs.io/en/latest/generated/anndata.read_zarr.html). In the following example a NGFF image was created by sticthing together two field of views, where each one is made of a stack of five Z planes. @@ -204,7 +253,7 @@ import anndata as ad table = ad.read_zarr("/somewhere/image.zarr/tables/FOV_ROI_table") print(table) -# AnnData object with n_obs × n_vars = 2 × 8 +# `AnnData` object with n_obs × n_vars = 2 × 8 print(table.obs_names) # Index(['FOV_1', 'FOV_2'], dtype='object', name='FieldIndex') @@ -233,11 +282,10 @@ In this case, the second FOV (labeled `FOV_2`) is defined as the three-dimension * Y is between 0 and 351 micrometers; * Z is between 0 and 5 - which means that all the five available Z planes are included. - -### Writing a table +#### Writing a table The `anndata.experimental.write_elem` function provides the required -functionality to write an AnnData object to a Zarr group. In +functionality to write an `AnnData` object to a Zarr group. In `fractal-tasks-core`, the `write_table` helper function wraps the `anndata` function and includes additional functionalities -- see [its documentation](../reference/fractal_tasks_core/lib_write/#fractal_tasks_core.lib_write.write_table). @@ -254,7 +302,7 @@ import zarr import anndata as ad from fractal_tasks_core.lib_write import write_table -table = ad.AnnData(X=np.ones((10, 10))) # Generate a dummy AnnData object +table = ad.AnnData(X=np.ones((10, 10))) # Generate a dummy `AnnData` object image_group = zarr.open_group("/tmp/image.zarr") table_name = "MyTable" table_attrs = { @@ -313,7 +361,9 @@ $ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes } ``` -## Future perspectives + +## Outlook + These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in V2. Development of @@ -327,7 +377,9 @@ An in-progress list of aspects that may be reviewed: also fix the current misleading names for the Z position/length columns (`z_micrometer` and `len_z_micrometer`, even though corresponding data are in arbitrary units). -2. We may re-evaluate whether AnnData tables are the most appropriate tool. For +2. We may re-evaluate whether `AnnData` tables are the most appropriate tool. For the record, Zarr does not natively support storage of dataframes (see e.g. https://github.com/zarr-developers/numcodecs/issues/452), which is one aspect in favor of sticking with the `anndata` library. + +--- From a902dc4394d6887e6279717b6c89a3656e355aa1 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:10:34 +0200 Subject: [PATCH 19/27] BROKEN Stash of current changes --- docs/tables.md | 53 +++++++++++++++++++++++++++++++++++++------------- mkdocs.yml | 33 ++++++++++++++++--------------- 2 files changed, 56 insertions(+), 30 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index cb4ecd361..28c05662b 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -1,20 +1,20 @@ -# Fractal tables +# Tables -Within `fractal-tasks-core` we make use of tables stored as `AnnData` objects -within OME-Zarr image groups. This page describes the specifications for -different kinds of tables: +Within `fractal-tasks-core`, we make use of tables which are `AnnData` objects +stored within OME-Zarr image groups. This page defines the different kinds of +tables we use, and it includes: -* A core [table specification](#core-tables), common to all cases; -* Two levels of specifications for tables that describe regions of interest (ROIs): +* A core [table specification](#core-tables), valid for all tables; +* Two levels of specifications for tables that define regions of interest (ROIs): * [Basic ROI tables](#basic-roi-tables); (FIXME: better naming?) - * [Advanced ROI tables](#advanced-roi-tables); (FIXME: better naming?) -* A [feature-table specification](#feature-tables). (FIXME: specify this is in progress) + * [Advanced ROI tables](#advanced-roi-tables), to be used e.g. for masked loading; (FIXME: better naming?) +* A [feature-table specification](#feature-tables), to store measurements. (FIXME: specify this is in progress) These different specifications correspond to different use cases in `fractal-tasks-core`: * Basic ROI tables: - * We keep track of the positions of the Field of Views (FOVs) within a well, after stitching the corresponding FOV images into a single whole-well array. - * We keep track of the original state before some transformations are applied - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. + * We store the sizes/positions of the original Field of Views (FOVs) within the NGFF image representing a well[^1]. + * We store the unprocessed ROI details before applying some transformation - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. * Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. * Advanced ROI tables: * We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. @@ -202,6 +202,8 @@ Here is an example of `image.zarr/tables/table1/.zattrs` FIXME: to do +https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/593 + ## Examples ### Default ROI tables @@ -366,20 +368,43 @@ $ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes These specifications may evolve (especially based on the future NGFF updates), -eventually leading to breaking changes in V2. Development of +eventually leading to breaking changes in future versions. `fractal-tasks-core` will aim at mantaining backwards-compatibility with V1 for a reasonable amount of time. -An in-progress list of aspects that may be reviewed: +Here is an in-progress list of aspects that may be reviewed: 1. We aim at removing the use of hard-coded units from the column names (e.g. `x_micrometer`), in favor of a more general definition of units. This will also fix the current misleading names for the Z position/length columns (`z_micrometer` and `len_z_micrometer`, even though corresponding data are in arbitrary units). -2. We may re-evaluate whether `AnnData` tables are the most appropriate tool. For +2. The `z_micrometer` and `len_z_micrometer` columns are currently required in + all ROI tables, even when the ROIs actually define a two-dimensional XY + region; in that case, we set `z_micrometer=0` and `len_z_micrometer` is such + that the whole Z size is covered. In a future version, we may introduce more + flexibility and also accept ROI tables which only include X and Y axes, and + adapt the relevant tools so that they automatically expand these ROIs into + three-dimensions when appropriate. +3. We may re-evaluate whether `AnnData` tables are the most appropriate tool. For the record, Zarr does not natively support storage of dataframes (see e.g. https://github.com/zarr-developers/numcodecs/issues/452), which is one aspect in favor of sticking with the `anndata` library. ---- + + +FIXME: remove "arbitrary units", after verifying that this is how the code works +FIXME: only mention "well" when talking about tiled + +FIXME: rather use word "tiled" + +FIXME: mention https://github.com/ome/ngff/pull/137 (Generalize well organization in high-content screening: field of view => image) + +[^1]: +Within `fractal-tasks-core`, NGFF images represent whole wells; this is still +compliant with the NGFF specifications, as of an [approved clarification in the +specs](https://github.com/ome/ngff/pull/137). This explains the reason for +storing original the regions corresponding to the original FOVs in a specific +ROI table, since one NGFF image includes a collection of FOVs. Note that this +approach does not rely on the assumption that the FOVs constitute a regular +tiling of the well, but it also covers the case of irregularly placed FOVs. diff --git a/mkdocs.yml b/mkdocs.yml index 913ccbd1d..e3467e035 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,22 +4,23 @@ repo_url: https://github.com/fractal-analytics-platform/fractal-tasks-core repo_name: fractal-tasks-core markdown_extensions: -- admonition -- pymdownx.details -- pymdownx.emoji -- pymdownx.magiclink -- pymdownx.snippets: - check_paths: true - base_path: - - fractal_tasks_core/__FRACTAL_MANIFEST__.json -- pymdownx.superfences -- pymdownx.tabbed: - alternate_style: true -- pymdownx.tasklist -- toc: - permalink: true -- pymdownx.arithmatex: - generic: true + - admonition + - pymdownx.details + - pymdownx.emoji + - pymdownx.magiclink + - pymdownx.snippets: + check_paths: true + base_path: + - fractal_tasks_core/__FRACTAL_MANIFEST__.json + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - pymdownx.tasklist + - toc: + permalink: true + - pymdownx.arithmatex: + generic: true + - footnotes extra_javascript: - javascripts/mathjax.js From c2d9d525aff2199443cda3e2ab2cc7818ea35353 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 13:41:43 +0100 Subject: [PATCH 20/27] Update docstring of `convert_ROIs_from_3D_to_2D` --- fractal_tasks_core/lib_regions_of_interest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fractal_tasks_core/lib_regions_of_interest.py b/fractal_tasks_core/lib_regions_of_interest.py index f3e32f8b2..0bcf7ee82 100644 --- a/fractal_tasks_core/lib_regions_of_interest.py +++ b/fractal_tasks_core/lib_regions_of_interest.py @@ -183,6 +183,10 @@ def convert_ROIs_from_3D_to_2D( """ TBD + Note that this function is only relevant when the ROIs in adata span the + whole extent of the Z axis. + TODO: check this explicitly. + Args: adata: TBD pixel_size_z: TBD From 49a9b653438e90e5cd2c5850b9ac91d1a7e27bdc Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:32:34 +0100 Subject: [PATCH 21/27] Update tables docs --- docs/tables.md | 321 ++++++++++++++++++++++++++++--------------------- 1 file changed, 186 insertions(+), 135 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 28c05662b..2c4a523c2 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -1,30 +1,24 @@ # Tables Within `fractal-tasks-core`, we make use of tables which are `AnnData` objects -stored within OME-Zarr image groups. This page defines the different kinds of +stored within OME-Zarr image groups. This page describes the different kinds of tables we use, and it includes: * A core [table specification](#core-tables), valid for all tables; -* Two levels of specifications for tables that define regions of interest (ROIs): - * [Basic ROI tables](#basic-roi-tables); (FIXME: better naming?) - * [Advanced ROI tables](#advanced-roi-tables), to be used e.g. for masked loading; (FIXME: better naming?) -* A [feature-table specification](#feature-tables), to store measurements. (FIXME: specify this is in progress) - -These different specifications correspond to different use cases in `fractal-tasks-core`: - -* Basic ROI tables: - * We store the sizes/positions of the original Field of Views (FOVs) within the NGFF image representing a well[^1]. - * We store the unprocessed ROI details before applying some transformation - e.g. shifting FOVs to avoid overlaps, or shifting a multiplexing cycle during registration. - * Several tasks in `fractal-tasks-core` take an existing ROI table as an input and then loop over the ROIs defined in the table. Such tasks have more flexibility, as they can process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. -* Advanced ROI tables: - * We store ROIs associated to segmented objects, for instance the bounding boxes of organoids/nuclei. -* Feature tables: - * We store measurements associated to segmented objects (e.g. as computed via `regionprops` from `scikit-image`, as wrapped in [napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). - -> **Note**: The specifications below are largely based on [a proposed update to -> NGFF specs](https://github.com/ome/ngff/pull/64). This update is currently on -> hold, and `fractal-tasks-core` will evolve as soon as the NGFF specs will -> adopt a definition of tables - see also the [Outlook](#outlook) section. +* The definition of [tables for regions of interests (ROIs)](#roi-tables); +* The definition of [masking ROI tables](#masking-roi-tables), namely ROI tables that are linked e.g. to labels; +* A [feature-table specification](#feature-tables), to store measurements. + +> ⚠️ **Warning**: As of version 0.13 of `fractal-tasks-core`, the +> specifications below are not yet fully implemented (see issue +> [602](https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/602) +> and +> [593](https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/593)). +
+> **Note**: The specifications below are largely inspired by [a proposed update +> to NGFF specs](https://github.com/ome/ngff/pull/64). This update is currently +> on hold, and `fractal-tasks-core` will evolve as soon as an official NGFF +> table specs is adopted - see also the [Outlook](#outlook) section. ## Specifications @@ -33,13 +27,12 @@ These different specifications correspond to different use cases in `fractal-tas The core-table specification consists in the definition of the required Zarr structure and attributes, and of the `AnnData` table format. -#### `AnnData` table format +**`AnnData` table format** -Data of a table are stored into a Zarr group as `AnnData` ("Annotated Data") -objects; the [`anndata` Python library](https://anndata.readthedocs.io) provides the -definition of this format and the relevant tools. - -Quoting from `anndata` documentation: +We store tabular data into Zarr groups as `AnnData` ("Annotated Data") objects; +the [`anndata` Python library](https://anndata.readthedocs.io) provides the +definition of this format and the relevant tools. Quoting from the `anndata` +documentation: > `AnnData` is specifically designed for matrix-like data. By this we mean that > we have $n$ observations, each of which can be represented as $d$-dimensional @@ -53,13 +46,13 @@ Note that `AnnData` tables are easily transformed from/into `pandas.DataFrame` objects - see e.g. the [`AnnData.to_df` method](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.to_df.html#anndata.AnnData.to_df). -#### Zarr structure +**Zarr structure and attributes** The structure of Zarr groups is based on the [`image` specification in NGFF 0.4](https://ngff.openmicroscopy.org/0.4/index.html#image-layout), with an additional `tables` group and the corresponding subgroups (similar to `labels`): -``` +```hl_lines="12 13 14 15" image.zarr # Zarr group for a NGFF image | ├── 0 # Zarr array for multiscale level 0 @@ -77,136 +70,186 @@ image.zarr # Zarr group for a NGFF image └── ... ``` -#### Zarr attributes - The Zarr attributes of the `tables` group must include the key `tables`, -pointing to the list of all tables; this simplifies the discovery of image -tables. - -Here is an example of `image.zarr/tables/.zattrs`: -```json +pointing to the list of all tables (this simplifies the discovery of image +tables), as in +```json title="image.zarr/tables/.zattrs" { "tables": ["table_1", "table_2"] } ``` -The Zarr attributes of each specific-table group have no required properties, -but writing an `AnnData` object to that group typically sets some default -attributes. For anndata 0.11, for instance, the attributes in -`image.zarr/tables/table1/.zattrs` would be -```json +The Zarr attributes of each specific-table group must include the version of +the table specification (currently version 1), through the +`fractal_table_version` attribute. Also note that the `anndata` function to +write an `AnnData` object into a Zarr group automatically sets additional +attributes. Here is an example of the resulting Zarr attributes: +```json title="image.zarr/tables/table_1/.zattrs" { - "encoding-type": "anndata", # Automatically added by anndata - "encoding-version": "0.1.0", # Automatically added by anndata + "fractal_table_version": "1", + "encoding-type": "anndata", // Automatically added by anndata 0.11 + "encoding-version": "0.1.0", // Automatically added by anndata 0.11 } ``` ### ROI tables -The current section describes the first version (V1) of `fractal-tasks-core` -tables, which is based on [a proposed update to NGFF -specs](https://github.com/ome/ngff/pull/64); this update is currently on hold, -and `fractal-tasks-core` will evolve as soon as the NGFF specs will adopt a -definition of tables. -As in the original proposed NGFF update, the current specifications are -specifically based on `AnnData` tables. - -In Fractal, regions of interest (ROIs) are three-dimensional -regions of space delimited by orthogonal planes. ROI tables are stored as +In `fractal-tasks-core`, a ROI table defines regions of space which are +three-dimensional (see also the [Outlook section](#outlook) about +dimensionality flexibility) and box-shaped. +Examples use cases are described [here](#roi-tables_1). -#### Basic ROI tables +**Zarr attributes** -For each table, the Zarr attributes must include the key -`fractal_roi_table_version`, pointing to the string version of this -specification (e.g. `1`). - -Here is an example of `image.zarr/tables/table1/.zattrs` -```json +The specification of a ROI table is a subset of the [core table +one](#core-tables). Moreover, the table-group Zarr attributes must include the +`type` attribute with value `roi_table`, as in +```json title="image.zarr/tables/table_1/.zattrs" hl_lines="3" { - "fractal_roi_table_version": "1", + "fractal_table_version": "1", + "type": "roi_table", "encoding-type": "anndata", # Automatically added by anndata "encoding-version": "0.1.0", # Automatically added by anndata } ``` -This is the kind of tables that are used in `fractal-tasks-core` to store ROIs -like a whole well, or the list of field of views. - -##### Required columns +**Table columns** -The [`var` attribute of AnnData -objects](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var.html#anndata.AnnData.var) -indexes the columns of the table. A `fractal-tasks-core` ROI table must include -the following six columns: +The [`var` +attribute](https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var.html#anndata.AnnData.var) +of a given `AnnData` object indexes the columns of the table. A +`fractal-tasks-core` ROI table must include the following six columns: -* `x_micrometer`, `y_micrometer`: the lower bounds of the XY intervals defining the ROI, in micrometers; -* `z_micrometer`: the lower bound of the Z interval defining the ROI, in arbitrary units or in micrometers; -* `len_x_micrometer`, `len_y_micrometer`: the XY edge lenghts, in micrometers; -* `len_z_micrometer`: the Z edge lenght in arbitrary units (corresponding to the number of Z planes) or in micrometers. +* `x_micrometer`, `y_micrometer`, `z_micrometer`: + the lower bounds of the XYZ intervals defining the ROI, in micrometers; +* `len_x_micrometer`, `len_y_micrometer`, `len_z_micrometer`: + the XYZ edge lenghts, in micrometers. > Notes: > -> 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) is set -> to coincide with the top-left corner of a well (for the YX axes) and with +> 1. The **axes origin** for the ROI positions (e.g. for `x_micrometer`) +> corresponds to the top-left corner of the image (for the YX axes) and to > the lowest Z plane. > 2. ROIs are defined in **physical coordinates**, and they do not store > information on the number or size of pixels. -> 3. The current version of `fractal-tasks-core` only uses **arbitrary units** -> for `z_micrometer` and `len_z_micrometer` columns, where a single unit -> corresponds to the distance between two subsequent Z planes. -##### Other columns +ROI tables may also include other columns, beyond the required ones. Here are +the ones that are typically used in `fractal-tasks-core` (see also the [Use +cases](#use-cases) section): -ROI tables may also include abitrary columns. Here are the ones that are -typically used in `fractal-tasks-core`: +* `x_micrometer_original` and `y_micrometer_original`, which are a copy of + `x_micrometer` and `y_micrometer` taken before applying some transformation; +* `translation_x`, `translation_y` and `translation_z`, which are used during + registration of multiplexing cycles; +* `label`, which is used to link a ROI to a label (either for + [masking ROI tables](#masking-roi-tables) or for + [feature tables](#feature-tables)). -* `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; -* `label`, which is used within measurement tables as a reference to the labels corresponding to a row of measurements (see [description of `instance_key` below](#single-table-segmented-objects)). -* FIXME: add `translation_x/y/z` columns +### Masking ROI tables +Masking ROI tables are a specific instance of the basic ROI tables described +above, where each ROI must also be associated to a specific label of a label +image. +**Motivation** +The motivation for this association is based on the following use case: +* By performing segmentation of a NGFF image, we identify N objects and we + store them as a label image (where the value at each pixel correspond to the + label index); +* We also compute the three-dimensional bounding box of each segmented object, + and store these bounding boxes into a `masking` ROI table; +* For each one of these ROIs, we also include information that link it to both + the label image and a specific label index; +* During further processing we can load/modify specific sub-regions of the ROI, + based on information contained in the label image. This kind of operations + are `masked`, as they only act on the array elements that match a certain + condition on the label value. -#### Advanced ROI tables (FIXME: rename?) +**Zarr attributes** -When each table row corresponds to (the bounding box of) a segmented object, -`fractal-tasks-core` follows more closely the [proposed NGFF update mentioned -above](https://github.com/ome/ngff/pull/64), with the following additional -requirements on the Zarr group of a given table: +For this kind of tables, `fractal-tasks-core` closely follows the [proposed +NGFF update mentioned above](https://github.com/ome/ngff/pull/64). The +requirements on the Zarr attributes of a given table are: -* Attributes must contain a `type` key, with value `ngff:region_table`. +* Attributes must contain a `type` key, with value `masking_roi_table`[^2]. * Attributes must contain a `region` key; the corresponding value must be an object with a `path` key and a string value (i.e. the path to the data the table is annotating). -* Attributes may include a key `instance_key`, which is the key in `obs` that - denotes which instance in `region` the row corresponds to. If `instance_key` - is not provided, the values from the `_index` Zarr attribute of `obs` is used. +* Attributes must include a key `instance_key`, which is the key in `obs` that + denotes which instance in `region` the row corresponds to. -Here is an example of `image.zarr/tables/table1/.zattrs` -```json +Here is an example of valid Zarr attributes +```json title="image.zarr/tables/table_1/.zattrs" hl_lines="3 4 5" { - "fractal_roi_table_version": "1", - "type": "ngff:region_table", - "region": { - "path": "../labels/label_DAPI", - }, + "fractal_table_version": "1", + "type": "masking_roi_table", + "region": { "path": "../labels/label_DAPI" }, "instance_key": "label", "encoding-type": "anndata", # Automatically added by anndata "encoding-version": "0.1.0", # Automatically added by anndata } ``` +**Table columns** + +On top of the required ROI-table colums, a masking ROI table must include the +table which is defined in its `instance_key` attribute, e.g. the `label` one in +the example above. + ### Feature tables -FIXME: to do +**Motivation** + +The typical use case for feature tables is to store measurements related to +segmented objects, while mantaining a link to the original instances (e.g. +labels). Note that the current specification is aligned to the one of [masking +ROI tables](#masking-roi-tables), since they share the same kind of use case, +but the two may diverge in the future. + +As part of the current `fractal-tasks-core` tasks, measurements can be +performed e.g. via `regionprops` from `scikit-image`, as wrapped in +[napari-skimage-regionprops](https://github.com/haesleinhuepf/napari-skimage-regionprops)). + +**Zarr attributes** + +For this kind of tables, `fractal-tasks-core` closely follows the [proposed +NGFF update mentioned above](https://github.com/ome/ngff/pull/64). The +requirements on the Zarr attributes of a given table are: + +* Attributes must contain a `type` key, with value `feature_table`[^2]. +* Attributes must contain a `region` key; the corresponding value must be an + object with a `path` key and a string value (i.e. the path to the data the + table is annotating). +* Attributes must include a key `instance_key`, which is the key in `obs` that + denotes which instance in `region` the row corresponds to. + +Here is an example of valid Zarr attributes +```json title="image.zarr/tables/table_1/.zattrs" hl_lines="3 4 5" +{ + "fractal_table_version": "1", + "type": "feature_table", + "region": { "path": "../labels/label_DAPI" }, + "instance_key": "label", + "encoding-type": "anndata", # Automatically added by anndata + "encoding-version": "0.1.0", # Automatically added by anndata +} +``` + +**Table columns** -https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/593 +There is no specific constraint on which columns a feature table should have. ## Examples -### Default ROI tables +### Use cases + +The different table specifications above correspond to different use cases in +`fractal-tasks-core`. + +#### ROI tables OME-Zarrs created via `fractal-tasks-core` (e.g. by parsing Yokogawa images via the @@ -215,8 +258,8 @@ or [`create_ome_zarr_multiplex`](../reference/fractal_tasks_core/tasks/create_ome_zarr_multiplex/#fractal_tasks_core.tasks.create_ome_zarr_multiplex.create_ome_zarr_multiplex) tasks) always include two specific ROI tables: +* The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well[^1]. * The table named `FOV_ROI_table`, which lists all original FOVs; -* The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well (formed by all the original FOVs stiched together) Each one of these two tables includes ROIs that are only defined in the XY plane, and span the whole set of Z planes. Note that this differs, e.g., from @@ -235,6 +278,20 @@ ROI tables to the NGFF images: may simply be useful for applying downstream processing to smaller arrays and avoid large memory requirements. +ROI tables are also used and updated during image processing, e.g as in: + +* FOV ROI tables may undergo transformations during processing, e.g. FOV ROIs + may be shifted to avoid overlaps; in this case, we use the optional columns + `x_micrometer_original` and `y_micrometer_original` to store the values + before the transformation. +* FOV ROI tables are also used to store information on the registration of + multiplexing cycles, via the `translation_x`, `translation_y` and + `translation_z` optional columns. +* Several tasks in `fractal-tasks-core` take an existing ROI table as an input + and then loop over the ROIs defined in the table. This makes the task more + flexible, as it can be used to process e.g. a whole well, a set of FOVs, or a + set of custom regions of the array. + ### Reading/writing tables @@ -354,7 +411,7 @@ $ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes { "encoding-type": "anndata", "encoding-version": "0.1.0", - "fractal_roi_table_version": "1", + "fractal_table_version": "1", "instance_key": "label", "region": { "path": "../labels/MyLabel" @@ -374,37 +431,31 @@ a reasonable amount of time. Here is an in-progress list of aspects that may be reviewed: -1. We aim at removing the use of hard-coded units from the column names (e.g. - `x_micrometer`), in favor of a more general definition of units. This will - also fix the current misleading names for the Z position/length columns - (`z_micrometer` and `len_z_micrometer`, even though corresponding data are - in arbitrary units). -2. The `z_micrometer` and `len_z_micrometer` columns are currently required in - all ROI tables, even when the ROIs actually define a two-dimensional XY - region; in that case, we set `z_micrometer=0` and `len_z_micrometer` is such - that the whole Z size is covered. In a future version, we may introduce more - flexibility and also accept ROI tables which only include X and Y axes, and - adapt the relevant tools so that they automatically expand these ROIs into - three-dimensions when appropriate. -3. We may re-evaluate whether `AnnData` tables are the most appropriate tool. For - the record, Zarr does not natively support storage of dataframes (see e.g. - https://github.com/zarr-developers/numcodecs/issues/452), which is one - aspect in favor of sticking with the `anndata` library. - - - -FIXME: remove "arbitrary units", after verifying that this is how the code works -FIXME: only mention "well" when talking about tiled - -FIXME: rather use word "tiled" +* We aim at removing the use of hard-coded units from the column names (e.g. + `x_micrometer`), in favor of a more general definition of units. +* The `z_micrometer` and `len_z_micrometer` columns are currently required in + all ROI tables, even when the ROIs actually define a two-dimensional XY + region; in that case, we set `z_micrometer=0` and `len_z_micrometer` is such + that the whole Z size is covered. In a future version, we may introduce more + flexibility and also accept ROI tables which only include X and Y axes, and + adapt the relevant tools so that they automatically expand these ROIs into + three-dimensions when appropriate. +* We may re-evaluate whether `AnnData` tables are the most appropriate tool. For + the record, Zarr does not natively support storage of dataframes (see e.g. + https://github.com/zarr-developers/numcodecs/issues/452), which is one + aspect in favor of sticking with the `anndata` library. -FIXME: mention https://github.com/ome/ngff/pull/137 (Generalize well organization in high-content screening: field of view => image) [^1]: -Within `fractal-tasks-core`, NGFF images represent whole wells; this is still -compliant with the NGFF specifications, as of an [approved clarification in the +Within `fractal-tasks-core`, NGFF images represent whole wells; this still +complies with the NGFF specifications, as of an [approved clarification in the specs](https://github.com/ome/ngff/pull/137). This explains the reason for -storing original the regions corresponding to the original FOVs in a specific -ROI table, since one NGFF image includes a collection of FOVs. Note that this -approach does not rely on the assumption that the FOVs constitute a regular -tiling of the well, but it also covers the case of irregularly placed FOVs. +storing the regions corresponding to the original FOVs in a specific ROI table, +since one NGFF image includes a collection of FOVs. Note that this approach +does not rely on the assumption that the FOVs constitute a regular tiling of +the well, but it also covers the case of irregularly placed FOVs. + +[^2]: +Note that the table types `masking_roi_table` and `feature_table` closely +resemble the `type="ngff:region_table"` specification in the previous [proposed +NGFF table specs](https://github.com/ome/ngff/pull/64). From 1ecc1305251dd7357ec4edfa63e780658def0f6a Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:38:30 +0100 Subject: [PATCH 22/27] Rename `__ROI_TABLE_VERSION__` into `__FRACTAL_TABLE_VERSION__` --- fractal_tasks_core/__init__.py | 2 +- fractal_tasks_core/lib_write.py | 4 ++-- tests/test_unit_zarr.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fractal_tasks_core/__init__.py b/fractal_tasks_core/__init__.py index 23c9b890b..3c8692eef 100644 --- a/fractal_tasks_core/__init__.py +++ b/fractal_tasks_core/__init__.py @@ -7,4 +7,4 @@ __VERSION__ = "0.13.1" __OME_NGFF_VERSION__ = "0.4" -__ROI_TABLE_VERSION__ = "1" +__FRACTAL_TABLE_VERSION__ = "1" diff --git a/fractal_tasks_core/lib_write.py b/fractal_tasks_core/lib_write.py index b27f85fe4..2e15ac178 100644 --- a/fractal_tasks_core/lib_write.py +++ b/fractal_tasks_core/lib_write.py @@ -23,7 +23,7 @@ from zarr.errors import ContainsGroupError from zarr.errors import GroupNotFoundError -from fractal_tasks_core import __ROI_TABLE_VERSION__ +from fractal_tasks_core import __FRACTAL_TABLE_VERSION__ class OverwriteNotAllowedError(RuntimeError): @@ -294,7 +294,7 @@ def write_table( table_group.attrs.update(**table_attrs) # Always add information about the fractal-roi-table version - table_group.attrs.update(fractal_roi_table_version=__ROI_TABLE_VERSION__) + table_group.attrs.update(fractal_table_version=__FRACTAL_TABLE_VERSION__) return table_group diff --git a/tests/test_unit_zarr.py b/tests/test_unit_zarr.py index 791d77544..3f55fb922 100644 --- a/tests/test_unit_zarr.py +++ b/tests/test_unit_zarr.py @@ -4,7 +4,7 @@ import zarr from devtools import debug -from fractal_tasks_core import __ROI_TABLE_VERSION__ +from fractal_tasks_core import __FRACTAL_TABLE_VERSION__ from fractal_tasks_core.lib_write import _write_elem_with_overwrite from fractal_tasks_core.lib_write import open_zarr_group_with_overwrite from fractal_tasks_core.lib_write import OverwriteNotAllowedError @@ -129,8 +129,8 @@ def test_write_table(tmp_path): for key in ["region", "instance_key", "type"]: assert key not in table_a_group.attrs.keys() assert ( - table_a_group.attrs["fractal_roi_table_version"] - == __ROI_TABLE_VERSION__ + table_a_group.attrs["fractal_table_version"] + == __FRACTAL_TABLE_VERSION__ ) # Run write_table again, with overwrite=True From b8ba5e7de0d1bc2b7ab84de3c711ea5395dc017b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:38:40 +0100 Subject: [PATCH 23/27] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d969311f..de0561dde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ * Testing: * Cache Zenodo data, within GitHub actions (\#585). * Documentation: - * Define V1 of ROI-table specs (\#582). + * Define V1 of table specs (\#582). * Add mathjax support (\#582). * Add cross-reference inventories to external APIs (\#582). From 26435d63fed4761c953e6773c18c5585d607c46b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:38:52 +0100 Subject: [PATCH 24/27] Fix feature-table description in specs --- docs/tables.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 2c4a523c2..9745ff365 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -108,8 +108,8 @@ one](#core-tables). Moreover, the table-group Zarr attributes must include the { "fractal_table_version": "1", "type": "roi_table", - "encoding-type": "anndata", # Automatically added by anndata - "encoding-version": "0.1.0", # Automatically added by anndata + "encoding-type": "anndata", + "encoding-version": "0.1.0", } ``` @@ -188,8 +188,8 @@ Here is an example of valid Zarr attributes "type": "masking_roi_table", "region": { "path": "../labels/label_DAPI" }, "instance_key": "label", - "encoding-type": "anndata", # Automatically added by anndata - "encoding-version": "0.1.0", # Automatically added by anndata + "encoding-type": "anndata", + "encoding-version": "0.1.0", } ``` @@ -233,14 +233,15 @@ Here is an example of valid Zarr attributes "type": "feature_table", "region": { "path": "../labels/label_DAPI" }, "instance_key": "label", - "encoding-type": "anndata", # Automatically added by anndata - "encoding-version": "0.1.0", # Automatically added by anndata + "encoding-type": "anndata", + "encoding-version": "0.1.0", } ``` **Table columns** -There is no specific constraint on which columns a feature table should have. +A feature table must include the table which is defined in its `instance_key` +attribute, e.g. the `label` one in the example above. ## Examples From 0def945670bde8d755b72eb2bfeee24c92984e82 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:50:26 +0100 Subject: [PATCH 25/27] Minor updates to table specs [skip ci] --- docs/tables.md | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 9745ff365..519dd877c 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -135,7 +135,7 @@ of a given `AnnData` object indexes the columns of the table. A ROI tables may also include other columns, beyond the required ones. Here are the ones that are typically used in `fractal-tasks-core` (see also the [Use -cases](#use-cases) section): +cases](#roi-tables-use-cases) section): * `x_micrometer_original` and `y_micrometer_original`, which are a copy of `x_micrometer` and `y_micrometer` taken before applying some transformation; @@ -145,7 +145,6 @@ cases](#use-cases) section): [masking ROI tables](#masking-roi-tables) or for [feature tables](#feature-tables)). - ### Masking ROI tables Masking ROI tables are a specific instance of the basic ROI tables described @@ -245,12 +244,9 @@ attribute, e.g. the `label` one in the example above. ## Examples -### Use cases - -The different table specifications above correspond to different use cases in -`fractal-tasks-core`. +### Use cases for ROI tables -#### ROI tables +#### OME-Zarr creation OME-Zarrs created via `fractal-tasks-core` (e.g. by parsing Yokogawa images via the @@ -259,13 +255,15 @@ or [`create_ome_zarr_multiplex`](../reference/fractal_tasks_core/tasks/create_ome_zarr_multiplex/#fractal_tasks_core.tasks.create_ome_zarr_multiplex.create_ome_zarr_multiplex) tasks) always include two specific ROI tables: -* The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well[^1]. -* The table named `FOV_ROI_table`, which lists all original FOVs; +* The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well[^1]; +* The table named `FOV_ROI_table`, which lists all original FOVs. Each one of these two tables includes ROIs that are only defined in the XY -plane, and span the whole set of Z planes. Note that this differs, e.g., from -the case of bounding-box ROIs based on three-dimensional segmented objects, -which may have a non-trivial Z size. +plane, and span the whole image size along the Z axis. Note that this differs, +e.g., from ROIs which are the bounding boxes of three-dimensional segmented +objects, and which may cover only a part of the image Z size. + +#### OME-Zarr import When working with an externally-generated OME-Zarr, one may use the [`import_ome_zarr` @@ -273,19 +271,21 @@ task](../reference/fractal_tasks_core/tasks/import_ome_zarr/#fractal_tasks_core. to make it compatible with `fractal-tasks-core`. This task optionally adds two ROI tables to the NGFF images: -* The table named `image_ROI_table`, which simply covers the whole image. +* The table named `image_ROI_table`, which covers the whole image; * A table named `grid_ROI_table`, which splits the whole-image ROI into a YX - rectangular grid of smaller ROIs. This may correspond to original FOVs, or it - may simply be useful for applying downstream processing to smaller arrays and - avoid large memory requirements. + rectangular grid of smaller ROIs. This may correspond to original FOVs (in + case the image is a tiled well[^1]), or it may simply be useful for applying + downstream processing to smaller arrays and avoid large memory requirements. + +#### OME-Zarr processing ROI tables are also used and updated during image processing, e.g as in: -* FOV ROI tables may undergo transformations during processing, e.g. FOV ROIs - may be shifted to avoid overlaps; in this case, we use the optional columns - `x_micrometer_original` and `y_micrometer_original` to store the values +* The FOV ROI table may undergo transformations during processing, e.g. FOV + ROIs may be shifted to avoid overlaps; in this case, we use the optional + columns `x_micrometer_original` and `y_micrometer_original` to store the values before the transformation. -* FOV ROI tables are also used to store information on the registration of +* The FOV ROI table is also used to store information on the registration of multiplexing cycles, via the `translation_x`, `translation_y` and `translation_z` optional columns. * Several tasks in `fractal-tasks-core` take an existing ROI table as an input @@ -293,7 +293,6 @@ ROI tables are also used and updated during image processing, e.g as in: flexible, as it can be used to process e.g. a whole well, a set of FOVs, or a set of custom regions of the array. - ### Reading/writing tables The `anndata` library offers a set of functions for input/output of AnnData @@ -421,10 +420,8 @@ $ cat /tmp/image.zarr/tables/MyTable/.zattrs # View single-table attributes } ``` - ## Outlook - These specifications may evolve (especially based on the future NGFF updates), eventually leading to breaking changes in future versions. `fractal-tasks-core` will aim at mantaining backwards-compatibility with V1 for @@ -446,7 +443,6 @@ Here is an in-progress list of aspects that may be reviewed: https://github.com/zarr-developers/numcodecs/issues/452), which is one aspect in favor of sticking with the `anndata` library. - [^1]: Within `fractal-tasks-core`, NGFF images represent whole wells; this still complies with the NGFF specifications, as of an [approved clarification in the From 43a622e7afd52d3849a9fa7809208a0b7d390526 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 2 Nov 2023 08:57:16 +0100 Subject: [PATCH 26/27] Improve tables docs page --- docs/tables.md | 57 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 519dd877c..6b9f8ab73 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -16,7 +16,7 @@ tables we use, and it includes: > [593](https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/593)).
> **Note**: The specifications below are largely inspired by [a proposed update -> to NGFF specs](https://github.com/ome/ngff/pull/64). This update is currently +> to OME-NGFF specs](https://github.com/ome/ngff/pull/64). This update is currently > on hold, and `fractal-tasks-core` will evolve as soon as an official NGFF > table specs is adopted - see also the [Outlook](#outlook) section. @@ -71,8 +71,8 @@ image.zarr # Zarr group for a NGFF image ``` The Zarr attributes of the `tables` group must include the key `tables`, -pointing to the list of all tables (this simplifies the discovery of image -tables), as in +pointing to the list of all tables (this simplifies discovery of tables +associated to the current NGFF image), as in ```json title="image.zarr/tables/.zattrs" { "tables": ["table_1", "table_2"] @@ -97,7 +97,7 @@ attributes. Here is an example of the resulting Zarr attributes: In `fractal-tasks-core`, a ROI table defines regions of space which are three-dimensional (see also the [Outlook section](#outlook) about dimensionality flexibility) and box-shaped. -Examples use cases are described [here](#roi-tables_1). +Typical use cases are described [here](#use-cases-for-roi-tables). **Zarr attributes** @@ -123,7 +123,7 @@ of a given `AnnData` object indexes the columns of the table. A * `x_micrometer`, `y_micrometer`, `z_micrometer`: the lower bounds of the XYZ intervals defining the ROI, in micrometers; * `len_x_micrometer`, `len_y_micrometer`, `len_z_micrometer`: - the XYZ edge lenghts, in micrometers. + the XYZ edge lengths, in micrometers. > Notes: > @@ -195,8 +195,8 @@ Here is an example of valid Zarr attributes **Table columns** On top of the required ROI-table colums, a masking ROI table must include the -table which is defined in its `instance_key` attribute, e.g. the `label` one in -the example above. +column which is defined in its `instance_key` attribute (e.g. the `label` +column, for the example above). ### Feature tables @@ -205,8 +205,8 @@ the example above. The typical use case for feature tables is to store measurements related to segmented objects, while mantaining a link to the original instances (e.g. labels). Note that the current specification is aligned to the one of [masking -ROI tables](#masking-roi-tables), since they share the same kind of use case, -but the two may diverge in the future. +ROI tables](#masking-roi-tables), since they both need to relate a table to a +label image, but the two may diverge in the future. As part of the current `fractal-tasks-core` tasks, measurements can be performed e.g. via `regionprops` from `scikit-image`, as wrapped in @@ -239,8 +239,8 @@ Here is an example of valid Zarr attributes **Table columns** -A feature table must include the table which is defined in its `instance_key` -attribute, e.g. the `label` one in the example above. +A feature table must include the column which is defined in its `instance_key` +attribute (e.g. the `label` column, for the example above). ## Examples @@ -256,7 +256,7 @@ or tasks) always include two specific ROI tables: * The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well[^1]; -* The table named `FOV_ROI_table`, which lists all original FOVs. +* The table named `FOV_ROI_table`, which lists all original fields of view (FOVs). Each one of these two tables includes ROIs that are only defined in the XY plane, and span the whole image size along the Z axis. Note that this differs, @@ -302,8 +302,9 @@ tables, including functions specifically targeting the Zarr format. To read an `AnnData` table from a Zarr group, one may use the [`read_zarr` function](https://anndata.readthedocs.io/en/latest/generated/anndata.read_zarr.html). -In the following example a NGFF image was created by sticthing together two -field of views, where each one is made of a stack of five Z planes. +In the following example a NGFF image was created by stitching together two +field of views, where each one is made of a stack of five Z planes with 1 um +spacing between the planes. The `FOV_ROI_table` has information on the XY position and size of the two original FOVs (named `FOV_1` and `FOV_2`): ```python @@ -333,6 +334,15 @@ print(table.var_names) print(table.X) # [[ 0. 0. 0. 416. 351. 5. -1448.3 -1517.7] # [ 416. 0. 0. 416. 351. 5. -1032.3 -1517.7]] + +df = table.to_df() # Convert to pandas DataFrame +print(df) +# x_micrometer y_micrometer z_micrometer ... len_z_micrometer x_micrometer_original y_micrometer_original +# FieldIndex ... +# FOV_1 0.0 0.0 0.0 ... 2.0 -1448.300049 -1517.699951 +# FOV_2 416.0 0.0 0.0 ... 2.0 -1032.300049 -1517.699951 +# +# [2 rows x 8 columns] ``` In this case, the second FOV (labeled `FOV_2`) is defined as the three-dimensional region such that @@ -434,14 +444,17 @@ Here is an in-progress list of aspects that may be reviewed: * The `z_micrometer` and `len_z_micrometer` columns are currently required in all ROI tables, even when the ROIs actually define a two-dimensional XY region; in that case, we set `z_micrometer=0` and `len_z_micrometer` is such - that the whole Z size is covered. In a future version, we may introduce more - flexibility and also accept ROI tables which only include X and Y axes, and - adapt the relevant tools so that they automatically expand these ROIs into - three-dimensions when appropriate. -* We may re-evaluate whether `AnnData` tables are the most appropriate tool. For - the record, Zarr does not natively support storage of dataframes (see e.g. - https://github.com/zarr-developers/numcodecs/issues/452), which is one - aspect in favor of sticking with the `anndata` library. + that the whole Z size is covered (that is, `len_z_micrometer` is the product + of the spacing between Z planes and the number of planes). In a future + version, we may introduce more flexibility and also accept ROI tables which + only include X and Y axes, and adapt the relevant tools so that they + automatically expand these ROIs into three-dimensions when appropriate. +* Concerning the use of `AnnData` tables or other formats for tabular data, our + plan is to follow whatever serialised table specification becomes part of the + NGFF standard. For the record, Zarr does not natively support storage of + dataframes (see e.g. + https://github.com/zarr-developers/numcodecs/issues/452), which is one aspect + in favor of sticking with the `anndata` library. [^1]: Within `fractal-tasks-core`, NGFF images represent whole wells; this still From 98514aa47cfa7ca1f068d7ae3369a916e554f788 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:26:13 +0100 Subject: [PATCH 27/27] Rephrase sentence about Z size of FOV/well ROIs --- docs/tables.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/tables.md b/docs/tables.md index 6b9f8ab73..b2e4d6f35 100644 --- a/docs/tables.md +++ b/docs/tables.md @@ -258,10 +258,10 @@ tasks) always include two specific ROI tables: * The table named `well_ROI_table`, which covers the NGFF image corresponding to the whole well[^1]; * The table named `FOV_ROI_table`, which lists all original fields of view (FOVs). -Each one of these two tables includes ROIs that are only defined in the XY -plane, and span the whole image size along the Z axis. Note that this differs, -e.g., from ROIs which are the bounding boxes of three-dimensional segmented -objects, and which may cover only a part of the image Z size. +Each one of these two tables includes ROIs that span the whole image size along +the Z axis. Note that this differs, e.g., from ROIs which are the bounding +boxes of three-dimensional segmented objects, and which may cover only a part +of the image Z size. #### OME-Zarr import @@ -277,6 +277,10 @@ ROI tables to the NGFF images: case the image is a tiled well[^1]), or it may simply be useful for applying downstream processing to smaller arrays and avoid large memory requirements. +As for the case of `well_ROI_table` and `FOV_ROI_table` described +[above](#ome-zarr-creation), also these two tables include ROIs spanning the +whole image extension along the Z axis. + #### OME-Zarr processing ROI tables are also used and updated during image processing, e.g as in: