diff --git a/.flake8 b/.flake8
index ffb0b3c..cfd3540 100644
--- a/.flake8
+++ b/.flake8
@@ -5,9 +5,10 @@ max_line_length = 120
# B007: it can be intended to name loop variables even if they are not used
# B023: leads to a lot of false alarms at the moment: https://github.com/PyCQA/flake8-bugbear/issues/269
# B027: it is totally valid to prepare more methods in an abstract class without forcing them to be abstract
-# B028: currently broken: https://github.com/PyCQA/flake8-bugbear/issues/329
# D*: pydocstyle has a lot of irrelevant checks by default. We are mainly interested in D417 (checks for missing arguments)
-ignore = B007, B023, B027, B028, C408, E203, E501, E721, E731, E741, W503, F841, D1, D200, D202, D205, D212, D400, D401, D402, D415
+ignore = B007, B023, B027, C408, E203, E501, E721, E731, E741, W503, F841, D1, D200, D202, D205, D212, D400, D401, D402, D415
extend-select = B902, B904
per_file_ignores = __init__.py: F401
-docstring-convention=google
+docstring-convention = google
+# rich has a print function which is explicitly named the same way for easy replacement
+builtins-ignorelist = print
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 2ae3704..c1287aa 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -2,21 +2,24 @@
name: π Bug report
about: Create a report to help us improve
title: "[Bug]"
-labels: ''
-assignees: ''
-
+labels: ""
+assignees: ""
---
## :bug: Bug
+
### Description
+
### Dataset
+
Which dataset are you using? [`HeiPorSPECTRAL` | `private`]
### Environment
+
diff --git a/.github/ISSUE_TEMPLATE/dataset.md b/.github/ISSUE_TEMPLATE/dataset.md
index 2eb6939..ee97cb9 100644
--- a/.github/ISSUE_TEMPLATE/dataset.md
+++ b/.github/ISSUE_TEMPLATE/dataset.md
@@ -2,9 +2,8 @@
name: π Dataset
about: Everything related to the public HeiPorSPECTRAL dataset
title: "[Dataset]"
-labels: ''
-assignees: ''
-
+labels: ""
+assignees: ""
---
## :rainbow: Dataset
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 16652ca..2adf831 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,10 +1,9 @@
---
name: π§ Feature request
about: Suggest an idea for this project
-title: '[Feature]'
-labels: ''
-assignees: ''
-
+title: "[Feature]"
+labels: ""
+assignees: ""
---
## :climbing: Feature
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
index e0e3698..7cc8860 100644
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -2,9 +2,8 @@
name: β Question
about: Ask a question
title: "[Question]"
-labels: ''
-assignees: ''
-
+labels: ""
+assignees: ""
---
## :question: Question
diff --git a/.github/workflows/dataset.yml b/.github/workflows/dataset.yml
index 1c4ef03..44f64f3 100644
--- a/.github/workflows/dataset.yml
+++ b/.github/workflows/dataset.yml
@@ -29,7 +29,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
- python-version: ['3.9', '3.10', '3.11']
+ python-version: ["3.9", "3.10", "3.11"]
steps:
- name: Checkout files
@@ -51,11 +51,11 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- cache: 'pip'
+ cache: "pip"
- name: Install htc package
run: pip install imsy-htc
-
+
- name: Run example
env:
PATH_Tivita_HeiPorSPECTRAL: HeiPorSPECTRAL_example
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 04c7142..bb0ecf5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,25 +9,25 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
- python-version: ['3.11']
+ python-version: ["3.11"]
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v4
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
- cache: 'pip'
- cache-dependency-path: 'requirements*.txt'
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: "pip"
+ cache-dependency-path: "requirements*.txt"
- - name: Install dependencies
- run: |
- pip install -r requirements.txt
- pip install pytest wheel
+ - name: Install dependencies
+ run: |
+ pip install -r requirements.txt
+ pip install pytest wheel
- - name: Install htc
- run: pip install --no-use-pep517 -e .
+ - name: Install htc
+ run: pip install --no-use-pep517 -e .
- - name: Tests
- run: py.test --doctest-modules --import-mode=importlib --collect-only .
+ - name: Tests
+ run: py.test --doctest-modules --import-mode=importlib --collect-only .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3fd3e34..9d946ee 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
# Order plays a role here: we first need to run pyupgrade because it changes the code and the new code may not be correctly formatted so we need to run black afterwards. flake8 again should be run after black so that it doesn't e.g. complain about whitespace issues.
repos:
- repo: https://github.com/asottile/pyupgrade
- rev: v3.15.0
+ rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py39-plus]
@@ -22,6 +22,7 @@ repos:
- flake8-comprehensions
- flake8-use-pathlib
- flake8-docstrings
+ - flake8-builtins
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
@@ -31,8 +32,9 @@ repos:
- flake8-comprehensions
- flake8-use-pathlib
- flake8-docstrings
+ - flake8-builtins
- repo: https://github.com/nbQA-dev/nbQA
- rev: 1.7.1
+ rev: 1.8.5
hooks:
- id: nbqa-pyupgrade
args: [--py39-plus]
@@ -43,7 +45,14 @@ repos:
- flake8-comprehensions
- flake8-use-pathlib
- flake8-docstrings
+ - flake8-builtins
args: ["--extend-ignore=E402"]
+ # Avoid potential problems with py.test if __init__.py files are missing
+ - repo: https://github.com/lk16/detect-missing-init
+ rev: v0.1.6
+ hooks:
+ - id: detect-missing-init
+ args: ["--create", "--python-folders", "htc"]
- repo: https://github.com/citation-file-format/cff-converter-python
rev: "44e8fc9"
hooks:
@@ -56,3 +65,36 @@ repos:
rev: 1.7.0
hooks:
- id: pyproject-fmt
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.5.0
+ hooks:
+ - id: check-yaml
+ - id: check-toml
+ - id: trailing-whitespace
+ - id: check-case-conflict
+ - id: debug-statements
+ - id: name-tests-test
+ args: [--pytest-test-first]
+ - id: mixed-line-ending
+ - id: end-of-file-fixer
+ - repo: https://github.com/pre-commit/mirrors-prettier
+ rev: v3.1.0
+ hooks:
+ - id: prettier
+ - repo: local
+ hooks:
+ - id: check-notebooks
+ name: Check notebooks for common errors
+ entry: python hooks/check_notebooks.py
+ language: system
+ types: [jupyter]
+ - id: check-public-readme
+ name: Check for common mistakes in the public README
+ entry: python hooks/check_public_readme.py
+ language: system
+ types: [file]
+ files: ^README_public.md$
+ - repo: meta
+ hooks:
+ - id: check-hooks-apply
+ - id: check-useless-excludes
diff --git a/CITATION.cff b/CITATION.cff
index faadcf9..c8088f4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -12,5 +12,5 @@ identifiers:
value: 10.5281/zenodo.6577614
repository-code: "https://github.com/IMSY-DKFZ/htc"
license: MIT
-version: v0.0.15
-date-released: "2024-02-05"
+version: v0.0.16
+date-released: "2024-08-05"
diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt
index 13c8533..7bc47f1 100644
--- a/LICENSES/MIT.txt
+++ b/LICENSES/MIT.txt
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/README.md b/README.md
index f3df227..49ce1f1 100644
--- a/README.md
+++ b/README.md
@@ -4,15 +4,17 @@
[](https://pypi.org/project/imsy-htc)
[](https://pypi.org/project/imsy-htc)
[](https://github.com/IMSY-DKFZ/htc/actions/workflows/tests.yml)
+
# Hyperspectral Tissue Classification
+
This package is a framework for automated tissue classification and segmentation on medical hyperspectral imaging (HSI) data. It contains:
-- The implementation of deep learning models to solve supervised classification and segmentation problems for a variety of different input spatial granularities (pixels, superpixels, patches and entire images, cf. figure below) and modalities (RGB data, raw and processed HSI data) from our paper [βRobust deep learning-based semantic organ segmentation in hyperspectral imagesβ](https://doi.org/10.1016/j.media.2022.102488). It is based on [PyTorch](https://pytorch.org/) and [PyTorch Lightning](https://lightning.ai/).
-- Corresponding pretrained models.
-- A pipeline to efficiently load and process HSI data, to aggregate deep learning results and to validate and visualize findings.
-- Presentation of several solutions to speed up the data loading process (see [Pytorch Conference 2023 poster details](./README.md#-dealing-with-io-bottlenecks-in-high-throughput-model-training) below).
+- The implementation of deep learning models to solve supervised classification and segmentation problems for a variety of different input spatial granularities (pixels, superpixels, patches and entire images, cf. figure below) and modalities (RGB data, raw and processed HSI data) from our paper [βRobust deep learning-based semantic organ segmentation in hyperspectral imagesβ](https://doi.org/10.1016/j.media.2022.102488). It is based on [PyTorch](https://pytorch.org/) and [PyTorch Lightning](https://lightning.ai/).
+- Corresponding pretrained models.
+- A pipeline to efficiently load and process HSI data, to aggregate deep learning results and to validate and visualize findings.
+- Presentation of several solutions to speed up the data loading process (see [Pytorch Conference 2023 poster details](./README.md#-dealing-with-io-bottlenecks-in-high-throughput-model-training) below).
@@ -20,12 +22,12 @@ This package is a framework for automated tissue classification and segmentation
This framework is designed to work on HSI data from the [Tivita](https://diaspective-vision.com/en/) cameras but you can adapt it to different HSI datasets as well. Potential applications include:
-- Use our data loading and processing pipeline to easily access image and meta data for any work utilizing Tivita datasets.
-- This repository is tightly coupled to work with the public [HeiPorSPECTRAL](https://heiporspectral.org/) dataset. If you already downloaded the data, you only need to perform the setup steps and then you can directly use the `htc` framework to work on the data (cf. [our tutorials](#tutorials)).
-- Train your own networks and benefit from a pipeline offering e.g. efficient data loading, correct hierarchical aggregation of results and a set of helpful visualizations.
-- Apply deep learning models for different spatial granularities and modalities on your own semantically annotated dataset.
-- Use our pretrained models to initialize the weights for your own training.
-- Use our pretrained models to generate predictions for your own data.
+- Use our data loading and processing pipeline to easily access image and meta data for any work utilizing Tivita datasets.
+- This repository is tightly coupled to work with the public [HeiPorSPECTRAL](https://heiporspectral.org/) dataset. If you already downloaded the data, you only need to perform the setup steps and then you can directly use the `htc` framework to work on the data (cf. [our tutorials](./README.md#tutorials)).
+- Train your own networks and benefit from a pipeline offering e.g. efficient data loading, correct hierarchical aggregation of results and a set of helpful visualizations.
+- Apply deep learning models for different spatial granularities and modalities on your own semantically annotated dataset.
+- Use our pretrained models to initialize the weights for your own training.
+- Use our pretrained models to generate predictions for your own data.
If you use the `htc` framework, please consider citing the [corresponding papers](./README.md#papers). You can also cite this repository directly via:
@@ -37,20 +39,25 @@ If you use the `htc` framework, please consider citing the [corresponding papers
author = {Sellner, Jan and Seidlitz, Silvia},
publisher = {Zenodo},
url = {https://github.com/IMSY-DKFZ/htc},
- date = {2024-02-05},
+ date = {2024-08-05},
doi = {10.5281/zenodo.6577614},
title = {Hyperspectral Tissue Classification},
- version = {v0.0.15},
+ version = {v0.0.16},
}
```
+
## Setup
+
### Package Installation
+
This package can be installed via pip:
+
```bash
pip install imsy-htc
```
+
This installs all the required dependencies defined in [`requirements.txt`](./requirements.txt). The requirements include [PyTorch](https://pytorch.org/), so you may want to install it manually before installing the package in case you have specific needs (e.g. CUDA version).
> ⚠️ This framework was developed and tested using the Ubuntu 20.04+ Linux distribution. Despite we do provide wheels for Windows and macOS as well, they are not tested.
@@ -63,52 +70,65 @@ This installs all the required dependencies defined in [`requirements.txt`](./re
We cannot provide wheels for all PyTorch versions. Hence, a version of `imsy-htc` may not work with all versions of PyTorch due to changes in the ABI. In the following table, we list the PyTorch versions which are compatible with the respective `imsy-htc` version.
| `imsy-htc` | `torch` |
-| -------- | ------- |
-| 0.0.9 | 1.13 |
-| 0.0.10 | 1.13 |
-| 0.0.11 | 2.0 |
-| 0.0.12 | 2.0 |
-| 0.0.13 | 2.1 |
-| 0.0.14 | 2.1 |
+| ---------- | ------- |
+| 0.0.9 | 1.13 |
+| 0.0.10 | 1.13 |
+| 0.0.11 | 2.0 |
+| 0.0.12 | 2.0 |
+| 0.0.13 | 2.1 |
+| 0.0.14 | 2.1 |
+| 0.0.15 | 2.2 |
+| 0.0.15 | 2.3 |
+| 0.0.16 | 2.4 |
However, we do not make explicit version constraints in the dependencies of the `imsy-htc` package because a future version of PyTorch may still work and we don't want to break the installation if it is not necessary.
> π‘ Please note that it is always possible to build the `imsy-htc` package with your installed PyTorch version yourself (cf. Developer Installation).
+
Optional Dependencies (imsy-htc[extra])
Some requirements are considered optional (e.g. if they are only needed by certain scripts) and you will get an error message if they are needed but unavailable. You can install them via
+
```bash
pip install --extra-index-url https://read_package:CnzBrgDfKMWS4cxf-r31@git.dkfz.de/api/v4/projects/15/packages/pypi/simple imsy-htc[extra]
```
+
or by adding the following lines to your `requirements.txt`
+
```
--extra-index-url https://read_package:CnzBrgDfKMWS4cxf-r31@git.dkfz.de/api/v4/projects/15/packages/pypi/simple
imsy-htc[extra]
```
This installs the optional dependencies defined in [`requirements-extra.txt`](./requirements-extra.txt), including for example our Python wrapper for the [challengeR toolkit](https://github.com/wiesenfa/challengeR).
+
Docker
We also provide a Docker setup for testing. As a prerequisite:
-- Clone this repository
-- Install [Docker](https://docs.docker.com/get-docker/) and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
-- Install the required dependencies to run the Docker startup script:
+
+- Clone this repository
+- Install [Docker](https://docs.docker.com/get-docker/) and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+- Install the required dependencies to run the Docker startup script:
+
```bash
pip install python-dotenv
```
Make sure that your environment variables are available and then bash into the container
+
```bash
export PATH_Tivita_HeiPorSPECTRAL="/path/to/the/dataset"
python run_docker.py bash
```
+
You can now run any commands you like. All datasets you provided via an environment variable that starts with `PATH_Tivita` will be accessible in your container (you can also check the generated `docker-compose.override.yml` file for details). Please note that the Docker container is meant for small testing only and not for development. This is also reflected by the fact that per default all results are stored inside the container and hence will also be deleted after exiting the container. If you want to keep your results, let the environment variable `PATH_HTC_DOCKER_RESULTS` point to the directory where you want to store the results.
+
@@ -129,46 +149,53 @@ pip install --no-use-pep517 -e .
```
Before commiting any files, please run the static code checks locally:
+
```bash
git add .
pre-commit run --all-files
```
+
### Environment Variables
+
This framework can be configured via environment variables. Most importantly, we need to know where your data is located (e.g. `PATH_Tivita_HeiPorSPECTRAL`) and where results should be stored (e.g. `PATH_HTC_RESULTS`). For a full list of possible environment variables, please have a look at the documentation of the [`Settings`](./htc/settings.py) class.
> π‘ If you set an environment variable for a dataset path, it is important that the variable name matches the folder name (e.g. the variable name `PATH_Tivita_HeiPorSPECTRAL` matches the dataset path `my/path/HeiPorSPECTRAL` with its folder name `HeiPorSPECTRAL`, whereas the variable name `PATH_Tivita_some_other_name` does not match). Furthermore, the dataset path needs to point to a directory which contains a `data` and an `intermediates` subfolder.
There are several options to set the environment variables. For example:
-- You can specify a variable as part of your bash startup script `~/.bashrc` or before running each command:
+
+- You can specify a variable as part of your bash startup script `~/.bashrc` or before running each command:
```bash
PATH_HTC_RESULTS="~/htc/results" htc training --model image --config "models/image/configs/default"
```
- However, this might get cumbersome or might not give you the flexibility you need.
-- Recommended if you cloned this repository (in contrast to simply installing it via pip): You can create a `.env` file in the repository root and add your variables, for example:
+ However, this might get cumbersome or might not give you the flexibility you need.
+- Recommended if you cloned this repository (in contrast to simply installing it via pip): You can create a `.env` file in the repository root and add your variables, for example:
```bash
export PATH_Tivita_HeiPorSPECTRAL=/mnt/nvme_4tb/HeiPorSPECTRAL
export PATH_HTC_RESULTS=~/htc/results
```
-- Recommended if you installed the package via pip: You can create user settings for this application. The location is OS-specific. For Linux the location might be at `~/.config/htc/variables.env`. Please run `htc info` upon package installation to retrieve the exact location on your system. The content of the file is of the same format as of the `.env` above.
+- Recommended if you installed the package via pip: You can create user settings for this application. The location is OS-specific. For Linux the location might be at `~/.config/htc/variables.env`. Please run `htc info` upon package installation to retrieve the exact location on your system. The content of the file is of the same format as of the `.env` above.
After setting your environment variables, it is recommended to run `htc info` to check that your variables are correctly registered in the framework.
## Tutorials
+
A series of [tutorials](./tutorials) can help you get started on the `htc` framework by guiding you through different usage scenarios.
+
> π‘ The tutorials make use of our public HSI dataset [HeiPorSPECTRAL](https://heiporspectral.org/). If you want to directly run them, please download the dataset first and make it accessible via the environment variable `PATH_Tivita_HeiPorSPECTRAL` as described above.
-- As a start, we recommend to take a look at this [general notebook](./tutorials/General.ipynb) which showcases the basic functionalities of the `htc` framework. Namely, it demonstrates the usage of the `DataPath` class which is the entry point to load and process HSI data. For example, you will learn how to read HSI cubes, segmentation masks and meta data. Among others, you can use this information to calculate the median spectrum of an organ.
-- If you want to use our framework with your own dataset, it might be necessary to write a custom `DataPath` class so that you can load and process your images and annotations. We [collected some tips](./tutorials/CustomDataPath.md) on how this can be achieved.
-- You have some HSI data at hand and want to use one of our pretrained models to generate predictions? Then our [prediction notebook](./tutorials/CreatingPredictions.ipynb) has got you covered.
-- You want to use our pretrained models to initialize the weights for your own training? See the section about [pretrained models](#pretrained-models) below for details.
-- You want to use our framework to train a network? The [network training notebook](./tutorials/network_training/NetworkTraining.ipynb) will show you how to achieve this on the example of a heart and lung segmentation network.
-- If you are interested in our technical validation (e.g. because you want to compare your colorchecker images with ours) and need to create a mask to detect the different colorchecker fields, you might find our automatic [colorchecker mask creation pipeline](./htc/utils/ColorcheckerMaskCreation.ipynb) useful.
+- As a start, we recommend to take a look at this [general notebook](./tutorials/General.ipynb) which showcases the basic functionalities of the `htc` framework. Namely, it demonstrates the usage of the `DataPath` class which is the entry point to load and process HSI data. For example, you will learn how to read HSI cubes, segmentation masks and meta data. Among others, you can use this information to calculate the median spectrum of an organ.
+- If you want to use our framework with your own dataset, it might be necessary to write a custom `DataPath` class so that you can load and process your images and annotations. We [collected some tips](./tutorials/CustomDataPath.md) on how this can be achieved.
+- You have some HSI data at hand and want to use one of our pretrained models to generate predictions? Then our [prediction notebook](./tutorials/CreatingPredictions.ipynb) has got you covered.
+- You want to use our pretrained models to initialize the weights for your own training? See the section about [pretrained models](./README.md#pretrained-models) below for details.
+- You want to use our framework to train a network? The [network training notebook](./tutorials/network_training/NetworkTraining.ipynb) will show you how to achieve this on the example of a heart and lung segmentation network.
+- If you are interested in our technical validation (e.g. because you want to compare your colorchecker images with ours) and need to create a mask to detect the different colorchecker fields, you might find our automatic [colorchecker mask creation pipeline](./htc/utils/ColorcheckerMaskCreation.ipynb) useful.
We do not have a separate documentation website for our framework yet. However, most of the functions and classes are documented so feel free to explore the source code or use your favorite IDE to display the documentation. If something does not become clear from the documentation, feel free to open an issue!
## Pretrained Models
+
This framework gives you access to a variety of pretrained segmentation and classification models. The models will be automatically downloaded, provided you specify the model type (e.g. `image`) and the run folder (e.g. `2022-02-03_22-58-44_generated_default_model_comparison`). It can then be used for example to [create predictions](./tutorials/CreatingPredictions.ipynb) on some data or as a baseline for your own training (see example below).
The following table lists all the models you can get:
@@ -197,7 +224,9 @@ The following table lists all the models you can get:
After successful installation of the `htc` package, you can use any of the pretrained models listed in the table. There are several ways to use them but the general principle is that models are always specified via their `model` and `run_folder`.
### Option 1: Use the models in your own training pipeline
+
Every model class listed in the table has a static method [`pretrained_model()`](./htc/models/common/HTCModel.py) which you can use to create a model instance and initialize it with the pretrained weights. The model object will be an instance of `torch.nn.Module`. The function has examples for all the different model types but as a teaser consider the following example which loads the pretrained image HSI network:
+
```python
import torch
from htc import ModelImage, Normalization
@@ -213,26 +242,34 @@ model(input_data).shape
> π‘ Please note that when initializing the weights as in this example, the segmentation head is initialized randomly. Meaningful predictions on your own data can thus not be expected out of the box, but you will have to train the model on your data first.
### Option 2: Use the models to create predictions for your data
+
The models can be used to predict segmentation masks for your data. The segmentation models automatically sample from your input image according to the selected model spatial granularity (e.g. by creating patches) and the output is always a segmentation mask for an entire image. The set of output classes is determined by the training configuration, e.g. 18 organ classes + background for our semantic models. The [`CreatingPredictions`](./tutorials/CreatingPredictions.ipynb) notebook shows how to create predictions and how to map the network output to meaningful label names.
### Option 3: Use the models to train a network with the `htc` package
+
If you are using the `htc` framework to [train your networks](./tutorials/network_training/NetworkTraining.ipynb), you only need to define the model in your configuration:
+
```json
{
"model": {
"pretrained_model": {
"model": "image",
- "run_folder": "2022-02-03_22-58-44_generated_default_model_comparison",
+ "run_folder": "2022-02-03_22-58-44_generated_default_model_comparison"
}
}
}
```
+
This is very similar to option 1 but may be more convenient if you train with the `htc` framework.
+> π‘ We have a [JSON Schema file](./htc/utils/config.schema) which describes the structure of our config files including descriptions of the attributes.
+
## CLI
+
There is a common command line interface for many scripts in this repository. More precisely, every script which is prefixed with `run_NAME.py` can also be run via `htc NAME` from any directory. For more details, just type `htc`.
## Papers
+
This repository contains code to reproduce our publications listed below:
### π [Robust deep learning-based semantic organ segmentation in hyperspectral images](https://doi.org/10.1016/j.media.2022.102488)
@@ -241,7 +278,7 @@ This repository contains code to reproduce our publications listed below:
-In this paper, we tackled fully automatic organ segmentation and compared deep learning models on different spatial granularities (e.g. patch vs. image) and modalities (e.g. HSI vs. RGB). Furthermore, we studied the required amount of training data and the generalization capabilities of our models across subjects. The pretrained networks are related to this paper. You can find the notebooks to generate the paper figures in [paper/MIA2021](./paper/MIA2021) (the folder also includes a [reproducibility document](./paper/MIA2021/reproducibility.md)) and the models in [htc/models](./htc/models). For each model, there are three configuration files, namely `default`, `default_rgb` and `default_parameters`, which correspond to the HSI, RGB and TPI modality, respectively. You can also download the [NSD thresholds](https://e130-hyperspectal-tissue-classification.s3.dkfz.de/models/nsd_thresholds_semantic.csv) which we used for the NSD metric (cf. Fig. 12).
+In this paper, we tackled fully automatic organ segmentation and compared deep learning models on different spatial granularities (e.g. patch vs. image) and modalities (e.g. HSI vs. RGB). Furthermore, we studied the required amount of training data and the generalization capabilities of our models across subjects. The pretrained networks are related to this paper. You can find the notebooks to generate the paper figures in [paper/MIA2022](./paper/MIA2022) (the folder also includes a [reproducibility document](./paper/MIA2022/reproducibility.md)) and the models in [htc/models](./htc/models). For each model, there are three configuration files, namely `default`, `default_rgb` and `default_parameters`, which correspond to the HSI, RGB and TPI modality, respectively. You can also download the [NSD thresholds](https://e130-hyperspectal-tissue-classification.s3.dkfz.de/models/nsd_thresholds_semantic.csv) which we used for the NSD metric (cf. Fig. 12).
> π The dataset for this paper is not publicly available.
@@ -261,6 +298,7 @@ In this paper, we tackled fully automatic organ segmentation and compared deep l
volume = {80},
}
```
+
### π [Semantic segmentation of surgical hyperspectral images under geometric domain shifts](https://doi.org/10.48550/arXiv.2303.10972)
@@ -269,7 +307,7 @@ In this paper, we tackled fully automatic organ segmentation and compared deep l
-This MICCAI2023 paper is the direct successor of our MIA2021 paper. We analyzed how well our networks perform under geometrical domain shifts which commonly occur in real-world open surgeries (e.g. situs occlusions). The effect is drastic (drop of Dice similarity coefficient by 45β―%) but the good news is that performance on par with in-distribution data can be achieved with our simple, model-independent solution (augmentation method). You can find all the code in [htc/context](./htc/context) and paper figures as well as [reproducibility instructions](./paper/MICCAI2023/reproducibility.md) in [paper/MICCAI2023](./paper/MICCAI2023). Pretrained models are available for our organ transplantation networks with HSI and RGB modalities.
+This MICCAI2023 paper is the direct successor of our MIA2022 paper. We analyzed how well our networks perform under geometrical domain shifts which commonly occur in real-world open surgeries (e.g. situs occlusions). The effect is drastic (drop of Dice similarity coefficient by 45β―%) but the good news is that performance on par with in-distribution data can be achieved with our simple, model-independent solution (augmentation method). You can find all the code in [htc/context](./htc/context) and paper figures as well as [reproducibility instructions](./paper/MICCAI2023/reproducibility.md) in [paper/MICCAI2023](./paper/MICCAI2023). Pretrained models are available for our organ transplantation networks with HSI and RGB modalities.
> π‘ If you are only interested in our data augmentation method, you can also head over to [Kornia](https://github.com/kornia/kornia) where this augmentation is implemented for generic use cases (including 2D and 3D data). You will find it under the name [`RandomTransplantation`](https://kornia.readthedocs.io/en/latest/augmentation.module.html#kornia.augmentation.RandomTransplantation).
@@ -292,6 +330,7 @@ This MICCAI2023 paper is the direct successor of our MIA2021 paper. We analyzed
title = {Semantic Segmentation of Surgical Hyperspectral Images Under Geometric Domain Shifts},
}
```
+
### π [Dealing with I/O bottlenecks in high-throughput model training](https://e130-hyperspectal-tissue-classification.s3.dkfz.de/figures/PyTorchConference_Poster.pdf)
@@ -307,18 +346,19 @@ You can find the code to generate the results figures of the poster in [paper/Py
```bibtex
@misc{sellner_benchmarking_2023,
- author = {Sellner, Jan and Seidlitz, Silvia and Maier-Hein, Lena},
- language = {en},
- url = {https://e130-hyperspectal-tissue-classification.s3.dkfz.de/figures/PyTorchConference_Poster.pdf},
- date = {2023-10-16},
- title = {Dealing with I/O bottlenecks in high-throughput model training},
+ author = {Sellner, Jan and Seidlitz, Silvia and Maier-Hein, Lena},
+ url = {https://e130-hyperspectal-tissue-classification.s3.dkfz.de/figures/PyTorchConference_Poster.pdf},
+ date = {2023-10-16},
+ howpublished = {Poster presented at the PyTorch Conference 2023, San Francisco, United States of America},
+ title = {Dealing with I/O bottlenecks in high-throughput model training},
}
```
+
### π [Spectral organ fingerprints for machine learning-based intraoperative tissue classification with hyperspectral imaging in a porcine model](https://doi.org/10.1038/s41598-022-15040-w)
-In this paper, we trained a classification model based on median spectra from HSI data. You can find the model code in [htc/tissue_atlas](./htc/tissue_atlas) and the confusion matrix figure of the paper in [paper/NatureReports2021](./paper/NatureReports2021) (including a reproducibility document).
+In this paper, we trained a classification model based on median spectra from HSI data. You can find the model code in [htc/tissue_atlas](./htc/tissue_atlas) and the confusion matrix figure of the paper in [paper/NatureReports2022](./paper/NatureReports2022) (including a reproducibility document).
> π The dataset for this paper is not fully publicly available, but a subset of the data is available through the public [HeiPorSPECTRAL](https://heiporspectral.org/) dataset.
@@ -338,6 +378,7 @@ In this paper, we trained a classification model based on median spectra from HS
volume = {12},
}
```
+
### π [HeiPorSPECTRAL - the Heidelberg Porcine HyperSPECTRAL Imaging Dataset of 20 Physiological Organs](https://doi.org/10.1038/s41597-023-02315-8)
@@ -365,6 +406,7 @@ If you want to learn more about the [HeiPorSPECTRAL](https://heiporspectral.org/
volume = {10},
}
```
+
### π [KΓΌnstliche Intelligenz und hyperspektrale Bildgebung zur bildgestΓΌtzten Assistenz in der minimal-invasiven Chirurgie](https://doi.org/10.1007/s00104-022-01677-w)
@@ -389,8 +431,9 @@ This paper presents several applications of intraoperative HSI, including our or
volume = {93},
}
```
+
## Funding
-This project has received funding from the European Research Council (ERC) under the European Unions Horizon 2020 research and innovation programme (NEURAL SPICING, grant agreement No. 101002198) and was supported by the German Cancer Research Center (DKFZ) and the Helmholtz Association under the joint research school HIDSS4Health (Helmholtz Information and Data Science School for Health). It further received funding from the Surgical Oncology Program of the National Center for Tumor Diseases (NCT) Heidelberg.
\ No newline at end of file
+This project has received funding from the European Research Council (ERC) under the European Unions Horizon 2020 research and innovation programme (NEURAL SPICING, grant agreement No. 101002198) and was supported by the German Cancer Research Center (DKFZ) and the Helmholtz Association under the joint research school HIDSS4Health (Helmholtz Information and Data Science School for Health). It further received funding from the Surgical Oncology Program of the National Center for Tumor Diseases (NCT) Heidelberg.
diff --git a/docker-compose.yml b/docker-compose.yml
index f618dba..4236959 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,10 +10,10 @@ services:
- htc-base
image: htc
container_name: htc
- network_mode: host # e.g. for Jupyter Lab
+ network_mode: host # e.g. for Jupyter Lab
shm_size: 10gb
volumes:
- - /var/run/docker.sock:/var/run/docker.sock # This allows Docker containers to be start from inside the container, but as siblings and not nested (https://stackoverflow.com/a/33003273/2762258)
+ - /var/run/docker.sock:/var/run/docker.sock # This allows Docker containers to be start from inside the container, but as siblings and not nested (https://stackoverflow.com/a/33003273/2762258)
# Required to get copy-on-write to work: https://github.com/moby/moby/issues/18191#issuecomment-159280820
cap_add:
- SYS_ADMIN
@@ -23,4 +23,4 @@ services:
resources:
reservations:
devices:
- - capabilities: [gpu]
+ - capabilities: [gpu]
diff --git a/htc/__init__.py b/htc/__init__.py
index 1ed6a36..31ae312 100644
--- a/htc/__init__.py
+++ b/htc/__init__.py
@@ -19,6 +19,7 @@
_import_structure = {
"cpp": [
"hierarchical_bootstrapping",
+ "hierarchical_bootstrapping_labels",
"kfold_combinations",
"map_label_image",
"nunique",
@@ -56,6 +57,8 @@
"FlexibleIdentity",
"copy_sample",
"cpu_only_tensor",
+ "group_mean",
+ "minmax_pos_neg_scaling",
"move_batch_gpu",
"pad_tensors",
"smooth_one_hot",
@@ -127,6 +130,7 @@
"utils.parallel": ["p_imap", "p_map"],
"utils.SpectrometerReader": ["SpectrometerReader"],
"utils.sqldf": ["sqldf"],
+ "utils.Task": ["Task"],
"utils.type_from_string": ["type_from_string"],
"utils.unify_path": ["unify_path"],
"utils.visualization": [
@@ -150,6 +154,7 @@
if TYPE_CHECKING:
from htc.cpp import (
hierarchical_bootstrapping,
+ hierarchical_bootstrapping_labels,
kfold_combinations,
map_label_image,
nunique,
@@ -170,6 +175,7 @@
from htc.fonts.set_font import set_font
from htc.model_processing.ImageConsumer import ImageConsumer
from htc.model_processing.Runner import Runner
+ from htc.model_processing.SinglePredictor import SinglePredictor
from htc.model_processing.TestLeaveOneOutPredictor import TestLeaveOneOutPredictor
from htc.model_processing.TestPredictor import TestPredictor
from htc.model_processing.ValidationPredictor import ValidationPredictor
@@ -187,6 +193,8 @@
FlexibleIdentity,
copy_sample,
cpu_only_tensor,
+ group_mean,
+ minmax_pos_neg_scaling,
move_batch_gpu,
pad_tensors,
smooth_one_hot,
@@ -254,6 +262,7 @@
from htc.utils.parallel import p_imap, p_map
from htc.utils.SpectrometerReader import SpectrometerReader
from htc.utils.sqldf import sqldf
+ from htc.utils.Task import Task
from htc.utils.type_from_string import type_from_string
from htc.utils.unify_path import unify_path
from htc.utils.visualization import (
diff --git a/htc/context/context_transforms.py b/htc/context/context_transforms.py
index bca56ac..80f0ff0 100644
--- a/htc/context/context_transforms.py
+++ b/htc/context/context_transforms.py
@@ -522,6 +522,10 @@ def _apply_transform(self, batch: dict[str, torch.Tensor], donor_indices: list[i
donor_regions = {k: batch[k][donor] for k in regions_keys}
valid_donor_labels = donor_labels[donor_valid_pixels].unique()
+ if len(valid_donor_labels) == 0:
+ # In rare cases, it may happen that a previous (affine) augmentation removes all valid pixels
+ # In this case, there is not much we can do here because no donor pixels are available
+ continue
selected_label = valid_donor_labels[torch.randperm(len(valid_donor_labels))[0]]
# Apply selection to organ acceptor
diff --git a/htc/context/extra_datasets/run_dataset_tables.py b/htc/context/extra_datasets/run_dataset_tables.py
index 231c9de..c29b8f0 100644
--- a/htc/context/extra_datasets/run_dataset_tables.py
+++ b/htc/context/extra_datasets/run_dataset_tables.py
@@ -14,7 +14,7 @@
if __name__ == "__main__":
# For the context runs:
- # htc dataset_tables --model image --run-folder 2023-01-27_23-59-37_random_erasing --metrics DSC --test --dataset-name masks_isolation
+ # htc dataset_tables --model image --run-folder 2023-02-08_09-40-59_elastic_0.2 --metrics DSC --test --dataset-name masks_isolation
# For the MIA runs:
# htc dataset_tables --model image --run-folder 2022-02-03_22-58-44_generated_default_model_comparison --metrics DSC --test --dataset-name masks_isolation --output-dir ~/htc/results_context/neighbour_analysis/masks_isolation/image/2022-02-03_22-58-44_generated_default_model_comparison
runner = Runner(
diff --git a/htc/context/manipulated_datasets/run_context_evaluation_table.py b/htc/context/manipulated_datasets/run_context_evaluation_table.py
index 515df2b..081f4ef 100644
--- a/htc/context/manipulated_datasets/run_context_evaluation_table.py
+++ b/htc/context/manipulated_datasets/run_context_evaluation_table.py
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
# SPDX-License-Identifier: MIT
+import gc
from pathlib import Path
import pandas as pd
@@ -43,6 +44,10 @@ def produce_predictions(
self.rows[k] += rows
+ # There might be memory overflows without explicit garbage collection
+ gc.collect()
+ torch.cuda.empty_cache()
+
def predict_step(self, batch: dict[str, torch.Tensor], batch_idx: int = None) -> dict[str, torch.Tensor]:
prediction = self.model.predict_step(batch)
prediction["class"] = prediction["class"].softmax(dim=1)
@@ -73,6 +78,9 @@ def produce_predictions(self, model: HTCLightning, batch: dict[str, torch.Tensor
rows = self._validation_context(batch, batch_idx=-1, dataloader_idx=0, context_key=k)
self.rows[k] += rows
+ gc.collect()
+ torch.cuda.empty_cache()
+
def predict_step(self, batch: dict[str, torch.Tensor], batch_idx: int = None) -> dict[str, torch.Tensor]:
return self.model.predict_step(batch)
diff --git a/htc/context/manipulated_datasets/utils.py b/htc/context/manipulated_datasets/utils.py
index a7cc18e..197bb8f 100644
--- a/htc/context/manipulated_datasets/utils.py
+++ b/htc/context/manipulated_datasets/utils.py
@@ -46,21 +46,21 @@ def compare_performance(
mapping = LabelMapping.from_config(Config(experiment_dir / "config_reference.json"))
# Select all dirs in the experiment dir that start with exp
- all_subdir = list(experiment_dir.iterdir())
+ all_subdir = sorted(experiment_dir.iterdir())
exp_subdirs = []
- for dir in all_subdir:
- if dir.name.startswith("exp"):
- exp_subdirs.append(dir)
+ for subdir in all_subdir:
+ if subdir.name.startswith("exp"):
+ exp_subdirs.append(subdir)
exp_subdirs = sorted(exp_subdirs, key=lambda i: int(i.name.removeprefix(exp_string)))
# Get all the reference dirs
if reference_experiment:
assert reference_experiment.exists()
- all_reference_subdir = list(reference_experiment.iterdir())
+ all_reference_subdir = sorted(reference_experiment.iterdir())
ref_subdirs = []
- for dir in all_reference_subdir:
- if dir.name.startswith("exp"):
- ref_subdirs.append(dir)
+ for subdir in all_reference_subdir:
+ if subdir.name.startswith("exp"):
+ ref_subdirs.append(subdir)
ref_subdirs = sorted(ref_subdirs, key=lambda i: int(i.name.removeprefix(exp_string)))
assert len(ref_subdirs) == len(exp_subdirs)
else:
diff --git a/htc/context/models/configs/context.json b/htc/context/models/configs/context.json
index 106637c..ccf922a 100644
--- a/htc/context/models/configs/context.json
+++ b/htc/context/models/configs/context.json
@@ -8,14 +8,18 @@
"checkpoint_metric_mode": "class_level",
"checkpoint_saving": "last",
"context_transforms_gpu": {
- "isolation_0": [{
- "class": "htc.context.context_transforms>OrganIsolation",
- "fill_value": "0"
- }],
- "isolation_cloth": [{
- "class": "htc.context.context_transforms>OrganIsolation",
- "fill_value": "cloth"
- }]
+ "isolation_0": [
+ {
+ "class": "htc.context.context_transforms>OrganIsolation",
+ "fill_value": "0"
+ }
+ ],
+ "isolation_cloth": [
+ {
+ "class": "htc.context.context_transforms>OrganIsolation",
+ "fill_value": "cloth"
+ }
+ ]
}
}
}
diff --git a/htc/context/models/configs/organ_transplantation_0.8.json b/htc/context/models/configs/organ_transplantation_0.8.json
index 7747d02..9c526b5 100644
--- a/htc/context/models/configs/organ_transplantation_0.8.json
+++ b/htc/context/models/configs/organ_transplantation_0.8.json
@@ -1,10 +1,12 @@
{
"inherits": "image/configs/default",
"input": {
- "transforms_gpu_extends": [{
- "class": "htc.context.context_transforms>OrganTransplantation",
- "p": 0.8
- }]
+ "transforms_gpu_extends": [
+ {
+ "class": "htc.context.context_transforms>OrganTransplantation",
+ "p": 0.8
+ }
+ ]
},
"trainer_kwargs": {
"check_val_every_n_epoch": 10
diff --git a/htc/context/models/context_evaluation.py b/htc/context/models/context_evaluation.py
index 38ce2ff..4a575b7 100644
--- a/htc/context/models/context_evaluation.py
+++ b/htc/context/models/context_evaluation.py
@@ -7,6 +7,7 @@
import pandas as pd
from htc.context.settings_context import settings_context
+from htc.evaluation.model_comparison.paper_runs import collect_comparison_runs
from htc.evaluation.utils import split_test_table
from htc.models.common.HTCModel import HTCModel
from htc.models.common.MetricAggregation import MetricAggregation
@@ -45,8 +46,8 @@ def aggregate_removal_table(path: Path) -> pd.DataFrame:
# Take the minimum for each used label, i.e. keep the worst performance per label (this corresponds to the performance of an organ if the most important neighbour is missing)
columns = [c for c in df.columns if c not in ["target_label", "dice_metric"] + additional_metrics]
df = df.groupby(columns, as_index=False).agg(
- dice_metric=pd.NamedAgg(column="dice_metric", aggfunc=min),
- **{m: pd.NamedAgg(column=m, aggfunc=min) for m in additional_metrics},
+ dice_metric=pd.NamedAgg(column="dice_metric", aggfunc="min"),
+ **{m: pd.NamedAgg(column=m, aggfunc="min") for m in additional_metrics},
)
# Implode the dataframe (to keep the same format as before)
@@ -63,7 +64,9 @@ def aggregate_removal_table(path: Path) -> pd.DataFrame:
return df.reindex(columns=column_order)
-def context_evaluation_table(run_dir: Path, test: bool = False, aggregate: bool = True) -> pd.DataFrame:
+def context_evaluation_table(
+ run_dir: Path, test: bool = False, aggregate: bool = True, keep_subjects: bool = False
+) -> pd.DataFrame:
"""
Collects all the context results for a training run.
@@ -86,6 +89,7 @@ def context_evaluation_table(run_dir: Path, test: bool = False, aggregate: bool
run_dir: Path to the training run to the context network.
test: If True, read the test table instead of the validation table.
aggregate: If True, organ-level aggregated results are returned. If False, a much larger table with metric values per image is returned.
+ keep_subjects: If True, keep the subject column in the aggregated table.
Returns: Table with (aggregated) results.
"""
@@ -237,7 +241,9 @@ def real_data_tables(names: list[str]) -> list[pd.DataFrame]:
config,
metrics=metrics,
)
- df_agg.append(agg.grouped_metrics(mode="class_level", domains=["network", "dataset"]))
+ df_agg.append(
+ agg.grouped_metrics(mode="class_level", domains=["network", "dataset"], keep_subjects=keep_subjects)
+ )
assert all(len(df) > 0 for df in df_agg), "All tables must have at least one row"
return pd.concat(df_agg)
@@ -245,13 +251,14 @@ def real_data_tables(names: list[str]) -> list[pd.DataFrame]:
return pd.concat(tables)
-def compare_context_runs(run_dirs: list[Path], test: bool = False) -> pd.DataFrame:
+def compare_context_runs(run_dirs: list[Path], test: bool = False, keep_subjects: bool = False) -> pd.DataFrame:
"""
Collect all scores for the given training runs and combine it into one table. The network column is adapted to distinguish the different runs.
Args:
run_dirs: List of training runs which should be combined.
test: If True, read the test table instead of the validation table.
+ keep_subjects: If True, keep the subject column in the aggregated table.
Returns: Table with the combined results.
"""
@@ -260,7 +267,7 @@ def compare_context_runs(run_dirs: list[Path], test: bool = False) -> pd.DataFra
# run folder name without the timestamp
name = run_dir.name[20:]
- df = context_evaluation_table(run_dir, test)
+ df = context_evaluation_table(run_dir, test, keep_subjects=keep_subjects)
if "context" in name:
df = df.replace(to_replace={"network": {"context": name}})
else:
@@ -316,7 +323,7 @@ def find_best_transform_run(name: str) -> Path:
return best_run[0]
-def glove_runs(networks: dict[str, Path] = None, aggregate: bool = True) -> pd.DataFrame:
+def glove_runs(networks: dict[str, Path] = None, aggregate: bool = True, **aggregation_kwargs) -> pd.DataFrame:
"""
Collects the test results for all glove runs. There will be two test datasets (glove and no-glove) corresponding to the out-of-distribution and in-distribution, respectively.
@@ -325,6 +332,7 @@ def glove_runs(networks: dict[str, Path] = None, aggregate: bool = True) -> pd.D
Args:
networks: Dictionary of (name, run_dir) pairs of glove runs which should be included in the final table. If None, the default glove runs (as specified in settings_context.glove_runs) are used.
aggregate: If True, organ-level aggregated results are returned. If False, a much larger table with metric values per image is returned.
+ aggregation_kwargs: Keyword arguments passed on to the grouped_metrics method.
Returns: Table with all aggregated results.
"""
@@ -340,7 +348,7 @@ def aggregate_run(tables: dict[str, pd.DataFrame], config: Config) -> pd.DataFra
config,
metrics=metrics,
)
- df_agg.append(agg.grouped_metrics(mode="class_level", domains=["network", "dataset"]))
+ df_agg.append(agg.grouped_metrics(mode="class_level", domains=["network", "dataset"], **aggregation_kwargs))
df_agg = pd.concat(df_agg)
return df_agg
@@ -389,3 +397,94 @@ def best_run_data(test: bool = False) -> pd.DataFrame:
df.replace({"network": {"context": "organ_transplantation"}}, inplace=True)
return df
+
+
+def baseline_granularity_comparison(
+ baseline_timestamp: str, glove_runs_hsi: dict[str, Path], glove_runs_rgb: dict[str, Path]
+) -> pd.DataFrame:
+ """
+ Compares the baseline performance for different spatial granularities.
+
+ Args:
+ baseline_timestamp: The timestamp for the model comparison baseline runs (MIA runs).
+ glove_runs_hsi: A dictionary mapping spatial granularities to run directories for the HSI glove runs.
+ glove_runs_rgb: A dictionary mapping spatial granularities to run directories for the RGB glove runs.
+
+ Returns: A comparison table with class-wise aggregated scores for each network and dataset.
+ """
+ table_name = "test_table"
+ df_runs = collect_comparison_runs(baseline_timestamp)
+ config = None
+ n_bootstraps = 1000
+
+ tables = []
+ for _, row in df_runs.iterrows():
+ for modality in ["hsi", "rgb"]:
+ if row["model"] == "superpixel_classification":
+ rgb = "_rgb" if modality == "rgb" else ""
+ run_folder = settings_context.superpixel_classification_timestamp + f"_default{rgb}"
+ else:
+ run_folder = row[f"run_{modality}"]
+ run_dir = HTCModel.find_pretrained_run(row["model"], run_folder)
+ if config is None:
+ config = Config(run_dir / "config.json")
+
+ df = pd.read_pickle(run_dir / f"{table_name}.pkl.xz")
+ df["network"] = row["name"]
+ df["dataset"] = "semantic"
+ df["modality"] = modality.upper()
+ tables.append(df)
+
+ for folder, dataset in [
+ ("organ_isolation_0", "isolation_0"),
+ ("organ_isolation_cloth", "isolation_cloth"),
+ ("organ_removal_0", "removal_0"),
+ ("organ_removal_cloth", "removal_cloth"),
+ ("masks_isolation", "masks_isolation"),
+ ]:
+ table_path = (
+ settings.results_dir
+ / "neighbour_analysis"
+ / folder
+ / row["model"]
+ / run_folder
+ / f"{table_name}_{dataset}.pkl.xz"
+ )
+
+ if "removal" in folder:
+ df = aggregate_removal_table(table_path)
+ else:
+ df = pd.read_pickle(table_path)
+
+ df["network"] = row["name"]
+ df["dataset"] = dataset
+ df["modality"] = modality.upper()
+ tables.append(df)
+
+ tables_agg = []
+ for df in tables:
+ agg = MetricAggregation(
+ df,
+ config,
+ metrics=["dice_metric"],
+ )
+ tables_agg.append(
+ agg.grouped_metrics(
+ mode="class_level", domains=["network", "dataset", "modality"], n_bootstraps=n_bootstraps
+ )
+ )
+ assert all(len(df) > 0 for df in tables_agg), "All tables must have at least one row"
+
+ for name, run_dir in glove_runs_hsi.items():
+ df = glove_runs({name: run_dir}, n_bootstraps=n_bootstraps)
+ df.drop(columns=["surface_distance_metric", settings_seg.nsd_aggregation_short], inplace=True)
+ df["modality"] = "HSI"
+ tables_agg.append(df)
+
+ for name, run_dir in glove_runs_rgb.items():
+ df = glove_runs({name: run_dir}, n_bootstraps=n_bootstraps)
+ df.drop(columns=["surface_distance_metric", settings_seg.nsd_aggregation_short], inplace=True)
+ df["modality"] = "RGB"
+ tables_agg.append(df)
+
+ return pd.concat(tables_agg)
diff --git a/htc/context/models/data/pigs_semantic-only_5foldsV2_glove.json b/htc/context/models/data/pigs_semantic-only_5foldsV2_glove.json
index cd0b13b..df8cff9 100644
--- a/htc/context/models/data/pigs_semantic-only_5foldsV2_glove.json
+++ b/htc/context/models/data/pigs_semantic-only_5foldsV2_glove.json
@@ -2299,4 +2299,4 @@
]
}
}
-]
\ No newline at end of file
+]
diff --git a/htc/context/models/run_baseline_tables.py b/htc/context/models/run_baseline_tables.py
index 7b9e719..a0f2f70 100644
--- a/htc/context/models/run_baseline_tables.py
+++ b/htc/context/models/run_baseline_tables.py
@@ -9,8 +9,10 @@
import htc.context.manipulated_datasets.run_context_evaluation_table as run_context_evaluation_table
from htc.context.models.run_context_test_tables import compute_glove_test_tables
from htc.context.settings_context import settings_context
+from htc.evaluation.model_comparison.paper_runs import collect_comparison_runs
from htc.models.common.HTCModel import HTCModel
from htc.settings import settings
+from htc.settings_seg import settings_seg
from htc.utils.general import subprocess_run
@@ -134,17 +136,40 @@ def compute_context_tables(runs: list[Path], table_name: str, recalculate: bool
# The main difference between the baseline and the context networks is that we store the context tables for the baseline network at a different location (settings.results_dir / "neighbour_analysis") since we do not want to change the existing models
# Additionally, we also compute the validation tables for the baseline network, but not for the context networks (as this is done automatically during training)
# This is why we cannot use the same script for both
- runs = [
- HTCModel.find_pretrained_run("image", "2022-02-03_22-58-44_generated_default_model_comparison"),
- HTCModel.find_pretrained_run("image", "2022-02-03_22-58-44_generated_default_rgb_model_comparison"),
- ]
- runs_glove = [
- settings_context.glove_runs["baseline"],
- settings_context.glove_runs_rgb["baseline"],
+ runs_main = [
+ HTCModel.find_pretrained_run(
+ "image", f"{settings_seg.model_comparison_timestamp}_generated_default_model_comparison"
+ ),
+ HTCModel.find_pretrained_run(
+ "image", f"{settings_seg.model_comparison_timestamp}_generated_default_rgb_model_comparison"
+ ),
]
- compute_context_tables(runs, "validation_table", args.recalculate)
- compute_context_tables(runs, "test_table", args.recalculate)
- compute_context_tables(runs_glove, "validation_table", args.recalculate)
- compute_context_tables(runs_glove, "test_table", args.recalculate)
+ runs_other_granularities = []
+ df_runs = collect_comparison_runs(settings_seg.model_comparison_timestamp)
+ for _, row in df_runs.iterrows():
+ if row["model"] == "image":
+ continue
+ elif row["model"] == "superpixel_classification":
+ runs_other_granularities.append(
+ HTCModel.find_pretrained_run(
+ row["model"], settings_context.superpixel_classification_timestamp + "_default"
+ )
+ )
+ runs_other_granularities.append(
+ HTCModel.find_pretrained_run(
+ row["model"], settings_context.superpixel_classification_timestamp + "_default_rgb"
+ )
+ )
+ else:
+ runs_other_granularities.append(HTCModel.find_pretrained_run(row["model"], row["run_hsi"]))
+ runs_other_granularities.append(HTCModel.find_pretrained_run(row["model"], row["run_rgb"]))
+
+ runs_glove = list(settings_context.glove_runs_granularities.values()) + list(
+ settings_context.glove_runs_granularities_rgb.values()
+ )
+
+ compute_context_tables(runs_main, "validation_table", args.recalculate)
+ compute_context_tables(runs_main, "test_table", args.recalculate)
+ compute_context_tables(runs_other_granularities, "test_table", args.recalculate)
compute_glove_test_tables(runs_glove, args.recalculate)
diff --git a/htc/context/models/run_glove_baseline_runs.py b/htc/context/models/run_glove_baseline_runs.py
index 5562f17..9921620 100644
--- a/htc/context/models/run_glove_baseline_runs.py
+++ b/htc/context/models/run_glove_baseline_runs.py
@@ -1,7 +1,10 @@
# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
# SPDX-License-Identifier: MIT
+import argparse
+
from htc.models.common.RunGenerator import RunGenerator
+from htc.settings_seg import settings_seg
from htc.utils.Config import Config
@@ -11,10 +14,32 @@ def glove_adjustment(config: Config, **kwargs) -> str:
if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description=(
+ "Start training runs on the cluster for the glove baseline models (MIA runs with the glove data"
+ " specification)."
+ ),
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument(
+ "--model",
+ default=settings_seg.model_names,
+ choices=settings_seg.model_names,
+ nargs="+",
+ type=str,
+ help="One or more model names to generate runs for (each time with RGB and HSI).",
+ )
+ args = parser.parse_args()
+
rg = RunGenerator()
- for name in ["default", "default_rgb"]:
- config = Config.from_model_name(name, "image")
- rg.generate_run(config, [glove_adjustment])
+ for model in args.model:
+ for name in ["default", "default_rgb"]:
+ config = Config.from_model_name(name, model)
+ rg.generate_run(config, [glove_adjustment], model_name=model)
+
+ if model == "patch":
+ config = Config.from_model_name(name.replace("default", "default_64"), model)
+ rg.generate_run(config, [glove_adjustment], model_name=model)
rg.submit_jobs()
diff --git a/htc/context/neighbour/__init__.py b/htc/context/neighbour/__init__.py
new file mode 100644
index 0000000..17e71a8
--- /dev/null
+++ b/htc/context/neighbour/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
diff --git a/htc/context/neighbour/find_neighbour_valid_pixels.py b/htc/context/neighbour/find_neighbour_valid_pixels.py
new file mode 100644
index 0000000..d741ac5
--- /dev/null
+++ b/htc/context/neighbour/find_neighbour_valid_pixels.py
@@ -0,0 +1,82 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+import torch
+
+
+def find_neighbour_classes_valid_pixels(
+ labels: torch.IntTensor, label_index: int, valid_pixels: torch.BoolTensor
+) -> torch.FloatTensor:
+ """
+ Create a matrix which has True for label_index and False for all other classes.
+
+ Arg:
+ labels: torch.IntTensor containing a label of a class (int) in each entry
+ label_index: int which describes for what class the neighbour pixels have to be found
+ valid_pixels: torch.BoolTensor containing which pixels of the image are valid (bool)
+ """
+
+ class_matrix = labels == label_index
+
+ # The kernel defines our neighbour concept, does not work with torch
+ kernel = torch.tensor([[True, True, True], [True, True, True], [True, True, True]], dtype=torch.float32)
+
+ unsqueezed_matrix = class_matrix.unsqueeze(dim=0).unsqueeze(dim=0).type("torch.FloatTensor")
+ unsqueezed_kernel = kernel.unsqueeze(dim=0).unsqueeze(dim=0)
+ unsqueezed_dilation_matrix = torch.nn.functional.conv2d(unsqueezed_matrix, unsqueezed_kernel, padding=(1, 1))
+ dilation_matrix = unsqueezed_dilation_matrix.squeeze(dim=0).squeeze(dim=0).type("torch.BoolTensor")
+
+ # Superpose the class matrix to the dilation matrix and set their class values into the neighbour vector.
+ superposion_matrix = ~class_matrix & dilation_matrix & valid_pixels
+
+ neighbour_vector = labels[superposion_matrix]
+ neighbour_classes, counts = torch.unique(neighbour_vector, return_counts=True)
+
+ return neighbour_classes, counts
+
+
+def neighbour_class_percentage_for_valid_pixels(
+ labels: torch.IntTensor, valid_pixels: torch.BoolTensor, n_classes: int
+) -> torch.FloatTensor:
+ """
+ Find the "percentage matrix", which indicates the neighbour class pixels percentage
+ to every class. EX: class 0 has a neighbour the class 1 to 0.75 and the class 2 to 0.25.
+ In the matrix ixj the i represents each class and j the neighbour to the given class.
+ EX: (0.00, 0.75, 0.25)
+ (0.50, 0.00, 0.50)
+ (0.50, 0.50, 0.00)
+
+ Arg:
+ labels: torch.IntTensor containing a label of a class in each entry
+ valid_pixels: torch.BoolTensor containing which pixels of the image are valid (
+ n_classes: int number of different classes that appear in the image
+ """
+
+ class_vector = labels[valid_pixels].unique()
+
+ percentage_matrix = torch.zeros((n_classes, n_classes))
+
+ for label_index in class_vector:
+ neighbour_classes, counts = find_neighbour_classes_valid_pixels(labels, label_index, valid_pixels)
+ length = sum(counts)
+ # Set the percentages in the spot matrix[class, neighbour_class]
+ percentage_matrix[label_index, neighbour_classes] = counts / length
+
+ return percentage_matrix
+
+
+def count_rows_sum_eq_1(neighbour_matrix: torch.FloatTensor) -> torch.FloatTensor:
+ """
+ Count which classes appear in the image.
+ Arg:
+ neighbour_matrix: torch.FloatTensor
+ """
+ length = neighbour_matrix.shape[0]
+ ROW_IS_0 = torch.zeros(length, dtype=torch.float)
+ rows_diff_from_0 = torch.zeros(length)
+
+ for i in range(length):
+ if not (torch.equal(neighbour_matrix[i, :], ROW_IS_0)):
+ rows_diff_from_0[i] += 1
+
+ return rows_diff_from_0
diff --git a/htc/context/neighbour/find_normalized_neighbour_matrix.py b/htc/context/neighbour/find_normalized_neighbour_matrix.py
new file mode 100644
index 0000000..6160ce6
--- /dev/null
+++ b/htc/context/neighbour/find_normalized_neighbour_matrix.py
@@ -0,0 +1,66 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+import torch
+
+from htc.context.neighbour.find_neighbour_valid_pixels import (
+ count_rows_sum_eq_1,
+ neighbour_class_percentage_for_valid_pixels,
+)
+from htc.models.image.DatasetImage import DatasetImage
+
+
+def find_normalized_neighbour_matrix(dataset: DatasetImage, n_classes: int) -> torch.FloatTensor:
+ """
+ Calculate the normalized neighbourhood confusion matrix for all images.
+
+ Arg:
+ dataset: A DatasetImage class which needs to contain a matrix for the 'labels' key and one for the 'valid_pixels' key.
+ n_classes: int number of different classes that appear in the dataset. (The neighbouir class percentage will only be calculated for these classes)
+ """
+ result = {}
+ rows_diff_from_0 = {}
+
+ # Group all images per label (what pig they are from) and add them
+ for sample in dataset:
+ # get neighbour matrix
+ subject_name = sample["image_name"].split("#")[0]
+ neighbour_matrix = neighbour_class_percentage_for_valid_pixels(
+ sample["labels"], sample["valid_pixels"], n_classes
+ )
+
+ # Add the matrices that have the same subject name & keep track
+ if subject_name in result:
+ result[subject_name] = torch.add(result[subject_name], neighbour_matrix)
+ else:
+ result[subject_name] = neighbour_matrix
+
+ if subject_name in rows_diff_from_0:
+ rows_diff_from_0[subject_name] = torch.add(
+ rows_diff_from_0[subject_name],
+ count_rows_sum_eq_1(neighbour_matrix),
+ )
+ else:
+ rows_diff_from_0[subject_name] = count_rows_sum_eq_1(neighbour_matrix)
+
+ # For each label, divide to get the average
+ normalized_result = {}
+ for key in result.keys():
+ normalized_result[key] = torch.div(result[key], rows_diff_from_0[key][:, None])
+ # Make sure that NaN turn into 0s
+ normalized_result[key] = torch.nan_to_num(normalized_result[key])
+
+ # Add all labels and divide
+ result_matrix = torch.zeros(n_classes, n_classes)
+ result_rows_diff_from_0 = torch.zeros(n_classes)
+
+ for key in normalized_result.keys():
+ result_matrix = torch.add(result_matrix, normalized_result[key])
+ result_rows_diff_from_0 = torch.add(
+ result_rows_diff_from_0,
+ count_rows_sum_eq_1(normalized_result[key]),
+ )
+ result_matrix = torch.nan_to_num(result_matrix)
+
+ normalized_result_matrix = torch.div(result_matrix, result_rows_diff_from_0[:, None])
+ return normalized_result_matrix
diff --git a/htc/context/settings_context.py b/htc/context/settings_context.py
index 05e4186..159cd60 100644
--- a/htc/context/settings_context.py
+++ b/htc/context/settings_context.py
@@ -69,6 +69,14 @@ def __init__(self):
"elastic": "#F4A460",
"baseline": self.network_colors["baseline#HSI"],
}
+ self.cmap_diverging = "PRGn"
+
+ self.labels_paper_renaming = {
+ "major_vein": "major vein",
+ "kidney_with_Gerotas_fascia": "kidney with Gerota's fascia",
+ "fat_subcutaneous": "subcutaneous fat",
+ "small_bowel": "small bowel",
+ }
# This also specifies which tasks we include in the paper (e.g. box plots)
self.task_name_mapping = {
@@ -82,6 +90,18 @@ def __init__(self):
"glove": "occlusion",
}
+ self.scenario_mapping = {
+ "semantic": "isolation",
+ "isolation_0": "isolation",
+ "isolation_cloth": "isolation",
+ "masks_isolation": "isolation",
+ "semantic2": "removal",
+ "removal_0": "removal",
+ "removal_cloth": "removal",
+ "no-glove": "occlusion",
+ "glove": "occlusion",
+ }
+
self.transforms = {
"organ_transplantation": {
"class": "htc.context.context_transforms>OrganTransplantation",
@@ -263,6 +283,9 @@ def __init__(self):
"masks_isolation": self.masks_isolation_dataset,
}
+ # The original superpixel runs got broken so we had to re-train the HSI and RGB models
+ self.superpixel_classification_timestamp = "2024-07-24_15-20-46"
+
self._results_dir = None
@property
@@ -286,6 +309,12 @@ def paper_dir(self) -> MultiPath:
target_dir.mkdir(parents=True, exist_ok=True)
return target_dir
+ @property
+ def paper_extended_dir(self) -> MultiPath:
+ target_dir = self.results_dir / "paper_extended"
+ target_dir.mkdir(parents=True, exist_ok=True)
+ return target_dir
+
@property
def best_transform_runs(self) -> dict[str, MultiPath]:
# Best runs for each transformation (found via find_best_transform_run())
@@ -327,5 +356,29 @@ def glove_runs_rgb(self) -> dict[str, MultiPath]:
),
}
+ @property
+ def glove_runs_granularities(self) -> dict[str, MultiPath]:
+ return {
+ "image": self.glove_runs["baseline"],
+ "patch_64": settings.training_dir / "patch/2024-07-19_10-26-33_default_64_glove",
+ "patch_32": settings.training_dir / "patch/2024-07-19_10-26-33_default_glove",
+ "superpixel_classification": (
+ settings.training_dir / "superpixel_classification/2024-07-19_10-26-33_default_glove"
+ ),
+ "pixel": settings.training_dir / "pixel/2024-07-19_10-26-33_default_glove",
+ }
+
+ @property
+ def glove_runs_granularities_rgb(self) -> dict[str, MultiPath]:
+ return {
+ "image": self.glove_runs_rgb["baseline"],
+ "patch_64": settings.training_dir / "patch/2024-07-19_10-26-33_default_64_rgb_glove",
+ "patch_32": settings.training_dir / "patch/2024-07-19_10-26-33_default_rgb_glove",
+ "superpixel_classification": (
+ settings.training_dir / "superpixel_classification/2024-07-19_10-26-33_default_rgb_glove"
+ ),
+ "pixel": settings.training_dir / "pixel/2024-07-19_10-26-33_default_rgb_glove",
+ }
+
settings_context = SettingContext()
diff --git a/htc/cpp/ParallelExecution.h b/htc/cpp/ParallelExecution.h
index fe514c8..edb736d 100644
--- a/htc/cpp/ParallelExecution.h
+++ b/htc/cpp/ParallelExecution.h
@@ -16,7 +16,7 @@ class ParallelExecution
public:
/**
* @brief Provides simple methods to parallelize for loops including helper functions for critical sections.
- *
+ *
* @param numbThreads specifies the number of threads used for parallelization (if not specified otherwise). Defaults to the number of cores available on the system (virtual + real cores)
*/
explicit ParallelExecution(const size_t numbThreads = std::thread::hardware_concurrency())
@@ -27,9 +27,9 @@ class ParallelExecution
/**
* @brief Stores results in a thread-safe way.
- *
+ *
* A mutex will automatically be locked on entry and unlocked on exit of this function. This is useful after the parallel computation when a common variable is accessed containing all the results.
- *
+ *
* @param callback includes the code which should be executed in a thread-safe way
*/
void setResult(const std::function& callback)
@@ -41,9 +41,9 @@ class ParallelExecution
/**
* @brief Writes messages to the console in a thread-safe way.
- *
+ *
* Same mutex behaviour as in ParallelExecution::setResult(). Useful if you don't want your console output get messed up.
- *
+ *
* @param message to print to the console
*/
void write(const std::string& message)
@@ -55,9 +55,9 @@ class ParallelExecution
/**
* @brief Executes index-based containers in parallel.
- *
+ *
* It is save to throw exceptions from inside the threads. They are catched and re-thrown later in the main thread.
- *
+ *
* @param idxBegin first index to start (including), e.g. 0
* @param idxEnd last index to start (including), e.g. container.size()
* @param callback this function will be called from each thread multiple times. Each time an associated index will be passed to the function
@@ -95,7 +95,7 @@ class ParallelExecution
}
std::deque threads(sizeThreads);
-
+
/* Calculate the index ranges */
const size_t n = idxEnd - idxBegin + 1; // Both are inclusive
const size_t nEqual = n / sizeThreads; // 38 / 12 = 3
@@ -103,7 +103,7 @@ class ParallelExecution
size_t d = 0; // The last part should be portioned equally between all threads
/*
-
+
# Thread 0
d = 0 -> d = 1
0*3, ..., 1*3-1 (+1)
diff --git a/htc/cpp/__init__.py b/htc/cpp/__init__.py
index 96e3ef3..70509ed 100644
--- a/htc/cpp/__init__.py
+++ b/htc/cpp/__init__.py
@@ -38,7 +38,15 @@ def _automatic_numpy_conversion(*args, **kwargs):
# Call the actual function
if conversion_happened:
# Return value should probably be a numpy array (because at least one argument was a numpy array)
- return func(*new_args, **new_kwargs).numpy()
+ res = func(*new_args, **new_kwargs)
+ if type(res) == tuple:
+ return tuple(r.numpy() for r in res)
+ elif type(res) == list:
+ return [r.numpy() for r in res]
+ elif type(res) == dict:
+ return {k: v.numpy() for k, v in res.items()}
+ else:
+ return res.numpy()
else:
return func(*new_args, **new_kwargs)
@@ -133,16 +141,22 @@ def tensor_mapping(tensor: Union[torch.Tensor, np.ndarray], mapping: dict[int, i
assert all(
type(k) == type(v) for k, v in mapping.items()
), "All keys and values of the mapping must have the same type"
- first_value = next(iter(mapping.values()))
-
- if isinstance(first_value, int):
- assert not tensor.is_floating_point(), f"The tensor must have an integer type ({tensor.dtype = })"
- return htc._cpp.tensor_mapping_integer(tensor, mapping)
- elif isinstance(first_value, float):
- assert tensor.is_floating_point(), f"The tensor must have an floating type ({tensor.dtype = })"
- return htc._cpp.tensor_mapping_floating(tensor, mapping)
+
+ if tensor.ndim == 0:
+ # Map scalar values directly (in-place)
+ tensor.fill_(mapping.get(tensor.item(), tensor.item()))
+ return tensor
else:
- raise ValueError(f"Invalid type: {type(first_value)}")
+ first_value = next(iter(mapping.values()))
+
+ if isinstance(first_value, int):
+ assert not tensor.is_floating_point(), f"The tensor must have an integer type ({tensor.dtype = })"
+ return htc._cpp.tensor_mapping_integer(tensor, mapping)
+ elif isinstance(first_value, float):
+ assert tensor.is_floating_point(), f"The tensor must have an floating type ({tensor.dtype = })"
+ return htc._cpp.tensor_mapping_floating(tensor, mapping)
+ else:
+ raise ValueError(f"Invalid type: {type(first_value)}")
@automatic_numpy_conversion
@@ -262,10 +276,11 @@ def hierarchical_bootstrapping(
def hierarchical_bootstrapping_labels(
- domain_mapping: dict[int, dict[int, list[int]]],
- label_mapping: dict[int, dict[int, list[int]]],
+ domain_subjects_images_mapping: dict[int, dict[int, list[int]]],
+ label_images_mapping: dict[int, list[int]],
n_labels: int,
n_bootstraps: int = 1000,
+ oversampling: bool = False,
) -> torch.Tensor:
"""
Creates bootstrap samples based on a three-level hierarchy (domain_name, subject_name, image_name) while always selecting all domains equally often in every bootstrap. Compared to `hierarchical_bootstrapping()`, this function takes the labels into account and always selects images with the same label for each domain tuple. For each domain and label, one subject and one image is selected, i.e. selection of different subjects is preferred over selecting many images per subject.
@@ -284,38 +299,46 @@ def hierarchical_bootstrapping_labels(
>>> print('ignore_line'); seed_everything(0) # doctest: +ELLIPSIS
ignore_line...
>>> domain_mapping = {
- ... 0: {0: [10, 11]}, # First camera, one subject with two images
- ... 1: {1: [20, 30], 2: [40]} # Second camera, two subjects with two and one image each
+ ... 0: {0: [10, 11]}, # First camera, one subject with two images
+ ... 1: {1: [20, 30], 2: [40]}, # Second camera, two subjects with two and one image each
... }
- >>> label_mapping = {
- ... 100: {0: [10, 11], 1: [20]}, # Images 10, 11 and 20 have label 100
- ... 200: {0: [10], 1: [30], 2: [40]} # Images 10, 30 and 40 have label 200
+ >>> label_images_mapping = {
+ ... 100: [10, 11, 20], # Images 10, 11 and 20 have label 100
+ ... 200: [10, 30, 40], # Images 10, 30 and 40 have label 200
... }
- >>> hierarchical_bootstrapping_labels(domain_mapping, label_mapping, n_labels=2, n_bootstraps=4)
+ >>> hierarchical_bootstrapping_labels(domain_mapping, label_images_mapping, n_labels=2, n_bootstraps=4)
tensor([[20, 10, 30, 10],
[20, 10, 20, 11],
[20, 11, 20, 11],
[30, 10, 20, 11]])
Args:
- domain_mapping: Domain to subjects to images mapping.
- label_mapping: Label to subjects to images mapping.
- n_labels: Number of labels to draw with replacement. For each label, images from n_domains will be selected.
+ domain_subjects_images_mapping: Domain to subjects to images mapping.
+ label_images_mapping: Label to images mapping. Every image must occur in the domain_subjects_images_mapping exactly once.
+ n_labels: Number of labels to draw with replacement per domain. For example, with 3 domains and 2 labels, 6 images will be selected per bootstrap sample.
n_bootstraps: Total number of bootstraps.
+ oversampling: If True, instead selecting the labels randomly, the least currently chosen label is selected first. This is achieved by keeping an account for the already selected labels (including every label for each image) which is updated whenever selecting an image. This may still not yield a perfect balance across labels because some labels appear on nearly all images (e.g., background) but underrepresented classes are at least selected as often as possible.
Returns: Matrix of shape (n_bootstraps, n_domains * n_labels) with the bootstraps. It contains the values provided for the images (final layer in the mappings).
"""
- n_domains = len(set(domain_mapping.keys()))
- subjects2domain = {s: d for d, subjects in domain_mapping.items() for s in subjects}
- for label, subjects in label_mapping.items():
+ n_domains = len(set(domain_subjects_images_mapping.keys()))
+ images2domain = {
+ img: d
+ for d, subjects in domain_subjects_images_mapping.items()
+ for images in subjects.values()
+ for img in images
+ }
+ for label, images in label_images_mapping.items():
assert (
- len({subjects2domain[s] for s in subjects}) == n_domains
- ), f"Label {label} is not present in all domains (only the subjects {subjects} have this label)"
+ len({images2domain[img] for img in images}) == n_domains
+ ), f"Label {label} is not present in all domains (only the images {images} have this label)"
# We are generating a random number which will be used as seed during bootstraping
# This produces different bootstraps when the user calls this function multiple times while still allowing to set a seed
seed = torch.randint(0, torch.iinfo(torch.int32).max, (1,), dtype=torch.int32).item()
- bootstraps = htc._cpp.hierarchical_bootstrapping_labels(domain_mapping, label_mapping, n_labels, n_bootstraps, seed)
+ bootstraps = htc._cpp.hierarchical_bootstrapping_labels(
+ domain_subjects_images_mapping, label_images_mapping, n_labels, n_bootstraps, oversampling, seed
+ )
assert bootstraps.shape == (n_bootstraps, n_domains * n_labels)
return bootstraps
diff --git a/htc/cpp/colorchecker_automask.cpp b/htc/cpp/colorchecker_automask.cpp
index 8707b4e..5180123 100644
--- a/htc/cpp/colorchecker_automask.cpp
+++ b/htc/cpp/colorchecker_automask.cpp
@@ -67,7 +67,7 @@ class ColorcheckerAutomask {
{"square_dist_horizontal", this->square_dist_horizontal + this->safety_margin + best_param.delta_horizontal},
{"square_dist_vertical", this->square_dist_vertical + this->safety_margin + best_param.delta_vertical},
};
-
+
if (this->cc_board == "cc_passport") {
// Search for the right part on the right image side
this->generate_parameters(/*offset_left_min_start=*/this->img_width / 2, /*offset_left_stop=*/this->img_width);
diff --git a/htc/cpp/evaluate_superpixels.cpp b/htc/cpp/evaluate_superpixels.cpp
index 4cbda77..e75ab31 100644
--- a/htc/cpp/evaluate_superpixels.cpp
+++ b/htc/cpp/evaluate_superpixels.cpp
@@ -8,11 +8,11 @@ std::tuple spxs_predictions(torch::Tensor& spxs, t
spxs = spxs.flatten();
labels = labels.flatten();
mask = mask.flatten();
-
+
auto spxs_a = spxs.accessor();
auto labels_a = labels.accessor();
auto mask_a = mask.accessor();
-
+
// Count for each superpixel which labels the corresponding pixels have
auto spx_label_counts = torch::zeros({spxs.max().item() + 1, n_classes}, torch::kInt32);
auto spx_label_counts_a = spx_label_counts.accessor();
@@ -23,18 +23,18 @@ std::tuple spxs_predictions(torch::Tensor& spxs, t
spx_label_counts_a[spxs_a[i]][labels_a[i]] += 1;
}
}
-
+
// The label of the superpixel is the mode of the labels, i.e. the max count
auto spx_label = spx_label_counts.argmax(1); // The index of the max count corresponds to the label of the superpixel (mask-only superpixels are assigned to the background)
auto spx_label_a2 = spx_label.accessor();
-
+
// Project the calculated labels for each superpixel back to the image
auto predictions = torch::empty(shape[0] * shape[1], torch::kInt64);
auto predictions_a = predictions.accessor();
-
+
for (int i = 0; i < spxs_a.size(0); ++i) {
predictions_a[i] = spx_label_a2[spxs_a[i]];
}
-
+
return std::make_tuple(predictions.reshape(shape), spx_label_counts);
}
diff --git a/htc/cpp/hierarchical_bootstrapping.h b/htc/cpp/hierarchical_bootstrapping.h
index 188f649..c29f699 100644
--- a/htc/cpp/hierarchical_bootstrapping.h
+++ b/htc/cpp/hierarchical_bootstrapping.h
@@ -9,14 +9,16 @@ using Domain2Subjects = std::unordered_map>;
using Subject2Images = std::unordered_map>;
using Domain2Subjects2Images = std::unordered_map;
using Label2Subjects2Images = std::unordered_map;
+using Label2Images = std::unordered_map>;
+using Image2Labels = std::unordered_map>;
torch::Tensor hierarchical_bootstrapping(Domain2Subjects2Images& mapping, int n_subjects, int n_images, int n_bootstraps, unsigned int seed) {
std::mt19937 gen(seed); // Offers a good uniform distribution (https://www.boost.org/doc/libs/1_61_0/doc/html/boost_random/reference.html#boost_random.reference.generators)
-
+
auto n_domains = mapping.size();
auto bootstraps = torch::empty({n_bootstraps, static_cast(n_domains * n_subjects * n_images)}, torch::kInt64);
auto bootstraps_a = bootstraps.accessor();
-
+
// Cache domain2subjects vector mapping for later use (we don't want to do this all over again inside the bootstrap loop)
Domain2Subjects domain2subjects;
for (const auto &[domain_index, subject2images]: mapping) {
@@ -25,64 +27,141 @@ torch::Tensor hierarchical_bootstrapping(Domain2Subjects2Images& mapping, int n_
domain2subjects[domain_index].push_back(p.first);
}
}
-
+
for (int b = 0; b < n_bootstraps; ++b) {
int col = 0;
for (auto &[domain_index, subject2images]: mapping) {
std::vector& subjects = domain2subjects[domain_index];
-
+
std::uniform_int_distribution<> random_subject(0, subjects.size() - 1);
-
+
for (int subject_index = 0; subject_index < n_subjects; ++subject_index) {
auto& subject = subjects[random_subject(gen)];
auto& images = subject2images[subject];
std::uniform_int_distribution<> random_image(0, images.size() - 1);
-
+
for (int image_index = 0; image_index < n_images; ++image_index) {
bootstraps_a[b][col++] = images[random_image(gen)];
}
}
}
}
-
+
return bootstraps;
}
-torch::Tensor hierarchical_bootstrapping_labels(Domain2Subjects2Images& domain_mapping, Label2Subjects2Images& label_mapping, int n_labels, int n_bootstraps, unsigned int seed) {
- std::mt19937 gen(seed); // Offers a good uniform distribution (https://www.boost.org/doc/libs/1_61_0/doc/html/boost_random/reference.html#boost_random.reference.generators)
-
- auto n_domains = domain_mapping.size();
- auto bootstraps = torch::empty({ n_bootstraps, static_cast(n_domains * n_labels) }, torch::kInt64);
- auto bootstraps_a = bootstraps.accessor();
-
- // Cache domain2subjects vector mapping for later use (we don't want to do this all over again inside the bootstrap loop)
+Domain2Subjects construct_domain_subjects_mapping(const Domain2Subjects2Images& domain_subjects_images_mapping) {
Domain2Subjects domain2subjects;
- for (const auto& [domain_index, subject2images] : domain_mapping) {
+ for (const auto& [domain_index, subject2images] : domain_subjects_images_mapping) {
domain2subjects[domain_index].reserve(subject2images.size());
for (auto const& p : subject2images) {
domain2subjects[domain_index].push_back(p.first);
}
}
+ return domain2subjects;
+}
+
+Label2Subjects2Images construct_label_subjects_images_mapping(const Domain2Subjects2Images& domain_subjects_images_mapping, const Label2Images& label_images_mapping) {
+ Label2Subjects2Images label_subjects_images_mapping;
+ for (const auto& [label, label_images] : label_images_mapping) {
+ for (int64_t label_image : label_images) {
+
+ // Search for the current image in the domain mapping
+ bool found = false;
+ for (const auto& [domain_index, subject2images] : domain_subjects_images_mapping) {
+ for (const auto& [subject, images] : subject2images) {
+ if (std::find(images.begin(), images.end(), label_image) != images.end()) {
+ label_subjects_images_mapping[label][subject].push_back(label_image);
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ break;
+ }
+ }
+ }
+ }
+
+ return label_subjects_images_mapping;
+}
+
+Image2Labels construct_image_labels_mapping(const Label2Images& label_images_mapping) {
+ Image2Labels image_labels_mapping;
+ for (const auto& [label, images] : label_images_mapping) {
+ for (int64_t image : images) {
+ image_labels_mapping[image].push_back(label);
+ }
+ }
+
+ return image_labels_mapping;
+}
+
+torch::Tensor hierarchical_bootstrapping_labels(Domain2Subjects2Images& domain_subjects_images_mapping, Label2Images& label_images_mapping, int n_labels, int n_bootstraps, bool oversampling, unsigned int seed) {
+ std::mt19937 gen(seed); // Offers a good uniform distribution (https://www.boost.org/doc/libs/1_61_0/doc/html/boost_random/reference.html#boost_random.reference.generators)
+
+ auto n_domains = domain_subjects_images_mapping.size();
+ auto bootstraps = torch::empty({ n_bootstraps, static_cast(n_domains * n_labels) }, torch::kInt64);
+ auto bootstraps_a = bootstraps.accessor();
+
+ // Cache common mappings for later use (we don't want to do this all over again inside the bootstrap loop)
+ Domain2Subjects domain_subjects_mapping = construct_domain_subjects_mapping(domain_subjects_images_mapping);
+ Label2Subjects2Images label_subjects_images_mapping = construct_label_subjects_images_mapping(domain_subjects_images_mapping, label_images_mapping);
+ Image2Labels image_labels_mapping = construct_image_labels_mapping(label_images_mapping);
+
// List of possible labels
std::vector labels;
- labels.reserve(label_mapping.size());
- for (auto& item : label_mapping) {
+ labels.reserve(label_images_mapping.size());
+ for (auto& item : label_images_mapping) {
labels.push_back(item.first);
}
std::uniform_int_distribution<> random_label(0, labels.size() - 1);
+ // Keep track of how many times each label has been selected
+ std::unordered_map label_counts;
+ for (int64_t label : labels) {
+ label_counts[label] = 0;
+ }
+
for (int b = 0; b < n_bootstraps; ++b) {
int col = 0;
// For each label, we select per domain one subject and one image and repeat this process n_labels times
while (col < bootstraps.size(1)) {
- auto label = labels[random_label(gen)];
- auto& label_subjects = label_mapping[label];
+ // First select a label
+ int64_t label;
+ if (oversampling) {
+ // Find all labels which have the current least occurrence (there might be multiple labels with the same count)
+ std::unordered_map> min_count_labels;
+ int64_t min_count = std::numeric_limits::max();
+ for (auto& [l, count] : label_counts) {
+ min_count_labels[count].push_back(l);
+ if (count < min_count) {
+ min_count = count;
+ }
+ }
- for (auto& [domain_index, subject2images] : domain_mapping) {
- std::vector& subjects_domain = domain2subjects[domain_index];
+ auto& possible_labels = min_count_labels[min_count];
+ if (possible_labels.size() > 1) {
+ // From the labels with the lowest count, select one randomly
+ std::uniform_int_distribution<> random_possible_label(0, possible_labels.size() - 1);
+ label = possible_labels[random_possible_label(gen)];
+ }
+ else {
+ // If there is only one possible label, we do not need to select anything randomly
+ label = possible_labels[0];
+ }
+ }
+ else {
+ label = labels[random_label(gen)];
+ }
+
+ auto& label_subjects = label_subjects_images_mapping[label];
+
+ for (auto& [domain_index, subject2images] : domain_subjects_images_mapping) {
+ std::vector& subjects_domain = domain_subjects_mapping[domain_index];
// Select the subjects which have images of the current label
std::vector subjects;
@@ -98,7 +177,15 @@ torch::Tensor hierarchical_bootstrapping_labels(Domain2Subjects2Images& domain_m
// Select random image
auto& images = label_subjects[subject];
std::uniform_int_distribution<> random_image(0, images.size() - 1);
- bootstraps_a[b][col++] = images[random_image(gen)];
+ int64_t image = images[random_image(gen)];
+ bootstraps_a[b][col++] = image;
+
+ if (oversampling) {
+ // Update label counts for all the labels which appear in the selected image
+ for (int64_t label : image_labels_mapping[image]) {
+ label_counts[label]++;
+ }
+ }
}
}
}
diff --git a/htc/cpp/map_label_image.cpp b/htc/cpp/map_label_image.cpp
index 0699b6b..993e1b7 100644
--- a/htc/cpp/map_label_image.cpp
+++ b/htc/cpp/map_label_image.cpp
@@ -5,10 +5,10 @@
torch::Tensor map_label_image(const torch::Tensor& label_image, std::unordered_map>& label_color_mapping) {
auto mapped_image = torch::empty({label_image.size(0), label_image.size(1), 4}, torch::kFloat32);
-
+
auto label_image_a = label_image.accessor();
auto mapped_image_a = mapped_image.accessor();
-
+
for (int row = 0; row < label_image_a.size(0); ++row) {
for (int col = 0; col < label_image_a.size(1); ++col) {
auto label = label_image_a[row][col];
@@ -19,6 +19,6 @@ torch::Tensor map_label_image(const torch::Tensor& label_image, std::unordered_m
mapped_image_a[row][col][3] = std::get<3>(color);
}
}
-
+
return mapped_image;
}
diff --git a/htc/cpp/nunique.cpp b/htc/cpp/nunique.cpp
index c1566ee..35479a7 100644
--- a/htc/cpp/nunique.cpp
+++ b/htc/cpp/nunique.cpp
@@ -30,7 +30,7 @@ torch::Tensor nunique(const torch::Tensor& in, int64_t dim) {
*reinterpret_cast(out_data) = values.size();
};
-
+
// Unfortunately, we need to execute the loop in serial because the unordered_set is not thread safe
iter.serial_for_each(loop, {0, iter.numel()});
});
diff --git a/htc/cpp/segmentation_mask.cpp b/htc/cpp/segmentation_mask.cpp
index 5f89b98..24f65e9 100644
--- a/htc/cpp/segmentation_mask.cpp
+++ b/htc/cpp/segmentation_mask.cpp
@@ -5,10 +5,10 @@
torch::Tensor segmentation_mask(const torch::Tensor& label_image, std::map, int>& color_mapping) {
auto seg = torch::empty({label_image.size(0), label_image.size(1)}, torch::kUInt8);
-
+
auto seg_a = seg.accessor();
auto label_a = label_image.accessor();
-
+
for (int row = 0; row < label_a.size(0); ++row) {
for (int col = 0; col < label_a.size(1); ++col) {
auto pixel = label_a[row][col];
@@ -20,6 +20,6 @@ torch::Tensor segmentation_mask(const torch::Tensor& label_image, std::map None:
results: Results from the computation step (one entry per path).
"""
pass
+
+ def _compute_necessary(self, image_name: str) -> bool:
+ """
+ Check if the computation for the given image is necessary.
+
+ Args:
+ image_name: Name of the image.
+
+ Returns: True if the computation is necessary (file does not exists or has no valid size), False otherwise.
+ """
+ target_path = self.output_dir / f"{image_name}.{self.file_type}"
+ return not (target_path.is_file() and target_path.stat().st_size > 0)
diff --git a/htc/data_processing/run_l1_normalization.py b/htc/data_processing/run_l1_normalization.py
index 8a8ea04..a936e84 100644
--- a/htc/data_processing/run_l1_normalization.py
+++ b/htc/data_processing/run_l1_normalization.py
@@ -16,14 +16,21 @@
class L1Normalization(DatasetIteration):
- def __init__(self, paths: list[DataPath], file_type: str, output_dir: Path = None, regenerate: bool = False):
+ def __init__(
+ self,
+ paths: list[DataPath],
+ file_type: str,
+ output_dir: Path = None,
+ regenerate: bool = False,
+ folder_name: str = "L1",
+ ):
super().__init__(paths)
self.file_type = file_type
if output_dir is None:
- self.output_dir = settings.intermediates_dir_all / "preprocessing" / "L1"
+ self.output_dir = settings.intermediates_dir_all / "preprocessing" / folder_name
else:
- self.output_dir = output_dir / "L1"
+ self.output_dir = output_dir / folder_name
self.output_dir.mkdir(exist_ok=True, parents=True)
config = Config({
@@ -36,7 +43,7 @@ def __init__(self, paths: list[DataPath], file_type: str, output_dir: Path = Non
clear_directory(self.output_dir)
def compute(self, i: int) -> None:
- if not (self.output_dir / f"{self.paths[i].image_name()}.{self.file_type}").exists():
+ if self._compute_necessary(self.paths[i].image_name()):
sample = self.dataset[i]
img = sample["features"].numpy().astype(np.float16)
diff --git a/htc/data_processing/run_median_spectra.py b/htc/data_processing/run_median_spectra.py
index 7af4411..39af479 100644
--- a/htc/data_processing/run_median_spectra.py
+++ b/htc/data_processing/run_median_spectra.py
@@ -112,13 +112,16 @@ def compute(self, i: int) -> list[dict]:
current_row = {"image_name": path.image_name()}
current_row |= path.image_name_typed()
+ # Avoid std nan values for single-element spectra
+ correction = 0 if spectra.size(0) == 1 else 1
+
current_row |= {
"label_index": label_index,
"label_name": label_name,
"median_spectrum": spectra.quantile(q=0.5, dim=0).numpy(), # Same as np.median
- "std_spectrum": spectra.std(dim=0).numpy(),
+ "std_spectrum": spectra.std(dim=0, correction=correction).numpy(),
"median_normalized_spectrum": spectra_normalized.quantile(q=0.5, dim=0).numpy(),
- "std_normalized_spectrum": spectra_normalized.std(dim=0).numpy(),
+ "std_normalized_spectrum": spectra_normalized.std(dim=0, correction=correction).numpy(),
"n_pixels": counts.item(),
"median_sto2": np.median(selected_sto2.data),
"std_sto2": np.std(selected_sto2.data),
diff --git a/htc/data_processing/run_parameter_images.py b/htc/data_processing/run_parameter_images.py
index 365e31d..ed799e3 100644
--- a/htc/data_processing/run_parameter_images.py
+++ b/htc/data_processing/run_parameter_images.py
@@ -14,14 +14,21 @@
class ParameterImages(DatasetIteration):
- def __init__(self, paths: list[DataPath], file_type: str, output_dir: Path = None, regenerate: bool = False):
+ def __init__(
+ self,
+ paths: list[DataPath],
+ file_type: str,
+ output_dir: Path = None,
+ regenerate: bool = False,
+ folder_name: str = "parameter_images",
+ ):
super().__init__(paths)
self.file_type = file_type
if output_dir is None:
- self.output_dir = settings.intermediates_dir_all / "preprocessing" / "parameter_images"
+ self.output_dir = settings.intermediates_dir_all / "preprocessing" / folder_name
else:
- self.output_dir = output_dir / "parameter_images"
+ self.output_dir = output_dir / folder_name
self.output_dir.mkdir(exist_ok=True, parents=True)
if regenerate:
@@ -30,7 +37,7 @@ def __init__(self, paths: list[DataPath], file_type: str, output_dir: Path = Non
def compute(self, i: int) -> None:
path = self.paths[i]
- if not (self.output_dir / f"{path.image_name()}.{self.file_type}").exists():
+ if self._compute_necessary(path.image_name()):
cube = path.read_cube()
sto2 = path.compute_sto2(cube)
params = {
diff --git a/htc/data_processing/run_raw16.py b/htc/data_processing/run_raw16.py
index ab5d276..f0935ec 100644
--- a/htc/data_processing/run_raw16.py
+++ b/htc/data_processing/run_raw16.py
@@ -23,15 +23,17 @@ def __init__(
output_dir: Path = None,
regenerate: bool = False,
precision: str = "16",
+ folder_name: str = None,
):
super().__init__(paths)
self.file_type = file_type
self.precision = precision
+ _folder_name = f"raw{self.precision}" if folder_name is None else folder_name
if output_dir is None:
- self.output_dir = settings.intermediates_dir_all / "preprocessing" / f"raw{self.precision}"
+ self.output_dir = settings.intermediates_dir_all / "preprocessing" / _folder_name
else:
- self.output_dir = output_dir / f"raw{self.precision}"
+ self.output_dir = output_dir / _folder_name
self.output_dir.mkdir(exist_ok=True, parents=True)
config = Config({
@@ -44,7 +46,7 @@ def __init__(
clear_directory(self.output_dir)
def compute(self, i: int) -> None:
- if not (self.output_dir / f"{self.paths[i].image_name()}.{self.file_type}").exists():
+ if self._compute_necessary(self.paths[i].image_name()):
sample = self.dataset[i]
img = sample["features"].numpy()
if self.precision == "16":
diff --git a/htc/data_processing/run_standardization.py b/htc/data_processing/run_standardization.py
index 8b31596..38fa05a 100644
--- a/htc/data_processing/run_standardization.py
+++ b/htc/data_processing/run_standardization.py
@@ -38,16 +38,16 @@ def channel_params(self) -> np.ndarray:
def pixel_params(self) -> np.ndarray:
# Pixel params based on the channel sums
total_elements = self.total_elements * np.prod(self.sum.shape)
- sum = np.sum(self.sum)
+ total = np.sum(self.sum)
sum_squarred = np.sum(self.sum_squarred)
- mean = sum / total_elements
- std = np.sqrt((sum_squarred - sum**2 / total_elements) / total_elements)
+ mean = total / total_elements
+ std = np.sqrt((sum_squarred - total**2 / total_elements) / total_elements)
return mean, std
-def calc_standardization(datasets: dict[str, DataPath]) -> dict[str, float]:
+def calc_standardization(datasets: dict[str, list[DataPath]]) -> dict[str, float]:
rs_hsi = RunningStats(channels=100)
rs_tpi = RunningStats(channels=4)
rs_rgb = RunningStats(channels=3)
@@ -87,7 +87,7 @@ def calc_standardization_folds(specs: DataSpecification) -> dict[str, dict[str,
if __name__ == "__main__":
- prep = ParserPreprocessing(description="Precomputes a filter for all images")
+ prep = ParserPreprocessing(description="Precomputes standardization statistics for each fold")
paths = prep.get_paths() # Must always be called
assert (
prep.args.spec is not None
diff --git a/htc/data_processing/run_superpixel_prediction.py b/htc/data_processing/run_superpixel_prediction.py
index 462f007..a1c459a 100644
--- a/htc/data_processing/run_superpixel_prediction.py
+++ b/htc/data_processing/run_superpixel_prediction.py
@@ -39,7 +39,6 @@ def aggregate_results(i: int) -> dict[str, Union[dict, Any]]:
if __name__ == "__main__":
config = Config.from_model_name("default", "superpixel_classification")
- config["input/no_features"] = True
paths = list(DataPath.iterate(settings.data_dirs.semantic))
dataset_all = DatasetImage(paths, train=False, config=config)
diff --git a/htc/evaluation/ExperimentAnalysis.ipynb b/htc/evaluation/ExperimentAnalysis.ipynb
index 1eaa43a..33e269d 100644
--- a/htc/evaluation/ExperimentAnalysis.ipynb
+++ b/htc/evaluation/ExperimentAnalysis.ipynb
@@ -33,11 +33,12 @@
"from htc.utils.visualization import (\n",
" create_class_scores_figure,\n",
" create_confusion_figure,\n",
- " create_confusion_figure_comparison,\n",
" create_ece_figure,\n",
" create_running_metric_plot,\n",
+ " create_spec_labels_figure,\n",
" create_surface_dice_plot,\n",
" create_training_stats_figure,\n",
+ " create_training_stats_label_figure,\n",
" show_class_scores_epoch,\n",
" show_loss_chart,\n",
" visualize_dict,\n",
@@ -55,7 +56,7 @@
"outputs": [],
"source": [
"# Parameter for papermill\n",
- "run_dir = settings.training_dir / \"image/2022-01-27_15-52-09_generated_default_lr=0.001\""
+ "run_dir = settings.training_dir / \"image/2022-02-03_22-58-44_generated_default_model_comparison\""
]
},
{
@@ -68,7 +69,7 @@
"output_type": "stream",
"text": [
"Model: image\n",
- "Experiment: 2022-01-27_15-52-09_generated_default_lr=0.001\n"
+ "Experiment: 2022-02-03_22-58-44_generated_default_model_comparison\n"
]
}
],
@@ -157,8 +158,8 @@
"
0
\n",
"
49
\n",
"
NaN
\n",
- "
0.925478
\n",
- "
0.716378
\n",
+ "
0.814283
\n",
+ "
0.676449
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
@@ -169,11 +170,11 @@
"
0
\n",
"
99
\n",
"
NaN
\n",
- "
0.677840
\n",
- "
0.642946
\n",
- "
0.151312
\n",
- "
1.192136
\n",
- "
0.770478
\n",
+ "
0.610165
\n",
+ "
0.570003
\n",
+ "
0.101304
\n",
+ "
1.109594
\n",
+ "
0.730523
\n",
" \n",
"
\n",
"
3
\n",
@@ -193,8 +194,8 @@
"
1
\n",
"
149
\n",
"
NaN
\n",
- "
0.502476
\n",
- "
0.495842
\n",
+ "
0.358511
\n",
+ "
0.425239
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
@@ -206,22 +207,22 @@
"text/plain": [
" fold_name epoch_index step lr-Adam train/ce_loss_step \\\n",
"0 fold_P041,P060,P069 0 0 0.00100 NaN \n",
- "1 fold_P041,P060,P069 0 49 NaN 0.925478 \n",
- "2 fold_P041,P060,P069 0 99 NaN 0.677840 \n",
+ "1 fold_P041,P060,P069 0 49 NaN 0.814283 \n",
+ "2 fold_P041,P060,P069 0 99 NaN 0.610165 \n",
"3 fold_P041,P060,P069 0 100 0.00099 NaN \n",
- "4 fold_P041,P060,P069 1 149 NaN 0.502476 \n",
+ "4 fold_P041,P060,P069 1 149 NaN 0.358511 \n",
"\n",
" train/dice_loss_step dice_metric train/ce_loss_epoch \\\n",
"0 NaN NaN NaN \n",
- "1 0.716378 NaN NaN \n",
- "2 0.642946 0.151312 1.192136 \n",
+ "1 0.676449 NaN NaN \n",
+ "2 0.570003 0.101304 1.109594 \n",
"3 NaN NaN NaN \n",
- "4 0.495842 NaN NaN \n",
+ "4 0.425239 NaN NaN \n",
"\n",
" train/dice_loss_epoch \n",
"0 NaN \n",
"1 NaN \n",
- "2 0.770478 \n",
+ "2 0.730523 \n",
"3 NaN \n",
"4 NaN "
]
@@ -267,83 +268,107 @@
"
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "rows = []\n",
+ "for fold_name, splits in spec:\n",
+ " for name, paths in splits.items():\n",
+ " r = {\n",
+ " \"fold_name\": fold_name,\n",
+ " \"split_name\": name,\n",
+ " }\n",
+ " labels = []\n",
+ " for p in paths:\n",
+ " labels += p.annotated_labels()\n",
+ " label_names, counts = np.unique(labels, return_counts=True)\n",
+ " r[\"label_name\"] = label_names\n",
+ " r[\"# images\"] = counts\n",
+ "\n",
+ " rows.append(r)\n",
+ "\n",
+ "df_labels = pd.DataFrame(rows)\n",
+ "df_labels = df_labels.explode([\"label_name\", \"# images\"])\n",
+ "\n",
+ "fig = px.bar(df_labels, x=\"label_name\", y=\"# images\", color=\"split_name\", facet_col=\"fold_name\", facet_col_wrap=2)\n",
+ "fig.update_layout(height=800, width=1200, template=\"plotly_white\")\n",
+ "fig.update_layout(\n",
+ " title_x=0.5, title_text=f\"image-level label distribution for each fold and split for the spec {path.stem}\"\n",
+ ")\n",
+ "\n",
+ "compress_html(target_dir / f\"{path.stem}_labels.html\", fig)\n",
+ "fig"
+ ]
}
],
"metadata": {
@@ -240,7 +3264,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.12"
+ "version": "3.11.9"
}
},
"nbformat": 4,
diff --git a/htc/models/data/SpecsGeneration.py b/htc/models/data/SpecsGeneration.py
index 6d26956..6098a40 100644
--- a/htc/models/data/SpecsGeneration.py
+++ b/htc/models/data/SpecsGeneration.py
@@ -27,6 +27,7 @@ def generate_dataset(self, target_folder: Path = None):
specs_path = target_folder / f"{self.name}.json"
with specs_path.open("w") as f:
json.dump(folds, f, indent=4)
+ f.write("\n") # Add newline at the end of the file so that all JSON files are formatted the same way
# Make sure we can read the specs file
DataSpecification(specs_path)
diff --git a/htc/models/data/data_spec.schema b/htc/models/data/data_spec.schema
new file mode 100644
index 0000000..a0e43d7
--- /dev/null
+++ b/htc/models/data/data_spec.schema
@@ -0,0 +1,38 @@
+{
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "$comment": "This schema file defines the common structure of our data specification files used by the htc framework to define the training folds and splits. We are storing the data specification files in this repository because they are fundamental for every training and we want to ensure that everyone uses the same splits. There are also several tests running against each data specification file (e.g., that they adhere to this schema definition).",
+ "type": "array",
+ "items": {
+ "description": "Each object in this list defines the training setup for one fold. The folds are trained in the order as defined in the data specification.",
+ "type": "object",
+ "properties": {
+ "fold_name": {
+ "description": "The name of the fold (will be used as folder name for the training run).",
+ "type": "string"
+ }
+ },
+ "patternProperties": {
+ "^(?:train|val|test)": {
+ "description": "The configuration of one split for this fold. Usually, there is a training, a validation and a testing split. The test split is usually the same for all folds. There may be more than one split per split type, e.g., two validation splits. However, for train and test splits, all paths from all respective splits will be combined (e.g., paths from train_1 and train_2 become the training paths) and sorted by image name (not relevant for training but for testing). Only for validation splits, a list of datasets will be used (e.g., in HTCLightning.datasets_val) with the index being defined by the order of the splits in the spec.",
+ "type": "object",
+ "properties": {
+ "image_names": {
+ "description": "List of unique image names which should be included in this split. The name may also include the desired annotation name as defined by the DataPath class.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "data_path_class": {
+ "description": "Per default, our htc.tivita.DataPath class will be used to load the images. However, with this key, it is also possible to specify a custom class which should be used to load the images. The custom class must have a from_image_name() method which gets the name of an image and should return an instance of the class. Specify the class in the format module>class (e.g., htc.bias.bias_aware_ML.SimulationPath>SimulationPath) or refer to the type_from_string() function for more details.",
+ "type": "string"
+ }
+ },
+ "required": ["image_names"],
+ "additionalProperties": false
+ }
+ },
+ "required": ["fold_name"],
+ "additionalProperties": false
+ }
+}
diff --git a/htc/models/data/pigs_semantic-only_5foldsV2.json b/htc/models/data/pigs_semantic-only_5foldsV2.json
index 4807bdc..b926ddb 100644
--- a/htc/models/data/pigs_semantic-only_5foldsV2.json
+++ b/htc/models/data/pigs_semantic-only_5foldsV2.json
@@ -2,8 +2,6 @@
{
"fold_name": "fold_P041,P060,P069",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -262,8 +260,6 @@
]
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -342,8 +338,6 @@
]
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_57_47",
"P045#2020_02_05_11_01_20",
@@ -360,8 +354,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -535,8 +527,6 @@
{
"fold_name": "fold_P044,P050,P059",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_39",
@@ -819,8 +809,6 @@
]
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -875,8 +863,6 @@
]
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_01_09",
"P045#2020_02_05_10_59_32",
@@ -893,8 +879,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1068,8 +1052,6 @@
{
"fold_name": "fold_P045,P061,P071",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_01_09",
"P041#2019_12_14_12_01_39",
@@ -1343,8 +1325,6 @@
]
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P045#2020_02_05_10_54_19",
"P045#2020_02_05_10_55_07",
@@ -1408,8 +1388,6 @@
]
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P044#2020_02_01_09_58_48",
@@ -1426,8 +1404,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1601,8 +1577,6 @@
{
"fold_name": "fold_P047,P049,P070",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -1870,8 +1844,6 @@
]
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P047#2020_02_07_17_28_15",
"P047#2020_02_07_17_28_38",
@@ -1941,8 +1913,6 @@
]
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_01_39",
"P044#2020_02_01_09_52_12",
@@ -1959,8 +1929,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2134,8 +2102,6 @@
{
"fold_name": "fold_P048,P057,P058",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -2376,8 +2342,6 @@
]
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P048#2020_02_08_10_34_35",
"P048#2020_02_08_10_35_01",
@@ -2474,8 +2438,6 @@
]
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_01_39",
"P044#2020_02_01_09_58_31",
@@ -2492,8 +2454,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2664,4 +2624,4 @@
]
}
}
-]
\ No newline at end of file
+]
diff --git a/htc/models/data/pigs_semantic-only_dataset-size_repetitions=5V2.json b/htc/models/data/pigs_semantic-only_dataset-size_repetitions=5V2.json
index 1efcaa3..386b68d 100644
--- a/htc/models/data/pigs_semantic-only_dataset-size_repetitions=5V2.json
+++ b/htc/models/data/pigs_semantic-only_dataset-size_repetitions=5V2.json
@@ -2,8 +2,6 @@
{
"fold_name": "fold_pigs=1_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -26,8 +24,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -191,8 +187,6 @@
{
"fold_name": "fold_pigs=1_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P047#2020_02_07_17_28_15",
"P047#2020_02_07_17_28_38",
@@ -207,8 +201,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -372,8 +364,6 @@
{
"fold_name": "fold_pigs=1_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P061#2020_05_15_10_26_26",
"P061#2020_05_15_10_26_51",
@@ -396,8 +386,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -561,8 +549,6 @@
{
"fold_name": "fold_pigs=1_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P069#2020_07_23_09_55_15",
"P069#2020_07_23_09_55_42",
@@ -587,8 +573,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -752,8 +736,6 @@
{
"fold_name": "fold_pigs=1_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P049#2020_02_11_19_09_49",
"P049#2020_02_11_19_10_20",
@@ -774,8 +756,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -939,8 +919,6 @@
{
"fold_name": "fold_pigs=2_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -968,8 +946,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1133,8 +1109,6 @@
{
"fold_name": "fold_pigs=2_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P047#2020_02_07_17_28_15",
"P047#2020_02_07_17_28_38",
@@ -1173,8 +1147,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1338,8 +1310,6 @@
{
"fold_name": "fold_pigs=2_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P048#2020_02_08_10_34_35",
"P048#2020_02_08_10_35_01",
@@ -1377,8 +1347,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1542,8 +1510,6 @@
{
"fold_name": "fold_pigs=2_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P048#2020_02_08_10_34_35",
"P048#2020_02_08_10_35_01",
@@ -1583,8 +1549,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1748,8 +1712,6 @@
{
"fold_name": "fold_pigs=2_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -1778,8 +1740,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -1943,8 +1903,6 @@
{
"fold_name": "fold_pigs=3_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -2012,8 +1970,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2177,8 +2133,6 @@
{
"fold_name": "fold_pigs=3_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P047#2020_02_07_17_28_15",
"P047#2020_02_07_17_28_38",
@@ -2222,8 +2176,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2387,8 +2339,6 @@
{
"fold_name": "fold_pigs=3_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P048#2020_02_08_10_34_35",
"P048#2020_02_08_10_35_01",
@@ -2442,8 +2392,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2607,8 +2555,6 @@
{
"fold_name": "fold_pigs=3_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -2666,8 +2612,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -2831,8 +2775,6 @@
{
"fold_name": "fold_pigs=3_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -2866,8 +2808,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -3031,8 +2971,6 @@
{
"fold_name": "fold_pigs=4_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -3122,8 +3060,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -3287,8 +3223,6 @@
{
"fold_name": "fold_pigs=4_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P045#2020_02_05_10_54_19",
"P045#2020_02_05_10_55_07",
@@ -3350,8 +3284,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -3515,8 +3447,6 @@
{
"fold_name": "fold_pigs=4_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -3574,8 +3504,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -3739,8 +3667,6 @@
{
"fold_name": "fold_pigs=4_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -3803,8 +3729,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -3968,8 +3892,6 @@
{
"fold_name": "fold_pigs=4_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -4013,8 +3935,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -4178,8 +4098,6 @@
{
"fold_name": "fold_pigs=5_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -4288,8 +4206,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -4453,8 +4369,6 @@
{
"fold_name": "fold_pigs=5_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P045#2020_02_05_10_54_19",
"P045#2020_02_05_10_55_07",
@@ -4554,8 +4468,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -4719,8 +4631,6 @@
{
"fold_name": "fold_pigs=5_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -4800,8 +4710,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -4965,8 +4873,6 @@
{
"fold_name": "fold_pigs=5_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -5047,8 +4953,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -5212,8 +5116,6 @@
{
"fold_name": "fold_pigs=5_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -5272,8 +5174,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -5437,8 +5337,6 @@
{
"fold_name": "fold_pigs=6_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -5562,8 +5460,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -5727,8 +5623,6 @@
{
"fold_name": "fold_pigs=6_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P045#2020_02_05_10_54_19",
"P045#2020_02_05_10_55_07",
@@ -5843,8 +5737,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -6008,8 +5900,6 @@
{
"fold_name": "fold_pigs=6_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -6099,8 +5989,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -6264,8 +6152,6 @@
{
"fold_name": "fold_pigs=6_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -6364,8 +6250,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -6529,8 +6413,6 @@
{
"fold_name": "fold_pigs=6_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -6611,8 +6493,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -6776,8 +6656,6 @@
{
"fold_name": "fold_pigs=7_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -6919,8 +6797,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -7084,8 +6960,6 @@
{
"fold_name": "fold_pigs=7_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -7218,8 +7092,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -7383,8 +7255,6 @@
{
"fold_name": "fold_pigs=7_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -7492,8 +7362,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -7657,8 +7525,6 @@
{
"fold_name": "fold_pigs=7_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -7781,8 +7647,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -7946,8 +7810,6 @@
{
"fold_name": "fold_pigs=7_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -8046,8 +7908,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -8211,8 +8071,6 @@
{
"fold_name": "fold_pigs=8_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -8382,8 +8240,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -8547,8 +8403,6 @@
{
"fold_name": "fold_pigs=8_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -8709,8 +8563,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -8874,8 +8726,6 @@
{
"fold_name": "fold_pigs=8_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -9021,8 +8871,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -9186,8 +9034,6 @@
{
"fold_name": "fold_pigs=8_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -9326,8 +9172,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -9491,8 +9335,6 @@
{
"fold_name": "fold_pigs=8_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -9609,8 +9451,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -9774,8 +9614,6 @@
{
"fold_name": "fold_pigs=9_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -9983,8 +9821,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -10148,8 +9984,6 @@
{
"fold_name": "fold_pigs=9_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -10314,8 +10148,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -10479,8 +10311,6 @@
{
"fold_name": "fold_pigs=9_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -10650,8 +10480,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -10815,8 +10643,6 @@
{
"fold_name": "fold_pigs=9_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -10959,8 +10785,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -11124,8 +10948,6 @@
{
"fold_name": "fold_pigs=9_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -11261,8 +11083,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -11426,8 +11246,6 @@
{
"fold_name": "fold_pigs=10_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -11659,8 +11477,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -11824,8 +11640,6 @@
{
"fold_name": "fold_pigs=10_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -12009,8 +11823,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -12174,8 +11986,6 @@
{
"fold_name": "fold_pigs=10_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -12365,8 +12175,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -12530,8 +12338,6 @@
{
"fold_name": "fold_pigs=10_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -12693,8 +12499,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -12858,8 +12662,6 @@
{
"fold_name": "fold_pigs=10_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -13023,8 +12825,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -13188,8 +12988,6 @@
{
"fold_name": "fold_pigs=11_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -13439,8 +13237,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -13604,8 +13400,6 @@
{
"fold_name": "fold_pigs=11_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -13811,8 +13605,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -13976,8 +13768,6 @@
{
"fold_name": "fold_pigs=11_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -14185,8 +13975,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -14350,8 +14138,6 @@
{
"fold_name": "fold_pigs=11_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -14541,8 +14327,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -14706,8 +14490,6 @@
{
"fold_name": "fold_pigs=11_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -14911,8 +14693,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -15076,8 +14856,6 @@
{
"fold_name": "fold_pigs=12_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_09_51_15",
"P044#2020_02_01_09_51_31",
@@ -15337,8 +15115,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -15502,8 +15278,6 @@
{
"fold_name": "fold_pigs=12_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -15749,8 +15523,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -15914,8 +15686,6 @@
{
"fold_name": "fold_pigs=12_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -16128,8 +15898,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -16293,8 +16061,6 @@
{
"fold_name": "fold_pigs=12_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -16494,8 +16260,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -16659,8 +16423,6 @@
{
"fold_name": "fold_pigs=12_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -16882,8 +16644,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -17047,8 +16807,6 @@
{
"fold_name": "fold_pigs=13_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -17312,8 +17070,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -17477,8 +17233,6 @@
{
"fold_name": "fold_pigs=13_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -17744,8 +17498,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -17909,8 +17661,6 @@
{
"fold_name": "fold_pigs=13_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -18142,8 +17892,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -18307,8 +18055,6 @@
{
"fold_name": "fold_pigs=13_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -18530,8 +18276,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -18695,8 +18439,6 @@
{
"fold_name": "fold_pigs=13_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -18934,8 +18676,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -19099,8 +18839,6 @@
{
"fold_name": "fold_pigs=14_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -19380,8 +19118,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -19545,8 +19281,6 @@
{
"fold_name": "fold_pigs=14_seed=1",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -19830,8 +19564,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -19995,8 +19727,6 @@
{
"fold_name": "fold_pigs=14_seed=2",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -20256,8 +19986,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -20421,8 +20149,6 @@
{
"fold_name": "fold_pigs=14_seed=3",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -20684,8 +20410,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -20849,8 +20573,6 @@
{
"fold_name": "fold_pigs=14_seed=4",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -21126,8 +20848,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -21291,8 +21011,6 @@
{
"fold_name": "fold_pigs=15_seed=0",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_12_00_16",
"P041#2019_12_14_12_01_09",
@@ -21592,8 +21310,6 @@
]
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27",
"P043#2019_12_20_10_05_48",
@@ -21754,4 +21470,4 @@
]
}
}
-]
\ No newline at end of file
+]
diff --git a/htc/models/data/run_pig_dataset.py b/htc/models/data/run_pig_dataset.py
index fbf89d9..1837328 100644
--- a/htc/models/data/run_pig_dataset.py
+++ b/htc/models/data/run_pig_dataset.py
@@ -90,23 +90,15 @@ def generate_folds(self) -> list[dict]:
fold_specs = {
"fold_name": f"fold_{subject_name}",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_train],
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_unknown],
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_known],
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": imgs_test,
},
}
@@ -134,23 +126,15 @@ def generate_folds(self) -> list[dict]:
fold_specs = {
"fold_name": "fold_" + ",".join(fold),
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_train],
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_unknown],
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_known],
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": imgs_test,
},
}
@@ -182,28 +166,18 @@ def generate_folds(self) -> list[dict]:
fold_specs = {
"fold_name": "fold_" + ",".join(fold),
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_train],
},
"train_masks": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_train_masks],
},
"val_semantic_unknown": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_unknown],
},
"val_semantic_known": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_val_known],
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": imgs_test,
},
}
diff --git a/htc/models/data/run_size_dataset.py b/htc/models/data/run_size_dataset.py
index d3d4ba0..7830077 100644
--- a/htc/models/data/run_size_dataset.py
+++ b/htc/models/data/run_size_dataset.py
@@ -131,13 +131,9 @@ def generate_folds(self) -> list[dict]:
fold_specs = {
"fold_name": f"fold_pigs={n_pigs}_seed={seed}",
"train_semantic": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_fold],
},
"val_semantic_test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [p.image_name() for p in paths_test],
},
}
diff --git a/htc/models/image/DatasetImage.py b/htc/models/image/DatasetImage.py
index ab337b9..b0ad48d 100644
--- a/htc/models/image/DatasetImage.py
+++ b/htc/models/image/DatasetImage.py
@@ -5,6 +5,7 @@
from htc.models.common.HTCDataset import HTCDataset
from htc.models.data.DataSpecification import DataSpecification
+from htc.utils.Config import Config
from htc.utils.DomainMapper import DomainMapper
from htc.utils.SLICWrapper import SLICWrapper
@@ -63,11 +64,19 @@ def __getitem__(self, index: int, start_pointers: dict[str, int] = None) -> dict
sample = self.read_experiment(self.paths[index], start_pointers=start_pointers)
sample["image_index"] = index
- if self.config["input/superpixels"]:
+ if self.config["input/superpixels"] and not self.config["input/no_features"]:
if self.config["input/n_channels"] != 3:
- # # We always calculate the superpixels on the RGB image since we only want to compare the features and not the shape
- rgb = self.paths[index].read_rgb_reconstructed() / 255
- sample["features_rgb"] = torch.from_numpy(rgb).float()
+ # We always calculate the superpixels on the RGB image since we only want to compare the features and not the shape
+
+ # Load the RGB image via the dataset so that potential transformations may be applied
+ config_rgb = Config({"input/n_channels": 3})
+ if "label_mapping" in self.config:
+ config_rgb["label_mapping"] = self.config["label_mapping"]
+ if "input/test_time_transforms_cpu" in self.config:
+ config_rgb["input/test_time_transforms_cpu"] = self.config["input/test_time_transforms_cpu"]
+ sample_rgb = DatasetImage([self.paths[index]], train=False, config=config_rgb)[0]
+
+ sample["features_rgb"] = sample_rgb["features"]
spx_features_name = "features_rgb"
else:
# We already have the RGB data so we can directly use it for the superpixels
@@ -77,7 +86,7 @@ def __getitem__(self, index: int, start_pointers: dict[str, int] = None) -> dict
# The main problem is that the border values get mirrored leading to duplicate superpixel indices or missing indices
sample = self.apply_transforms(sample) # e.g. features.shape = [480, 640, 100]
- if self.config["input/superpixels"]:
+ if self.config["input/superpixels"] and not self.config["input/no_features"]:
fast_slic = SLICWrapper(**self.config["input/superpixels"])
sample["spxs"] = fast_slic.apply_slic(sample[spx_features_name])
diff --git a/htc/models/image/DatasetImageBatch.py b/htc/models/image/DatasetImageBatch.py
index e5f9866..2f6ca97 100644
--- a/htc/models/image/DatasetImageBatch.py
+++ b/htc/models/image/DatasetImageBatch.py
@@ -1,8 +1,6 @@
# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
# SPDX-License-Identifier: MIT
-import torch
-
from htc.models.common.SharedMemoryDatasetMixin import SharedMemoryDatasetMixin
from htc.models.image.DatasetImage import DatasetImage
@@ -74,24 +72,3 @@ def __len__(self) -> int:
@property
def buffer_size(self) -> int:
return self.worker_buffer_size * self.config["dataloader_kwargs/num_workers"]
-
- def _add_shared_resources(self) -> None:
- self._add_image_index_shared()
- spatial_shape = self.paths[0].dataset_settings["spatial_shape"]
-
- if not self.config["input/no_features"]:
- self._add_tensor_shared("features", self.features_dtype, *spatial_shape, self.config["input/n_channels"])
- if not self.config["input/no_labels"]:
- if self.config["input/annotation_name"] and not self.config["input/merge_annotations"]:
- for name in self._possible_annotation_names():
- self._add_tensor_shared(f"labels_{name}", torch.int64, *spatial_shape)
- self._add_tensor_shared(f"valid_pixels_{name}", torch.bool, *spatial_shape)
- else:
- self._add_tensor_shared("labels", torch.int64, *spatial_shape)
- self._add_tensor_shared("valid_pixels", torch.bool, *spatial_shape)
-
- if self.config["input/superpixels"]:
- self._add_tensor_shared("spxs", torch.int64, *spatial_shape)
-
- for domain in self.target_domains:
- self._add_tensor_shared(domain, torch.int64)
diff --git a/htc/models/image/DatasetImageStream.py b/htc/models/image/DatasetImageStream.py
index 555a5f2..34c6323 100644
--- a/htc/models/image/DatasetImageStream.py
+++ b/htc/models/image/DatasetImageStream.py
@@ -54,27 +54,6 @@ def iter_samples(self) -> Iterator[dict[str, torch.Tensor]]:
yield sample
- def _add_shared_resources(self) -> None:
- self._add_image_index_shared()
- spatial_shape = self.paths[0].dataset_settings["spatial_shape"]
-
- if not self.config["input/no_features"]:
- self._add_tensor_shared("features", self.features_dtype, *spatial_shape, self.config["input/n_channels"])
- if not self.config["input/no_labels"]:
- if self.config["input/annotation_name"] and not self.config["input/merge_annotations"]:
- for name in self._possible_annotation_names():
- self._add_tensor_shared(f"labels_{name}", torch.int64, *spatial_shape)
- self._add_tensor_shared(f"valid_pixels_{name}", torch.bool, *spatial_shape)
- else:
- self._add_tensor_shared("labels", torch.int64, *spatial_shape)
- self._add_tensor_shared("valid_pixels", torch.bool, *spatial_shape)
-
- if self.config["input/superpixels"]:
- self._add_tensor_shared("spxs", torch.int64, *spatial_shape)
-
- for domain in self.target_domains:
- self._add_tensor_shared(domain, torch.int64)
-
def n_image_elements(self) -> int:
return 1
diff --git a/htc/models/image/LightningImage.py b/htc/models/image/LightningImage.py
index cb34e39..9f84346 100644
--- a/htc/models/image/LightningImage.py
+++ b/htc/models/image/LightningImage.py
@@ -53,7 +53,7 @@ def __init__(self, *args, **kwargs):
if hasattr(self.dice_loss, "class_weight"):
# MONAI >=1.3.0 uses a class weight buffer which breaks loading of old checkpoints
# Since we have our own class weighting anyway, we simple remove the buffer
- del self.dice_loss.class_weight
+ self.dice_loss.class_weight = None
if "optimization/spx_loss_weight" in self.config:
assert (
diff --git a/htc/models/image/ModelImage.py b/htc/models/image/ModelImage.py
index 13f943b..48c159b 100644
--- a/htc/models/image/ModelImage.py
+++ b/htc/models/image/ModelImage.py
@@ -7,7 +7,7 @@
from htc.models.common.HSI3dChannel import HSI3dChannel
from htc.models.common.HTCModel import HTCModel
-from htc.models.common.utils import get_n_classes
+from htc.models.common.utils import get_n_classes, model_input_channels
from htc.utils.Config import Config
@@ -23,7 +23,7 @@ def __init__(self, config: Config, channels: int = None):
channels = self.channel_preprocessing.output_channels()
else:
self.channel_preprocessing = nn.Identity()
- channels = self.config["input/n_channels"] if channels is None else channels
+ channels = model_input_channels(self.config) if channels is None else channels
ArchitectureClass = getattr(smp, self.config["model/architecture_name"])
self.architecture = ArchitectureClass(
diff --git a/htc/models/image/configs/default.json b/htc/models/image/configs/default.json
index 3125e7e..d439116 100644
--- a/htc/models/image/configs/default.json
+++ b/htc/models/image/configs/default.json
@@ -5,23 +5,27 @@
"preprocessing": "L1",
"n_channels": 100,
"epoch_size": 500,
- "transforms_gpu": [{
- "class": "KorniaTransform",
- "transformation_name": "RandomAffine",
- "translate": [0.0625, 0.0625],
- "scale": [0.9, 1.1],
- "degrees": 45,
- "padding_mode": "reflection",
- "p": 0.5
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomHorizontalFlip",
- "p": 0.25
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomVerticalFlip",
- "p": 0.25
- }]
+ "transforms_gpu": [
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomAffine",
+ "translate": [0.0625, 0.0625],
+ "scale": [0.9, 1.1],
+ "degrees": 45,
+ "padding_mode": "reflection",
+ "p": 0.5
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomHorizontalFlip",
+ "p": 0.25
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomVerticalFlip",
+ "p": 0.25
+ }
+ ]
},
"label_mapping": "htc.settings_seg>label_mapping",
"optimization": {
diff --git a/htc/models/patch/configs/default.json b/htc/models/patch/configs/default.json
index f869efe..f54d464 100644
--- a/htc/models/patch/configs/default.json
+++ b/htc/models/patch/configs/default.json
@@ -8,23 +8,27 @@
"patch_sampling": "uniform",
"patch_size": [32, 32],
"background_undersampling": false,
- "transforms_cpu": [{
- "class": "KorniaTransform",
- "transformation_name": "RandomAffine",
- "translate": [0.0625, 0.0625],
- "scale": [0.9, 1.1],
- "degrees": 45,
- "padding_mode": "reflection",
- "p": 0.5
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomHorizontalFlip",
- "p": 0.25
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomVerticalFlip",
- "p": 0.25
- }]
+ "transforms_cpu": [
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomAffine",
+ "translate": [0.0625, 0.0625],
+ "scale": [0.9, 1.1],
+ "degrees": 45,
+ "padding_mode": "reflection",
+ "p": 0.5
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomHorizontalFlip",
+ "p": 0.25
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomVerticalFlip",
+ "p": 0.25
+ }
+ ]
},
"label_mapping": "htc.settings_seg>label_mapping",
"optimization": {
diff --git a/htc/models/patch/configs/default_64.json b/htc/models/patch/configs/default_64.json
index a0bdc33..816378d 100644
--- a/htc/models/patch/configs/default_64.json
+++ b/htc/models/patch/configs/default_64.json
@@ -8,23 +8,27 @@
"patch_sampling": "uniform",
"patch_size": [64, 64],
"background_undersampling": false,
- "transforms_cpu": [{
- "class": "KorniaTransform",
- "transformation_name": "RandomAffine",
- "translate": [0.0625, 0.0625],
- "scale": [0.9, 1.1],
- "degrees": 45,
- "padding_mode": "reflection",
- "p": 0.5
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomHorizontalFlip",
- "p": 0.25
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomVerticalFlip",
- "p": 0.25
- }]
+ "transforms_cpu": [
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomAffine",
+ "translate": [0.0625, 0.0625],
+ "scale": [0.9, 1.1],
+ "degrees": 45,
+ "padding_mode": "reflection",
+ "p": 0.5
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomHorizontalFlip",
+ "p": 0.25
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomVerticalFlip",
+ "p": 0.25
+ }
+ ]
},
"label_mapping": "htc.settings_seg>label_mapping",
"optimization": {
diff --git a/htc/models/pixel/ModelPixel.py b/htc/models/pixel/ModelPixel.py
index c3bc490..482a464 100644
--- a/htc/models/pixel/ModelPixel.py
+++ b/htc/models/pixel/ModelPixel.py
@@ -7,6 +7,7 @@
import htc.models.common.functions
from htc.models.common.Heads import Heads
from htc.models.common.HTCModel import HTCModel
+from htc.models.common.utils import model_input_channels
from htc.utils.Config import Config
@@ -45,7 +46,7 @@ def __init__(self, config: Config):
# The adaptive pooling layer ensures that the output of the conv layers always has the same length
# This allows a different number of channels to be used as input which could be helpful for pretraining
# If the input already has the correct input size, the adaptive layer does not change the conv output
- in_dim = self._conv_output_features(self.config["input/n_channels"])
+ in_dim = self._conv_output_features(model_input_channels(self.config))
self.adaptive_conv_reduction = nn.AdaptiveAvgPool1d(in_dim)
self.fc1 = nn.Linear(in_features=in_dim, out_features=100)
diff --git a/htc/models/pixel/ModelPixelRGB.py b/htc/models/pixel/ModelPixelRGB.py
index 132cd13..353e21e 100644
--- a/htc/models/pixel/ModelPixelRGB.py
+++ b/htc/models/pixel/ModelPixelRGB.py
@@ -7,6 +7,7 @@
import htc.models.common.functions
from htc.models.common.Heads import Heads
from htc.models.common.HTCModel import HTCModel
+from htc.models.common.utils import model_input_channels
from htc.utils.Config import Config
@@ -27,7 +28,7 @@ def __init__(self, config: Config):
DropoutLayer = nn.Identity
# FNN
- self.fc1 = nn.Linear(in_features=self.config["input/n_channels"], out_features=200)
+ self.fc1 = nn.Linear(in_features=model_input_channels(self.config), out_features=200)
self.fc1_norm = NormalizationLayer(num_features=self.fc1.out_features)
self.fc1_dropout = DropoutLayer(self.config["model/dropout"])
diff --git a/htc/models/pixel/configs/default.json b/htc/models/pixel/configs/default.json
index 74a9e67..0184da4 100644
--- a/htc/models/pixel/configs/default.json
+++ b/htc/models/pixel/configs/default.json
@@ -6,23 +6,27 @@
"n_channels": 100,
"epoch_size": "500 images",
"oversampling": false,
- "transforms_cpu": [{
- "class": "KorniaTransform",
- "transformation_name": "RandomAffine",
- "translate": [0.0625, 0.0625],
- "scale": [0.9, 1.1],
- "degrees": 45,
- "padding_mode": "reflection",
- "p": 0.5
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomHorizontalFlip",
- "p": 0.25
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomVerticalFlip",
- "p": 0.25
- }]
+ "transforms_cpu": [
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomAffine",
+ "translate": [0.0625, 0.0625],
+ "scale": [0.9, 1.1],
+ "degrees": 45,
+ "padding_mode": "reflection",
+ "p": 0.5
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomHorizontalFlip",
+ "p": 0.25
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomVerticalFlip",
+ "p": 0.25
+ }
+ ]
},
"label_mapping": "htc.settings_seg>label_mapping",
"optimization": {
diff --git a/htc/models/run_generate_configs.py b/htc/models/run_generate_configs.py
index 2f9e849..667b5a9 100644
--- a/htc/models/run_generate_configs.py
+++ b/htc/models/run_generate_configs.py
@@ -28,7 +28,7 @@ def generate_configs(
if params is None:
params = {
- "seed": [settings.default_seed],
+ "seed": [0, 1, 2, 3, 4],
# 'model/architecture_name': ["Model3D2DSeg", "DynUNet"],
# 'dataloader_kwargs/batch_size': [8],
# 'dataloader_kwargs/num_workers': [8],
diff --git a/htc/models/run_inference_timeit.py b/htc/models/run_inference_timeit.py
index 736c0b1..80dbdbd 100644
--- a/htc/models/run_inference_timeit.py
+++ b/htc/models/run_inference_timeit.py
@@ -4,7 +4,6 @@
import pandas as pd
import torch
from rich.progress import track
-from torch.cuda.amp import autocast
from htc.models.common.HTCLightning import HTCLightning
from htc.models.common.torch_helpers import move_batch_gpu
@@ -44,7 +43,7 @@ def __init__(self):
self.batch = move_batch_gpu(sample)
self.batch["features"] = self.batch["features"].unsqueeze(dim=0)
- @autocast()
+ @torch.autocast("cuda")
@torch.no_grad()
def inference_image(self) -> None:
fold_predictions = []
diff --git a/htc/models/run_training.py b/htc/models/run_training.py
index 23cc087..648a2f8 100644
--- a/htc/models/run_training.py
+++ b/htc/models/run_training.py
@@ -28,6 +28,7 @@
from htc.utils.DelayedFileHandler import DelayedFileHandler
from htc.utils.DuplicateFilter import DuplicateFilter
from htc.utils.MeasureTime import MeasureTime
+from htc.utils.Task import Task
class FoldTrainer:
@@ -40,14 +41,15 @@ def __init__(self, model_name: str, config_name: str, config_extends: Union[str,
adjust_num_workers(self.config)
# There must be a label mapping defined (class names to label ids)
- if not self.config["input/no_labels"] and "label_mapping" not in self.config:
+ labels_requested = not self.config["input/no_labels"] and Task.from_config(self.config) == Task.SEGMENTATION
+ if labels_requested and "label_mapping" not in self.config:
settings.log.warning(
"No label mapping specified in the config file. The default mapping from the images will be used which"
" may not be what you want (e.g. it is different across datasets). Best practice is to explicitly"
" specify the label mapping in the config"
)
- self.data_specs = DataSpecification.from_config(self.config)
+ self.spec = DataSpecification.from_config(self.config)
self.LightningClass = HTCLightning.class_from_config(self.config)
def train_fold(self, run_folder: Union[str, None], fold_name: str, *args) -> None:
@@ -111,14 +113,14 @@ def _train_fold(self, model_dir: str, fold_name: str, test: bool, file_log_handl
)
# Create datasets based on the paths in the data specs
- train_paths = []
+ train_paths = self.spec.fold_paths(fold_name, "^train")
test_paths = []
datasets_val = []
- for name, paths in self.data_specs.folds[fold_name].items():
+ for name, paths in self.spec.folds[fold_name].items():
assert not name.startswith("test"), "The test set should not be available at this point"
if name.startswith("train"):
- train_paths += paths
+ continue
elif name.startswith("val"):
dataset = self.LightningClass.dataset(paths=paths, train=False, config=self.config, fold_name=fold_name)
datasets_val.append(dataset)
@@ -128,8 +130,8 @@ def _train_fold(self, model_dir: str, fold_name: str, test: bool, file_log_handl
if test:
# To avoid potential errors, we activate the test set only temporarily to get the paths
# If other classes access the specs, they cannot accidentally access the test set
- with self.data_specs.activated_test_set():
- test_paths = self.data_specs.fold_paths(fold_name, "^test")
+ with self.spec.activated_test_set():
+ test_paths = self.spec.fold_paths(fold_name, "^test")
# We use only one training dataset which uses all available images. Oversampling of images from one dataset can be implemented in the lightning class
dataset_train = self.LightningClass.dataset(
@@ -145,13 +147,6 @@ def _train_fold(self, model_dir: str, fold_name: str, test: bool, file_log_handl
" calculation of the metric but just the name of the metric (e.g. used in the checkpoint filename)."
f" Defaulting to \"{self.config['validation/checkpoint_metric']}\""
)
- if "validation/dataset_index" not in self.config:
- self.config["validation/dataset_index"] = 0
- settings.log.warning(
- "No value set for validation/dataset_index in the config. This specifies the main validation dataset,"
- " e.g. used for checkpointing. Currently, only one validation dataset can be used. Defaulting to"
- f" \"{self.config['validation/dataset_index']}\""
- )
# Optional test dataset
lightning_kwargs = {}
@@ -226,6 +221,13 @@ def _train_fold(self, model_dir: str, fold_name: str, test: bool, file_log_handl
),
category=UserWarning,
)
+ warnings.filterwarnings(
+ "ignore",
+ message=(
+ ".*Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0.*"
+ ),
+ category=UserWarning,
+ )
if self.config["wandb_kwargs"]:
wandb_logger = WandbLogger(save_dir=model_dir, **self.config["wandb_kwargs"])
@@ -247,7 +249,7 @@ def _train_fold(self, model_dir: str, fold_name: str, test: bool, file_log_handl
settings.log.warning(key)
self.config.save_config(model_dir / "config.json")
- shutil.copy2(self.data_specs.path, model_dir / "data.json")
+ shutil.copy2(self.spec.path, model_dir / "data.json")
# Inform the system monitor that the training is finished
monitor_handle.send_signal(signal.SIGINT)
diff --git a/htc/models/superpixel_classification/DatasetSuperpixelImage.py b/htc/models/superpixel_classification/DatasetSuperpixelImage.py
index bda515e..d0057cb 100644
--- a/htc/models/superpixel_classification/DatasetSuperpixelImage.py
+++ b/htc/models/superpixel_classification/DatasetSuperpixelImage.py
@@ -38,7 +38,6 @@ def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
x_image = F.interpolate(
x_image.float(), size=self.config["input/resize_shape"], mode="bilinear", align_corners=False
).squeeze(dim=0)
- x_image = self.apply_transforms(x_image) # [100, 32, 32]
features.append(x_image)
@@ -48,9 +47,8 @@ def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
"image_name": sample_img["image_name"],
"features": torch.stack(features),
"spxs_sizes": torch.tensor(spxs_sizes),
- "spxs_indices_rows": torch.cat(
- spxs_indices_rows
- ), # We already concatentate the ids since we make only full image assignments later
+ # We already concatenate the ids since we make only full image assignments later
+ "spxs_indices_rows": torch.cat(spxs_indices_rows),
"spxs_indices_cols": torch.cat(spxs_indices_cols),
}
diff --git a/htc/models/superpixel_classification/ModelSuperpixelClassification.py b/htc/models/superpixel_classification/ModelSuperpixelClassification.py
index 87a9e13..51bff45 100644
--- a/htc/models/superpixel_classification/ModelSuperpixelClassification.py
+++ b/htc/models/superpixel_classification/ModelSuperpixelClassification.py
@@ -7,7 +7,7 @@
from htc.models.common.HSI3dChannel import HSI3dChannel
from htc.models.common.HTCModel import HTCModel
-from htc.models.common.utils import get_n_classes
+from htc.models.common.utils import get_n_classes, model_input_channels
from htc.utils.Config import Config
@@ -27,16 +27,17 @@ def forward(self, x):
class ModelSuperpixelClassification(HTCModel):
- def __init__(self, config: Config):
- super().__init__(config)
- n_classes = get_n_classes(self.config)
+ def __init__(self, config: Config, n_classes: int = None, **kwargs):
+ super().__init__(config, **kwargs)
+ if n_classes is None:
+ n_classes = get_n_classes(self.config)
if self.config["model/channel_preprocessing"]:
self.channel_preprocessing = HSI3dChannel(self.config)
channels = self.channel_preprocessing.output_channels()
else:
self.channel_preprocessing = nn.Identity()
- channels = self.config["input/n_channels"]
+ channels = model_input_channels(self.config)
self.architecture = UNetClassification(
self.config["model/encoder"],
diff --git a/htc/models/superpixel_classification/configs/default.json b/htc/models/superpixel_classification/configs/default.json
index fe0b701..81f6ffb 100644
--- a/htc/models/superpixel_classification/configs/default.json
+++ b/htc/models/superpixel_classification/configs/default.json
@@ -10,23 +10,27 @@
},
"resize_shape": [32, 32],
"epoch_size": "500 images",
- "transforms_cpu": [{
- "class": "KorniaTransform",
- "transformation_name": "RandomAffine",
- "translate": [0.0625, 0.0625],
- "scale": [0.9, 1.1],
- "degrees": 45,
- "padding_mode": "reflection",
- "p": 0.5
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomHorizontalFlip",
- "p": 0.25
- }, {
- "class": "KorniaTransform",
- "transformation_name": "RandomVerticalFlip",
- "p": 0.25
- }]
+ "transforms_cpu": [
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomAffine",
+ "translate": [0.0625, 0.0625],
+ "scale": [0.9, 1.1],
+ "degrees": 45,
+ "padding_mode": "reflection",
+ "p": 0.5
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomHorizontalFlip",
+ "p": 0.25
+ },
+ {
+ "class": "KorniaTransform",
+ "transformation_name": "RandomVerticalFlip",
+ "p": 0.25
+ }
+ ]
},
"label_mapping": "htc.settings_seg>label_mapping",
"optimization": {
diff --git a/htc/settings.py b/htc/settings.py
index 7af4fe0..b8aef6e 100644
--- a/htc/settings.py
+++ b/htc/settings.py
@@ -19,7 +19,7 @@
class ColoredFormatter(logging.Formatter):
- def format(self, record):
+ def format(self, record): # noqa: A003
# Apply level-specific color
levelname_prev = record.levelname
record.levelname = f"[logging.level.{record.levelname.lower()}]{record.levelname}[/]"
@@ -97,7 +97,7 @@ class Settings:
- `PATH_HTC_DOCKER_RESULTS`: If you compute something in our Docker container, results will only be stored in the container and deleted as soon as the container exits (since the container is only intended for testing). Let this variable point to a directory of your choice to keep your Docker results. Example: `PATH_HTC_DOCKER_RESULTS="/my/results/folder"`
- `HTC_ADD_NETWORK_ALTERNATIVES`: If set to the string `true`, will include results and intermediate directories on the network drive (default `false`). This is usually only required for testing. Example: `HTC_ADD_NETWORK_ALTERNATIVES="true"`
- `HTC_ENV_OVERRIDE`: Whether environment variables defined in the .env file or in your user settings override existing variables (default `true`). Set this to `false` if you want that variables defined elsewhere (e.g. before the command: `ENV_NAME htc command`) have precedence. Example: `HTC_ENV_OVERRIDE="false"`
- - `HTC_MODEL_COMPARISON_TIMESTAMP`: Variable is read in settings_seg and can be used to overwrite the default comparison timestamp (e.g. used for the reproducibility of our MIA2021 paper). Example: `HTC_MODEL_COMPARISON_TIMESTAMP="2022-02-03_22-58-44"`
+ - `HTC_MODEL_COMPARISON_TIMESTAMP`: Variable is read in settings_seg and can be used to overwrite the default comparison timestamp (e.g. used for the reproducibility of our MIA2022 paper). Example: `HTC_MODEL_COMPARISON_TIMESTAMP="2022-02-03_22-58-44"`
- `HTC_BENCHMARKING_TIMESTAMP`: Variable is read in settings_bench and can be used to overwrite the default timestamp for the benchmarking networks (e.g. used for the reproducibility of our PyTorchConf2023 poster). Example: `HTC_BENCHMARKING_TIMESTAMP="2023-09-03_22-48-13"`
- `HTC_CUDA_MEM_FRACTION`: Used in run_training.py to limit the GPU memory to a fraction of the available GPU memory (e.g. to simulate GPUs with less memory). Example: `HTC_CUDA_MEM_FRACTION="0.5"`
- `HTC_SYSTEM_MONITOR_REFRESH_RATE`: Refresh rate x in seconds for the system monitor (an event will be logged every x seconds). Example: `HTC_SYSTEM_MONITOR_REFRESH="0.15"`
@@ -285,6 +285,7 @@ def uuid4_seeded():
"tag_blood": "#f51505",
"tag_cauterization": "#9d9e9e",
"tag_malperfused": "#03ffff",
+ "tag_tumor": "#ff5100",
"instrument": "#636363",
"fur": "#FF7830",
"ligament_pat": "#FFB46D",
@@ -299,6 +300,7 @@ def uuid4_seeded():
"vesic_gland": "#00469C",
"Exterior": "#00000000", # Unlabeled parts in MITK
"network_unsure": "#AAAAAA",
+ "not_suitable_for_semantic": "#AAAAAA",
}
self.known_envs = (
diff --git a/htc/tissue_atlas/data/run_tissue_atlas_dataset.py b/htc/tissue_atlas/data/run_tissue_atlas_dataset.py
index 1607c2e..ea81a00 100644
--- a/htc/tissue_atlas/data/run_tissue_atlas_dataset.py
+++ b/htc/tissue_atlas/data/run_tissue_atlas_dataset.py
@@ -46,18 +46,12 @@ def generate_folds(self) -> list[dict]:
fold_specs = {
"fold_name": f"fold_{subject_name}",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": sorted(imgs_train),
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": sorted(imgs_val),
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": sorted(imgs_test),
},
}
diff --git a/htc/tissue_atlas/data/tissue-atlas_loocv_test-8_seed-0_cam-118.json b/htc/tissue_atlas/data/tissue-atlas_loocv_test-8_seed-0_cam-118.json
index dded86f..33f1d8d 100644
--- a/htc/tissue_atlas/data/tissue-atlas_loocv_test-8_seed-0_cam-118.json
+++ b/htc/tissue_atlas/data/tissue-atlas_loocv_test-8_seed-0_cam-118.json
@@ -2,8 +2,6 @@
{
"fold_name": "fold_P041",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P042#2019_12_15_10_14_19",
"P042#2019_12_15_10_14_44",
@@ -3739,8 +3737,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -3779,8 +3775,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -4488,7 +4482,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -9081,8 +9075,6 @@
{
"fold_name": "fold_P042",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -12814,8 +12806,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P042#2019_12_15_10_14_19",
"P042#2019_12_15_10_14_44",
@@ -12858,8 +12848,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -13567,7 +13555,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -18160,8 +18148,6 @@
{
"fold_name": "fold_P043",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -21834,8 +21820,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P043#2019_12_20_10_05_27#overlap",
"P043#2019_12_20_10_05_48#overlap",
@@ -21937,8 +21921,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -22646,7 +22628,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -27239,8 +27221,6 @@
{
"fold_name": "fold_P044",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -30965,8 +30945,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P044#2020_02_01_08_52_55",
"P044#2020_02_01_08_55_38",
@@ -31016,8 +30994,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -31725,7 +31701,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -36318,8 +36294,6 @@
{
"fold_name": "fold_P045",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -40043,8 +40017,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P045#2020_02_05_10_16_23",
"P045#2020_02_05_10_18_37",
@@ -40095,8 +40067,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -40804,7 +40774,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -45397,8 +45367,6 @@
{
"fold_name": "fold_P046",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -49140,8 +49108,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P046#2020_02_07_08_43_49",
"P046#2020_02_07_08_46_51",
@@ -49174,8 +49140,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -49883,7 +49847,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -54476,8 +54440,6 @@
{
"fold_name": "fold_P047",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -58221,8 +58183,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P047#2020_02_07_17_03_35",
"P047#2020_02_07_17_09_00",
@@ -58253,8 +58213,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -58962,7 +58920,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -63555,8 +63513,6 @@
{
"fold_name": "fold_P048",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -67301,8 +67257,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P048#2020_02_08_10_03_45",
"P048#2020_02_08_10_07_50",
@@ -67332,8 +67286,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -68041,7 +67993,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -72634,8 +72586,6 @@
{
"fold_name": "fold_P049",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -76367,8 +76317,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P049#2020_02_11_18_39_13",
"P049#2020_02_11_18_47_14",
@@ -76411,8 +76359,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -77120,7 +77066,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -81713,8 +81659,6 @@
{
"fold_name": "fold_P050",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -85451,8 +85395,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P050#2020_02_18_17_31_22",
"P050#2020_02_18_17_32_22",
@@ -85490,8 +85432,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -86199,7 +86139,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -90792,8 +90732,6 @@
{
"fold_name": "fold_P051",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -94536,8 +94474,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P051#2020_03_03_19_02_24",
"P051#2020_03_03_19_04_06",
@@ -94569,8 +94505,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -95278,7 +95212,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -99871,8 +99805,6 @@
{
"fold_name": "fold_P052",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -103612,8 +103544,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P052#2020_03_04_12_22_54",
"P052#2020_03_04_12_31_04",
@@ -103648,8 +103578,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -104357,7 +104285,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -108950,8 +108878,6 @@
{
"fold_name": "fold_P053",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -112686,8 +112612,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P053#2020_03_06_11_09_56",
"P053#2020_03_06_11_13_21",
@@ -112727,8 +112651,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -113436,7 +113358,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -118029,8 +117951,6 @@
{
"fold_name": "fold_P054",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -121790,8 +121710,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P054#2020_03_10_17_50_00",
"P054#2020_03_10_18_06_00",
@@ -121806,8 +121724,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -122515,7 +122431,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -127108,8 +127024,6 @@
{
"fold_name": "fold_P055",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -130847,8 +130761,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P055#2020_03_11_10_35_25",
"P055#2020_03_11_10_35_55",
@@ -130885,8 +130797,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -131594,7 +131504,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -136187,8 +136097,6 @@
{
"fold_name": "fold_P056",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -139952,8 +139860,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P056#2020_03_12_13_02_18",
"P056#2020_03_12_13_02_44",
@@ -139964,8 +139870,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -140673,7 +140577,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -145266,8 +145170,6 @@
{
"fold_name": "fold_P057",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -149000,8 +148902,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P057#2020_03_13_17_58_15",
"P057#2020_03_13_17_59_41",
@@ -149043,8 +148943,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -149752,7 +149650,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -154345,8 +154243,6 @@
{
"fold_name": "fold_P058",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -157898,8 +157794,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P058#2020_05_13_17_26_08#overlap",
"P058#2020_05_13_17_26_32#overlap",
@@ -158122,8 +158016,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -158831,7 +158723,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -163424,8 +163316,6 @@
{
"fold_name": "fold_P059",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -167142,8 +167032,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P059#2020_05_14_11_19_00",
"P059#2020_05_14_11_20_31",
@@ -167201,8 +167089,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -167910,7 +167796,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -172503,8 +172389,6 @@
{
"fold_name": "fold_P060",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -176152,8 +176036,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P060#2020_05_14_19_14_12#overlap",
"P060#2020_05_14_19_14_44#overlap",
@@ -176280,8 +176162,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -176989,7 +176869,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -181582,8 +181462,6 @@
{
"fold_name": "fold_P061",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -185317,8 +185195,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P061#2020_05_15_09_52_02",
"P061#2020_05_15_09_52_37",
@@ -185359,8 +185235,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -186068,7 +185942,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -190661,8 +190535,6 @@
{
"fold_name": "fold_P062",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -194329,8 +194201,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P062#2020_05_15_18_36_47#overlap",
"P062#2020_05_15_18_37_18#overlap",
@@ -194438,8 +194308,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -195147,7 +195015,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -199740,8 +199608,6 @@
{
"fold_name": "fold_P063",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -203481,8 +203347,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P063#2020_05_28_15_48_10",
"P063#2020_05_28_15_50_51",
@@ -203517,8 +203381,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -204226,7 +204088,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -208819,8 +208681,6 @@
{
"fold_name": "fold_P064",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -212548,8 +212408,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P064#2020_05_29_10_08_39",
"P064#2020_05_29_10_09_28",
@@ -212596,8 +212454,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -213305,7 +213161,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -217898,8 +217754,6 @@
{
"fold_name": "fold_P065",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -221654,8 +221508,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P065#2020_06_19_18_49_59",
"P065#2020_06_19_18_51_28",
@@ -221675,8 +221527,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -222384,7 +222234,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -226977,8 +226827,6 @@
{
"fold_name": "fold_P066",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -230736,8 +230584,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P066#2020_07_07_08_47_39",
"P066#2020_07_07_08_49_35",
@@ -230754,8 +230600,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -231463,7 +231307,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -236056,8 +235900,6 @@
{
"fold_name": "fold_P067",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -239780,8 +239622,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P067#2020_07_09_17_42_35",
"P067#2020_07_09_17_44_42",
@@ -239833,8 +239673,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -240542,7 +240380,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -245135,8 +244973,6 @@
{
"fold_name": "fold_P068",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -248872,8 +248708,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P068#2020_07_20_17_18_47",
"P068#2020_07_20_17_23_35",
@@ -248912,8 +248746,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -249621,7 +249453,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -254214,8 +254046,6 @@
{
"fold_name": "fold_P069",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -257960,8 +257790,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P069#2020_07_23_09_07_28",
"P069#2020_07_23_09_41_16",
@@ -257991,8 +257819,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -258700,7 +258526,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -263293,8 +263119,6 @@
{
"fold_name": "fold_P070",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -267009,8 +266833,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P070#2020_07_24_19_16_14#overlap",
"P070#2020_07_24_19_16_33#overlap",
@@ -267070,8 +266892,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -267779,7 +267599,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -272372,8 +272192,6 @@
{
"fold_name": "fold_P071",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -276107,8 +275925,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P071#2020_08_05_11_04_57",
"P071#2020_08_05_11_07_56",
@@ -276149,8 +275965,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -276858,7 +276672,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -281451,8 +281265,6 @@
{
"fold_name": "fold_P072",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -285163,8 +284975,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P072#2020_08_08_12_17_19",
"P072#2020_08_08_12_18_25",
@@ -285228,8 +285038,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -285937,7 +285745,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -290530,8 +290338,6 @@
{
"fold_name": "fold_P074",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -294299,16 +294105,9 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
- "image_names": [
- "P074#2020_08_19_18_40_44",
- "P074#2020_08_19_18_41_23"
- ]
+ "image_names": ["P074#2020_08_19_18_40_44", "P074#2020_08_19_18_41_23"]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -295016,7 +294815,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -299609,8 +299408,6 @@
{
"fold_name": "fold_P076",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -303325,8 +303122,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P076#2020_08_24_10_01_46",
"P076#2020_08_24_10_02_16",
@@ -303386,8 +303181,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -304095,7 +303888,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -308688,8 +308481,6 @@
{
"fold_name": "fold_P085",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -312353,8 +312144,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P085#2021_04_10_10_47_08",
"P085#2021_04_10_10_47_38",
@@ -312465,8 +312254,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -313174,7 +312961,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -317767,8 +317554,6 @@
{
"fold_name": "fold_P088",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -320763,8 +320548,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P088#2021_04_19_08_31_51",
"P088#2021_04_19_08_32_10",
@@ -321544,8 +321327,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -322253,7 +322034,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -326846,8 +326627,6 @@
{
"fold_name": "fold_P090",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -330041,8 +329820,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P090#2021_04_22_08_35_05",
"P090#2021_04_22_08_35_25",
@@ -330623,8 +330400,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -331332,7 +331107,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -335925,8 +335700,6 @@
{
"fold_name": "fold_P094",
"train": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P041#2019_12_14_10_50_54",
"P041#2019_12_14_10_51_18",
@@ -338919,8 +338692,6 @@
]
},
"val": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P094#2021_04_30_08_36_33",
"P094#2021_04_30_08_36_50",
@@ -339702,8 +339473,6 @@
]
},
"test": {
- "data_path_module": "htc.tivita.DataPath",
- "data_path_class": "DataPath",
"image_names": [
"P086#2021_04_15_09_22_02",
"P086#2021_04_15_09_22_20",
@@ -340411,7 +340180,7 @@
"P086#2021_04_15_19_44_11",
"P086#2021_04_15_19_44_30",
"P086#2021_04_15_19_44_49",
-
+
"P086#2021_04_15_19_55_01",
"P086#2021_04_15_19_55_24",
"P086#2021_04_15_19_55_48",
@@ -345001,4 +344770,4 @@
]
}
}
-]
\ No newline at end of file
+]
diff --git a/htc/tissue_atlas/median_pixel/DatasetMedianPixel.py b/htc/tissue_atlas/median_pixel/DatasetMedianPixel.py
index 0237413..809cb9d 100644
--- a/htc/tissue_atlas/median_pixel/DatasetMedianPixel.py
+++ b/htc/tissue_atlas/median_pixel/DatasetMedianPixel.py
@@ -7,7 +7,7 @@
from htc.models.common.HTCDataset import HTCDataset
from htc.tivita.DataPath import DataPath
from htc.utils.helper_functions import median_table
-from htc.utils.LabelMapping import LabelMapping
+from htc.utils.Task import Task
class DatasetMedianPixel(HTCDataset):
@@ -15,42 +15,68 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Load precomputed spectra
- label_mapping = LabelMapping.from_config(self.config)
+ df = median_table(image_names=self.image_names, config=self.config)
- df = median_table(image_names=self.image_names, label_mapping=label_mapping)
- assert not df.duplicated(["image_name", "label_index_mapped"]).any(), (
- "Found duplicated rows (same (image_name, label_index_mapped) combination found more than once). Cannot use"
- " this table because it is unclear which median spectra should be used in this case"
+ self.labels = torch.from_numpy(df["label_index_mapped"].values) if self.config["label_mapping"] else None
+ self.image_labels = (
+ torch.from_numpy(np.stack(df["image_labels"])) if self.config["input/image_labels"] else None
)
+ if self.labels is not None:
+ assert not df.duplicated(["image_name", "label_index_mapped"]).any(), (
+ "Found duplicated rows (same (image_name, label_index_mapped) combination found more than once). Cannot"
+ " use this table because it is unclear which median spectra should be used in this case"
+ )
+
# We need to set these variables again because an image may contain more than one median spectra and we want to use all (and find the corresponding path to each annotation)
self.image_names = df["image_name"].tolist()
self.paths = [DataPath.from_image_name(image_name) for image_name in self.image_names]
- if self.config["input/normalization"] == "L1":
+ if self.config["input/normalization"] == "L1" or "L1" in self.config["input/preprocessing"]:
self.features = df["median_normalized_spectrum"].values
else:
self.features = df["median_spectrum"].values
- self.labels = torch.from_numpy(df["label_index_mapped"].values)
self.features = torch.from_numpy(np.stack(self.features))
self.features = self.apply_transforms(self.features)
- assert (
- len(self.features) == len(self.labels) == len(self.paths) == len(self.image_names)
- ), "All arrays must have the same length"
+ if self.config["input/meta"]:
+ self.meta = torch.stack([self.read_meta(path) for path in self.paths])
+ assert len(self.meta) == len(self.features), "Meta and features must have the same length"
+ else:
+ self.meta = None
+
+ assert len(self.features) == len(self.paths) == len(self.image_names), "All arrays must have the same length"
+ if self.labels is not None:
+ assert len(self.labels) == len(self.features), "Labels and features must have the same length"
+ if self.image_labels is not None:
+ assert len(self.image_labels) == len(self.features), "Image labels and features must have the same length"
def label_counts(self) -> tuple[torch.Tensor, torch.Tensor]:
- return self.labels.unique(return_counts=True)
+ """
+ Calculates for each unique label in the dataset the number of occurrences based on the task, i.e. either based on the labels or image_labels attribute.
+
+ Compared to the parent class, this method counts the number of annotations and not pixels.
+
+ Returns: Tuple with label values and corresponding counts.
+ """
+ task = Task.from_config(self.config)
+ return getattr(self, task.labels_name()).unique(return_counts=True)
def __len__(self) -> int:
- return len(self.labels)
+ task = Task.from_config(self.config)
+ return len(getattr(self, task.labels_name()))
def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
- sample = {
- "features": self.features[index, :],
- "labels": self.labels[index],
- }
+ sample = {"features": self.features[index, :]}
+
+ if self.labels is not None:
+ sample["labels"] = self.labels[index]
+ if self.image_labels is not None:
+ sample["image_labels"] = self.image_labels[index]
+
+ if self.meta is not None:
+ sample["meta"] = self.meta[index, :]
if not self.train:
sample["image_name"] = self.image_names[index]
diff --git a/htc/tissue_atlas/median_pixel/LightningMedianPixel.py b/htc/tissue_atlas/median_pixel/LightningMedianPixel.py
index 60ad64a..269fd4f 100644
--- a/htc/tissue_atlas/median_pixel/LightningMedianPixel.py
+++ b/htc/tissue_atlas/median_pixel/LightningMedianPixel.py
@@ -18,6 +18,7 @@
from htc.models.common.utils import get_n_classes
from htc.models.pixel.ModelPixel import ModelPixel
from htc.tissue_atlas.median_pixel.DatasetMedianPixel import DatasetMedianPixel
+from htc.utils.Task import Task
class LightningMedianPixel(HTCLightning):
@@ -48,7 +49,8 @@ def train_dataloader(self) -> DataLoader:
)
weights = calculate_class_weights(config, *self.dataset_train.label_counts())
- sample_weights = weights[self.dataset_train.labels]
+ task = Task.from_config(self.config)
+ sample_weights = weights[getattr(self.dataset_train, task.labels_name())]
sampler = WeightedRandomSampler(sample_weights, num_samples=self.config["input/epoch_size"])
else:
sampler = RandomSampler(self.dataset_train, replacement=True, num_samples=self.config["input/epoch_size"])
@@ -57,14 +59,13 @@ def train_dataloader(self) -> DataLoader:
self.dataset_train, sampler=sampler, persistent_workers=True, **self.config["dataloader_kwargs"]
)
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- return self.model(x)["class"]
+ def forward(self, batch: dict[str, torch.Tensor]) -> torch.Tensor:
+ return self.model(batch["features"])["class"]
def training_step(self, batch: dict[str, torch.Tensor], batch_idx: int) -> dict:
+ predictions = self(batch)
labels = batch["labels"]
- features = batch["features"]
- predictions = self(features)
ce_loss = self.ce_loss_weighted(predictions, labels)
self.log("train/ce_loss", ce_loss, on_epoch=True)
@@ -76,7 +77,7 @@ def validation_step(self, batch: dict[str, torch.Tensor], batch_idx: int) -> Non
len(values) == 0 for values in self.validation_results_epoch.values()
), "Validation results are not properly cleared"
- predictions = self(batch["features"]).argmax(dim=1)
+ predictions = self(batch).argmax(dim=1)
self.validation_results_epoch["labels"].append(batch["labels"])
self.validation_results_epoch["predictions"].append(predictions)
@@ -116,10 +117,9 @@ def test_step(self, batch: dict[str, torch.Tensor], batch_idx: int) -> None:
), "Test results are not properly cleared"
labels = batch["labels"]
- features = batch["features"]
image_names = batch["image_name"]
- logits = self(features)
+ logits = self(batch)
self.test_results_epoch["labels"].append(labels)
self.test_results_epoch["logits"].append(logits)
@@ -133,6 +133,3 @@ def on_test_epoch_end(self) -> None:
np.savez_compressed(Path(self.logger.save_dir) / "test_results.npz", **results)
self.test_results_epoch = {"labels": [], "logits": [], "image_names": []}
-
- def _predict_images(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
- return {"class": self(batch["features"])}
diff --git a/htc/tissue_atlas/median_pixel/configs/default.json b/htc/tissue_atlas/median_pixel/configs/default.json
index 7030d0d..410d07b 100644
--- a/htc/tissue_atlas/median_pixel/configs/default.json
+++ b/htc/tissue_atlas/median_pixel/configs/default.json
@@ -40,7 +40,6 @@
"annealing_epochs": 0
},
"validation": {
- "checkpoint_metric": "accuracy",
- "dataset_index": 0
+ "checkpoint_metric": "accuracy"
}
}
diff --git a/htc/tissue_atlas/model_processing/run_median_test_table.py b/htc/tissue_atlas/model_processing/run_median_test_table.py
index e55f214..974677a 100644
--- a/htc/tissue_atlas/model_processing/run_median_test_table.py
+++ b/htc/tissue_atlas/model_processing/run_median_test_table.py
@@ -11,7 +11,7 @@
from htc.tissue_atlas.median_pixel.DatasetMedianPixel import DatasetMedianPixel
if __name__ == "__main__":
- # htc median_test_table --model median_pixel --run-folder 2024-01-10_15-45-57_median_18classes --spec tissue-atlas_loocv_test-8_seed-0_cam-118.json --table-name test_table_pigs
+ # htc median_test_table --model median_pixel --run-folder 2024-02-23_14-08-16_median_18classes --spec tissue-atlas_loocv_test-8_seed-0_cam-118.json --table-name test_table_pigs
runner = Runner(description="Create a test table based on a trained median spectra model for a new set of paths.")
runner.add_argument("--input-dir")
runner.add_argument("--spec")
diff --git a/htc/tivita/DataPath.py b/htc/tivita/DataPath.py
index dfb9976..1164e23 100644
--- a/htc/tivita/DataPath.py
+++ b/htc/tivita/DataPath.py
@@ -11,6 +11,7 @@
import numpy as np
import pandas as pd
+import torch
from PIL import Image
from typing_extensions import Self
@@ -89,7 +90,7 @@ def __init__(
Args:
image_dir: Path (or string) to the image directory (timestamp folder).
- data_dir: Path (or string) to the data directory of the dataset (it should contain a dataset_settings.json file).
+ data_dir: Path (or string) to the data directory of the dataset (it should contain a dataset_settings.json file). In case of a subdataset, data_dir should point to the subdataset folder instead of the root dataset folder.
intermediates_dir: Path (or string) to the intermediates directory of the dataset.
dataset_settings: Reference to the settings of the dataset. If None and no settings could be found in the image directory, the parents of the image directory are searched. If available, the closest dataset_settings.json is used. Otherwise, the data path gets an empty dataset settings assigned.
annotation_name_default: Default annotation_name(s) which will be used when reading the segmentation with read_segmentation() with no arguments.
@@ -148,15 +149,10 @@ def __lt__(self, other: Self) -> bool:
@property
def dataset_settings(self) -> DatasetSettings:
if self._dataset_settings is None:
- if self.image_dir is not None and (path := self.image_dir / "dataset_settings.json").exists():
- self._dataset_settings = DatasetSettings(path)
+ if self.image_dir is not None:
+ self._dataset_settings = DatasetSettings(self.image_dir / "dataset_settings.json")
else:
self._dataset_settings = DatasetSettings(path_or_data={})
- parent_paths = list(self.image_dir.parents)
- for p in parent_paths:
- if (path := p / "dataset_settings.json").exists():
- self._dataset_settings = DatasetSettings(path)
- break
return self._dataset_settings
@@ -172,12 +168,11 @@ def cube_path(self) -> Path:
"""
return self() / f"{self.timestamp}_SpecCube.dat"
- def read_cube(self, *reading_args, **reading_kwargs) -> np.ndarray:
+ def read_cube(self, **reading_kwargs) -> np.ndarray:
"""
Read the Tivita HSI cube (see read_tivita_hsi()).
Args:
- reading_args: Positional arguments to be passed to read_tivita_hsi function.
reading_kwargs: Keyword arguments to be passed to read_tivita_hsi function.
Returns: HSI data cube.
@@ -185,7 +180,24 @@ def read_cube(self, *reading_args, **reading_kwargs) -> np.ndarray:
from htc.tivita.hsi import read_tivita_hsi
cube_path = self.cube_path()
- return read_tivita_hsi(cube_path, *reading_args, **reading_kwargs)
+
+ if getattr(self, "calibration_target", None) is not None:
+ from htc.cameras.calibration.CalibrationSwap import CalibrationSwap
+
+ cube = read_tivita_hsi(self.cube_path()) # We need unnormalized cubes
+
+ t = CalibrationSwap()
+ cube = t.transform_image(
+ self, image=torch.from_numpy(cube), calibration_target=self.calibration_target
+ ).numpy()
+
+ if reading_kwargs.get("normalization") is not None:
+ cube = cube / np.linalg.norm(cube, ord=reading_kwargs["normalization"], axis=2, keepdims=True)
+ cube = np.nan_to_num(cube, copy=False)
+ else:
+ cube = read_tivita_hsi(cube_path, **reading_kwargs)
+
+ return cube
def read_cube_raw(self, calibration_original: Union["CalibrationFiles", None] = None) -> np.ndarray:
"""
@@ -204,6 +216,7 @@ def read_cube_raw(self, calibration_original: Union["CalibrationFiles", None] =
t = CalibrationSwap()
calibration_original = t.original_calibration_files(self)
+
return cube * calibration_original.white_image.numpy() + calibration_original.dark_image.numpy()
def compute_oversaturation_mask(self, threshold: int = 1000) -> np.ndarray:
@@ -213,12 +226,12 @@ def compute_oversaturation_mask(self, threshold: int = 1000) -> np.ndarray:
Args:
threshold: Threshold to consider a camera count being oversaturated. The maximum count of the camera is 1023, therefore a value of 1000 is chosen as default to account for noise and slight miscalibrations (e.g. due to the corresponding calibration files not being available).
- Returns: Oversaturation mask of the image.
+ Returns: Oversaturation mask of the image (True indicates overstaurated pixels).
"""
cube_raw = self.read_cube_raw()
return np.any(cube_raw > threshold, axis=-1)
- def is_cube_valid(self) -> bool:
+ def is_cube_valid(self, strict: bool = False) -> bool:
"""
Checks whether the HSI cube is valid, i.e. not broken. Unfortunately, the Tivita camera may produce broken images due to unknown reasons. Here, we basically check whether we can read the cube and whether it contains invalid values (zero, negative pixels, infinite numbers).
@@ -226,6 +239,9 @@ def is_cube_valid(self) -> bool:
>>> path.is_cube_valid()
True
+ Args:
+ strict: If True, will also mark cubes as invalid if any value is zero or negative. Otherwise, only a warning is issued.
+
Returns: True if all checks pass. If False, then the image should be excluded from the analysis as the spectra may be completely wrong. R.I.P.
"""
is_valid = True
@@ -238,7 +254,10 @@ def is_cube_valid(self) -> bool:
is_valid = False
if cube.shape != self.dataset_settings["shape"]:
- settings.log.error(f"The cube {self} does not have the correct shape ({cube.shape = })")
+ settings.log.error(
+ f"The cube {self} does not have the correct shape ({cube.shape = } !="
+ f" {self.dataset_settings['shape'] = })"
+ )
is_valid = False
infinite_values = ~np.isfinite(cube)
@@ -255,6 +274,8 @@ def is_cube_valid(self) -> bool:
settings.log.warning(
f"The cube {self} has {np.sum(cube == 0)} zero values (the cube is still used)"
)
+ if strict:
+ is_valid = False
if np.all(cube < 0):
settings.log.error(f"The cube {self} contains only negative values")
@@ -265,6 +286,8 @@ def is_cube_valid(self) -> bool:
settings.log.warning(
f"The cube {self} contains {np.sum(negative_pixels)} negative pixels (the cube is still used)"
)
+ if strict:
+ is_valid = False
except Exception as e:
settings.log.error(f"Cannot read the cube {self}: {e}")
is_valid = False
@@ -332,6 +355,31 @@ def read_rgb_sensor(self, *reading_args, **reading_kwargs) -> np.ndarray:
rgb_path = self.rgb_path_sensor()
return read_tivita_rgb(rgb_path, *reading_args, **reading_kwargs)
+ def align_rgb_sensor(self, *args, recompute: bool = False, **kwargs) -> np.ndarray:
+ """
+ Align the RGB image from the RGB sensor to the reconstructed RGB image of the HSI cube.
+
+ See the function `align_rgb_sensor()` for more details.
+
+ Args:
+ recompute: If True, the alignment will be recomputed even if a precomputed file exists.
+ args: Positional arguments to be passed to `align_rgb_sensor()` function.
+ kwargs: Keyword arguments to pass to `align_rgb_sensor()` function.
+
+ Returns: Aligned RGB sensor image.
+ """
+ if not recompute:
+ precomputed_path = (
+ self.intermediates_dir / "preprocessing" / "rgb_sensor_aligned" / f"{self.image_name()}.blosc"
+ )
+ if precomputed_path.exists():
+ data = decompress_file(precomputed_path)
+ return np.ma.MaskedArray(data["data"], data["mask"])
+
+ from htc.tivita.rgb import align_rgb_sensor
+
+ return align_rgb_sensor(self.rgb_path_reconstructed(), self.rgb_path_sensor(), *args, **kwargs)
+
def segmentation_path(self) -> Union[Path, None]:
"""
Path to the file which stores the segmentation image(s). These are not the raw annotations but the processed images, i.e. numpy array with the same shape as the image and annotations for all labels merged in one file.
@@ -407,6 +455,29 @@ def read_segmentation(
else:
return None
+ def colorchecker_annotation_path(self) -> Union[Path, None]:
+ """
+ Path to the colorchecker annotation file (automatically or manually created).
+
+ Returns: Path to the existing colorchecker annotation file or None if it could not be found (e.g., if the data path does not point to an colorchecker image).
+ """
+ annotations_dir = self.image_dir / "annotations"
+ if not annotations_dir.exists():
+ return None
+ else:
+ mask_paths = list(
+ annotations_dir.glob(f"{self.timestamp}#squares#automask#*.png")
+ ) # searching for automasks
+ if len(mask_paths) == 0:
+ mask_paths = list(annotations_dir.glob(f"{self.timestamp}#polygon#*.nrrd")) # searching for MITK masks
+
+ if len(mask_paths) == 0:
+ return None
+ elif len(mask_paths) > 1:
+ raise ValueError(f"Too many colorchecker masks available for {self.image_dir}")
+ else:
+ return mask_paths[0] if mask_paths[0].exists() else None
+
def read_colorchecker_mask(
self, return_spectra: bool = False, normalization: int = None
) -> Union[dict[str, Union[np.ndarray, pd.DataFrame, LabelMapping]], None]:
@@ -436,22 +507,15 @@ def read_colorchecker_mask(
- median_table: Table with median spectra (unnormalized and L1-normalized) for each color chip.
- label_mapping: The label mapping object to interpret the values of the mask array.
"""
- mask_dir = self.image_dir / "annotations"
- mask_paths = list(mask_dir.glob(f"{self.timestamp}#squares#automask#*.png")) # searching for automasks
- if len(mask_paths) == 0:
- mask_paths = list(mask_dir.glob(f"{self.timestamp}#polygon#*.nrrd")) # searching for MITK masks
- assert len(mask_paths) <= 1, f"Too many colorchecker masks available for {self.image_dir}"
+ mask_path = self.colorchecker_annotation_path()
- if len(mask_paths) == 0:
+ if mask_path is None:
settings.log.warning(
f"Colorchecker mask cannot be found for {self.image_dir}. Please refer to"
" ColorcheckerMaskCreation.ipynb or use MITK to generate the corresponding colorchecker mask!"
)
return None
-
else:
- mask_path = mask_paths[0]
-
from htc.utils.ColorcheckerReader import ColorcheckerReader
if mask_path.suffix == ".png":
@@ -643,7 +707,7 @@ def _load_precomputed_parameters(self) -> Union[Union[np.ndarray, int], dict[Any
return decompress_file(params_path)
- def compute_sto2(self, cube: np.ndarray = None) -> np.ndarray:
+ def compute_sto2(self, cube: np.ndarray = None, version: str = None) -> np.ndarray:
"""
Computes the Tissue oxygen saturation (StO2) for the image.
@@ -666,29 +730,34 @@ def compute_sto2(self, cube: np.ndarray = None) -> np.ndarray:
Args:
cube: If not None, will use this cube instead of loading it.
+ version: Name of the function to use for computing the StO2 parameter. If None, the official function which will be chosen based on the Camera_CamID. Currently, `calc_sto2` is used for the Halogen formula and `calc_sto2_2_helper` for the LED formula.
Returns: The StO2 parameter image (as numpy masked array) with values in the range [0;1].
"""
- try:
- from htc.tivita.functions_official import calc_sto2, calc_sto2_2_helper, detect_background
+ detect_background = self._code_from_official("detect_background")
+ if version is not None:
+ calc_sto2 = self._code_from_official(version)
+ else:
+ if self.meta("Camera_CamID") is None or self.meta("Camera_CamID") in [
+ "0102-00057",
+ "0102-00085",
+ "0102-00098",
+ "0202-00113",
+ "0202-00118",
+ ]:
+ calc_sto2 = self._code_from_official("calc_sto2") # Halogen formula should be used
+ else:
+ calc_sto2 = self._code_from_official("calc_sto2_2_helper") # LED formula should be used
+ if calc_sto2 is not None and detect_background is not None:
with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
cube = self.read_cube() if cube is None else cube
- if self.meta("Camera_CamID") is None or self.meta("Camera_CamID") in [
- "0102-00057",
- "0102-00085",
- "0102-00098",
- "0202-00113",
- "0202-00118",
- ]:
- sto2_img = calc_sto2(cube) # Halogen formula should be used
- else:
- sto2_img = calc_sto2_2_helper(cube) # LED formula should be used
+ sto2_img = calc_sto2(cube)
param = np.nan_to_num(np.rot90(sto2_img, k=-1), copy=False)
background = np.rot90(detect_background(cube), k=-1)
return np.ma.MaskedArray(param, background == 0, fill_value=0)
- except ImportError:
+ else:
params = self._load_precomputed_parameters()
return np.ma.MaskedArray(params["StO2"], params["background"], fill_value=0)
@@ -1102,6 +1171,18 @@ def image_name_annotations(self) -> str:
def datetime(self) -> datetime:
return datetime.strptime(self.timestamp, "%Y_%m_%d_%H_%M_%S")
+ def is_timestamp_folder(self) -> bool:
+ """
+ Check if this data path points to a timestamp folder (as it is usually the case for image folders).
+
+ Returns: True if this data path points to a valid timestamp, False otherwise.
+ """
+ try:
+ self.datetime()
+ return True
+ except ValueError:
+ return False
+
def annotation_names(self) -> list[str]:
"""
Returns the names of all associated annotations for this image.
@@ -1158,11 +1239,26 @@ def _build_cache(local: bool) -> dict[str, Any]:
assert len(table_path) == 1, f"More than one meta table found for {entry}"
table_path = table_path[0]
- dsettings = DatasetSettings(entry["path_data"] / "dataset_settings.json")
df = pd.read_feather(table_path)
- df["dsettings"] = dsettings
+
+ if "dataset_settings_path" in df.columns:
+ # Subdatasets may have their own path to the dataset settings
+ dsettings_mapping = {
+ f: DatasetSettings(entry["path_data"] / f) for f in df.dataset_settings_path.unique()
+ }
+ df["dsettings"] = df.dataset_settings_path.map(dsettings_mapping)
+
+ # The data directory always points to the folder which contains the dataset settings (may be the subdataset instead of the root dataset)
+ data_dir_mapping = {
+ f: (entry["path_data"] / f).parent for f in df.dataset_settings_path.unique()
+ }
+ df["data_dir"] = df.dataset_settings_path.map(data_dir_mapping)
+ else:
+ dsettings = DatasetSettings(entry["path_data"] / "dataset_settings.json")
+ df["dsettings"] = dsettings
+
df["dataset_env_name"] = env_key
- df["data_dir"] = entry["path_data"]
+ df["root_data_dir"] = entry["path_data"]
df["intermediates_dir"] = entry["path_intermediates"]
# Append the metadata for the current dataset to the global cache
@@ -1266,8 +1362,8 @@ def from_image_name(image_name: str) -> Self:
)
DataPath._data_paths_cache[cache_name] = DataPathClass(
- match["data_dir"] / match["path"],
- match["data_dir"],
+ match["root_data_dir"] / match["path"],
+ match["data_dir"] if "data_dir" in match else match["root_data_dir"],
match["intermediates_dir"],
match["dsettings"],
annotation_name,
@@ -1277,7 +1373,7 @@ def from_image_name(image_name: str) -> Self:
@staticmethod
def iterate(
- data_dir: Path,
+ data_dir: Union[str, Path],
filters: Union[list[Callable[[Self], bool]], None] = None,
annotation_name: Union[str, list[str]] = None,
) -> Iterator[Self]:
@@ -1305,6 +1401,8 @@ def iterate(
Returns: Generator with all path objects.
"""
+ if type(data_dir) == str:
+ data_dir = Path(data_dir)
if filters is None:
filters = []
@@ -1340,7 +1438,11 @@ def iterate(
parts.pop()
if DataPathClass is None:
- if not (data_dir / "dataset_settings.json").exists() and (data_dir / "data").exists():
+ if (
+ not (data_dir / "dataset_settings.json").exists()
+ and (data_dir / "data").exists()
+ and not data_dir.name.startswith("Cat_")
+ ):
settings.log.warning(
f"No dataset_settings.json file found in the data directory {data_dir} but the subdirectory data"
" exists in this directory. For the default datasets, please point data_dir to the data"
diff --git a/htc/tivita/DataPathMultiorgan.py b/htc/tivita/DataPathMultiorgan.py
index 832ee5e..49382f2 100644
--- a/htc/tivita/DataPathMultiorgan.py
+++ b/htc/tivita/DataPathMultiorgan.py
@@ -13,17 +13,29 @@
# We use a decorator to wrap some of the path functions. This is important for the files
-# which are stored in the overlap folder because then the image data is stored in the semantic
+# which are stored in the overlap folder because then the image data is stored in a different
# dataset (due to multiple annotations)
-def use_semantic_path(method: Callable) -> Callable:
+def use_overlap_path(method: Callable) -> Callable:
@functools.wraps(method)
- def _use_semantic_path(self):
+ def _use_overlap_path(self):
if self.is_overlap:
image_dir_old = self.image_dir
- image_dir_new = (
- settings.data_dirs["PATH_Tivita_multiorgan_semantic"] / "subjects" / self.subject_name / self.timestamp
- )
- assert image_dir_new.exists(), f"Cannot find the path {image_dir_new}"
+ potential_data_dirs = [
+ settings.data_dirs["PATH_Tivita_multiorgan_semantic"],
+ settings.data_dirs["PATH_Tivita_multiorgan_masks"],
+ ]
+ image_dir_new_found = False
+
+ for potential_data_dir in potential_data_dirs:
+ image_dir_new = potential_data_dir / "subjects" / self.subject_name / self.timestamp
+
+ if image_dir_new.exists():
+ image_dir_new_found = True
+ break
+
+ assert (
+ image_dir_new_found
+ ), f"Cannot find the overlap image name in any of the potential dataset dirs {potential_data_dirs}"
self.image_dir = image_dir_new
res = method(self)
@@ -33,7 +45,7 @@ def _use_semantic_path(self):
return res
- return _use_semantic_path
+ return _use_overlap_path
class DataPathMultiorgan(DataPath):
@@ -78,15 +90,15 @@ def image_name_parts(self) -> list[str]:
return parts
- @use_semantic_path
+ @use_overlap_path
def cube_path(self) -> Path:
return super().cube_path()
- @use_semantic_path
+ @use_overlap_path
def camera_meta_path(self) -> Path:
return super().camera_meta_path()
- @use_semantic_path
+ @use_overlap_path
def rgb_path_reconstructed(self) -> Path:
return super().rgb_path_reconstructed()
diff --git a/htc/tivita/DatasetSettings.py b/htc/tivita/DatasetSettings.py
index 014a329..bd9bcf3 100644
--- a/htc/tivita/DatasetSettings.py
+++ b/htc/tivita/DatasetSettings.py
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: MIT
import json
+import threading
from pathlib import Path
from typing import Any, Union
@@ -43,6 +44,21 @@ def __init__(self, path_or_data: Union[str, Path, dict]):
self._data = None
self._path = path_or_data
+ self._mutex = threading.Lock()
+
+ def __getstate__(self):
+ state = self.__dict__.copy()
+
+ # The lock cannot be pickled but this is not a problem since the lock is only for threads anyway to ensure that inside one process the data is only modified once
+ del state["_mutex"]
+ return state
+
+ def __setstate__(self, state):
+ self.__dict__.update(state)
+
+ # Just create a new lock for every process
+ self._mutex = threading.Lock()
+
def __repr__(self) -> str:
res = (
"Settings for the dataset"
@@ -82,18 +98,19 @@ def __contains__(self, key: str) -> bool:
@property
def settings_path(self) -> Union[None, Path]:
"""
- Returns: The Path to the dataset_settings.json file if it exists or None if not.
+ Returns: The Path to the dataset_settings.json file if it exists (either at the specified path or any parent directory) or None if not.
"""
if self._path is None:
return None
else:
- if self._path.exists():
- p = self._path
- if self._path.is_dir():
- p /= "dataset_settings.json"
-
- return p if p.exists() else None
+ if self._path.is_file():
+ return self._path
else:
+ possible_locations = [self._path] + list(self._path.parents)
+ for p in possible_locations:
+ if (path := p / "dataset_settings.json").is_file():
+ return path
+
return None
@property
@@ -102,10 +119,14 @@ def data(self) -> dict:
if self.settings_path is None:
self._data = {}
else:
- with self.settings_path.open(encoding="utf-8") as f:
- self._data = json.load(f)
-
- self._data_conversions()
+ # The data should only be loaded and converted by one thread at a time
+ with self._mutex:
+ # By now, another thread might have already loaded the data
+ if self._data is None:
+ with self.settings_path.open(encoding="utf-8") as f:
+ self._data = json.load(f)
+
+ self._data_conversions()
return self._data
diff --git a/htc/tivita/rgb.py b/htc/tivita/rgb.py
index d316ddf..4cd079c 100644
--- a/htc/tivita/rgb.py
+++ b/htc/tivita/rgb.py
@@ -1,10 +1,12 @@
# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
# SPDX-License-Identifier: MIT
+from functools import partial
from pathlib import Path
import numpy as np
import torch
+from kornia.geometry import HomographyWarper, ImageRegistrator, Similarity
from PIL import Image
diff --git a/htc/utils/Config.py b/htc/utils/Config.py
index c36adfa..88e6dba 100644
--- a/htc/utils/Config.py
+++ b/htc/utils/Config.py
@@ -123,30 +123,45 @@ def __init__(self, path_or_dict: Union[str, Path, dict], use_shared_dict=False):
self.data[k] = v
if self["inherits"]:
- extension = "" if self["inherits"].endswith(".json") else ".json"
- inherits = Path(self["inherits"] + extension)
-
- # We try several locations to find the parent config file
- possible_paths = Config._get_possible_paths(inherits)
- if self.path_config is not None:
- possible_paths.append(self.path_config.with_name(inherits.name)) # Same directory as the child config
-
- parent_path = None
- for path in possible_paths:
- if path.exists():
- parent_path = path
- break
-
- assert parent_path is not None, (
- f"Cannot find the path to the parent configuration file {inherits}. Tried at the following locations:"
- f" {possible_paths}"
- )
- data_parent = Config(parent_path).data
-
- # The existing data (=data from the child) has precedence over the parent data
- self.data = dict(merge_dicts_deep(data_parent, self.data))
+ if type(self["inherits"]) == str:
+ self["inherits"] = [self["inherits"]]
+
+ for parent in self["inherits"]:
+ extension = "" if parent.endswith(".json") else ".json"
+ inherits = Path(parent + extension)
+
+ # We try several locations to find the parent config file
+ possible_paths = Config._get_possible_paths(inherits)
+ if self.path_config is not None:
+ possible_paths.append(
+ self.path_config.with_name(inherits.name)
+ ) # Same directory as the child config
+
+ parent_path = None
+ for path in possible_paths:
+ if path.exists():
+ parent_path = path
+ break
+
+ assert parent_path is not None, (
+ f"Cannot find the path to the parent configuration file {inherits}. Tried at the following"
+ f" locations: {possible_paths}"
+ )
+
+ config_parent = Config(parent_path)
+ if self["inherits_skip"]:
+ for key in self["inherits_skip"]:
+ del config_parent[key]
+ data_parent = config_parent.data
+
+ # The existing data (=data from the child) has precedence over the parent data
+ self.data = dict(merge_dicts_deep(data_parent, self.data))
+
+ # Extend all config keys from the parent (but not the own class due to the possibility of multiple inherence)
+ self._extend_lists(config_parent)
del self["inherits"]
+ del self["inherits_skip"]
self._extend_lists()
@@ -157,9 +172,12 @@ def __init__(self, path_or_dict: Union[str, Path, dict], use_shared_dict=False):
else:
self._used_keys = {}
- def _extend_lists(self) -> None:
+ def _extend_lists(self, base_config: "Config" = None) -> None:
+ if base_config is None:
+ base_config = self
+
# Users can extend additional lists by adding the same key with _extends appended
- for k, v in self.items():
+ for k, v in base_config.items():
if k.endswith("_extends") and type(v) == list:
k_original = k.removesuffix("_extends")
if k_original in self:
@@ -168,7 +186,7 @@ def _extend_lists(self) -> None:
" supported for the extends feature"
)
self[k_original] = self[k_original] + v
- del self[k]
+ del base_config[k]
def _copy_data(self, dict_data: dict) -> dict:
new_data = {}
diff --git a/htc/utils/DelayedFileHandler.py b/htc/utils/DelayedFileHandler.py
index ae06226..2d6c26d 100644
--- a/htc/utils/DelayedFileHandler.py
+++ b/htc/utils/DelayedFileHandler.py
@@ -29,8 +29,8 @@ def set_filename(self, filename: Path, **kwargs) -> None:
self.file_handler = logging.FileHandler(filename, **kwargs)
# Apply existing settings to the new file handler
- for filter in self.filters:
- self.file_handler.addFilter(filter)
+ for f in self.filters:
+ self.file_handler.addFilter(f)
self.file_handler.setFormatter(self.formatter)
self.file_handler.setLevel(self.level)
@@ -39,11 +39,11 @@ def set_filename(self, filename: Path, **kwargs) -> None:
self.file_handler.emit(record)
self.cached_records = []
- def addFilter(self, filter: Union[logging.Filter, Callable]) -> None:
+ def addFilter(self, filter_func: Union[logging.Filter, Callable]) -> None:
if self.file_handler is None:
- super().addFilter(filter)
+ super().addFilter(filter_func)
else:
- self.file_handler.addFilter(filter)
+ self.file_handler.addFilter(filter_func)
def setFormatter(self, fmt: str) -> None:
if self.file_handler is None:
diff --git a/htc/utils/DomainMapper.py b/htc/utils/DomainMapper.py
index 19af53c..132d8cb 100644
--- a/htc/utils/DomainMapper.py
+++ b/htc/utils/DomainMapper.py
@@ -75,7 +75,7 @@ def _init_attributes(self) -> tuple[Union[list, list[str]], dict, Any]:
elif "subject_index" == self.target_domain:
domains, domain_mapping = self._pig_domains(dataset, paths)
elif "species_index" == self.target_domain:
- domains, domain_mapping = self._species_domains(["Pig", "Human"], paths)
+ domains, domain_mapping = self._species_domains(paths)
try:
from htc.human.settings_human import settings_human
@@ -124,8 +124,23 @@ def _pig_domains(dataset: list[str], paths: list[DataPath]) -> tuple[list, dict]
return dataset, {x.image_name(): x.subject_name for x in paths}
@staticmethod
- def _species_domains(domains: list, paths: list[DataPath]) -> tuple[list, dict]:
- return domains, {x.image_name(): domains[1] if "SPACE_" in x.subject_name else domains[0] for x in paths}
+ def _species_domains(paths: list[DataPath]) -> tuple[list, dict]:
+ domains = set()
+ domain_mapping = {}
+ for p in paths:
+ if p.subject_name.startswith("SPACE_"):
+ domain_mapping[p.image_name()] = "human"
+ domains.add("human")
+ elif p.subject_name.startswith("P"):
+ domain_mapping[p.image_name()] = "pig"
+ domains.add("pig")
+ elif p.subject_name.startswith("R"):
+ domain_mapping[p.image_name()] = "rat"
+ domains.add("rat")
+ else:
+ raise ValueError(f"Unknown species for path: {p}")
+
+ return sorted(domains), domain_mapping
def domain_name(self, image_name: str) -> str:
"""
diff --git a/htc/utils/DuplicateFilter.py b/htc/utils/DuplicateFilter.py
index 6dfaf75..1b32fe2 100644
--- a/htc/utils/DuplicateFilter.py
+++ b/htc/utils/DuplicateFilter.py
@@ -10,7 +10,7 @@ def __init__(self):
super().__init__()
self.msgs = set()
- def filter(self, record):
+ def filter(self, record): # noqa: A003
rv = record.msg not in self.msgs
self.msgs.add(record.msg)
return rv
diff --git a/htc/utils/LDA.py b/htc/utils/LDA.py
index f60c555..2006e9d 100644
--- a/htc/utils/LDA.py
+++ b/htc/utils/LDA.py
@@ -74,9 +74,9 @@ def LDA(data: np.ndarray, labels: np.ndarray) -> tuple[np.ndarray, np.ndarray, n
) # Using the pseudo-inverse matrix gives stabler results
# Sort the eigenvalues descendingly (https://stackoverflow.com/questions/8092920/sort-eigenvalues-and-associated-eigenvectors-after-using-numpy-linalg-eig-in-pyt)
- eval, evec = np.linalg.eig(scatter)
- idx = eval.argsort()[::-1]
- eval = eval[idx]
- evec = evec[:, idx]
+ eigenvalues, eigenvectors = np.linalg.eig(scatter)
+ idx = eigenvalues.argsort()[::-1]
+ eigenvalues = eigenvalues[idx]
+ eigenvectors = eigenvectors[:, idx]
- return evec, np.matmul(data, evec), eval
+ return eigenvectors, np.matmul(data, eigenvectors), eigenvalues
diff --git a/htc/utils/LabelMapping.py b/htc/utils/LabelMapping.py
index 29725bb..e436f6b 100644
--- a/htc/utils/LabelMapping.py
+++ b/htc/utils/LabelMapping.py
@@ -1,9 +1,7 @@
# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
# SPDX-License-Identifier: MIT
-import importlib
import itertools
-import re
from pathlib import Path
from typing import TYPE_CHECKING, Union
@@ -15,6 +13,8 @@
from htc.settings import settings
from htc.tivita.DatasetSettings import DatasetSettings
from htc.utils.Config import Config
+from htc.utils.Task import Task
+from htc.utils.type_from_string import variable_from_string
if TYPE_CHECKING:
from htc.tivita.DataPath import DataPath
@@ -243,22 +243,22 @@ def map_tensor(self, tensor: Union[torch.Tensor, np.ndarray], old_mapping: Self)
return tensor_mapping(tensor, old_new_mapping)
- def rename(self, rename_dict: dict[str, str]) -> None:
+ def rename(self, rename_mapping: dict[str, str]) -> None:
"""
Rename existing label names to new label names.
Args:
- rename_dict: dict with key being what label should be renamed and value being the new label name.
+ rename_mapping: Mapping with key being what label should be renamed and value being the new label name.
"""
self.mapping_name_index = {
- rename_dict.get(label_name, label_name): label_index
+ rename_mapping.get(label_name, label_name): label_index
for label_name, label_index in self.mapping_name_index.items()
}
self.label_colors = {
- rename_dict.get(label_name, label_name): color for label_name, color in self.label_colors.items()
+ rename_mapping.get(label_name, label_name): color for label_name, color in self.label_colors.items()
}
self.mapping_index_name = {
- label_index: rename_dict.get(label_name, label_name)
+ label_index: rename_mapping.get(label_name, label_name)
for label_index, label_name in self.mapping_index_name.items()
}
@@ -315,6 +315,9 @@ def from_path(cls, path: "DataPath") -> Self:
Constructs a label mapping based on the default labels of the dataset accessed via the path object.
These are the labels as defined by the clinicians.
+
+ Args:
+ path: Data path to the image.
"""
label_colors = path.dataset_settings["label_colors"] if "label_colors" in path.dataset_settings else None
return cls(
@@ -335,31 +338,34 @@ def from_data_dir(cls, data_dir: Path) -> Self:
return cls(dsettings["label_mapping"], dsettings["last_valid_label_index"])
@classmethod
- def from_config(cls, config: Config) -> Self:
+ def from_config(cls, config: Config, task: Task = None, image_label_entry_index: int = 0) -> Self:
"""
- Constructs a label mapping as defined in the config file. config['label_mapping'] can be defined as:
+ Constructs a label mapping as defined in the config file. For example, `config['label_mapping']` can be defined as:
* a LabelMapping instance.
- * a config definition string in the format module>variable (e.g. htc.settings_seg>label_mapping). module must be importable and variable must exist in the module.
- * a dict from a JSON file (as saved via to_class_dict()).
- * a dict with label_name:label_index definitions (like settings_seg.label_mapping) in which case settings.label_index_thresh will be used to determine invalid labels.
+ * a config definition string in the format module>variable (e.g. `htc.settings_seg>label_mapping`). module must be importable and variable must exist in the module.
+ * a dict from a JSON file (as saved via `to_class_dict()`).
+ * a dict with label_name:label_index definitions (like `settings_seg.label_mapping`) in which case `settings.label_index_thresh` will be used to determine invalid labels.
+
+ Args:
+ config: The config object.
+ task: The task for which the mapping should be constructed. For segmentation tasks, the mapping must be defined in `config['label_mapping']` and for classification tasks it must be defined in `config['input/image_labels'][image_label_entry_index]['image_label_mapping']`. If None, the task will be determined from the config.
+ image_label_entry_index: The index of the config['input/image_labels'] list in the config file (used only for classification tasks).
"""
- assert "label_mapping" in config, "There is no label mapping in the config file"
- mapping = config["label_mapping"]
+ if task is None:
+ task = Task.from_config(config)
+
+ if task == Task.SEGMENTATION:
+ assert "label_mapping" in config, "There is no label mapping in the config file"
+ mapping = config["label_mapping"]
+ elif task == Task.CLASSIFICATION:
+ assert "input/image_labels" in config, "There must be image labels defined for classification tasks"
+ mapping = config["input/image_labels"][image_label_entry_index]["image_label_mapping"]
+ else:
+ raise ValueError(f"Invalid task: {task}")
if type(mapping) == str:
- match = re.search(r"^([\w.]+)>(\w+)$", mapping)
- assert match is not None, (
- f"Could not parse the string {mapping} as a valid config definition. It must be in the format"
- " module>variable (e.g. htc.settings_seg>label_mapping) and must refer to a valid Python script"
- )
-
- module = importlib.import_module(match.group(1))
- if not hasattr(module, match.group(2)):
- # In case settings is an object
- module = getattr(module, match.group(1).split(".")[-1])
- mapping = getattr(module, match.group(2))
- # Now load as usual
+ mapping = variable_from_string(mapping)
if isinstance(mapping, LabelMapping):
mapping_obj = mapping
@@ -389,5 +395,12 @@ def from_config(cls, config: Config) -> Self:
mapping_obj = cls(label_mapping)
- config["label_mapping"] = mapping_obj # Cache for future use
+ # Cache for future use
+ if task == Task.SEGMENTATION:
+ config["label_mapping"] = mapping_obj
+ elif task == Task.CLASSIFICATION:
+ config["input/image_labels"][image_label_entry_index]["image_label_mapping"] = mapping_obj
+ else:
+ raise ValueError(f"Invalid task: {task}")
+
return mapping_obj
diff --git a/htc/utils/MultiPath.py b/htc/utils/MultiPath.py
index e30e704..2891a60 100644
--- a/htc/utils/MultiPath.py
+++ b/htc/utils/MultiPath.py
@@ -124,22 +124,22 @@ def __repr__(self):
/y (exists=False)
/x/y (exists=False)
"""
- repr = f"Class: {self.__class__.__name__}\n"
+ text = f"Class: {self.__class__.__name__}\n"
root_location = Path(super().__str__())
- repr += f"Root location: {root_location} (exists={root_location.exists()})\n"
+ text += f"Root location: {root_location} (exists={root_location.exists()})\n"
if self._default_needle is not None:
repr_needle = f" (considering needle {self._default_needle})"
else:
repr_needle = ""
best_location = self.find_best_location()
- repr += f"Best location{repr_needle}: {best_location} (exists={best_location.exists()})\n"
+ text += f"Best location{repr_needle}: {best_location} (exists={best_location.exists()})\n"
- repr += "All locations:\n"
- repr += "\n".join([str(a) + f" (exists={a.exists()})" for a in self.possible_locations()])
+ text += "All locations:\n"
+ text += "\n".join([str(a) + f" (exists={a.exists()})" for a in self.possible_locations()])
- return repr
+ return text
def __reduce__(self):
# Called when pickling path objects (e.g. multiprocessing)
@@ -181,17 +181,17 @@ def name(self) -> str:
# Some methods also rely on this property
return self.find_best_location().name
- def iterdir(self, filter: Callable[[Path], bool] = None):
+ def iterdir(self, filter_func: Callable[[Path], bool] = None):
# We also need to override the iterate methods to return paths from all alternatives
- for location in self.possible_locations(only_existing=True, filter=filter):
+ for location in self.possible_locations(only_existing=True, filter_func=filter_func):
yield from location.iterdir()
- def glob(self, pattern, filter: Callable[[Path], bool] = None):
- for location in self.possible_locations(only_existing=True, filter=filter):
+ def glob(self, pattern, filter_func: Callable[[Path], bool] = None):
+ for location in self.possible_locations(only_existing=True, filter_func=filter_func):
yield from location.glob(pattern)
- def rglob(self, pattern, filter: Callable[[Path], bool] = None):
- for location in self.possible_locations(only_existing=True, filter=filter):
+ def rglob(self, pattern, filter_func: Callable[[Path], bool] = None):
+ for location in self.possible_locations(only_existing=True, filter_func=filter_func):
yield from location.rglob(pattern)
def mkdir(self, *args, **kwargs):
@@ -330,7 +330,7 @@ def find_best_location(self, writing: bool = False) -> Path:
# There was a match, but the path does not exist, still better than the root location
return matched_location
- def possible_locations(self, only_existing=False, filter: Callable[[Path], bool] = None) -> list[Path]:
+ def possible_locations(self, only_existing=False, filter_func: Callable[[Path], bool] = None) -> list[Path]:
"""
Lists all locations which can be accessed by this multi path.
@@ -341,7 +341,7 @@ def possible_locations(self, only_existing=False, filter: Callable[[Path], bool]
Args:
only_existing: Include only locations which exist.
- filter: Filter function to select locations. The function receives a paths and must return True if the path should be used.
+ filter_func: Filter function to select locations. The function receives a paths and must return True if the path should be used.
Returns: All possible locations for the current path.
"""
@@ -364,8 +364,8 @@ def possible_locations(self, only_existing=False, filter: Callable[[Path], bool]
new = unify_path(new, resolve_symlinks=False)
locations.append(new)
- if filter is not None:
- locations = [l for l in locations if filter(l)]
+ if filter_func is not None:
+ locations = [l for l in locations if filter_func(l)]
if only_existing:
locations = [l for l in locations if l.exists()]
diff --git a/htc/utils/Task.py b/htc/utils/Task.py
new file mode 100644
index 0000000..9b3197d
--- /dev/null
+++ b/htc/utils/Task.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+from enum import Enum, unique
+from typing_extensions import Self
+
+
+@unique
+class Task(Enum):
+ """This enum can be used to distinguish between a segmentation task (with pixel-level labels) or a classification task (with image-level labels)."""
+
+ SEGMENTATION = "segmentation"
+ CLASSIFICATION = "classification"
+
+ def labels_name(self) -> str:
+ """Returns the name of the labels attribute (e.g., used in DatasetMedianPixel) or the name of the key in the batch which stores the labels."""
+ if self == Task.SEGMENTATION:
+ return "labels"
+ elif self == Task.CLASSIFICATION:
+ return "image_labels"
+ else:
+ raise ValueError(f"Unknown task: {self}")
+
+ @classmethod
+ def from_config(cls, config) -> Self:
+ return cls(config.get("task", "segmentation"))
diff --git a/htc/utils/blosc_compression.py b/htc/utils/blosc_compression.py
index 7136ba5..c69ca79 100644
--- a/htc/utils/blosc_compression.py
+++ b/htc/utils/blosc_compression.py
@@ -61,24 +61,36 @@ def compress_file(path: Path, data: Union[np.ndarray, dict[Any, np.ndarray]]) ->
def decompress_file(
- path: Path, start_pointer: Union[int, dict[str, int]] = None
-) -> Union[Union[np.ndarray, int], dict[Any, Union[np.ndarray, int]]]:
+ path: Path, start_pointer: Union[int, dict[str, int]] = None, load_keys: list[str] = None, return_meta: bool = False
+) -> Union[
+ Union[np.ndarray, int],
+ dict[str, Union[np.ndarray, int]],
+ tuple[
+ Union[np.ndarray, int],
+ dict[str, Union[np.ndarray, int]],
+ Union[tuple[tuple[int, ...], np.dtype], dict[str, tuple[tuple[int, ...], np.dtype]]],
+ ],
+]:
"""
Decompresses a blosc file.
Args:
path: File to the blosc data.
start_pointer: If not None must be a valid memory address. It will be used to store the decompressed data directly into the provided memory location. This is, for example, useful if the data should be directly loaded into a shared memory buffer. If the compressed data contains a dictionary, the pointers must also be a dictionary with the keys corresponding to the (expected) keys in the compressed data. A pointer can only be used if the size and dtype of the decompressed data is known in advance.
+ load_keys: If not None and the compressed data contains a dictionary, only the keys in this list will be loaded. The other keys will be skipped.
+ return_meta: If True, will return additionally a tuple where the second value contains (shape, dtype) information for each decompressed array.
Returns: Decompressed array data or the given pointer address. Depending on the file, this will either be directly the numpy array or a dict with all numpy arrays.
"""
res = {}
+ array_meta = {}
with path.open("rb") as f:
meta = pickle.load(f)
if type(meta) == tuple:
shape, dtype = meta
data = f.read()
+ array_meta = meta
if start_pointer is not None:
blosc.decompress_ptr(data, start_pointer)
@@ -89,7 +101,12 @@ def decompress_file(
res = array
else:
for name, (shape, dtype, size) in meta.items():
+ if load_keys is not None and name not in load_keys:
+ f.seek(size, 1)
+ continue
+
data = f.read(size)
+ array_meta[name] = (shape, dtype)
if start_pointer is not None:
blosc.decompress_ptr(data, start_pointer[name])
@@ -99,4 +116,7 @@ def decompress_file(
blosc.decompress_ptr(data, array.__array_interface__["data"][0])
res[name] = array
- return res
+ if return_meta:
+ return res, array_meta
+ else:
+ return res
diff --git a/htc/utils/colorchecker_mask_sketch.svg b/htc/utils/colorchecker_mask_sketch.svg
index 4be4d2c..f7a0505 100644
--- a/htc/utils/colorchecker_mask_sketch.svg
+++ b/htc/utils/colorchecker_mask_sketch.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
diff --git a/htc/utils/colors.py b/htc/utils/colors.py
index 5b73449..9886f12 100644
--- a/htc/utils/colors.py
+++ b/htc/utils/colors.py
@@ -5,7 +5,7 @@
from pprint import pprint
import numpy as np
-from matplotlib.colors import to_hex, to_rgb
+from matplotlib.colors import LinearSegmentedColormap, to_hex, to_rgb
from scipy.spatial import distance
from htc.settings import settings
@@ -13,6 +13,27 @@
from htc.utils.helper_functions import sort_labels
+def lighten_color(color: str, amount: float) -> str:
+ """
+ Lightens the given color by the specified amount.
+
+ The color is interpolated with white so that this function has a similar effect as if a transparency is added to the color on a white background.
+
+ >>> lighten_color("#FF0000", 0.5)
+ '#ff8080'
+
+ Args:
+ color: The color to be lightened as hex string.
+ amount: The amount by which to lighten the color. Must be between 0 and 1.
+
+ Returns: The lightened color as hex string.
+ """
+ assert 0 <= amount <= 1, "Amount must be between 0 and 1"
+ cmap = LinearSegmentedColormap.from_list("lighten", [color, (1, 1, 1)])
+
+ return to_hex(cmap(amount))
+
+
def generate_distinct_colors(n_colors: int, existing_colors: list[tuple] = None) -> list[tuple]:
"""
Generates distinct random colors by maximizing the distance between the colors.
diff --git a/htc/utils/config.schema b/htc/utils/config.schema
index 754902a..8223d62 100644
--- a/htc/utils/config.schema
+++ b/htc/utils/config.schema
@@ -3,11 +3,36 @@
"$comment": "This schema file defines the common structure of the config files used in this repository. It is not a complete list but describes the most important properties.",
"type": "object",
"properties": {
+ "inherits": {
+ "description": "Path to a parent config file where this config should inherit from. Absolute, relative or package-relative paths are supported. Properties of the parent config are available as well. Properties of the child have always precedence over properties defined in one of the parents. Multiple inherence is possible by passing an array of paths.",
+ "type": ["string", "array", "null"],
+ "items": {
+ "type": "string"
+ }
+ },
+ "inherits_skip": {
+ "description": "List of keys which should be excluded from inheritance (via full names, e.g., input/hierarchical_sampling).",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
"lightning_class": {
- "description": "Specification of the lightning class used for training. It must be in the format module>class (e.g. htc.models.image.LightningImage>LightningImage) and must refer to a valid Python class.",
+ "description": "Specification of the lightning class used for training. It must be in the format module>class (e.g. htc.models.image.LightningImage>LightningImage) and must refer to a valid Python class (see the type_from_string() function for more details).",
"type": "string"
},
+ "label_mapping": {
+ "description": "Mapping of label names to label indices. This will be used to remap the original labels of the dataset to the new labels for the current training. Can either be a dict with label_name:label_index mappings or a string in the format module>variable (e.g. htc.settings_seg>label_mapping) in which case it must refer to a variable inside a Python script.",
+ "type": ["object", "string"]
+ },
+ "task": {
+ "description": "Sets the main network task. Can either be set to segmentation or classification. Segmentation tasks use pixel-level labels whereas classification tasks use image-level labels. This is for example used to determine which labels should be considered for class weighting.",
+ "type": "string",
+ "enum": ["segmentation", "classification"],
+ "default": "segmentation"
+ },
"input": {
+ "description": "Common attributes which affect the loading of the data.",
"type": "object",
"properties": {
"data_spec": {
@@ -16,10 +41,14 @@
},
"preprocessing": {
"description": "Name of the folder inside the intermediates/preprocessing directory which contains preprocessed images (e.g. L1). It is also possible to specify the folder relative to the results_dir or results_dir/preprocessing. This may be useful for preprocessed files which are only needed for specific projects or on the cluster. Finally, the relative or absolute path to the folder can be specified as well.",
- "type": [
- "string",
- "null"
- ]
+ "type": ["string", "null"]
+ },
+ "spatial_shape": {
+ "description": "Explicitly set the shape of the input data. Useful for cases where it is different to the image shape (e.g., cropped images). If not set, the input shape will be inferred from the dataset settings.",
+ "type": ["array", "null"],
+ "items": {
+ "type": "integer"
+ }
},
"features_dtype": {
"description": "Explicitly set the dtype for the features. This determines with which dtype the features are transferred to the GPU. Usually, this is automatically inferred from the training precision (e.g. 16-mixed leads to float16) but in some cases you may want to have control over this parameter (e.g. for benchmarking).",
@@ -36,12 +65,83 @@
},
"preprocessing_additional": {
"description": "Additional preprocessing folder names which will be added to the batch as data_NAME. For example, if L1 is in the list, it will be added as data_L1.",
- "type": [
- "array",
- "null"
- ],
+ "type": ["array", "null"],
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "Name of the preprocessing folder.",
+ "type": "string"
+ },
+ "parameter_names": {
+ "description": "Name of the parameter images which are concatenated along the channel dimension (see input/parameter_names).",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": ["StO2", "NIR", "TWI", "OHI", "TLI", "THI"]
+ }
+ },
+ "n_channels": {
+ "description": "Number of input channels for the additional input.",
+ "type": "integer"
+ }
+ },
+ "required": ["name"]
+ }
+ },
+ "meta": {
+ "type": "object",
+ "properties": {
+ "attributes": {
+ "description": "List of meta attributes to load.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "Name of the attribute. This name will be passed on to path.meta().",
+ "type": "string"
+ },
+ "mapping": {
+ "description": "Mapping which is applied on the loaded metadata. This is useful to map strings to numbers.",
+ "type": "object"
+ }
+ },
+ "required": ["name"]
+ }
+ },
+ "dtype": {
+ "description": "Data type of the metadata table (also used for GPU transfer).",
+ "type": "string",
+ "default": "float32"
+ },
+ "missing_replacement": {
+ "description": "Value which will be used to replace missing values (nan values).",
+ "type": "number",
+ "default": -1
+ }
+ },
+ "required": ["attributes"]
+ },
+ "image_labels": {
+ "description": "Specifies how the image labels should be constructed from the metadata of the images. Each entry in this list results in one image label which can be used as classification target. The resulting image_labels entry (e.g. in the loaded sample or batch) can be a scalar (if only one image label is requested) or a two-dimensional tensor (if more than one image label is requested).",
+ "type": "array",
"items": {
- "type": "string"
+ "type": "object",
+ "properties": {
+ "meta_attributes": {
+ "description": "List of names for the metadata columns where the label should be extracted from (via DataPath.meta()). Specify more than one name if metadata from different datasets should be combined but the corresponding columns have different names.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "image_label_mapping": {
+ "description": "Defines an optional mapping to map the string meta values to indices. The format is the same as for the label_mapping attribute.",
+ "type": ["object", "string"]
+ }
+ },
+ "required": ["meta_attributes"]
}
},
"no_features": {
@@ -53,48 +153,44 @@
"type": "boolean"
},
"n_channels": {
- "description": "Specifies which data should be loaded. 100 = HSI data, 4 = TPI data, 3 = RGB data.",
- "type": "integer",
- "enum": [3, 4, 100]
+ "description": "Specifies the number of input channels. For example, 100 = HSI data, 4 = TPI data, 3 = RGB data.",
+ "type": "integer"
},
"n_classes": {
- "description": "Number of classes which should be used for training. This key is only required if a label mapping cannot be specified ( usually the number of classes is inferred from the label mapping).",
+ "description": "Number of classes which should be used for training. This key is only required if a label mapping cannot be specified (usually the number of classes is inferred from the label mapping).",
"type": "integer"
},
"epoch_size": {
"description": "Length of one training epoch in terms of number of images. Can also be a string like '500 images' and then it will translate automatically for non-image based models (like the pixel model) to the appropriate number depending on the image size.",
- "type": [
- "integer",
- "string"
- ]
+ "type": ["integer", "string"]
+ },
+ "target_domain": {
+ "description": "Specifies the target domain which should be taken into account in the model or for the sampling. If set to \"no_domain\", assigns each image to the same domain.",
+ "type": ["array", "null"],
+ "items": {
+ "type": "string",
+ "enum": ["camera_index", "subject_index", "species_index", "no_domain"]
+ }
+ },
+ "hierarchical_sampling": {
+ "description": "Use a batch sampling strategy which takes the hierarchy of the data into account. The first hierarchy level is defined by input/target_domain and the second by the subjects. If set to true, each batch contains images from each input/target_domain domain while maximizing diversity between subjects (it is preferred to take images from different subjects over images from the same subject). It can also be set to label or image_label to additionally ensure an equal label distribution in the batches (label uses the labels from the segmentation masks and image_label the metadata defined by input/image_labels). For example, with a batch size of 6 and two cameras (first hierarchical level), there might be two colon, two liver and two kidney images (or more precisely: images which contain at least colon, liver and kidney), one from each camera and from 6 different subjects. You can also add +oversampling to the string to enforce selecting images which contain underrepresented classes.",
+ "type": ["boolean", "string", "null"]
},
"transforms_cpu": {
"description": "Data augmentation specification as list of dicts (each entry denotes one augmentation step). Will be executed on the CPU (by the workers).",
- "type": [
- "array",
- "null"
- ]
+ "type": ["array", "null"]
},
"transforms_gpu": {
"description": "Data augmentation specification as list of dicts (each entry denotes one augmentation step). Will be executed on the GPU.",
- "type": [
- "array",
- "null"
- ]
+ "type": ["array", "null"]
},
"test_time_transforms_cpu": {
"description": "Similar to transforms_cpu but the transforms will also be applied during inference. This is for example useful for context analysis (e.g. removing organs in an image).",
- "type": [
- "array",
- "null"
- ]
+ "type": ["array", "null"]
},
"test_time_transforms_gpu": {
"description": "Similar to transforms_gpu but the transforms will also be applied during inference. This is for example useful for applying normalization.",
- "type": [
- "array",
- "null"
- ]
+ "type": ["array", "null"]
},
"patch_sampling": {
"description": "The strategy to extract patches from an image. `uniform` yields so many patches as a grid-based tiling would yield, i.e. the number of patches are simply a function of the patch and image size. `proportional` constraints the number of patches to the number of valid pixels, i.e. so many patches will be sampled until theoretically (!) all pixels are used. However, this it is not enforced that really all valid pixels are sampled. `all_valid` is similar to `proportional` but now makes sure that all valid pixels are part of a patch at least once. This is especially useful to ensure that smaller classes are sampled as well.",
@@ -107,10 +203,7 @@
},
"annotation_name": {
"description": "The annotations which should be loaded. Either a list of annotation names or 'all' if all available annotation names should be included in the batch. If no merge strategy is set (see merge_annotations), the annotations will appear as separate tensors with the name labels_annotation_name and valid_pixels_annotation_name. Please note that it is also possible to define the annotations you want to use on a per image bases by using the format image_name@name1&name.",
- "type": [
- "array",
- "string"
- ]
+ "type": ["array", "string"]
},
"merge_annotations": {
"description": "Merge strategy in case there is more than one annotation per image. 'union' merges all annotations in one image. It assumes that the annotations are conflict-free, i.e. that there will be no pixel with more than one class label (overlap on the same class label is fine). Later annotator names overwrite previous ones.",
@@ -119,15 +212,44 @@
}
}
},
- "label_mapping": {
- "description": "Mapping of label names to label indices. This will be used to remap the original labels of the dataset to the new labels for the current training. Can either be a dict with label_name:label_index mappings or a string in the format module>variable (e.g. htc.settings_seg>label_mapping) in which case it must refer to a variable inside a Python script.",
- "type": [
- "object",
- "string"
- ]
+ "optimization": {
+ "description": "Settings for the optimizer and the learning rate scheduler.",
+ "type": "object",
+ "properties": {
+ "optimizer": {
+ "description": "Settings for the optimizer. Except for the name, all attributes are passed on as arguments to the optimizer.",
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "Name of the optimizer inside the torch.optim module.",
+ "type": "string"
+ }
+ }
+ },
+ "optimizer_layer_settings": {
+ "description": "Layer-specific settings for the optimizer. This can be used to specify separate learning rates for different layers.",
+ "type": "object",
+ "patternProperties": {
+ ".*": {
+ "description": "The name of the property is interpreted as a regular expression which is matched against all layers of the model. The value must be an object with the layer-specific settings (e.g., learning rate).",
+ "type": "object"
+ }
+ }
+ },
+ "lr_scheduler": {
+ "description": "Settings for the learning rate scheduler. Except for the name, all attributes are passed on as arguments to the scheduler.",
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "Name of the learning rate scheduler inside the torch.optim.lr_scheduler module.",
+ "type": "string"
+ }
+ }
+ }
+ }
},
"model": {
- "description": "Settings to configure a neural network.",
+ "description": "Settings to configure a neural network. Most settings depend on the lightning class.",
"type": "object",
"properties": {
"pretrained_model": {
@@ -152,18 +274,22 @@
},
"dataloader_kwargs": {
"description": "Keyword arguments which are passed to the PyTorch dataloader.",
- "type": "object",
- "properties": {
- "batch_size": {
- "type": "integer"
- }
- }
+ "type": "object"
+ },
+ "trainer_kwargs": {
+ "description": "Keyword arguments which are passed to the PyTorch Lightning trainer.",
+ "type": "object"
+ },
+ "swa_kwargs": {
+ "description": "Keyword arguments which are passed to the SWA scheduler. If this attribute is present (and not null), SWA will be activated.",
+ "type": ["object", "null"]
},
"validation": {
+ "description": "Arguments which define how the validation is carried on (metric, checkpointing, etc.).",
"type": "object",
"properties": {
"dataset_index": {
- "description": "Index of the dataset which should be used for checkpointing (relevant if there is more than one validation dataset).",
+ "description": "Index of the dataset which should be used for checkpointing (relevant if there is more than one validation dataset). The index is defined by the order of the validation splits in the data spec. If not set, the checkpoint metric will be calculated based on the results from all validation datasets.",
"type": "integer"
},
"checkpoint_metric": {
@@ -176,15 +302,6 @@
"enum": ["best", "last", false]
}
}
- },
- "trainer_kwargs": {
- "description": "Keyword arguments which are passed to the PyTorch Lightning trainer.",
- "type": "object",
- "properties": {
- "max_epochs": {
- "type": "integer"
- }
- }
}
}
-}
\ No newline at end of file
+}
diff --git a/htc/utils/helper_functions.py b/htc/utils/helper_functions.py
index cfd71ea..4c9ad41 100644
--- a/htc/utils/helper_functions.py
+++ b/htc/utils/helper_functions.py
@@ -24,6 +24,7 @@
from htc.tivita.DatasetSettings import DatasetSettings
from htc.utils.Config import Config
from htc.utils.LabelMapping import LabelMapping
+from htc.utils.Task import Task
def basic_statistics(
@@ -92,11 +93,8 @@ def basic_statistics(
df["label_name"] = df_median["label_name_mapped"]
df["label_valid"] = [label_mapping.is_index_valid(i) for i in df["label_index"]]
- # Only include valid labels in the statistics
- df = df[df["label_valid"]]
-
# Sum together the pixels for labels with the same name
- df = df.groupby(sorted(set(df.columns.to_list()) - {"n_pixels"}), as_index=False, observed=True)[
+ df = df.groupby(sorted(set(df.columns.to_list()) - {"n_pixels"}), as_index=False, observed=True, dropna=False)[
"n_pixels"
].sum()
df = df.sort_values(by=["image_name", "label_index"])
@@ -111,6 +109,9 @@ def median_table(
image_names: list[str] = None,
label_mapping: LabelMapping = None,
annotation_name: Union[str, list[str]] = None,
+ additional_mappings: dict[str, LabelMapping] = None,
+ image_labels_column: list[dict[str, Union[list[str], LabelMapping]]] = None,
+ config: Config = None,
) -> pd.DataFrame:
"""
This function is the general entry point for reading the median spectra tables. You can either read the table from a specific dataset or provide image names for which you want to have the spectra (also works if the names come from different datasets).
@@ -123,7 +124,7 @@ def median_table(
Besides basic info about the image and the median spectra (`median_normalized_spectrum`), all available metadata is included in the table as well:
>>> df.columns.to_list()
- ['image_name', 'subject_name', 'timestamp', 'label_index', 'label_name', 'median_spectrum', 'std_spectrum', 'median_normalized_spectrum', 'std_normalized_spectrum', 'n_pixels', 'median_sto2', 'std_sto2', 'median_nir', 'std_nir', 'median_twi', 'std_twi', 'median_ohi', 'std_ohi', 'median_thi', 'std_thi', 'median_tli', 'std_tli', 'image_labels', 'Camera_CamID', 'Camera_Exposure', 'Camera_analoger Gain', 'Camera_digitaler Gain', 'Camera_Speed', 'SW_Name', 'SW_Version', 'Fremdlichterkennung_Fremdlicht erkannt?', 'Fremdlichterkennung_PixelmitFremdlicht', 'Fremdlichterkennung_Breite LED Rot', 'Fremdlichterkennung_Breite LED Gruen', 'Fremdlichterkennung_Grenzwert Pixelanzahl', 'Fremdlichterkennung_Intensity Grenzwert', 'Aufnahme_Aufnahmemodus', 'camera_name', 'path', 'annotation_name']
+ ['image_name', 'subject_name', 'timestamp', 'label_index', 'label_name', 'median_spectrum', 'std_spectrum', 'median_normalized_spectrum', 'std_normalized_spectrum', 'n_pixels', 'median_sto2', 'std_sto2', 'median_nir', 'std_nir', 'median_twi', 'std_twi', 'median_ohi', 'std_ohi', 'median_thi', 'std_thi', 'median_tli', 'std_tli', 'image_labels', 'Camera_CamID', 'Camera_Exposure', 'Camera_analoger Gain', 'Camera_digitaler Gain', 'Camera_Speed', 'SW_Name', 'SW_Version', 'Fremdlichterkennung_Fremdlicht erkannt?', 'Fremdlichterkennung_PixelmitFremdlicht', 'Fremdlichterkennung_Breite LED Rot', 'Fremdlichterkennung_Breite LED Gruen', 'Fremdlichterkennung_Grenzwert Pixelanzahl', 'Fremdlichterkennung_Intensity Grenzwert', 'Aufnahme_Aufnahmemodus', 'camera_name', 'path', 'dataset_settings_path', 'annotation_name']
This function can also be used to select specific annotations, either globally per dataset:
>>> df = median_table(dataset_name="2021_02_05_Tivita_multiorgan_semantic", annotation_name="semantic#intra1")
@@ -138,15 +139,40 @@ def median_table(
Note: In the original table, one row denotes one label of one image from one annotator which also corresponds to the default of this function since the default annotation is used (similar to DataPath.read_segmentation()). If more than one annotation name is requested, a row is unique by its image_name, label_name and annotation_name.
Args:
- dataset_name: Name of the dataset from which you want to have the median spectra table. The name may include a # to specify a subdataset, e.g. `2021_02_05_Tivita_multiorgan_semantic#context_experiments` for the context_experiments folder inside the semantic data directory.
+ dataset_name: Name of the dataset from which you want to have the median spectra table. The name may include a # to specify a subdataset, e.g. `2021_02_05_Tivita_multiorgan_semantic#context_experiments` for the context_experiments folder inside the semantic data directory. If a dataset consists only of subdatasets (e.g., 2022_10_24_Tivita_sepsis_ICU), it is also possible to use the name of the main dataset to get all tables from the subdatasets (e.g., 2022_10_24_Tivita_sepsis_ICU to get 2022_10_24_Tivita_sepsis_ICU#calibrations + 2022_10_24_Tivita_sepsis_ICU#subjects).
table_name: For each dataset, there may be multiple tables for different purposes (e.g. tables with recalibrated data). With this switch, you specify which table should be loaded. The format of these tables on disk is `dataset_name@table_name@median_spectra@annotation_name.feather`. Per default, the normal table with the original data is loaded corresponding to tables on disk with the format `dataset_name@median_spectra@annotation_name.feather`, i.e. without the optional `@table_name`. Requested image names (`image_names` argument) are only considered from the tables matching the given `table_name`. It is not possible to select images from tables with different table names with this function since they may contain the same images.
paths: List of DataPath objects from which you want to have the median spectra. If annotation names are specified with a data path object, those names will be used. If specified, image_names must be None.
image_names: List of image names to search for (similar to the paths parameter). Image names may also include annotation names (e.g. subject#timestamp@name1&name2). It is not ensured that the resulting table contains all requested images because some images may lack annotations or are filtered out by the label_mapping. If specified, paths must be None.
- label_mapping: The target label mapping. There will be a new label_index_mapped column (and a new label_name_mapped column with the new names defined by the mapping) and the old label_index column will be removed (since the label_index is not unique across datasets). If set to None, then mapping is not carried out.
+ label_mapping: The target label mapping. There will be a new label_index_mapped column (and a new label_name_mapped column with the new names defined by the mapping) and the old label_index column will be removed (since the label_index is not unique across datasets). Only valid labels will be included in the resulting table. If set to None, then mapping is not carried out.
annotation_name: Unique name of the annotation(s) for cases where multiple annotations exist (e.g. inter-rater variability). If None, will use the default from the dataset. If the dataset does not have a default (i.e. the annotation_name_default is missing in the dataset_settings.json file), all annotations are returned. It is also possible to explicitly retrieve all annotations by setting this parameter to 'all'.
+ additional_mappings: Additional label mappings for other columns. The keys are the column names and the values are the LabelMapping objects for the respective columns. For each specified column, a new column with _index appended will be added.
+ image_labels_column: Specify how multiple columns should be mapped into one `image_labels` column indicating one or more image labels. Each entry in the list specifies one dimension in the `image_labels` columns and the dictionary contains information from which columns values should be mapped from. It is possible to map values from different columns to one image label and to have multiple image labels. The specification is similar to `input/image_labels` in the config file. See tests for examples.
+ config: Load median spectra based on the settings of the config. This can be used to automatically retrieve common options (e.g., label_mapping) which otherwise have to be passed to this function. If no dataset_name, paths or image_names is given, the data specification is loaded from the config object and all non-test paths are used. Options passed as arguments have precedence over the config options.
Returns: Median spectra data frame. The table is either sorted by image names (if image_names is not None) or by the sort_labels() function (if dataset_name is used).
"""
+ if additional_mappings is None:
+ additional_mappings = {}
+
+ if config is not None:
+ if table_name == "":
+ table_name = config.get("input/table_name", "")
+ if dataset_name is None and paths is None and image_names is None and config["input/data_spec"]:
+ spec = DataSpecification.from_config(config)
+ paths = spec.paths()
+ if label_mapping is None and config["label_mapping"]:
+ label_mapping = LabelMapping.from_config(config, task=Task.SEGMENTATION)
+ if annotation_name is None:
+ annotation_name = config.get("input/annotation_name", None)
+ if image_labels_column is None and config["input/image_labels"]:
+ image_labels_column = config["input/image_labels"]
+
+ # Make sure the label mapping objects are created
+ for image_label_entry_index, data in enumerate(image_labels_column):
+ data["image_label_mapping"] = LabelMapping.from_config(
+ config, task=Task.CLASSIFICATION, image_label_entry_index=image_label_entry_index
+ )
+
# Collect all available tables
tables = {}
for path in sorted((settings.intermediates_dir_all / "tables").glob("*median_spectra*.feather")):
@@ -169,7 +195,8 @@ def median_table(
assert _table_type == "median_spectra", (
f"Invalid table name for median spectra table ({_table_type} instead of median_spectra, the general format"
- f" should be dataset_name@median_spectra@annotation_name.feather): {path}"
+ " should be @median_spectra@.feather or"
+ f" @@median_spectra@.feather): {path}"
)
_table_identifier = (_dataset_name, _table_name)
@@ -178,7 +205,12 @@ def median_table(
tables[_table_identifier] = {}
tables[_table_identifier][_annotation_name] = path
- def read_table(dataset_name: str, table_name: str, annotation_name: Union[str, list[str], None]) -> pd.DataFrame:
+ def read_table(
+ dataset_name: str,
+ table_name: str,
+ annotation_name: Union[str, list[str], None],
+ requested_image_names: list[str] = None,
+ ) -> pd.DataFrame:
table_identifier = (dataset_name, table_name)
# Find the default annotation_name
@@ -189,6 +221,9 @@ def read_table(dataset_name: str, table_name: str, annotation_name: Union[str, l
annotation_name = dsettings.get("annotation_name_default")
if annotation_name is None or annotation_name == "all":
+ assert (
+ table_identifier in tables
+ ), f"Could not find the table {table_identifier} in the tables\n{tables.keys()}"
annotation_name = list(tables[table_identifier].keys())
if type(annotation_name) == str:
@@ -196,6 +231,9 @@ def read_table(dataset_name: str, table_name: str, annotation_name: Union[str, l
df = []
for name in annotation_name:
+ if name not in tables[table_identifier]:
+ continue
+
df_a = pd.read_feather(tables[table_identifier][name])
if name is not None:
df_a["annotation_name"] = name
@@ -203,21 +241,34 @@ def read_table(dataset_name: str, table_name: str, annotation_name: Union[str, l
assert len(annotation_name) == 1
df.append(df_a)
+ if len(df) == 0:
+ return pd.DataFrame()
+
needs_sorting = len(df) > 1
df = pd.concat(df)
- if len(df) > 0 and label_mapping is not None:
- # Mapping from path to config (the mapping depends on the dataset and must be done separately)
- df = df.query("label_name in @label_mapping.label_names(all_names=True)").copy()
- if len(df) > 0:
- label_indices = torch.from_numpy(df["label_index"].values)
- assert (
- settings.data_dirs[dataset_name] is not None
- ), f"Cannot find the path to the dataset {dataset_name} but this is required for remapping the labels"
- original_mapping = LabelMapping.from_data_dir(settings.data_dirs[dataset_name])
- label_mapping.map_tensor(label_indices, original_mapping)
- df["label_index_mapped"] = label_indices
- df["label_name_mapped"] = [label_mapping.index_to_name(i) for i in df["label_index_mapped"]]
+ if requested_image_names is not None:
+ # Select relevant images so that we don't change the labels if we don't need to
+ df = df[df["image_name"].isin(requested_image_names)]
+
+ if len(df) > 0:
+ if label_mapping is not None:
+ # Mapping from path to config (the mapping depends on the dataset and must be done separately)
+ df = df.query("label_name in @label_mapping.label_names(all_names=True)").copy()
+ if len(df) > 0:
+ assert settings.data_dirs[dataset_name] is not None, (
+ f"Cannot find the path to the dataset {dataset_name} but this is required for remapping the"
+ " labels"
+ )
+
+ original_mapping = LabelMapping.from_data_dir(settings.data_dirs[dataset_name])
+ label_indices = df["label_index"].values.astype(np.int64, copy=True)
+ label_mapping.map_tensor(label_indices, original_mapping) # Operates in-place
+ df["label_index_mapped"] = label_indices
+ df["label_name_mapped"] = [label_mapping.index_to_name(i) for i in df["label_index_mapped"]]
+
+ for name, mapping in additional_mappings.items():
+ df[f"{name}_index"] = [mapping.name_to_index(x) for x in df[name]]
if needs_sorting:
df = sort_labels(df, dataset_name=dataset_name)
@@ -225,150 +276,232 @@ def read_table(dataset_name: str, table_name: str, annotation_name: Union[str, l
return df.reset_index(drop=True)
if dataset_name is not None:
- return read_table(dataset_name, table_name, annotation_name)
-
- def parse_paths(paths: list[DataPath]) -> tuple[list[str], dict[str, list[str]], list[str]]:
- image_names_ordering = []
- image_names_only = []
- annotation_images = {}
- for p in paths:
- image_names_ordering.append(p.image_name())
- names = p.annotation_names()
-
- if len(names) > 0:
- for a in names:
- if a not in annotation_images:
- annotation_images[a] = []
- annotation_images[a].append(p.image_name())
- else:
- image_names_only.append(p.image_name())
-
- return image_names_only, annotation_images, image_names_ordering
-
- def parse_image_names(names: list[str]) -> tuple[list[str], dict[str, list[str]], list[str]]:
- image_names_ordering = []
- image_names_only = []
- annotation_images = {}
- for name in names:
- if "@" in name:
- image_name, annotation_names = name.split("@")
- annotation_names = annotation_names.split("&")
- for a in annotation_names:
- if a not in annotation_images:
- annotation_images[a] = []
- annotation_images[a].append(image_name)
- image_names_ordering.append(image_name)
+ if (dataset_name, table_name) not in tables:
+ error_message = (
+ f"Could not find the table {dataset_name}@{table_name} in the registered median tables (from all"
+ f" available datasets):\n{tables.keys()}"
+ )
+ if "#" not in dataset_name:
+ # If the dataset consists only of subdatasets but the main dataset is requested, collect all tables and merge them, e.g.
+ # 2022_10_24_Tivita_sepsis_ICU = 2022_10_24_Tivita_sepsis_ICU#calibrations + 2022_10_24_Tivita_sepsis_ICU#subjects
+ parent_tables = []
+ for _dataset_name, _table_name in tables.keys():
+ if _dataset_name.startswith(dataset_name) and table_name == _table_name:
+ parent_tables.append(read_table(_dataset_name, _table_name, annotation_name))
+ if len(parent_tables) > 0:
+ return pd.concat(parent_tables, ignore_index=True)
+ else:
+ raise ValueError(error_message)
else:
- image_names_only.append(name)
- image_names_ordering.append(name)
-
- return image_names_only, annotation_images, image_names_ordering
-
- if paths is not None:
- assert image_names is None, "image_names must be None if paths is specified"
- image_names_only, annotation_images, image_names_ordering = parse_paths(paths)
- elif image_names is not None:
- assert paths is None, "paths must be None if image_names is specified"
- # Theoretically, we could also parse the image names to paths and only use the paths function
- # However, it is faster to use the image names directly if available (and we need image names anyway for the table)
- image_names_only, annotation_images, image_names_ordering = parse_image_names(image_names)
+ raise ValueError(error_message)
+ else:
+ df = read_table(dataset_name, table_name, annotation_name)
+ if len(df) == 0:
+ settings.log.warning(
+ f"Could not find a table for the dataset {dataset_name}, the table name {table_name} and the"
+ f" annotation name {annotation_name}"
+ )
else:
- raise ValueError("image_names or paths must be supplied if dataset_names is None")
-
- image_names = image_names_only + list(itertools.chain.from_iterable(annotation_images.values()))
- image_names = pd.unique(np.asarray(image_names)) # Unique without sorting
- image_names_ordering = pd.unique(np.asarray(image_names_ordering))
-
- # First all the images without annotation name requirements
- dfs = []
- remaining_images = set(image_names_only)
- considered_datasets = set()
- for _dataset_name, _table_name in tables.keys():
- if _table_name != table_name:
- continue
- df = read_table(_dataset_name, _table_name, annotation_name)
- df = df.query("image_name in @remaining_images")
-
- if len(df) > 0:
- dfs.append(df)
- remaining_images = remaining_images - set(df["image_name"].values)
- considered_datasets.add(_dataset_name)
+ def parse_paths(paths: list[DataPath]) -> tuple[list[str], dict[str, list[str]], list[str]]:
+ image_names_ordering = []
+ image_names_only = []
+ annotation_images = {}
+ for p in paths:
+ image_names_ordering.append(p.image_name())
+ names = p.annotation_names()
+
+ if len(names) > 0:
+ for a in names:
+ if a not in annotation_images:
+ annotation_images[a] = []
+ annotation_images[a].append(p.image_name())
+ else:
+ image_names_only.append(p.image_name())
+
+ return image_names_only, annotation_images, image_names_ordering
+
+ def parse_image_names(names: list[str]) -> tuple[list[str], dict[str, list[str]], list[str]]:
+ image_names_ordering = []
+ image_names_only = []
+ annotation_images = {}
+ for name in names:
+ if "@" in name:
+ image_name, annotation_names = name.split("@")
+ annotation_names = annotation_names.split("&")
+ for a in annotation_names:
+ if a not in annotation_images:
+ annotation_images[a] = []
+ annotation_images[a].append(image_name)
+ image_names_ordering.append(image_name)
+ else:
+ image_names_only.append(name)
+ image_names_ordering.append(name)
+
+ return image_names_only, annotation_images, image_names_ordering
+
+ if paths is not None:
+ assert image_names is None, "image_names must be None if paths is specified"
+ image_names_only, annotation_images, image_names_ordering = parse_paths(paths)
+ elif image_names is not None:
+ assert paths is None, "paths must be None if image_names is specified"
+ # Theoretically, we could also parse the image names to paths and only use the paths function
+ # However, it is faster to use the image names directly if available (and we need image names anyway for the table)
+ image_names_only, annotation_images, image_names_ordering = parse_image_names(image_names)
+ else:
+ raise ValueError("image_names or paths must be supplied if dataset_names is None")
- if len(remaining_images) == 0:
- # We already have all image_names, we can stop looping over the tables
- break
+ image_names = image_names_only + list(itertools.chain.from_iterable(annotation_images.values()))
+ image_names = pd.unique(np.asarray(image_names)) # Unique without sorting
+ image_names_ordering = pd.unique(np.asarray(image_names_ordering))
- # Then all images with annotation names
- if len(annotation_images) > 0:
- remaining_images = {name: set(images) for name, images in annotation_images.items()}
- is_done = False
+ # First all the images without annotation name requirements
+ dfs = []
+ remaining_images = set(image_names_only)
+ considered_datasets = set()
for _dataset_name, _table_name in tables.keys():
if _table_name != table_name:
continue
- if is_done:
- break
- for table_annotation_name in tables[(_dataset_name, _table_name)].keys():
- if table_annotation_name not in annotation_images.keys():
- # If the table does not contain any of the requested annotations, we can skip it
+ df = read_table(_dataset_name, _table_name, annotation_name, requested_image_names=remaining_images)
+ if len(df) > 0:
+ dfs.append(df)
+ remaining_images = remaining_images - set(df["image_name"].values)
+ considered_datasets.add(_dataset_name)
+
+ if len(remaining_images) == 0:
+ # We already have all image_names, we can stop looping over the tables
+ break
+
+ # Then all images with annotation names
+ if len(annotation_images) > 0:
+ remaining_images = {name: set(images) for name, images in annotation_images.items()}
+ is_done = False
+ for _dataset_name, _table_name in tables.keys():
+ if _table_name != table_name:
continue
-
- df = read_table(_dataset_name, _table_name, table_annotation_name)
- df = df.query("image_name in @remaining_images[@table_annotation_name]")
-
- if len(df) > 0:
- dfs.append(df)
- remaining_images[table_annotation_name] = remaining_images[table_annotation_name] - set(
- df["image_name"].values
+ if is_done:
+ break
+
+ for table_annotation_name in tables[(_dataset_name, _table_name)].keys():
+ if table_annotation_name not in annotation_images.keys():
+ # If the table does not contain any of the requested annotations, we can skip it
+ continue
+
+ df = read_table(
+ _dataset_name,
+ _table_name,
+ table_annotation_name,
+ requested_image_names=remaining_images[table_annotation_name],
)
- considered_datasets.add(_dataset_name)
+ if len(df) > 0:
+ dfs.append(df)
+ remaining_images[table_annotation_name] = remaining_images[table_annotation_name] - set(
+ df["image_name"].values
+ )
+ considered_datasets.add(_dataset_name)
+
+ if all(len(r) == 0 for r in remaining_images.values()):
+ is_done = True
+ # We already have all image_names, we can stop looping over the tables
+ break
+
+ # We cannot assert that there are no remaining images anymore because some images may get excluded due to the label mapping or some images maybe don't even have annotations (so they can't be included)
+ if len(dfs) == 0:
+ error_message = (
+ f"Could not find any of the requested images (first image: {image_names[0]}) in the tables"
+ f" ({considered_datasets = }). This could mean that some of the intermediate files are missing or that"
+ " you do not have access to them (e.g. human data)."
+ )
+ if label_mapping is not None:
+ error_message += (
+ f" Please make also sure that the label mapping ({label_mapping}) is correct and does not exclude"
+ " all images."
+ )
+ raise ValueError(error_message)
+
+ with warnings.catch_warnings():
+ # The same columns might have different dtypes in the dataframes depending on missing values
+ warnings.filterwarnings(
+ "ignore", message=".*object-dtype columns with all-bool values", category=FutureWarning
+ )
+ df = pd.concat(dfs)
+ if len(dfs) > 1 and "label_index" in df.columns:
+ # label_index is potentially incorrect when paths from multiple datasets are used, so it is safer to remove it
+ df.drop(columns="label_index", inplace=True)
+
+ # Same order as defined by the paths
+ df["image_name"] = df["image_name"].astype("category")
+ df["image_name"] = df["image_name"].cat.set_categories(image_names_ordering)
+ df.sort_values("image_name", inplace=True, ignore_index=True)
+
+ # Make sure we have all requested image_names (it is possible that some image_names are missing if they contain only labels which were filtered out by the label mapping)
+ image_names_df = set(df["image_name"].unique())
+ assert image_names_df.issubset(image_names), (
+ "Could not find all image_names in the median spectra tables. Please make sure that the median table exists"
+ " for every dataset where the image_names come from"
+ )
- if all(len(r) == 0 for r in remaining_images.values()):
- is_done = True
- # We already have all image_names, we can stop looping over the tables
- break
+ if label_mapping is not None:
+ assert set(df["label_index_mapped"].values).issubset(
+ set(label_mapping.label_indices())
+ ), "Found at least one label_index which is not part of the mapping"
+ if len(image_names_df) < len(image_names):
+ settings.log.warning(
+ f"{len(image_names) - len(image_names_df)} image_names are not used because they were filtered out"
+ f" (e.g. by the label mapping). The following tables were considered: {considered_datasets}"
+ )
- # We cannot assert that there are no remaining images anymore because some images may get excluded due to the label mapping or some images maybe don't even have annotations (so they can't be included)
- assert len(dfs) > 0, (
- f"Could not find any of the requested images ({image_names = }, {annotation_images = }) in the tables"
- f" ({considered_datasets = }). This could mean that some of the intermediate files are missing or that you do"
- " not have access to them (e.g. human data)"
- )
+ if image_labels_column is not None:
+ image_labels = []
+ for _, row in df.iterrows():
+ row_label = []
+ # There may be more than one image label to predict (e.g., sepsis_status and shock)
+ for level_data in image_labels_column:
+ # Multiple attributes can be mapped to the same image label (e.g., sepsis_status (new sepsis study) and health_status (old sepsis study))
+ for attribute in level_data["meta_attributes"]:
+ if attribute in row and not pd.isna(row[attribute]):
+ value = row[attribute]
+ if "image_label_mapping" in level_data:
+ mapping = level_data["image_label_mapping"]
+ value = mapping.name_to_index(value)
+ else:
+ value = int(value)
+ row_label.append(value)
+ break
- with warnings.catch_warnings():
- # The same columns might have different dtypes in the dataframes depending on missing values
- warnings.filterwarnings("ignore", message=".*object-dtype columns with all-bool values", category=FutureWarning)
- df = pd.concat(dfs)
- if len(dfs) > 1:
- # label_index is potentially incorrect when paths from multiple datasets are used, so it is safer to remove it
- df.drop(columns="label_index", inplace=True)
-
- # Same order as defined by the paths
- df["image_name"] = df["image_name"].astype("category")
- df["image_name"] = df["image_name"].cat.set_categories(image_names_ordering)
- df.sort_values("image_name", inplace=True, ignore_index=True)
-
- # Make sure we have all requested image_names (it is possible that some image_names are missing if they contain only labels which were filtered out by the label mapping)
- image_names_df = set(df["image_name"].unique())
- assert image_names_df.issubset(image_names), (
- "Could not find all image_names in the median spectra tables. Please make sure that the median table exists for"
- " every dataset where the image_names come from"
- )
+ assert len(row_label) >= 1, f"Could not map the row\n{row}\nto any image label"
+ if len(row_label) == 1:
+ row_label = row_label[0]
+ image_labels.append(row_label)
- if label_mapping is not None:
- assert set(df["label_index_mapped"].values).issubset(
- set(label_mapping.label_indices())
- ), "Found at least one label_index which is not part of the mapping"
- if len(image_names_df) < len(image_names):
- settings.log.warning(
- f"{len(image_names) - len(image_names_df)} image_names are not used because they were filtered out (e.g. by"
- f" the label mapping). The following tables were considered: {considered_datasets}"
- )
+ df["image_labels"] = image_labels
return df
+def add_times_table(df: pd.DataFrame, groups: list[str] = None) -> None:
+ """
+ Adds a column "time" to the table with the timestamp converted to a datetime object. If groups is given, another "rel_time" column is added which contains the relative time (in seconds) within each grouping (e.g. time for all images of one subject relative to the first image `groups=["subject_name"]`).
+
+ Args:
+ df: The table to add the columns to (in-place).
+ groups: A list of column names for grouping of the relative time.
+ """
+ if groups is None:
+ groups = ["subject_name"]
+
+ df["time"] = pd.to_datetime(df["timestamp"], format=settings.tivita_timestamp_format)
+
+ if groups is not None:
+ df_group_times = df.groupby(groups)["time"].min()
+
+ rel_times = []
+ for _, row in df.iterrows():
+ rel_times.append(row["time"] - df_group_times.loc[tuple([row[g] for g in groups])])
+ df["rel_time"] = rel_times
+
+
def group_median_spectra(df: pd.DataFrame, additional_columns: list[str] = None) -> pd.DataFrame:
"""
Groups median spectra per subject by averaging all median spectra from that subject.
@@ -514,7 +647,7 @@ def utilization_table(run_dir: Path) -> pd.DataFrame:
def sort_labels(
storage: Union[np.ndarray, list, set, dict, pd.DataFrame],
- label_ordering: dict[str, Union[str, int]] = None,
+ label_ordering: Union[dict[str, Union[str, int]], list[str]] = None,
sorting_cols: list[str] = None,
dataset_name: str = None,
) -> Union[np.ndarray, list, dict, pd.DataFrame]:
@@ -532,12 +665,15 @@ def sort_labels(
Args:
storage: The storage to sort: numpy array, list, dict or dataframe. If dataframe, it will sort by label_name, image_name and annotation_name (if available).
- label_ordering: Alternative sort order for the labels. The mapping must define a key for each label and something sortable as values (e.g. integer values).
+ label_ordering: Alternative sort order for the labels. Either a mapping which defines a key for each label and something sortable as values (e.g. integer values) or a list of label names in the sorting order.
sorting_cols: Explicit list of columns which should be used to sort the dataframe. If None, will sort by label_name, image_name (if available) and annotation_name (if available).
dataset_name: Name of a dataset which is accessible via settings.data_dirs and which contains a dataset settings with a defined label ordering.
Returns: The sorted storage.
"""
+ if type(label_ordering) == list:
+ label_ordering = {label: i for i, label in enumerate(label_ordering)}
+
if label_ordering is None and dataset_name is not None:
dsettings = DatasetSettings(settings.data_dirs[dataset_name])
label_ordering = dsettings.get("label_ordering", None)
@@ -610,13 +746,13 @@ def sort_labels_cm(
# Swap rows
switched_cm = torch.zeros_like(cm)
ordering_indices = [cm_order.index(l) for l in target_order]
- for i, id in enumerate(ordering_indices):
- switched_cm[i, :] = cm[id, :]
+ for i, idx in enumerate(ordering_indices):
+ switched_cm[i, :] = cm[idx, :]
# Swap columns
switched_cm_final = torch.zeros_like(cm)
- for j, id in enumerate(ordering_indices):
- switched_cm_final[:, j] = switched_cm[:, id]
+ for j, idx in enumerate(ordering_indices):
+ switched_cm_final[:, j] = switched_cm[:, idx]
return switched_cm_final
@@ -740,7 +876,7 @@ def get_nsd_thresholds(mapping: LabelMapping, aggregation_method: str = None, na
Args:
mapping: Label mapping of the training run which is used to make a selection of labels.
aggregation_method: Aggregation method (e.g. mean). Must correspond to a column name in the table.
- name: Name of the table (e.g. semantic for the MIA2021 thresholds).
+ name: Name of the table (e.g. semantic for the MIA2022 thresholds).
Returns: Tolerance value for each class in the order defined in the label mapping.
"""
diff --git a/htc/utils/mitk/__init__.py b/htc/utils/mitk/__init__.py
new file mode 100644
index 0000000..17e71a8
--- /dev/null
+++ b/htc/utils/mitk/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
diff --git a/htc/utils/mitk/mitk_masks.py b/htc/utils/mitk/mitk_masks.py
new file mode 100644
index 0000000..913ea43
--- /dev/null
+++ b/htc/utils/mitk/mitk_masks.py
@@ -0,0 +1,343 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+import json
+import re
+import xml.etree.ElementTree as ET
+from copy import deepcopy
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+from matplotlib.colors import to_rgb
+
+from htc.cpp import nunique
+from htc.utils.import_extra import requires_extra
+from htc.utils.LabelMapping import LabelMapping
+
+try:
+ import nrrd
+
+ _missing_library = ""
+except ImportError:
+ _missing_library = "nrrd"
+
+
+@requires_extra(_missing_library)
+def nrrd_mask(nrrd_file: Path) -> dict[str, Union[np.ndarray, LabelMapping]]:
+ """
+ Read an nrrd mask file from MITK. This file contains all the information from the annotation process.
+
+ >>> from htc.tivita.DataPath import DataPath
+ >>> path = DataPath.from_image_name("P065#2020_06_19_21_02_33")
+ >>> mitk_data = nrrd_mask(path() / "annotations/2020_06_19_21_02_33#semantic#annotator5.nrrd")
+ >>> np.unique(mitk_data["mask"])
+ array([1, 2], dtype=uint8)
+ >>> mitk_data["label_mapping"].index_to_name(1)
+ 'stomach'
+
+ The "Exterior" in MITK always has the label index 0 and means that pixels are not labelled and are always considered invalid:
+ >>> mitk_data["label_mapping"].name_to_index("unlabeled")
+ 0
+ >>> mitk_data["label_mapping"].is_index_valid(0)
+ False
+
+ With the mitk_data you can easily map the segmentation to the desired target mapping:
+ >>> from htc.settings_seg import settings_seg
+ >>> mask = settings_seg.label_mapping.map_tensor(mitk_data["mask"], mitk_data["label_mapping"])
+ >>> np.unique(mask)
+ array([0, 6], dtype=uint8)
+ >>> settings_seg.label_mapping.index_to_name(6)
+ 'stomach'
+
+ In case of multi-layer NRRD files, an additional dimension is inserted at the front corresponding to the MITK layers:
+ >>> path = DataPath.from_image_name("SPACE_000069#2020_11_05_11_43_51")
+ >>> mitk_data = nrrd_mask(path() / "annotations/2020_11_05_11_43_51#semantic#primary.nrrd")
+ >>> mitk_data["mask"].shape
+ (4, 480, 640)
+
+ Args:
+ nrrd_file: Path to the nrrd file.
+
+ Returns: Dictionary with the following content:
+ - mask: Array with the raw label indices per pixel.
+ - label_mapping: Label mapping to interpret the label indices.
+ """
+ data, header = nrrd.read(nrrd_file)
+
+ total_n_labels = 0 # to be populated with number of labels across the layers
+
+ mask = data.squeeze()
+ mask = mask.T.astype(np.uint8)
+
+ if mask.ndim == 3:
+ mask = mask.transpose(2, 0, 1)
+
+ mapping_nrrd = {}
+ max_label_index = 0 # used to keep track of iterating label index in different layers. MITK assigns labels starting from 0 in each layer.
+
+ # new MITK version NRRD files have to handled separately as they contain JSON meta data
+ if "org.mitk.multilabel.segmentation.version" in header:
+ label_groups = json.loads(header["org.mitk.multilabel.segmentation.labelgroups"])
+ n_layers = len(label_groups)
+
+ # in the new format there is no exterior label, so the total n labels are incremented here
+ total_n_labels += 1
+ mapping_nrrd["unlabeled"] = 0
+
+ for layer in range(n_layers):
+
+ if label_groups[layer]["labels"] is not None:
+ total_n_labels += len(label_groups[layer]["labels"])
+ else:
+ label_groups[layer]["labels"] = []
+
+ if mask.ndim == 3:
+ # MITK assigns the label index for each layer individually according to the order in which the annotation was performed. This leads to different label indices for the same class in different layers. Therefore, a remapping is performed using the label_index of the previous layer(s).
+ layer_mask = deepcopy(mask[layer, :, :]) # needed for remapping
+ else:
+ layer_mask = mask
+
+ for label in label_groups[layer]["labels"]:
+ label_name = label["name"]
+ label_index = label["value"]
+
+ # in case the label name has the label order number as a prefix e.g. 12_kidney, then extract the label name
+ match = re.search(r"^\d+_", label_name)
+ if match is not None:
+ label_name = label_name.removeprefix(match.group(0))
+
+ if label_name not in mapping_nrrd:
+ mapping_nrrd[label_name] = (
+ max(mapping_nrrd.values()) + 1
+ ) # remapping to the smallest unassigned label_index
+
+ if mask.ndim == 3:
+ mask[layer, layer_mask == label_index] = mapping_nrrd[label_name]
+ else:
+ mask[layer_mask == label_index] = mapping_nrrd[label_name]
+
+ max_label_index = max(mapping_nrrd.values())
+ else:
+ n_layers = int(header["layers"])
+
+ for layer in range(n_layers):
+ n_labels = int(header[f"layer_00{layer}"])
+ total_n_labels += n_labels
+
+ if mask.ndim == 3:
+ layer_mask = deepcopy(mask[layer, :, :])
+ else:
+ layer_mask = mask
+
+ for i in range(n_labels):
+ root = ET.fromstring(header[f"org.mitk.label_00{layer}_{i:05d}"].replace("\\n", "\n"))
+ label_index = int(root.find("property[@key='value']/unsigned").attrib["value"])
+
+ label_name = root.find("property[@key='name']/string").attrib["value"]
+ match = re.search(r"^\d+_", label_name)
+ if match is not None:
+ label_name = label_name.removeprefix(match.group(0))
+
+ if i == 0:
+ mapping_nrrd["unlabeled"] = label_index
+ else:
+ if label_name not in mapping_nrrd:
+ mapping_nrrd[label_name] = max(mapping_nrrd.values()) + 1
+
+ if mask.ndim == 3:
+ mask[layer, layer_mask == label_index] = mapping_nrrd[label_name]
+ else:
+ mask[layer_mask == label_index] = mapping_nrrd[label_name]
+
+ max_label_index = max(mapping_nrrd.values())
+
+ mappings_nrrd = LabelMapping(mapping_nrrd, last_valid_label_index=max_label_index, zero_is_invalid=True)
+
+ assert nunique(mask) <= total_n_labels
+
+ return {"mask": mask, "label_mapping": mappings_nrrd}
+
+
+@requires_extra(_missing_library)
+def segmentation_to_nrrd(
+ nrrd_file: Path,
+ mask: np.ndarray,
+ mapping_mask: LabelMapping,
+) -> None:
+ """
+ Converts an existing segmentation mask to an nrrd file which can be read by MITK. This is useful if existing masks should be loaded into MITK for visualization or adaptations.
+
+ >>> from htc.tivita.DataPath import DataPath
+ >>> import tempfile
+ >>> with tempfile.NamedTemporaryFile() as tmpfile:
+ ... tmpfile = Path(tmpfile.name)
+ ... path = DataPath.from_image_name("SPACE_000001#2020_08_14_11_11_22")
+ ... segmentation_dict = path.read_segmentation(annotation_name="all")
+ ... mask = np.stack(list(segmentation_dict.values()))
+ ... segmentation_to_nrrd(nrrd_file=tmpfile, mask=mask, mapping_mask=LabelMapping.from_path(path))
+ ... labels = nrrd_mask(nrrd_file=tmpfile)['label_mapping'].label_names()
+ >>> labels
+ ['colon', 'omentum', 'small_bowel', 'fat', 'instrument', 'background', 'blue_cloth', 'unclear_organic', 'tag_blood']
+
+ Args:
+ nrrd_file: Path where the nrrd file should be stored.
+ mask: a dict of masks, each key representing an annotation name e.g. {{annotation_name1: mask, annotation_name2: mask...}}. If None, path must be given.
+ mapping_mask: Label mapping for the segmentation mask which gives every label index in the segmentation mask a name. If None, path must be given.
+ """
+
+ # create a copy of mask
+ mask = deepcopy(mask)
+
+ invalid_pixels = ~mapping_mask.is_index_valid(mask)
+
+ # We need to remap the labels to consecutive values starting from 1 because 0 will be the unlabeled pixels in MITK
+ mapping_mitk = {"Exterior": 0}
+ i = 1
+ for label_index in np.unique(mask):
+ if mapping_mask.is_index_valid(label_index):
+ mapping_mitk[mapping_mask.index_to_name(label_index)] = i
+ i += 1
+ else:
+ mapping_mitk[mapping_mask.index_to_name(label_index)] = 0
+
+ mapping_mitk = LabelMapping(mapping_mitk, zero_is_invalid=True)
+ assert mapping_mitk.last_valid_label_index == i - 1
+ assert len(mapping_mitk) <= len(mapping_mask)
+
+ # Remap segmentation to a valid MITK mask
+ mapping_mitk.map_tensor(mask, mapping_mask)
+ n_labels = len(mapping_mitk.label_names(include_invalid=True))
+ assert nunique(mask) <= n_labels
+ assert np.all(mask[invalid_pixels] == 0), "All invalid pixels should have been mapped to 0"
+
+ # MITK/nrrd loads the image as (width, height)
+
+ if mask.ndim == 3:
+ n_layers = mask.shape[0]
+ mask = np.expand_dims(np.transpose(mask, axes=(0, 2, 1)), -1)
+ else:
+ n_layers = 1
+ mask = np.expand_dims(mask.T, -1)
+
+ def mitk_label_header(label_index: int, label_name: str, label_color: str) -> dict:
+ # 0 = background/invalid in MITK
+ opacity = 0.600000024 if label_index != 0 else 0
+ locked = True if label_index != 0 else False
+ r, g, b = to_rgb(label_color)
+
+ meta = {
+ "color": {"type": "ColorProperty", "value": [float(r), float(g), float(b)]},
+ "locked": locked,
+ "name": label_name,
+ "opacity": opacity,
+ "value": int(label_index),
+ "visible": True,
+ }
+
+ return meta
+
+ header = {
+ "modality": "org.mitk.multilabel.segmentation",
+ "DICOM_0008_0060": '{"values":[{"z":0, "t":0, "value":"SEG"}]}',
+ "DICOM_0008_103E": '{"values":[{"z":0, "t":0, "value":"MITK Segmentation"}]}',
+ "org.mitk.multilabel.segmentation.labelgroups": [],
+ "org.mitk.multilabel.segmentation.unlabeledlabellock": "0",
+ "org.mitk.multilabel.segmentation.version": "1",
+ "type": "unsigned short",
+ "space": "left-posterior-superior",
+ "space origin": [0, 0, 0],
+ }
+
+ if n_layers == 1:
+ header["space directions"] = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
+ header["kinds"] = ["domain", "domain", "domain"]
+ else:
+ header["space directions"] = [[np.nan, np.nan, np.nan], [1, 0, 0], [0, 1, 0], [0, 0, 1]]
+ header["kinds"] = ["vector", "domain", "domain", "domain"]
+
+ # switching back to MITK increasing order of label index
+ curr_label_index = 1
+
+ mask_copy = deepcopy(mask)
+
+ for layer_index in range(n_layers):
+ labelgroup = {"labels": []}
+
+ if mask.ndim == 4:
+ label_indices = np.unique(mask[layer_index, ...])
+ else:
+ label_indices = mapping_mitk.label_indices(include_invalid=True)
+
+ for label_index in label_indices:
+ if label_index == 0:
+ continue
+
+ mask_copy[layer_index, ...][mask[layer_index, ...] == label_index] = curr_label_index
+
+ labelgroup["labels"].append(
+ mitk_label_header(
+ curr_label_index, mapping_mitk.index_to_name(label_index), mapping_mitk.index_to_color(label_index)
+ )
+ )
+
+ curr_label_index += 1
+ header["org.mitk.multilabel.segmentation.labelgroups"].append(labelgroup)
+
+ header["org.mitk.multilabel.segmentation.labelgroups"] = json.dumps(
+ header["org.mitk.multilabel.segmentation.labelgroups"]
+ )
+
+ nrrd.write(str(nrrd_file), data=mask_copy.astype(np.ushort), header=header)
+
+
+def segmentation_to_nrrd_annotation_name(
+ nrrd_file: Path,
+ mask: dict[str, np.ndarray],
+ mapping_mask: LabelMapping,
+ annotation_name_to_layer: dict[str, int] = None,
+) -> None:
+ """
+ Converts an existing segmentation mask to an nrrd file which can be read by MITK. This is useful if existing masks should be loaded into MITK for visualization or adaptations.
+ This function can be used to directly convert a dictionary of masks read from the path.
+
+ >>> from htc.tivita.DataPath import DataPath
+ >>> import tempfile
+ >>> with tempfile.NamedTemporaryFile() as tmpfile:
+ ... tmpfile = Path(tmpfile.name)
+ ... path = DataPath.from_image_name("SPACE_000001#2020_08_14_11_11_22")
+ ... mask = path.read_segmentation(annotation_name="all")
+ ... segmentation_to_nrrd_annotation_name(nrrd_file=tmpfile, mask=mask, mapping_mask=LabelMapping.from_path(path), annotation_name_to_layer={"semantic#primary": 0, "polygon#annotator1": 1})
+ ... labels = nrrd_mask(nrrd_file=tmpfile)['label_mapping'].label_names()
+ >>> labels
+ ['background', 'blue_cloth', 'colon', 'omentum', 'small_bowel', 'unclear_organic']
+
+ Args:
+ nrrd_file: Path where the nrrd file should be stored.
+ mask: a dict of masks, each key representing an annotation name e.g. {{annotation_name1: mask, annotation_name2: mask...}}. If None, path must be given.
+ mapping_mask: Label mapping for the segmentation mask which gives every label index in the segmentation mask a name. If None, path must be given.
+ annotation_name_to_layer: Maps annotation names to layers in MITK. Layers must be integers and define the order of the segmentation masks in MITK. The dictionary has the form: `{annotation_name: layer_index}`
+ """
+ mask = deepcopy(mask)
+
+ # take annotation names from annotation_name_to_layer attribute
+ # if the annotation_name_to_layer is None, then the default annotation names are used
+ # use all of the annotation names from the mask if annotation_name_to_layer is not set
+ if annotation_name_to_layer is not None:
+ annotation_names = list(annotation_name_to_layer.keys())
+ else:
+ annotation_names = mask.keys()
+
+ assert (
+ type(mask) == dict
+ ), "The mask has to be dict containing all annotations, of the form: `{annotation_name: layer_index}`"
+
+ stacked_masks = []
+
+ for annotation_name in annotation_names:
+ assert annotation_name in mask, f"Request annotation name {annotation_name} not present in mask"
+ stacked_masks.append(mask[annotation_name])
+
+ mask = np.stack(stacked_masks)
+
+ segmentation_to_nrrd(nrrd_file=nrrd_file, mask=mask, mapping_mask=mapping_mask)
diff --git a/htc/utils/mitk/run_mitk_dataset.py b/htc/utils/mitk/run_mitk_dataset.py
new file mode 100644
index 0000000..24895d7
--- /dev/null
+++ b/htc/utils/mitk/run_mitk_dataset.py
@@ -0,0 +1,89 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+from PIL import Image
+
+from htc import LabelMapping
+from htc.tivita.DataPath import DataPath
+from htc.utils.mitk.mitk_masks import segmentation_to_nrrd
+from htc.utils.parallel import p_map
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Collects all images from a dataset and converts the existing annotations to MITK nrrd files.",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument(
+ "--input-dir",
+ type=Path,
+ required=True,
+ help="Path to a dataset where data paths should be collected.",
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=Path,
+ required=True,
+ help=(
+ "Path to the output directory where the MITK files (images and results directory and task_list.json) should"
+ " be stored."
+ ),
+ )
+ args = parser.parse_args()
+
+ input_dir = args.input_dir
+ output_dir = args.output_dir
+ assert input_dir.exists(), f"The input directory {input_dir} does not exist"
+
+ images_dir = output_dir / "images"
+ results_dir = output_dir / "results"
+ task_list_file = output_dir / "task_list.json"
+ for f in [images_dir, results_dir, task_list_file]:
+ assert not f.exists(), (
+ f"The output directory {output_dir} already contains {f}. Please select a different output directory or"
+ " clear it first"
+ )
+
+ images_dir.mkdir(exist_ok=True, parents=True)
+ results_dir.mkdir(exist_ok=True, parents=True)
+
+ tasks = {
+ "FileFormat": "MITK Segmentation Task List",
+ "Version": 1,
+ "Name": "Segmentation",
+ "Defaults": {"LabelNameSuggestions": "dataset_labels.json"},
+ "Tasks": [],
+ }
+
+ paths = list(DataPath.iterate(args.input_dir))
+ assert len(paths) > 0, f"Could not find any images in {input_dir}"
+
+ def handle_path(path: DataPath) -> dict[str, str]:
+ rgb = path.read_rgb_reconstructed()
+ rgb = Image.fromarray(rgb)
+ rgb.save(images_dir / f"{path.image_name()}.png", optimize=True)
+
+ mask = path.read_segmentation(annotation_name="all")
+ if type(mask) == dict:
+ mask = np.stack(list(mask.values()))
+
+ segmentation_to_nrrd(
+ nrrd_file=results_dir / f"{path.image_name()}.nrrd",
+ mask=mask,
+ mapping_mask=LabelMapping.from_path(path),
+ )
+
+ return {
+ "Name": f"{path.image_name()}",
+ "Image": f"images/{path.image_name()}.png",
+ "Result": f"results/{path.image_name()}.nrrd",
+ }
+
+ tasks["Tasks"] = p_map(handle_path, paths)
+
+ with task_list_file.open("w") as f:
+ json.dump(tasks, f, indent=4)
diff --git a/htc/utils/mitk/run_mitk_task_list.py b/htc/utils/mitk/run_mitk_task_list.py
new file mode 100644
index 0000000..9545188
--- /dev/null
+++ b/htc/utils/mitk/run_mitk_task_list.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: 2022 Division of Intelligent Medical Systems, DKFZ
+# SPDX-License-Identifier: MIT
+
+import argparse
+import json
+import math
+from pathlib import Path
+from zipfile import ZipFile
+
+from PIL import Image
+from rich.progress import track
+
+from htc import read_tivita_rgb, safe_copy, settings
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description=(
+ "Searches for all Tivita images in a folder and creates a task list for MITK to annotated those images."
+ ),
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument(
+ "--input-dir",
+ type=Path,
+ required=True,
+ help="Path to the folder with the images which should be annotated.",
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=Path,
+ required=True,
+ help=(
+ "Path to the output directory where the MITK files (images directory and task_list.json) should be stored."
+ ),
+ )
+ parser.add_argument(
+ "--wildcard",
+ type=str,
+ default="*_RGB-Image.png",
+ required=False,
+ help="Wildcard file pattern which should be used to select RGB files.",
+ )
+ args = parser.parse_args()
+
+ input_dir = args.input_dir
+ output_dir = args.output_dir
+ assert input_dir.exists(), f"The input directory {input_dir} does not exist"
+
+ images_dir = output_dir / "images"
+ task_list_file = output_dir / "task_list.json"
+ zip_file = output_dir / "mitk.zip"
+
+ assert not images_dir.exists(), (
+ f"The output directory {output_dir} already contains an images folder. Please delete it or select a different"
+ " output directory"
+ )
+ assert not task_list_file.exists(), (
+ f"The output directory {output_dir} already contains a task_list.json. Please delete it or select a different"
+ " output directory"
+ )
+ assert not zip_file.exists(), (
+ f"The zip file {zip_file} already exists in the output directory. Please delete it or select a different output"
+ " directory"
+ )
+
+ # Find all images in the input directory
+ images_dir.mkdir(exist_ok=True, parents=True)
+ paths = {}
+ for p in sorted(input_dir.rglob(args.wildcard)):
+ # We use a dict to get a sorted list of unique images
+ paths[p] = True
+
+ assert len(paths) > 0, f"Could not find any images in {input_dir}"
+ print(f"Found {len(paths)} images in {input_dir}")
+
+ # Create task list and copy RGB images
+ tasks = {
+ "FileFormat": "MITK Segmentation Task List",
+ "Version": 1,
+ "Name": "Segmentation",
+ "Defaults": {"LabelNameSuggestions": "dataset_labels.json"},
+ "Tasks": [],
+ }
+
+ n_digits = math.ceil(math.log10(len(paths)))
+ for i, p in track(enumerate(paths.keys()), total=len(paths)):
+ timestamp = p.stem.removesuffix(args.wildcard.removeprefix("*"))
+ image_name = str(i + 1).rjust(n_digits, "0") + f"_{timestamp}"
+
+ try:
+ rgb = read_tivita_rgb(p)
+ rgb = Image.fromarray(rgb)
+ rgb.save(images_dir / f"{image_name}.png", optimize=True)
+ except Exception:
+ settings.log_once.info(
+ "Could not read the Tivita RGB image. The RGB file will be copied instead. This is fine if the image"
+ " does not contain black borders"
+ )
+ safe_copy(p, images_dir / f"{image_name}.png")
+
+ tasks["Tasks"].append({
+ "Name": f"{image_name}",
+ "Image": f"images/{image_name}.png",
+ "Result": f"results/{image_name}.nrrd",
+ })
+
+ with task_list_file.open("w") as f:
+ json.dump(tasks, f, indent=4)
+
+ # Create zip file of the task list and the images
+ with ZipFile(zip_file, mode="w") as archive:
+ archive.write(task_list_file, task_list_file.name)
+ for p in sorted(images_dir.iterdir()):
+ archive.write(p, f"images/{p.name}")
+
+ print(f"Stored the images folder at {images_dir}")
+ print(f"Stored the task_list.json at {task_list_file}")
+ print(f"Stored zip file at {zip_file}")
diff --git a/htc/utils/paths.py b/htc/utils/paths.py
index 07e08be..61d9232 100644
--- a/htc/utils/paths.py
+++ b/htc/utils/paths.py
@@ -36,7 +36,15 @@ def all_masks_paths() -> list[DataPath]:
class ParserPreprocessing:
- def __init__(self, description: str):
+ def __init__(self, description: str, inplace: bool = False):
+ """
+ Helper class for the preprocessing scripts.
+
+ Args:
+ description: A short description of what the preprocessing script does.
+ inplace: Set this to true if your preprocessing scripts operates in-place and hence does not need an output path.
+ """
+ self.inplace = inplace
self.parser = argparse.ArgumentParser(
description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
@@ -121,7 +129,7 @@ def get_paths(self, filters: Union[list[Callable[["DataPath"], bool]], None] = N
# From now on, we write to the intermediates directory of the selected dataset
settings.intermediates_dir_all.set_default_location(self.args.dataset_name)
else:
- assert self.args.output_path is not None, (
+ assert self.inplace or self.args.output_path is not None, (
"Either --dataset-name or --output-path must be given (we need to know where the generated files should"
" be stored)"
)
diff --git a/htc/utils/renderjson.js b/htc/utils/renderjson.js
index f8905a3..4304184 100644
--- a/htc/utils/renderjson.js
+++ b/htc/utils/renderjson.js
@@ -8,154 +8,212 @@
// Code from: https://github.com/caldwell/renderjson
// Code is adapted to avoid conflicts with Plotly (the originally returned define object from renderjson causes problems)
-var renderjson = (function() {
- var themetext = function(/* [class, text]+ */) {
+var renderjson = (function () {
+ var themetext = function (/* [class, text]+ */) {
var spans = [];
while (arguments.length)
- spans.push(append(span(Array.prototype.shift.call(arguments)),
- text(Array.prototype.shift.call(arguments))));
+ spans.push(
+ append(span(Array.prototype.shift.call(arguments)), text(Array.prototype.shift.call(arguments))),
+ );
return spans;
};
- var append = function(/* el, ... */) {
+ var append = function (/* el, ... */) {
var el = Array.prototype.shift.call(arguments);
- for (var a=0; a 0 && type != "string")
- show();
+ var show = function () {
+ if (!content)
+ append(
+ empty.parentNode,
+ (content = prepend(
+ builder(),
+ A(options.hide, "disclosure", function () {
+ content.style.display = "none";
+ empty.style.display = "inline";
+ }),
+ )),
+ );
+ content.style.display = "inline";
+ empty.style.display = "none";
+ };
+ append(
+ empty,
+ A(options.show, "disclosure", show),
+ themetext(type + " syntax", open),
+ A(placeholder, null, show),
+ themetext(type + " syntax", close),
+ );
+
+ var el = append(span(), text(my_indent.slice(0, -1)), empty);
+ if (show_level > 0 && type != "string") show();
return el;
};
if (json === null) return themetext(null, my_indent, "keyword", "null");
if (json === void 0) return themetext(null, my_indent, "keyword", "undefined");
- if (typeof(json) == "string" && json.length > options.max_string_length)
- return disclosure('"', json.substr(0,options.max_string_length)+" ...", '"', "string", function () {
+ if (typeof json == "string" && json.length > options.max_string_length)
+ return disclosure('"', json.substr(0, options.max_string_length) + " ...", '"', "string", function () {
return append(span("string"), themetext(null, my_indent, "string", JSON.stringify(json)));
});
- if (typeof(json) != "object" || [Number, String, Boolean, Date].indexOf(json.constructor) >= 0) // Strings, numbers and bools
- return themetext(null, my_indent, typeof(json), JSON.stringify(json));
+ if (typeof json != "object" || [Number, String, Boolean, Date].indexOf(json.constructor) >= 0)
+ // Strings, numbers and bools
+ return themetext(null, my_indent, typeof json, JSON.stringify(json));
if (json.constructor == Array) {
if (json.length == 0) return themetext(null, my_indent, "array syntax", "[]");
return disclosure("[", options.collapse_msg(json.length), "]", "array", function () {
var as = append(span("array"), themetext("array syntax", "[", null, "\n"));
- for (var i=0; i type:
_type_cache[class_definition] = getattr(module, match.group(2))
return _type_cache[class_definition]
+
+
+def variable_from_string(definition: str) -> Any:
+ """
+ Parses a string for a variable definition and imports the variable.
+
+ This works for any variable which can be imported
+ >>> mapping = variable_from_string("htc.settings_seg>label_mapping")
+ >>> len(mapping)
+ 19
+
+ It is also possible to import a variable via the path to the script
+ >>> from htc.settings import settings
+ >>> mapping = variable_from_string(str(settings.htc_package_dir / "settings_seg.py") + ">label_mapping")
+ >>> len(mapping)
+ 19
+
+ Args:
+ definition: Variable definition in the form module>variable (e.g. htc.settings_seg>label_mapping). The first part (module) may also be the full path to the Python file.
+
+ Returns: The imported variable.
+ """
+ match = re.search(r"^([^>]+)>(\w+)$", definition)
+ assert match is not None, (
+ f"Could not parse the string {definition} as a valid variable definition. It must be in the format"
+ " module>variable (e.g. htc.settings_seg>label_mapping) and must refer to a valid Python script"
+ )
+
+ try:
+ module = importlib.import_module(match.group(1))
+ is_path = False
+ except ModuleNotFoundError:
+ # Try path importing (https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly)
+ spec = importlib.util.spec_from_file_location(match.group(2), match.group(1))
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[match.group(2)] = module
+ spec.loader.exec_module(module)
+ is_path = True
+
+ if not hasattr(module, match.group(2)):
+ if is_path:
+ name = Path(match.group(1)).stem
+ else:
+ name = match.group(1).split(".")[-1]
+
+ # For example, if settings is an object
+ module = getattr(module, name)
+
+ return getattr(module, match.group(2))
diff --git a/htc/utils/visualization.py b/htc/utils/visualization.py
index 430b83d..dcff46f 100644
--- a/htc/utils/visualization.py
+++ b/htc/utils/visualization.py
@@ -5,6 +5,7 @@
import gzip
import json
import math
+import re
import uuid
from pathlib import Path
from typing import Callable, Union
@@ -14,12 +15,9 @@
import plotly
import plotly.express as px
import plotly.graph_objects as go
-import torch
-import torch.nn.functional as F
from IPython.display import HTML, display
from matplotlib.colors import to_rgba
from PIL import Image
-from plotly.colors import n_colors as generate_n_colors
from plotly.subplots import make_subplots
from scipy import stats
@@ -38,9 +36,10 @@
from htc.utils.ColorcheckerReader import ColorcheckerReader
from htc.utils.colors import generate_distinct_colors
from htc.utils.Config import Config
-from htc.utils.helper_functions import median_table, sort_labels
+from htc.utils.helper_functions import basic_statistics, median_table, sort_labels
from htc.utils.JSONSchemaMeta import JSONSchemaMeta
from htc.utils.LabelMapping import LabelMapping
+from htc.utils.Task import Task
def compress_html(file: Union[Path, None], fig_or_html: Union[go.Figure, str]) -> Union[str, None]:
@@ -358,89 +357,6 @@ def show_loss_chart(df_train: pd.DataFrame, df_val: pd.DataFrame = None) -> None
fig.show()
-def show_activation_image(df: pd.DataFrame, hist_config: dict, dataset_index: int, epoch: int = None) -> None:
- # Combine activations from all images
- if epoch is None:
- activations = df[(df["dataset_index"] == dataset_index)]["val/activations"].values
- else:
- activations = df[(df["epoch_index"] == epoch - 1) & (df["dataset_index"] == dataset_index)][
- "val/activations"
- ].values
- layer_counts = {}
-
- for key in activations[0].keys():
- layer_counts[key] = np.sum(
- [a[key]["counts"] for a in activations], axis=0
- ) # Sum over the activations from all images
-
- fig = make_subplots(
- rows=2,
- cols=1,
- subplot_titles=("Activation Distribution", r"$\mu \pm \sigma$"),
- row_heights=[0.7, 0.3],
- vertical_spacing=0.1,
- )
-
- values_range = np.arange(hist_config["min"], hist_config["max"], hist_config["step"]) + (
- hist_config["step"] / 2
- ) # The values of the histogram are predefined in the training config
- colors = generate_n_colors("rgb(5, 200, 200)", "rgb(200, 10, 10)", 16, colortype="rgb")
-
- # Calculate mean and std during the sample process (not perfect and this information could also be calculated from the histogram, but it is simple ;-)
- layer_mean = {}
- layer_std = {}
-
- for (name, counts), color in zip(layer_counts.items(), colors):
- # It is a bit stupid but in order to generate the Violin plots we need the original activations instead of the counts
- # The approach here is to use the counts and sample n values according to the distribution and then generate the Violin plot (of course, this is only an approximation)
- counts = counts / np.sum(counts) # Normalize to probabilities
- samples = np.repeat(
- values_range, np.ceil(counts * 5000).astype(np.int)
- ) # ceil ensures that every value with a probability > 0 gets sampled at least once
- layer_mean[name] = np.mean(samples)
- layer_std[name] = np.std(samples)
-
- fig.add_trace(go.Violin(x=samples, line_color=color, bandwidth=hist_config["step"], name=name), row=1, col=1)
-
- if all(excluded not in name for excluded in ["pool", "logits", "input", "Model"]):
- samples = F.elu(torch.from_numpy(samples))
- fig.add_trace(go.Violin(x=samples, line_color=color, name=f"elu({name})"), row=1, col=1)
-
- fig.update_traces(orientation="h", side="positive", width=3, points=False, row=1, col=1)
- fig.update_xaxes(title_text="Activations", row=1, col=1)
- fig.update_yaxes(title_text="Layer", row=1, col=1)
-
- # Mean/Std graph
- means = np.array(list(layer_mean.values()))
- stds = np.array(list(layer_std.values()))
- line = {"color": plotly.colors.DEFAULT_PLOTLY_COLORS[0]}
- x = list(layer_mean.keys())
- fig.add_trace(
- go.Scatter(x=x, y=means, mode="lines+markers", line=line, legendgroup="stat", name="stats"), row=2, col=1
- )
- fig.add_trace(
- go.Scatter(x=x, y=means + stds, mode="lines+markers", line=line, legendgroup="stat", showlegend=False),
- row=2,
- col=1,
- )
- fig.add_trace(
- go.Scatter(
- x=x, y=means - stds, mode="lines+markers", line=line, legendgroup="stat", showlegend=False, fill="tonexty"
- ),
- row=2,
- col=1,
- )
-
- fig.update_xaxes(title_text="Layer", row=2, col=1)
- fig.update_yaxes(title_text="Mean/Std", row=2, col=1)
-
- # General settings
- fig.layout.title = f"Activation distribution throughout the network (epoch {epoch})"
- fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, title_x=0.5)
- fig.layout.height = 1200
- fig.show()
-
-
def create_class_scores_figure(agg: MetricAggregation) -> None:
df = agg.df
mapping = LabelMapping.from_config(agg.config)
@@ -604,9 +520,11 @@ def show_class_scores_epoch(df: pd.DataFrame, mapping: LabelMapping) -> None:
line_ids.append(f)
- button_states.append(
- {"label": fold_name, "method": "update", "args": [{"title": f"Dice over training time ({fold_name})"}]}
- )
+ button_states.append({
+ "label": fold_name,
+ "method": "update",
+ "args": [{"title": f"Statistics for the validation set ({fold_name})"}],
+ })
# Calculate the visible states (find out which lines have to be activated for which fold)
line_ids = np.array(line_ids)
@@ -642,6 +560,8 @@ def create_confusion_figure(confusion_matrix: np.ndarray, labels: list[str] = No
y=labels,
x=labels,
text=hover_text,
+ colorscale="Teal",
+ colorbar={"title": "%", "thickness": 10},
hovertemplate="true: %{y} predicted: %{x} row-ratio: %{z:.3f} % pixels: %{text}",
)
annotations = []
@@ -650,8 +570,8 @@ def create_confusion_figure(confusion_matrix: np.ndarray, labels: list[str] = No
annotations.append({
"x": labels[j],
"y": labels[i],
- "font": {"color": "white"},
- "text": f"{value:.1f}",
+ "font": {"color": "black" if value < 0.5 else "white"},
+ "text": f"{value*100:.1f}",
"xref": "x1",
"yref": "y1",
"showarrow": False,
@@ -659,8 +579,9 @@ def create_confusion_figure(confusion_matrix: np.ndarray, labels: list[str] = No
layout = {"xaxis": {"title": "Predicted value"}, "yaxis": {"title": "Real value"}, "annotations": annotations}
fig = go.Figure(data=data, layout=layout)
+ fig.update_yaxes(autorange="reversed")
fig.update_layout(height=max(len(confusion_matrix) * 50, 300), width=max(len(confusion_matrix) * 50, 300) + 100)
- fig.update_layout(title_x=0.5, title_text="Confusion matrix of misclassification (row-wise normalized)")
+ fig.update_layout(title_x=0.5, title_text="Confusion matrix (row-wise normalized)")
return fig
@@ -838,7 +759,7 @@ def create_median_spectra_figure(path: DataPath) -> go.Figure:
df = median_table(image_names=[path.image_name()], annotation_name="all")
df = sort_labels(df)
df = df.query("label_name in @path.annotated_labels('all')")
- line_options = ["solid", "dot", "dash", "longdash", "dashdot", "longdashdot"]
+ line_options = ["solid", "dot", "dash", "longdash", "dashdot", "longdashdot", "5, 10, 5", "2, 10, 2", "5, 2, 5"]
annotator_mapping = {a: line_options[i] for i, a in enumerate(df["annotation_name"].unique())}
fig = go.Figure()
@@ -882,11 +803,12 @@ def create_median_spectra_comparison_figure(
"""
n_cols = 4
n_rows = math.ceil(df["label_name"].nunique() / n_cols)
+ n_missing = n_cols * n_rows - df["label_name"].nunique()
labels = df["label_name"].unique()
fig = make_subplots(
rows=n_rows,
cols=n_cols,
- shared_xaxes="all",
+ shared_xaxes="all" if n_missing == 0 else False,
shared_yaxes="all",
subplot_titles=labels,
vertical_spacing=0.05,
@@ -923,10 +845,60 @@ def create_median_spectra_comparison_figure(
)
if col == 0:
- fig.update_yaxes(title="L1 normalized reflectance", row=row + 1, col=col + 1)
+ fig.update_yaxes(title="L1 normalized reflectance [a.u.]", row=row + 1, col=col + 1)
if row == n_rows - 1:
fig.update_xaxes(title="wavelength [nm]", row=row + 1, col=col + 1)
+ if n_missing != 0:
+ # Manually add the x-axis ticks to the plots in the last rows
+ fig.update_xaxes(showticklabels=False)
+ ticks = [600, 700, 800, 900]
+ for i in range(n_missing):
+ fig.update_xaxes(
+ tickmode="array",
+ tickvals=ticks,
+ ticktext=ticks,
+ showticklabels=True,
+ title="wavelength [nm]",
+ row=n_rows - 1,
+ col=n_cols - i,
+ )
+ for i in range(n_cols - n_missing):
+ fig.update_xaxes(
+ tickmode="array",
+ tickvals=ticks,
+ ticktext=ticks,
+ showticklabels=True,
+ title="wavelength [nm]",
+ row=n_rows,
+ col=i + 1,
+ )
+
+ if n_missing != 0:
+ # Manually add the x-axis ticks to the plots in the last rows
+ fig.update_xaxes(showticklabels=False)
+ ticks = [600, 700, 800, 900]
+ for i in range(n_missing):
+ fig.update_xaxes(
+ tickmode="array",
+ tickvals=ticks,
+ ticktext=ticks,
+ showticklabels=True,
+ title="wavelength [nm]",
+ row=n_rows - 1,
+ col=n_cols - i,
+ )
+ for i in range(n_cols - n_missing):
+ fig.update_xaxes(
+ tickmode="array",
+ tickvals=ticks,
+ ticktext=ticks,
+ showticklabels=True,
+ title="wavelength [nm]",
+ row=n_rows,
+ col=i + 1,
+ )
+
fig.update_layout(
title="Spectra for organs and cameras (with inter-pig deviation)",
title_x=0.5,
@@ -944,6 +916,7 @@ def create_overview_document(
navigation_paths: list[DataPath] = None,
navigation_link_callback: Callable[[str, str, DataPath], str] = None,
nav_width: str = "23em",
+ searchable_meta_attributes: list[str] = None,
) -> str:
"""
Create an overview figure for the given image. It will show the RGB image with all the available annotations plus the tissue parameter images.
@@ -954,9 +927,13 @@ def create_overview_document(
navigation_paths: If not None, will add a navigation bar with all links sorted by organ. The user can use this navigation bar to easily switch between images.
navigation_link_callback: Callback which receives the label name, number and data path of the target image and should create a relative link where the corresponding local html file for the target image can be found. If parts of the link contain invalid URL characters (e.g. # in image name), then please wrap it in quote_plus before (e.g. quote_plus(p.image_name())). For example, ('spleen', '08', DataPath) --> '../08_spleen/P086%232021_04_15_09_22_20.html'.
nav_width: Width of the navigation bar (in CSS units).
+ searchable_meta_attributes: List of meta attributes which should be searchable. If None, the annotation_name will be searchable per default. You need to include the annotation_name yourself if you change this parameter.
Returns: HTML string which is best saved with the `compress_html()` function.
"""
+ if searchable_meta_attributes is None:
+ searchable_meta_attributes = ["annotation_name"]
+
seg = path.read_segmentation(annotation_name="all")
if seg is None or len(path.annotated_labels(annotation_name="all")) == 0:
# No annotations available, only show the RGB image
@@ -965,15 +942,17 @@ def create_overview_document(
# Similar size as create_segmentation_overlay()
img_height, img_width = rgb_image.shape[:2]
- fig_seg.update_layout(
- height=img_height * 1.5, width=img_width * 1.53, template="plotly_white", margin=dict(t=40)
- )
+ fig_seg.update_layout(height=img_height * 1.5, width=img_width * 1.53, template="plotly_white")
fig_seg.update_layout(title_x=0.5, title_text=path.image_name())
fig_median = None
else:
fig_seg = create_segmentation_overlay(seg, path)
fig_median = create_median_spectra_figure(path)
+ # Remove the Plotly title because it cannot be selected
+ # We'll add the title manually via HTML below
+ fig_seg.update_layout(margin=dict(t=0), title=None)
+
if include_tpi:
images = [path.compute_sto2().data, path.compute_nir().data, path.compute_ohi().data, path.compute_twi().data]
names = [
@@ -1002,10 +981,11 @@ def create_overview_document(
fig_tpi.update_layout(yaxis_autorange="reversed")
fig_tpi.update_layout(width=1000, height=800)
- annotation_meta = path.read_annotation_meta()
- if annotation_meta is not None:
- meta_html = "
Meta annotation for this image:
\n"
- meta_html += dict_to_html_list(annotation_meta, schema=path.annotation_meta_schema())
+ skip_keys = {"dsettings", "dataset_env_name", "data_dir", "intermediates_dir"}
+ image_meta = {k: v for k, v in path.meta().items() if k not in skip_keys}
+ if len(image_meta) > 0:
+ meta_html = f"
Meta annotation for the image {path.image_name()}:
\n"
+ meta_html += dict_to_html_list(image_meta, schema=path.annotation_meta_schema())
else:
meta_html = ""
@@ -1084,12 +1064,13 @@ def create_overview_document(
else:
meta = ""
- invisible_meta = p.meta("annotation_name")
- if invisible_meta is not None:
- invisible_meta = " ".join(invisible_meta)
- invisible_meta = f'{invisible_meta}'
- else:
- invisible_meta = ""
+ invisible_meta = ""
+ for attribute in searchable_meta_attributes:
+ attribute_meta = p.meta(attribute)
+ if attribute_meta is not None:
+ if type(attribute_meta) == list:
+ attribute_meta = " ".join(attribute_meta)
+ invisible_meta += f'{attribute}={attribute_meta} '
link = navigation_link_callback(l, label_number, p) + f"?nav=show&link_index={link_index}"
paths_html += (
@@ -1099,10 +1080,17 @@ def create_overview_document(
link_index += 1
# Add an image for the current label if available
- if (path.data_dir / "extra_label_symbols").exists():
- svg_path = path.data_dir / "extra_label_symbols" / f"Cat_{label_number}_{l}.svg"
- else:
+ svg_path = path.data_dir / "extra_label_symbols" / f"Cat_{label_number}_{l}.svg"
+ if not svg_path.exists():
+ # Try to find the label symbol in the masks dataset
svg_path = settings.data_dirs.masks / "extra_label_symbols" / f"Cat_{label_ordering.get(l, '')}_{l}.svg"
+ if not svg_path.exists():
+ # In case the label ordering is different, try to find the symbol by name in the masks dataset
+ svg_files = sorted((settings.data_dirs.masks / "extra_label_symbols").glob("*.svg"))
+ for f in svg_files:
+ if re.search(r"Cat_\d+_" + l, f.stem) is not None:
+ svg_path = f
+ break
if svg_path.exists():
with svg_path.open("r") as f:
@@ -1199,19 +1187,19 @@ def create_overview_document(
}
}
"""
- nav_html = """
-☰ Image selection{}{}
+ nav_html = f"""
+☰ Image selection{prev_link}{next_link}
-""".format(prev_link, next_link, details_html, search_function, nav_width)
+"""
nav_css = """
"""
@@ -1437,7 +1436,10 @@ def create_overview_document(
{nav_html}
- {fig_seg.to_html(full_html=False, include_plotlyjs='cdn', div_id='segmentation')}
+
"
+ ],
+ "text/plain": [
+ " set_type n_pigs n_images\n",
+ "0 test 5 166\n",
+ "1 train 15 340"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sqldf(\"\"\"\n",
+ " SELECT set_type, COUNT(DISTINCT subject_name) AS n_pigs, COUNT(DISTINCT timestamp) AS n_images\n",
+ " FROM df\n",
+ " GROUP BY set_type\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "658c79e0-0a04-4a2d-b880-37c6ba4a2fa4",
+ "metadata": {},
+ "source": [
+ "Ratio of background pixels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1d7df6f5-9111-49e1-b7db-4912fb903cd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.48073018311511867"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_background = df.query(\"label_name not in @labels\")\n",
+ "sqldf(\"\"\"\n",
+ " SELECT timestamp, CAST(SUM(n_pixels) AS FLOAT) / 307200 AS pixel_ratio\n",
+ " FROM df_background\n",
+ " GROUP BY timestamp\n",
+ "\"\"\")[\"pixel_ratio\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1f096dff-9b2f-4054-a215-77ebf9abb823",
+ "metadata": {},
+ "source": [
+ "Labels which are available for all pigs in the training set and are hence suitable for the dataset size experiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "455d7d04-4741-4146-9e58-5d211db36b73",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
label_name
\n",
+ "
n_pigs
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
1
\n",
+ "
kidney
\n",
+ "
3
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
kidney_with_Gerotas_fascia
\n",
+ "
3
\n",
+ "
\n",
+ "
\n",
+ "
3
\n",
+ "
major_vein
\n",
+ "
3
\n",
+ "
\n",
+ "
\n",
+ "
5
\n",
+ "
muscle
\n",
+ "
4
\n",
+ "
\n",
+ "
\n",
+ "
6
\n",
+ "
heart
\n",
+ "
5
\n",
+ "
\n",
+ "
\n",
+ "
7
\n",
+ "
lung
\n",
+ "
5
\n",
+ "
\n",
+ "
\n",
+ "
10
\n",
+ "
pancreas
\n",
+ "
7
\n",
+ "
\n",
+ "
\n",
+ "
12
\n",
+ "
bladder
\n",
+ "
9
\n",
+ "
\n",
+ "
\n",
+ "
13
\n",
+ "
gallbladder
\n",
+ "
9
\n",
+ "
\n",
+ "
\n",
+ "
18
\n",
+ "
omentum
\n",
+ "
13
\n",
+ "
\n",
+ "
\n",
+ "
19
\n",
+ "
fat_subcutaneous
\n",
+ "
14
\n",
+ "
\n",
+ "
\n",
+ "
22
\n",
+ "
colon
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
23
\n",
+ "
liver
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
24
\n",
+ "
peritoneum
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
25
\n",
+ "
skin
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
26
\n",
+ "
small_bowel
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
27
\n",
+ "
spleen
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
28
\n",
+ "
stomach
\n",
+ "
15
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " label_name n_pigs\n",
+ "1 kidney 3\n",
+ "2 kidney_with_Gerotas_fascia 3\n",
+ "3 major_vein 3\n",
+ "5 muscle 4\n",
+ "6 heart 5\n",
+ "7 lung 5\n",
+ "10 pancreas 7\n",
+ "12 bladder 9\n",
+ "13 gallbladder 9\n",
+ "18 omentum 13\n",
+ "19 fat_subcutaneous 14\n",
+ "22 colon 15\n",
+ "23 liver 15\n",
+ "24 peritoneum 15\n",
+ "25 skin 15\n",
+ "26 small_bowel 15\n",
+ "27 spleen 15\n",
+ "28 stomach 15"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sqldf(\"\"\"\n",
+ " SELECT label_name, COUNT(DISTINCT subject_name) as n_pigs\n",
+ " FROM df\n",
+ " WHERE set_type = 'train'\n",
+ " GROUP BY label_name\n",
+ " ORDER BY n_pigs\n",
+ "\"\"\").query(\"label_name in @labels\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "42c5f742-8b1f-4640-b1bc-a6f1ffc11df7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4192.15625"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.query('label_name == \"major_vein\"')[\"n_pixels\"].mean()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/paper/MIA2021/DatasetSize.ipynb b/paper/MIA2022/DatasetSize.ipynb
similarity index 100%
rename from paper/MIA2021/DatasetSize.ipynb
rename to paper/MIA2022/DatasetSize.ipynb
diff --git a/paper/MIA2021/GeneralizationError.ipynb b/paper/MIA2022/GeneralizationError.ipynb
similarity index 100%
rename from paper/MIA2021/GeneralizationError.ipynb
rename to paper/MIA2022/GeneralizationError.ipynb
diff --git a/paper/MIA2021/ImageExamples.ipynb b/paper/MIA2022/ImageExamples.ipynb
similarity index 100%
rename from paper/MIA2021/ImageExamples.ipynb
rename to paper/MIA2022/ImageExamples.ipynb
diff --git a/paper/MIA2021/Intro.ipynb b/paper/MIA2022/Intro.ipynb
similarity index 100%
rename from paper/MIA2021/Intro.ipynb
rename to paper/MIA2022/Intro.ipynb
diff --git a/paper/MIA2021/NSDThresholds.ipynb b/paper/MIA2022/NSDThresholds.ipynb
similarity index 100%
rename from paper/MIA2021/NSDThresholds.ipynb
rename to paper/MIA2022/NSDThresholds.ipynb
diff --git a/paper/MIA2021/RankingDifferenceLR.ipynb b/paper/MIA2022/RankingDifferenceLR.ipynb
similarity index 100%
rename from paper/MIA2021/RankingDifferenceLR.ipynb
rename to paper/MIA2022/RankingDifferenceLR.ipynb
diff --git a/paper/MIA2021/SpectraVisualization.ipynb b/paper/MIA2022/SpectraVisualization.ipynb
similarity index 100%
rename from paper/MIA2021/SpectraVisualization.ipynb
rename to paper/MIA2022/SpectraVisualization.ipynb
diff --git a/paper/MIA2021/SuperpixelReference.ipynb b/paper/MIA2022/SuperpixelReference.ipynb
similarity index 100%
rename from paper/MIA2021/SuperpixelReference.ipynb
rename to paper/MIA2022/SuperpixelReference.ipynb
diff --git a/paper/MIA2021/annotation_protocol.ipynb b/paper/MIA2022/annotation_protocol.ipynb
similarity index 100%
rename from paper/MIA2021/annotation_protocol.ipynb
rename to paper/MIA2022/annotation_protocol.ipynb
diff --git a/paper/MIA2021/interactive_example_spectra.ipynb b/paper/MIA2022/interactive_example_spectra.ipynb
similarity index 100%
rename from paper/MIA2021/interactive_example_spectra.ipynb
rename to paper/MIA2022/interactive_example_spectra.ipynb
diff --git a/paper/MIA2021/model_overview_samples.ipynb b/paper/MIA2022/model_overview_samples.ipynb
similarity index 100%
rename from paper/MIA2021/model_overview_samples.ipynb
rename to paper/MIA2022/model_overview_samples.ipynb
diff --git a/paper/MIA2021/reproducibility.md b/paper/MIA2022/reproducibility.md
similarity index 99%
rename from paper/MIA2021/reproducibility.md
rename to paper/MIA2022/reproducibility.md
index 8af7c3d..388165c 100644
--- a/paper/MIA2021/reproducibility.md
+++ b/paper/MIA2022/reproducibility.md
@@ -1,7 +1,9 @@
# Reproducibility (DKFZ internal only)
+
This document will guide you through the process of reproducing the main results for our [semantic organ segmentation paper](https://doi.org/10.1016/j.media.2022.102488). To reduce the number of required training runs, we are only reproducing the results for the spatial-spectral comparison (Fig. 5).
## Setup
+
Start by installing the [repository](https://git.dkfz.de/imsy/issi/htc) according to the [README](../../README.md).
> These instructions were tested on the `paper_semantic_v3` tag. However, for reproducing, we recommend to use the latest master instead as there are some general dependencies (e.g. dataset version, cluster access) which are not guaranteed to work on an old tag in the future.
@@ -9,36 +11,47 @@ Start by installing the [repository](https://git.dkfz.de/imsy/issi/htc) accordin
> Please use a [`screen`](https://linuxize.com/post/how-to-use-linux-screen/) environment for all of the following commands since they may take a while to complete.
## (Optional) run tests
+
If you like, you can run all the tests (some tests may be skipped) [β 1 hour]
+
```bash
htc tests --slow --parallel 4
```
+
With the test `test_paper_semantic_files` (usually one of the longest running tests) you already reproduced all paper figures based on the trained models. We will now re-train the networks again to see whether we can still reproduce the main results.
## Start fresh
+
We want to train our networks again based on the raw data, so please delete the intermediate files
+
```bash
rmd ~/htc/2021_02_05_Tivita_multiorgan_semantic/intermediates
```
There are 20 pigs in total in the semantic dataset and the pigs `['P043', 'P046', 'P062', 'P068', 'P072']` are used as test set. Please move the corresponding pig folders (located in `~/htc/2021_02_05_Tivita_multiorgan_semantic/data/subjects`) somewhere else to a location only you know (but outside the repository). This ensures that the following training steps cannot accidentally access the test set.
+
```bash
for subject_name in P043 P046 P062 P068 P072; do mv ~/htc/2021_02_05_Tivita_multiorgan_semantic/data/subjects/$subject_name YOUR_SECRET_FOLDER/$subject_name; done
```
## Preprocessing
+
Create the preprocessed files by running the following scripts (this basically re-creates the intermediates) [β 10 minutes]
+
```bash
htc l1_normalization && htc median_spectra && htc parameter_images
```
## Training
+
Start the training runs with the following script. This will create and submit 75 cluster jobs. It is recommended that you have set up filters in your mailbox to ensure that mails from the cluster get sorted into their own folder. [β 1β2 days (depending on the cluster utilization)]
+
```bash
htc model_comparison
```
If all jobs are finished and succeeded successfully, copy the trained models from the cluster and combine the results from the different folds (some unimportant warnings may be raised) [β 20 minutes]
+
```bash
htc move_results
htc table_generation
@@ -47,25 +60,32 @@ htc table_generation
All run folders are stored in `~/htc/results/training/(image|patch|superpixel_classification|pixel)` and there will be a `validation_table.pkl.xz` with all the validation results and an `ExperimentAnalysis.html` notebook with visualizations for each run. You also need the timestamp which was used for the runs later (e.g. `2022-02-03_22-58-44`). Every algorithm is prefixed with the same timestamp.
## Test inference
+
During training, we computed only validation results. It is now time to move the previously hidden test pigs back to the data folder and re-run the preprocessing steps from above [β 10 minutes]
+
```bash
for subject_name in P043 P046 P062 P068 P072; do mv YOUR_SECRET_FOLDER/$subject_name ~/htc/2021_02_05_Tivita_multiorgan_semantic/data/subjects/$subject_name; done
htc l1_normalization && htc median_spectra && htc parameter_images
```
For the NSD, we need to make the inter-rater results available (they are also shown in Fig. 5) [β 5 minutes]
+
```bash
htc nsd_thresholds
```
After this, it is finally time for the test predictions and validation [β 4 hour]
+
```bash
htc multiple --filter "" --script "run_tables.py"
```
## Main results
+
It is now time to take a look at the final results. The main figures are produced by a notebook and you can generate a HTML version via [β 5 minutes]
+
```bash
-HTC_MODEL_COMPARISON_TIMESTAMP="" jupyter nbconvert --to html --execute --output-dir=~/htc ~/htc/src/paper/MIA2021/Benchmarking.ipynb
+HTC_MODEL_COMPARISON_TIMESTAMP="" jupyter nbconvert --to html --execute --output-dir=~/htc ~/htc/src/paper/MIA2022/Benchmarking.ipynb
```
+
Fig. 5, Fig. 7 and Fig. 11 are directly shown in the notebook. Fig 6 is stored in `~/htc/results/paper/ranking_bootstrapped_test_dice_metric_image.pdf`. Due to non-determinism in our machine learning, the results cannot be expected to be exactly the same, but as long as the results are roughly similar to the paper, everything is good :-)
diff --git a/paper/MIA2021/run_generate_variables.py b/paper/MIA2022/run_generate_variables.py
similarity index 100%
rename from paper/MIA2021/run_generate_variables.py
rename to paper/MIA2022/run_generate_variables.py
diff --git a/paper/MIA2024/BootstrapRanking.ipynb b/paper/MIA2024/BootstrapRanking.ipynb
new file mode 100644
index 0000000..83e45ee
--- /dev/null
+++ b/paper/MIA2024/BootstrapRanking.ipynb
@@ -0,0 +1,477 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import copy\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import torch\n",
+ "from torch.utils.data import DataLoader\n",
+ "\n",
+ "from htc.context.models.context_evaluation import compare_context_runs, glove_runs\n",
+ "from htc.context.models.visualization import ranking_figure, ranking_legend\n",
+ "from htc.context.settings_context import settings_context\n",
+ "from htc.evaluation.ranking import BootstrapRankingSubjects\n",
+ "from htc.fonts.set_font import set_font\n",
+ "from htc.models.common.torch_helpers import move_batch_gpu\n",
+ "from htc.models.common.transforms import HTCTransformation\n",
+ "from htc.models.image.DatasetImage import DatasetImage\n",
+ "from htc.settings_seg import settings_seg\n",
+ "from htc.tivita.DataPath import DataPath\n",
+ "from htc.utils.Config import Config\n",
+ "\n",
+ "set_font(24)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[WARNING][py.warnings]warnings.py:110\n",
+ "/home/j562r/miniconda3/envs/htc2/lib/python3.11/site-packages/torch/nn/functional.py:4343: UserWarning: \n",
+ "Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please \n",
+ "specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for \n",
+ "details. \n",
+ " warnings.warn(\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[33mWARNING\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mpy.warnings\u001b[0m\u001b[1m]\u001b[0m \u001b[2mwarnings.py:110\u001b[0m\n",
+ "\u001b[35m/home/j562r/miniconda3/envs/htc2/lib/python3.11/site-packages/torch/nn/\u001b[0m\u001b[95mfunctional.py\u001b[0m:\u001b[38;5;145m4343\u001b[0m: UserWarning: \u001b[2m \u001b[0m\n",
+ "Default grid_sample and affine_grid behavior has changed to \u001b[33malign_corners\u001b[0m=\u001b[3;91mFalse\u001b[0m since \u001b[38;5;145m1.3\u001b[0m.\u001b[38;5;145m0\u001b[0m. Please \u001b[2m \u001b[0m\n",
+ "specify \u001b[33malign_corners\u001b[0m=\u001b[3;92mTrue\u001b[0m if the old behavior is desired. See the documentation of grid_sample for \u001b[2m \u001b[0m\n",
+ "details. \u001b[2m \u001b[0m\n",
+ " \u001b[1;35mwarnings.warn\u001b[0m\u001b[1m(\u001b[0m \u001b[2m \u001b[0m\n",
+ " \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
ce_loss
\n",
+ "
ece
\n",
+ "
dice_metric
\n",
+ "
used_labels
\n",
+ "
dice_metric_image
\n",
+ "
confusion_matrix
\n",
+ "
surface_distance_metric
\n",
+ "
surface_distance_metric_image
\n",
+ "
surface_dice_metric_mean
\n",
+ "
surface_dice_metric_image_mean
\n",
+ "
image_name
\n",
+ "
network
\n",
+ "
dataset
\n",
+ "
dataset_index
\n",
+ "
subject_name
\n",
+ "
timestamp
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
2.040663
\n",
+ "
{'error': 0.8628411889076233, 'accuracies': [1...
\n",
+ "
[0.9966934, 0.97932774, 0.9884513, 0.99261785,...
\n",
+ "
[0, 3, 4, 5, 9]
\n",
+ "
0.986499
\n",
+ "
[[146642, 0, 2, 23, 315, 483, 0, 0, 0, 65, 0, ...
\n",
+ "
[0.30972793166884083, 0.5907705154388064, 1.14...
\n",
+ "
0.757825
\n",
+ "
[0.9855702364394993, 0.8432470258922323, 0.986...
\n",
+ "
0.929099
\n",
+ "
P043#2019_12_20_10_05_27
\n",
+ "
baseline
\n",
+ "
semantic
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
2.040144
\n",
+ "
{'error': 0.8638126254081726, 'accuracies': [1...
\n",
+ "
[0.99715513, 0.9796044, 0.9904817, 0.9939147, ...
\n",
+ "
[0, 3, 4, 5, 9]
\n",
+ "
0.987200
\n",
+ "
[[145637, 0, 0, 27, 222, 465, 0, 0, 0, 41, 0, ...
\n",
+ "
[0.26621675778031273, 0.6333655949351811, 0.94...
\n",
+ "
0.678203
\n",
+ "
[0.9952887803175711, 0.8712871287128713, 0.995...
\n",
+ "
0.924438
\n",
+ "
P043#2019_12_20_10_05_48
\n",
+ "
baseline
\n",
+ "
semantic
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
2.038484
\n",
+ "
{'error': 0.8648071885108948, 'accuracies': [1...
\n",
+ "
[0.9983746, 0.9917354, 0.9931921, 0.9908809, 0...
\n",
+ "
[0, 3, 4, 5, 8]
\n",
+ "
0.906800
\n",
+ "
[[180891, 0, 0, 333, 37, 64, 0, 0, 0, 0, 0, 0,...
\n",
+ "
[0.2174811256048084, 1.200060671473143, 0.3961...
\n",
+ "
1.401052
\n",
+ "
[0.9935509087355873, 0.7235883424408015, 0.999...
\n",
+ "
0.795433
\n",
+ "
P043#2019_12_20_10_06_32
\n",
+ "
baseline
\n",
+ "
semantic
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
\n",
+ "
\n",
+ "
3
\n",
+ "
2.041420
\n",
+ "
{'error': 0.8622098565101624, 'accuracies': [1...
\n",
+ "
[0.995851, 0.9913923, 0.9891495, 0.9930062, 0.0]
\n",
+ "
[0, 3, 4, 5, 8]
\n",
+ "
0.793880
\n",
+ "
[[99849, 0, 0, 330, 141, 254, 0, 0, 0, 0, 0, 0...
\n",
+ "
[0.2822919895982137, 3.982988025147015, 1.0842...
\n",
+ "
2.028823
\n",
+ "
[0.9896065330363771, 0.7454203851573509, 0.996...
\n",
+ "
0.727201
\n",
+ "
P043#2019_12_20_10_07_03
\n",
+ "
baseline
\n",
+ "
semantic
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
\n",
+ "
\n",
+ "
4
\n",
+ "
2.041602
\n",
+ "
{'error': 0.8628288507461548, 'accuracies': [1...
\n",
+ "
[0.995935, 0.9920827, 0.9908885, 0.99351233, 0.0]
\n",
+ "
[0, 3, 4, 5, 8]
\n",
+ "
0.794484
\n",
+ "
[[99594, 0, 0, 256, 183, 303, 0, 0, 0, 0, 0, 0...
\n",
+ "
[0.27734278592477596, 3.6474446861401466, 0.92...
\n",
+ "
1.854296
\n",
+ "
[0.9873793615441723, 0.7827311121539183, 1.0, ...
\n",
+ "
0.734515
\n",
+ "
P043#2019_12_20_10_07_28
\n",
+ "
baseline
\n",
+ "
semantic
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ce_loss ece \\\n",
+ "0 2.040663 {'error': 0.8628411889076233, 'accuracies': [1... \n",
+ "1 2.040144 {'error': 0.8638126254081726, 'accuracies': [1... \n",
+ "2 2.038484 {'error': 0.8648071885108948, 'accuracies': [1... \n",
+ "3 2.041420 {'error': 0.8622098565101624, 'accuracies': [1... \n",
+ "4 2.041602 {'error': 0.8628288507461548, 'accuracies': [1... \n",
+ "\n",
+ " dice_metric used_labels \\\n",
+ "0 [0.9966934, 0.97932774, 0.9884513, 0.99261785,... [0, 3, 4, 5, 9] \n",
+ "1 [0.99715513, 0.9796044, 0.9904817, 0.9939147, ... [0, 3, 4, 5, 9] \n",
+ "2 [0.9983746, 0.9917354, 0.9931921, 0.9908809, 0... [0, 3, 4, 5, 8] \n",
+ "3 [0.995851, 0.9913923, 0.9891495, 0.9930062, 0.0] [0, 3, 4, 5, 8] \n",
+ "4 [0.995935, 0.9920827, 0.9908885, 0.99351233, 0.0] [0, 3, 4, 5, 8] \n",
+ "\n",
+ " dice_metric_image confusion_matrix \\\n",
+ "0 0.986499 [[146642, 0, 2, 23, 315, 483, 0, 0, 0, 65, 0, ... \n",
+ "1 0.987200 [[145637, 0, 0, 27, 222, 465, 0, 0, 0, 41, 0, ... \n",
+ "2 0.906800 [[180891, 0, 0, 333, 37, 64, 0, 0, 0, 0, 0, 0,... \n",
+ "3 0.793880 [[99849, 0, 0, 330, 141, 254, 0, 0, 0, 0, 0, 0... \n",
+ "4 0.794484 [[99594, 0, 0, 256, 183, 303, 0, 0, 0, 0, 0, 0... \n",
+ "\n",
+ " surface_distance_metric \\\n",
+ "0 [0.30972793166884083, 0.5907705154388064, 1.14... \n",
+ "1 [0.26621675778031273, 0.6333655949351811, 0.94... \n",
+ "2 [0.2174811256048084, 1.200060671473143, 0.3961... \n",
+ "3 [0.2822919895982137, 3.982988025147015, 1.0842... \n",
+ "4 [0.27734278592477596, 3.6474446861401466, 0.92... \n",
+ "\n",
+ " surface_distance_metric_image \\\n",
+ "0 0.757825 \n",
+ "1 0.678203 \n",
+ "2 1.401052 \n",
+ "3 2.028823 \n",
+ "4 1.854296 \n",
+ "\n",
+ " surface_dice_metric_mean \\\n",
+ "0 [0.9855702364394993, 0.8432470258922323, 0.986... \n",
+ "1 [0.9952887803175711, 0.8712871287128713, 0.995... \n",
+ "2 [0.9935509087355873, 0.7235883424408015, 0.999... \n",
+ "3 [0.9896065330363771, 0.7454203851573509, 0.996... \n",
+ "4 [0.9873793615441723, 0.7827311121539183, 1.0, ... \n",
+ "\n",
+ " surface_dice_metric_image_mean image_name network \\\n",
+ "0 0.929099 P043#2019_12_20_10_05_27 baseline \n",
+ "1 0.924438 P043#2019_12_20_10_05_48 baseline \n",
+ "2 0.795433 P043#2019_12_20_10_06_32 baseline \n",
+ "3 0.727201 P043#2019_12_20_10_07_03 baseline \n",
+ "4 0.734515 P043#2019_12_20_10_07_28 baseline \n",
+ "\n",
+ " dataset dataset_index subject_name timestamp \n",
+ "0 semantic NaN NaN NaN \n",
+ "1 semantic NaN NaN NaN \n",
+ "2 semantic NaN NaN NaN \n",
+ "3 semantic NaN NaN NaN \n",
+ "4 semantic NaN NaN NaN "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "run_context = settings_context.best_transform_runs[\"organ_transplantation\"]\n",
+ "df1 = context_evaluation_table(run_context, test=True, aggregate=False)\n",
+ "df2 = glove_runs(\n",
+ " {\n",
+ " \"baseline\": settings_context.glove_runs[\"baseline\"],\n",
+ " \"organ_transplantation\": settings_context.glove_runs[\"organ_transplantation\"],\n",
+ " },\n",
+ " aggregate=False,\n",
+ ")\n",
+ "df = pd.concat([df1, df2])\n",
+ "df.replace({\"network\": {\"context\": \"organ_transplantation\"}}, inplace=True)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison/fold_P041,P060,P069/epoch=46-dice_metric=0.87.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison/fold_P044,P050,P059/epoch=70-dice_metric=0.90.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison/fold_P045,P061,P071/epoch=75-dice_metric=0.84.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison/fold_P047,P049,P070/epoch=52-dice_metric=0.85.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison/fold_P048,P057,P058/epoch=79-dice_metric=0.86.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-08_14-48-02_organ_transplantation_0.8/fold_P041,P060,P069/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-08_14-48-02_organ_transplantation_0.8/fold_P044,P050,P059/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-08_14-48-02_organ_transplantation_0.8/fold_P045,P061,P071/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-08_14-48-02_organ_transplantation_0.8/fold_P047,P049,P070/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-08_14-48-02_organ_transplantation_0.8/fold_P048,P057,P058/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-44_glove_baseline/fold_P041,P060,P069/epoch=77-dice_metric=0.89.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-44_glove_baseline/fold_P044,P050,P059/epoch=82-dice_metric=0.92.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-44_glove_baseline/fold_P045,P061,P071/epoch=41-dice_metric=0.86.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-44_glove_baseline/fold_P047,P049,P070/epoch=48-dice_metric=0.85.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-44_glove_baseline/fold_P048,P057,P058/epoch=57-dice_metric=0.81.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-55_glove_organ_transplantation_0.8/fold_P041,P060,P069/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-55_glove_organ_transplantation_0.8/fold_P044,P050,P059/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-55_glove_organ_transplantation_0.8/fold_P045,P061,P071/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-55_glove_organ_transplantation_0.8/fold_P047,P049,P070/last.ckpt`\n",
+ "Lightning automatically upgraded your loaded checkpoint from v1.9.0 to v2.3.2. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint ../../../../../../mnt/ssd_8tb/htc/results_context/training/image/2023-02-21_23-14-55_glove_organ_transplantation_0.8/fold_P048,P057,P058/last.ckpt`\n"
+ ]
+ }
+ ],
+ "source": [
+ "networks = {\n",
+ " \"baseline\": SinglePredictor(\n",
+ " model=\"image\",\n",
+ " run_folder=f\"{settings_seg.model_comparison_timestamp}_generated_default_model_comparison\",\n",
+ " test=True,\n",
+ " ),\n",
+ " \"organ_transplantation\": SinglePredictor(path=run_context, test=True),\n",
+ " \"baseline_occlusions\": SinglePredictor(path=settings_context.glove_runs[\"baseline\"], test=True),\n",
+ " \"organ_transplantation_occlusions\": SinglePredictor(\n",
+ " path=settings_context.glove_runs[\"organ_transplantation\"], test=True\n",
+ " ),\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Seed set to 42\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "P043#2019_12_20_10_10_19\n",
+ "isolated label: stomach\n",
+ "P072#2020_08_08_13_14_14\n",
+ "isolated label: stomach\n",
+ "P042#2019_12_15_11_15_55@semantic#annotator5\n",
+ "P043#2019_12_20_10_11_20\n",
+ "most important neighbour of stomach is liver\n",
+ "P043#2019_12_20_10_11_20\n",
+ "most important neighbour of stomach is liver\n",
+ "P062#2020_05_15_18_46_30\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/pixel/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/pixel/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/pixel/2022-02-03_22-58-44_generated_default_rgb_model_compari\n",
+ "son\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/pixel/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_rgb_model_compari\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95mson\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_context/training/superpixel_classification/2024-07-24_15-20-46_default\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_context/training/superpixel_classification/\u001b[0m\u001b[95m2024-07-24_15-20-46_default\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_context/training/superpixel_classification/2024-07-24_15-20-46_default_rgb\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_context/training/superpixel_classification/\u001b[0m\u001b[95m2024-07-24_15-20-46_default_rgb\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/patch/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/patch/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/patch/2022-02-03_22-58-44_generated_default_rgb_model_compari\n",
+ "son\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/patch/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_rgb_model_compari\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95mson\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/patch/2022-02-03_22-58-44_generated_default_64_model_comparis\n",
+ "on\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/patch/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_64_model_comparis\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95mon\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/patch/2022-02-03_22-58-44_generated_default_64_rgb_model_comp\n",
+ "arison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/patch/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_64_rgb_model_comp\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95marison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_rgb_model_compari\n",
+ "son\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_rgb_model_compari\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95mson\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
+ "/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_rgb_model_compari\n",
+ "son\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
+ "\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_rgb_model_compari\u001b[0m \u001b[2m \u001b[0m\n",
+ "\u001b[95mson\u001b[0m \u001b[2m \u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:510\n",
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
"/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_model_comparison\n",
"
\n"
],
"text/plain": [
- "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:510\u001b[0m\n",
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
"\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_model_comparison\u001b[0m \u001b[2m \u001b[0m\n"
]
},
@@ -53,38 +53,13 @@
{
"data": {
"text/html": [
- "
[WARNING][py.warnings]/home/j562r/htc/src/htc/context/models/context_evaluation.py:44: FutureWarning: warnings.py:109\n",
- " \n",
- "The provided callable <built-in function min> is currently using SeriesGroupBy.min. In a future version \n",
- "of pandas, the provided callable will be used directly. To keep current behavior pass the string \"min\"\n",
- "instead. \n",
- " \n",
- " \n",
- "
\n"
- ],
- "text/plain": [
- "\u001b[1m[\u001b[0m\u001b[33mWARNING\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mpy.warnings\u001b[0m\u001b[1m]\u001b[0m \u001b[35m/home/j562r/htc/src/htc/context/models/\u001b[0m\u001b[95mcontext_evaluation.py\u001b[0m:\u001b[38;5;145m44\u001b[0m: FutureWarning: \u001b[2mwarnings.py:109\u001b[0m\n",
- " \u001b[2m \u001b[0m\n",
- "The provided callable \u001b[1m<\u001b[0m\u001b[1;95mbuilt-in\u001b[0m\u001b[39m function min\u001b[0m\u001b[1m>\u001b[0m is currently using SeriesGroupBy.min. In a future version \u001b[2m \u001b[0m\n",
- "of pandas, the provided callable will be used directly. To keep current behavior pass the string \u001b[90m\"min\"\u001b[0m \u001b[2m \u001b[0m\n",
- "instead. \u001b[2m \u001b[0m\n",
- " \u001b[2m \u001b[0m\n",
- " \u001b[2m \u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:510\n",
+ "
[INFO][htc.no_duplicates] Found pretrained run in the local results dir at HTCModel.py:481\n",
"/mnt/ssd_8tb/htc/results_semantic/training/image/2022-02-03_22-58-44_generated_default_rgb_model_compari\n",
"son\n",
"
\n"
],
"text/plain": [
- "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:510\u001b[0m\n",
+ "\u001b[1m[\u001b[0m\u001b[38;5;28mINFO\u001b[0m\u001b[1m]\u001b[0m\u001b[1m[\u001b[0m\u001b[3mhtc.no_duplicates\u001b[0m\u001b[1m]\u001b[0m Found pretrained run in the local results dir at \u001b[2mHTCModel.py:481\u001b[0m\n",
"\u001b[35m/mnt/ssd_8tb/htc/results_semantic/training/image/\u001b[0m\u001b[95m2022-02-03_22-58-44_generated_default_rgb_model_compari\u001b[0m \u001b[2m \u001b[0m\n",
"\u001b[95mson\u001b[0m \u001b[2m \u001b[0m\n"
]
@@ -194,11 +169,11 @@
"