Skip to content

Commit

Permalink
Merge pull request #164 from AustralianCancerDataNetwork/static-typing
Browse files Browse the repository at this point in the history
Added static typing to `pydicer` modules
  • Loading branch information
pchlap authored Feb 2, 2024
2 parents 8a371f2 + 27209d5 commit 9836a54
Show file tree
Hide file tree
Showing 22 changed files with 264 additions and 126 deletions.
16 changes: 11 additions & 5 deletions pydicer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
"available in the .pydicer directory.",
"type": int,
"default": 0,
"choices": [logging.NOTSET, logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR],
"choices": [
logging.NOTSET,
logging.DEBUG,
logging.INFO,
logging.WARNING,
logging.ERROR,
],
},
"for_fallback_linkage": {
"module": "general",
Expand Down Expand Up @@ -80,7 +86,6 @@
class PyDicerConfig:
class __PyDicerConfig: # pylint: disable=invalid-name
def __init__(self, working_dir=None):

if working_dir is None:
raise ValueError("working_dir must be set on config init")
self.working_dir = Path(working_dir)
Expand Down Expand Up @@ -128,7 +133,7 @@ def get_working_dir(self):
"""
return self.instance.working_dir

def get_config(self, name):
def get_config(self, name: str) -> object:
"""Get the value of the config item with the specified name
Args:
Expand All @@ -146,7 +151,7 @@ def get_config(self, name):

return self.instance.pydicer_config[name]

def set_config(self, name, value):
def set_config(self, name: str, value: object):
"""Set the value for the config with the given name
Args:
Expand All @@ -163,7 +168,8 @@ def set_config(self, name, value):

if not isinstance(value, PYDICER_CONFIG[name]["type"]) and not value is None:
raise ValueError(
f"Config {name} must be of type " f"{type(self.instance.pydicer_config[name])}"
f"Config {name} must be of type "
f"{type(self.instance.pydicer_config[name])}"
)

self.instance.pydicer_config[name] = value
Expand Down
16 changes: 11 additions & 5 deletions pydicer/convert/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import copy
import shutil
from pathlib import Path
from typing import Union

import pandas as pd
import numpy as np
import SimpleITK as sitk
Expand Down Expand Up @@ -51,7 +53,7 @@
]


def get_object_type(sop_class_uid):
def get_object_type(sop_class_uid: str) -> str:
"""Get the type of the object (used for the output path)
Args:
Expand All @@ -69,7 +71,9 @@ def get_object_type(sop_class_uid):
return object_type


def handle_missing_slice(files, ignore_duplicates=False):
def handle_missing_slice(
files: Union[pd.DataFrame, list], ignore_duplicates: bool = False
) -> list:
"""function to interpolate missing slices in an image
Example usage:
Expand Down Expand Up @@ -98,6 +102,8 @@ def handle_missing_slice(files, ignore_duplicates=False):
Args:
df_files (pd.DataFrame|list): the DataFrame which was produced by PreprocessData
or list of filepaths to dicom slices
ignore_duplicates (booleanbool, optional): specifices whether the function is to ignore
duplicate slices when handling missing ones
Returns:
file_paths(list): a list of the interpolated file paths
Expand Down Expand Up @@ -231,7 +237,7 @@ def handle_missing_slice(files, ignore_duplicates=False):
return df_files.file_path.tolist()


def link_via_frame_of_reference(for_uid, df_preprocess):
def link_via_frame_of_reference(for_uid: str, df_preprocess: pd.DataFrame) -> pd.DataFrame:
"""Find the image series linked to this FOR
Args:
Expand Down Expand Up @@ -271,7 +277,7 @@ def __init__(self, working_directory="."):
self.pydicer_directory = working_directory.joinpath(PYDICER_DIR_NAME)
self.output_directory = working_directory.joinpath(CONVERTED_DIR_NAME)

def add_entry(self, entry):
def add_entry(self, entry: dict):
"""Add an entry of a converted data object to the patient's converted dataframe.
Args:
Expand Down Expand Up @@ -308,7 +314,7 @@ def add_entry(self, entry):
df_pat_data = df_pat_data.reset_index(drop=True)
df_pat_data.to_csv(converted_df_path)

def convert(self, patient=None, force=True):
def convert(self, patient: Union[str, list]=None, force: bool=True):
"""Converts the DICOM which was preprocessed into the pydicer output directory.
Args:
Expand Down
7 changes: 6 additions & 1 deletion pydicer/convert/headers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import logging
import json
from typing import Union
from pathlib import Path

import pydicom

logger = logging.getLogger(__name__)


def convert_dicom_headers(dcm_file, binary_path, json_file):
def convert_dicom_headers(
dcm_file: Union[str, Path], binary_path: str, json_file: Union[str, Path]
):
"""Save the DICOM Headers as a JSON file
Args:
Expand Down
44 changes: 32 additions & 12 deletions pydicer/dataset/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
logger = logging.getLogger(__name__)


def rt_latest_struct(df, **kwargs):
def rt_latest_struct(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Select the latest Structure set and the image which it is linked to. You can specify keyword
arguments to for a match on any top level DICOM attributes. You may also supply lists of values
to these, one of which should match to select that series.
Expand Down Expand Up @@ -91,18 +91,24 @@ def rt_latest_struct(df, **kwargs):
keep_rows.append(struct_row.name) # Track index of row to keep

# Find the linked image
df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
df_linked_img = df[
df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
]

if len(df_linked_img) == 0:
logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
logger.warning(
"No linked images found for structure: %s", struct_row.hashed_uid
)
continue

keep_rows.append(df_linked_img.iloc[0].name) # Keep the index of the row of the image too
keep_rows.append(
df_linked_img.iloc[0].name
) # Keep the index of the row of the image too

return df.loc[keep_rows]


def rt_latest_dose(df, **kwargs):
def rt_latest_dose(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Select the latest RTDOSE and the image, structure and plan which it is linked to. You can
specify keyword arguments to for a match on any top level DICOM attributes. You may also supply
lists of values to these, one of which should match to select that series.
Expand Down Expand Up @@ -191,16 +197,22 @@ def rt_latest_dose(df, **kwargs):
keep_rows.append(dose_row.name) # Track index of row of dose to keep

# Find the linked plan
df_linked_plan = df[df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid]
df_linked_plan = df[
df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid
]

if len(df_linked_plan) == 0:
logger.warning("No linked plans found for dose: %s", dose_row.sop_instance_uid)
logger.warning(
"No linked plans found for dose: %s", dose_row.sop_instance_uid
)
continue

# Find the linked structure set
plan_row = df_linked_plan.iloc[0]
keep_rows.append(plan_row.name) # Keep the index of the row of the plan
df_linked_struct = df[df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid]
df_linked_struct = df[
df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid
]

if len(df_linked_struct) == 0:
# Try to link via Frame of Reference instead
Expand All @@ -209,18 +221,26 @@ def rt_latest_dose(df, **kwargs):
]

if len(df_linked_struct) == 0:
logger.warning("No structures found for plan: %s", plan_row.sop_instance_uid)
logger.warning(
"No structures found for plan: %s", plan_row.sop_instance_uid
)
continue

# Find the linked image
struct_row = df_linked_struct.iloc[0]
keep_rows.append(struct_row.name) # Keep the index of the row of the structure
df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
df_linked_img = df[
df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
]

if len(df_linked_img) == 0:
logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
logger.warning(
"No linked images found for structure: %s", struct_row.hashed_uid
)
continue

keep_rows.append(df_linked_img.iloc[0].name) # Keep the index of the row of the image too
keep_rows.append(
df_linked_img.iloc[0].name
) # Keep the index of the row of the image too

return df.loc[keep_rows]
33 changes: 24 additions & 9 deletions pydicer/dataset/nnunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
nnunet_description: str = "",
dataset_name: str = CONVERTED_DIR_NAME,
image_modality: str = "CT",
mapping_id=DEFAULT_MAPPING_ID,
mapping_id: str = DEFAULT_MAPPING_ID,
):
"""Prepare a dataset to train models using nnUNet.
Expand Down Expand Up @@ -219,12 +219,16 @@ def check_duplicates_train_test(self):
"""

if len(self.training_cases) == 0:
raise SystemError("training_cases are empty, run split_dataset function first.")
raise SystemError(
"training_cases are empty, run split_dataset function first."
)

img_stats = []

df = read_converted_data(self.working_directory, dataset_name=self.dataset_name)
df_images = df[(df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")]
df_images = df[
(df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")
]

for case in self.training_cases + self.testing_cases:
df_pat = df_images[df_images.patient_id == case]
Expand Down Expand Up @@ -252,7 +256,9 @@ def check_duplicates_train_test(self):

# Check to see if we have any duplicate image spacing and sizes, if so inspect these
# further
duplicated_rows = df_img_stats.duplicated(subset=["spacing", "size"], keep=False)
duplicated_rows = df_img_stats.duplicated(
subset=["spacing", "size"], keep=False
)
df_img_stats["voxel_sum"] = df_img_stats.apply(
lambda row: sitk.GetArrayFromImage(sitk.ReadImage(row.img_path)).sum()
if row.name in duplicated_rows.index
Expand Down Expand Up @@ -342,7 +348,9 @@ def check_structure_names(self) -> pd.DataFrame:
print(f"Structure {s} is missing for patients: {missing_pats}")

incomplete_structures.append(s)
incomplete_patients += [p for p in missing_pats if not p in incomplete_patients]
incomplete_patients += [
p for p in missing_pats if not p in incomplete_patients
]

if incomplete_structures:
print(
Expand Down Expand Up @@ -383,7 +391,8 @@ def check_overlapping_structures(self):
structure_name_j = structure_names[sj]

structure_sum = (
structure_set[structure_name_i] + structure_set[structure_name_j]
structure_set[structure_name_i]
+ structure_set[structure_name_j]
)
arr = sitk.GetArrayFromImage(structure_sum)
if arr.max() > 1:
Expand Down Expand Up @@ -444,7 +453,9 @@ def prepare_dataset(self) -> Path:
"""

if len(self.training_cases) == 0:
raise SystemError("training_cases are empty, run split_dataset function first.")
raise SystemError(
"training_cases are empty, run split_dataset function first."
)

# First check that all cases (in training set) have the structures which are to be learnt
df_structures = self.check_structure_names()
Expand Down Expand Up @@ -571,7 +582,9 @@ def generate_training_scripts(
raise FileNotFoundError(
"Ensure that the folder in which to generate the script exists."
)
script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
script_path = script_directory.joinpath(
f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
)

if isinstance(folds, str):
folds = [folds]
Expand Down Expand Up @@ -637,7 +650,9 @@ def train(self, script_directory: Union[str, Path] = ".", in_screen: bool = True
"""
# Make sure the script folder exists
script_directory = Path(script_directory)
script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
script_path = script_directory.joinpath(
f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
)

if not script_path.exists():
raise FileNotFoundError(
Expand Down
4 changes: 2 additions & 2 deletions pydicer/dataset/preparation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from pathlib import Path
from typing import Callable
from typing import Callable, Union

import pandas as pd

Expand All @@ -22,7 +22,7 @@ class PrepareDataset:
Defaults to ".".
"""

def __init__(self, working_directory="."):
def __init__(self, working_directory: Union[str, Path] = "."):
self.working_directory = Path(working_directory)

def add_object_to_dataset(self, dataset_name: str, data_object_row: pd.Series):
Expand Down
Loading

0 comments on commit 9836a54

Please sign in to comment.