diff --git a/pydicer/dataset/nnunet.py b/pydicer/dataset/nnunet.py
index 9dec245..2befca5 100644
--- a/pydicer/dataset/nnunet.py
+++ b/pydicer/dataset/nnunet.py
@@ -293,6 +293,7 @@ def check_structure_names(self) -> pd.DataFrame:
         """
 
         df = read_converted_data(self.working_directory, dataset_name=self.dataset_name)
+        df = df[df.patient_id.isin(self.testing_cases + self.training_cases)]
         df_structure_sets = df[df.modality == "RTSTRUCT"]
 
         # First get a set of all unique structure names available
@@ -369,6 +370,7 @@ def check_overlapping_structures(self):
                 "overlapping structures."
             )
         df = read_converted_data(self.working_directory, dataset_name=self.dataset_name)
+        df = df[df.patient_id.isin(self.testing_cases + self.training_cases)]
         df_structure_sets = df[df.modality == "RTSTRUCT"]
 
         has_overlapping_structures = False
diff --git a/tests/test_compare.py b/tests/test_compare.py
new file mode 100644
index 0000000..3e86a37
--- /dev/null
+++ b/tests/test_compare.py
@@ -0,0 +1,97 @@
+# pylint: disable=redefined-outer-name,missing-function-docstring
+
+import tempfile
+
+from pathlib import Path
+import pandas as pd
+
+from pydicer import PyDicer
+from pydicer.analyse.compare import (
+    compute_contour_similarity_metrics,
+    get_all_similarity_metrics_for_dataset,
+    prepare_similarity_metric_analysis,
+)
+from pydicer.utils import read_converted_data
+
+
+def test_compare_auto_segmentations(test_data_autoseg):
+    working_directory = test_data_autoseg
+    df = read_converted_data(working_directory=working_directory)
+
+    # We'll test this by comparing the structures against themselves,
+    # hence we expect perfect metrics
+    df_target = df[df.modality == "RTSTRUCT"]
+    df_reference = df[df.modality == "RTSTRUCT"]
+
+    PyDicer(working_directory)
+    segment_id = "test_seg"
+    compute_contour_similarity_metrics(df_target, df_reference, segment_id)
+
+    df_stats = get_all_similarity_metrics_for_dataset(working_directory)
+
+    assert len(df_stats) == 200
+
+    df_dsc = df_stats[df_stats["metric"] == "DSC"]
+    assert df_dsc.value.min() == 1.0
+    assert df_dsc.value.max() == 1.0
+
+
+def test_compaare_metrics_analysis(test_data_autoseg):
+    working_directory = test_data_autoseg
+    df = read_converted_data(working_directory=working_directory)
+
+    # We'll test this by comparing the structures against themselves,
+    # hence we expect perfect metrics
+    df_target = df[df.modality == "RTSTRUCT"]
+    df_reference = df[df.modality == "RTSTRUCT"]
+
+    PyDicer(working_directory)
+    segment_id = "test_seg"
+    compute_contour_similarity_metrics(df_target, df_reference, segment_id)
+
+    with tempfile.TemporaryDirectory() as analysis_dir:
+        analysis_dir = Path(analysis_dir)
+
+        prepare_similarity_metric_analysis(
+            working_directory=working_directory,
+            analysis_output_directory=analysis_dir,
+            segment_id=segment_id,
+        )
+
+        # Check that the output files exist
+        raw_metrics_file = analysis_dir.joinpath("raw_test_seg_default.csv")
+        assert raw_metrics_file.exists()
+        stats_metrics_file = analysis_dir.joinpath("stats_test_seg_default.csv")
+        assert stats_metrics_file.exists()
+        plot_dsc_file = analysis_dir.joinpath("plot_DSC_test_seg_default.png")
+        assert plot_dsc_file.exists()
+        plot_hd_file = analysis_dir.joinpath("plot_hausdorffDistance_test_seg_default.png")
+        assert plot_hd_file.exists()
+        plot_msd_file = analysis_dir.joinpath("plot_meanSurfaceDistance_test_seg_default.png")
+        assert plot_msd_file.exists()
+        plot_sdsc_file = analysis_dir.joinpath("plot_surfaceDSC_test_seg_default.png")
+        assert plot_sdsc_file.exists()
+
+        # Read in the raw metrics file and do some checks
+        df_raw = pd.read_csv(raw_metrics_file, index_col=0)
+        assert len(df_raw) == 200
+
+        # Since these structures compared against themselves, expect perfect metrics
+        assert df_raw[df_raw.metric == "DSC"].value.min() == 1.0
+        assert df_raw[df_raw.metric == "surfaceDSC"].value.min() == 1.0
+        assert df_raw[df_raw.metric == "hausdorffDistance"].value.max() == 0.0
+        assert df_raw[df_raw.metric == "meanSurfaceDistance"].value.max() == 0.0
+
+        # Read in the stats metrics file and do some checks
+        df_stats = pd.read_csv(stats_metrics_file, index_col=0)
+        assert len(df_stats) == 36
+
+        # Check one fo the rows
+        row_check = df_stats[
+            (df_stats.structure == "Esophagus") & (df_stats.metric == "surfaceDSC")
+        ].iloc[0]
+        assert row_check["mean"] == 1.0
+        assert row_check["std"] == 0.0
+        assert row_check["max"] == 1.0
+        assert row_check["min"] == 1.0
+        assert row_check["count"] == 10
diff --git a/tests/test_nnunet.py b/tests/test_nnunet.py
new file mode 100644
index 0000000..eba6c78
--- /dev/null
+++ b/tests/test_nnunet.py
@@ -0,0 +1,488 @@
+# pylint: disable=redefined-outer-name,missing-function-docstring
+
+import os
+import logging
+import shutil
+import json
+
+from pathlib import Path
+import SimpleITK as sitk
+import pytest
+
+from pydicer import PyDicer
+from pydicer.dataset.nnunet import NNUNetDataset
+from pydicer.utils import add_structure_name_mapping
+
+
+def test_nnunet_env_error(test_data_autoseg):
+    if "nnUNet_raw_data_base" in os.environ:
+        del os.environ["nnUNet_raw_data_base"]
+
+    working_directory = test_data_autoseg
+
+    # Expect SystemError due to nnUNet_raw_data_base not being set
+    with pytest.raises(SystemError):
+        NNUNetDataset(
+            working_directory=working_directory,
+            nnunet_id=100,
+            nnunet_name="TestTask",
+            nnunet_description="A test nnUNet task.",
+        )
+
+
+def test_nnunet_env_ok(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    # With nnUNet_raw_data_base this should success without exception
+    NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+
+def test_nnunet_check_dataset_ok(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    # This dataset should be ready to go for nnUNet so no exception here
+    nnunet.check_dataset()
+
+
+def test_nnunet_check_dataset_not_ok(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+    pyd = PyDicer(working_directory)
+
+    # Prepare a subset of data we know if invalid for the purpose of this test
+    def pick_ct_only(df):
+        return df[df.modality == "CT"]
+
+    dataset_name = "rubbish"
+    pyd.dataset.prepare(dataset_name, pick_ct_only)
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        dataset_name=dataset_name,
+    )
+
+    # This should raise an error now since our dataset is invalid
+    with pytest.raises(SystemError):
+        nnunet.check_dataset()
+
+
+def test_nnunet_split_dataset_random(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    nnunet.split_dataset(random_state=42)
+
+    assert len(nnunet.training_cases) == 7
+    assert len(nnunet.testing_cases) == 3
+
+
+def test_nnunet_split_dataset_specify(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    assert "LCTSC-Train-S1-007" in nnunet.training_cases
+    assert "LCTSC-Train-S1-002" in nnunet.training_cases
+    assert "LCTSC-Test-S1-101" in nnunet.testing_cases
+
+
+def test_nnunet_split_dataset_invalid(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    with pytest.raises(ValueError):
+        nnunet.split_dataset(training_cases=["invalid_id1", "invalid_2"])
+
+
+def test_nnunet_add_testing_case(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    # nnunet.add_testing_cases(testing_cases==["invalid_id1", "invalid_2"])
+    nnunet.add_testing_cases(testing_cases=["LCTSC-Test-S1-101"])
+
+    assert "LCTSC-Test-S1-101" in nnunet.testing_cases
+
+
+def test_nnunet_add_testing_case_invalid(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    with pytest.raises(ValueError):
+        nnunet.add_testing_cases(testing_cases=["invalid_id1"])
+
+
+def test_nnunet_check_duplicates_ok(test_data_autoseg, caplog):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    caplog.set_level(logging.INFO)
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    nnunet.check_duplicates_train_test()
+
+    assert "No duplicate images found" in caplog.text
+
+
+def test_nnunet_check_duplicates_dup(test_data_autoseg, caplog):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    caplog.set_level(logging.INFO)
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Train-S1-007"],
+    )
+
+    nnunet.check_duplicates_train_test()
+
+    assert "is likely a duplicate of" in caplog.text
+
+
+def test_nnunet_check_structure_names_missing(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    mapping = {
+        "Heart": [],
+        "Lung_L": [],
+        "Lung_R": [],
+    }
+    mapping_id = "no_mapping"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    df_results = nnunet.check_structure_names()
+
+    assert df_results.data["Heart"].sum() == 3
+    assert df_results.data["Lung_L"].sum() == 2
+    assert df_results.data["Lung_R"].sum() == 2
+
+
+def test_nnunet_check_structure_names_mapped(test_data_autoseg):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+
+    mapping = {
+        "Heart": [],
+        "Lung_L": ["Lung_Left"],
+        "Lung_R": ["Lung_Right"],
+    }
+    mapping_id = "mapping_ok"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    df_results = nnunet.check_structure_names()
+
+    assert df_results.data["Heart"].sum() == 3
+    assert df_results.data["Lung_L"].sum() == 3
+    assert df_results.data["Lung_R"].sum() == 3
+
+
+def test_nnunet_check_overlapping_structures_ok(test_data_autoseg, caplog):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+    caplog.set_level(logging.INFO)
+
+    mapping = {
+        "Lung_L": ["Lung_Left"],
+        "Lung_R": ["Lung_Right"],
+    }
+    mapping_id = "mapping_ok"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    nnunet.check_structure_names()
+
+    nnunet.check_overlapping_structures()
+
+    assert "No overlapping structures detected" in caplog.text
+
+
+def test_nnunet_check_overlapping_structures_overlap(test_data_autoseg, caplog):
+    os.environ["nnUNet_raw_data_base"] = "."
+
+    working_directory = test_data_autoseg
+    caplog.set_level(logging.INFO)
+
+    mapping = {
+        "Heart": [],
+        "Lung_L": ["Lung_Left"],
+        "Lung_R": ["Lung_Right"],
+    }
+    mapping_id = "mapping_ok"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    nnunet.check_structure_names()
+
+    nnunet.check_overlapping_structures()
+
+    assert "Overlapping structures were detected" in caplog.text
+
+
+def test_nnunet_prepare_dataset(test_data_autoseg):
+    raw_path = Path("./testdata_nnunet")
+    os.environ["nnUNet_raw_data_base"] = str(raw_path)
+
+    # Remove raw path if it was left over from a previous test
+    if raw_path.exists():
+        shutil.rmtree(raw_path)
+
+    raw_path.mkdir()
+
+    working_directory = test_data_autoseg
+
+    mapping = {
+        "Heart": [],
+        "Lung_L": ["Lung_Left"],
+        "Lung_R": ["Lung_Right"],
+    }
+    mapping_id = "mapping_ok"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    nnunet.prepare_dataset()
+
+    # Check the folders have been created
+    task_path = raw_path.joinpath("nnUNet_raw_data", "Task100_TestTask")
+    assert task_path.exists()
+    assert task_path.joinpath("imagesTr").exists()
+    assert task_path.joinpath("imagesTs").exists()
+    assert task_path.joinpath("labelsTr").exists()
+    assert task_path.joinpath("labelsTs").exists()
+
+    # CHeck the dataset flle
+    dataset_file = task_path.joinpath("dataset.json")
+    assert dataset_file.exists()
+
+    with open(dataset_file, "r", encoding="utf-8") as fp:
+        ds = json.load(fp)
+    assert len(ds.keys()) == 12
+    assert ds["labels"] == {"0": "background", "1": "Heart", "2": "Lung_L", "3": "Lung_R"}
+
+    # Open a label map file for sanity checks
+    label_map_path = task_path.joinpath("labelsTr", "LCTSC-Train-S1-007.nii.gz")
+    assert label_map_path.exists()
+
+    label_map = sitk.ReadImage(str(label_map_path))
+    lsif = sitk.LabelStatisticsImageFilter()
+    lsif.Execute(label_map, label_map)
+    labels = lsif.GetLabels()
+    assert len(labels) == 4
+    assert 0 in labels
+    assert 1 in labels
+    assert 2 in labels
+    assert 3 in labels
+
+    assert lsif.GetCount(1) == 24336
+    assert lsif.GetCount(2) == 52307
+    assert lsif.GetCount(3) == 67702
+
+
+def test_nnunet_generate_training_scripts(test_data_autoseg):
+    raw_path = Path("./testdata_nnunet")
+    os.environ["nnUNet_raw_data_base"] = str(raw_path)
+
+    # Remove raw path if it was left over from a previous test
+    if raw_path.exists():
+        shutil.rmtree(raw_path)
+
+    raw_path.mkdir()
+
+    working_directory = test_data_autoseg
+
+    mapping = {
+        "Heart": [],
+        "Lung_L": ["Lung_Left"],
+        "Lung_R": ["Lung_Right"],
+    }
+    mapping_id = "mapping_ok"
+    add_structure_name_mapping(
+        mapping_dict=mapping, mapping_id=mapping_id, working_directory=working_directory
+    )
+
+    nnunet = NNUNetDataset(
+        working_directory=working_directory,
+        nnunet_id=100,
+        nnunet_name="TestTask",
+        nnunet_description="A test nnUNet task.",
+        mapping_id=mapping_id,
+    )
+
+    nnunet.split_dataset(
+        training_cases=["LCTSC-Train-S1-007", "LCTSC-Train-S1-002"],
+        testing_cases=["LCTSC-Test-S1-101"],
+    )
+
+    nnunet.prepare_dataset()
+
+    nnunet.generate_training_scripts(raw_path)
+
+    script_file = raw_path.joinpath("train_100_TestTask.sh")
+    assert script_file.exists()
+
+    with open(script_file, "r", encoding="utf-8") as fp:
+        script_contents = fp.read()
+
+    assert "#!/bin/bash" in script_contents
+    assert "nnUNet_plan_and_preprocess -t 100 --verify_dataset_integrity;" in script_contents
+    assert "nnUNet_train 2d nnUNetTrainerV2 Task100_TestTask all;" in script_contents
diff --git a/tests/test_structure_set.py b/tests/test_structure_set.py
index e6f16c0..e9fc342 100644
--- a/tests/test_structure_set.py
+++ b/tests/test_structure_set.py
@@ -1,3 +1,5 @@
+# pylint: disable=redefined-outer-name,missing-function-docstring
+
 import json
 
 import SimpleITK as sitk