From 27d4d29f9470b3144902fbf58c12f5363fdca976 Mon Sep 17 00:00:00 2001 From: sjoerdk Date: Thu, 18 Apr 2024 22:11:18 +0200 Subject: [PATCH] Rewrites caching, improves dicom level reasoning Rewrites caching: * Trolley no longer caches results by default, cleans up Trolley. * Completely rewrites CachedSearcher to implement Searcher interface * Caching is now optional by wrapping any searcher instance * Caching is now time-based and done per DICOM object * Adds query caching (previously only get_study_for_id) Makes reasoning about DICOM levels (study/series/instance) easier: * Adds numeric value and comparison to DICOMObjectLevels * Introduces DICOMObject.contained_references() * Merges to_series_level_refs, to_instance_refs into to_refs(level) --- conftest.py | 2 +- dicomtrolley/caching.py | 303 ++++++++++++++++++++++++++ dicomtrolley/core.py | 364 ++++++++++++++++++------------- dicomtrolley/datastructures.py | 379 +++++++++++++++++++++++++++++++++ dicomtrolley/exceptions.py | 8 + dicomtrolley/parsing.py | 48 +---- dicomtrolley/qido_rs.py | 24 +-- dicomtrolley/trolley.py | 308 +++++---------------------- docs/index.md | 2 +- docs/usage.md | 20 +- tests/mock_responses.py | 79 ++++++- tests/test_caching.py | 211 ++++++++++++++++++ tests/test_core.py | 92 ++++++++ tests/test_datastructures.py | 211 ++++++++++++++++++ tests/test_parsing.py | 23 +- tests/test_qido_rs.py | 29 ++- tests/test_trolley.py | 104 +-------- 17 files changed, 1611 insertions(+), 596 deletions(-) create mode 100644 dicomtrolley/caching.py create mode 100644 dicomtrolley/datastructures.py create mode 100644 tests/test_caching.py create mode 100644 tests/test_datastructures.py diff --git a/conftest.py b/conftest.py index 95e47c3..596885f 100644 --- a/conftest.py +++ b/conftest.py @@ -29,7 +29,7 @@ def a_wado(a_session): @pytest.fixture def a_trolley(a_mint, a_wado) -> Trolley: """Trolley instance that will not hit any server""" - return Trolley(searcher=a_mint, downloader=a_wado, query_missing=True) + return Trolley(searcher=a_mint, downloader=a_wado) class NoSaveStorageDir(StorageDir): diff --git a/dicomtrolley/caching.py b/dicomtrolley/caching.py new file mode 100644 index 0000000..9711acf --- /dev/null +++ b/dicomtrolley/caching.py @@ -0,0 +1,303 @@ +"""Storing DICOM query results locally to avoid unneeded calls to server""" + +from typing import Dict, Iterable, List, Optional, Sequence, Tuple + +from dicomtrolley.core import ( + DICOMObject, + DICOMObjectLevels, + DICOMObjectReference, + InstanceReference, + Query, + QueryLevels, + Searcher, + SeriesReference, + Study, + StudyReference, +) +from dicomtrolley.datastructures import ( + ExpiringCollection, + TreeAddress, + TreeNode, +) +from dicomtrolley.exceptions import DICOMTrolleyError +from dicomtrolley.logs import get_module_logger + +logger = get_module_logger("caching") + + +class DICOMObjectCache: + def __init__( + self, + initial_objects: Optional[List[DICOMObject]] = None, + expiry_seconds: Optional[int] = 600, + ): + """A tree holding expiring DICOM objects. Objects can be retrieved by + study/series/instance UID tuples: + + >>>cache = DICOMObjectCache() + >>>cache.add(a_study) # with uid 'study1' and full series and instance info + >>>cache.retrieve(reference=('study1','series')) + + >>>cache.retrieve(reference=('study1','series','instance')) + + # 20 minutes later... study has expired + >>>cache.retrieve(reference=('study1','series')) + + + Parameters + ---------- + initial_objects, optional + Shorthand for add() on all objects in this list. Defaults to empty + + expiry_seconds, optional + Expire objects after this many seconds. If set to None, will disable + expiry. Defaults to 600 (10 minutes) + + Notes + ----- + The functionality here is similar to dicomtrolley.parsing.TreeNode, but the + use case is different enough to warrant a separate class I think + + """ + if expiry_seconds is None: + self.expiry = None + else: + self.expiry = ExpiringCollection( + expire_after_seconds=expiry_seconds + ) + self.root = TreeNode() + self._awaiting_prune: List[TreeAddress] = [] + if initial_objects: # not none or empty: + if not isinstance(initial_objects, list): + raise ValueError( + f"Expected list but got {initial_objects}. Did you" + f"forget [braces] for initial_objects value?" + ) + for x in initial_objects: + self.add(x) + + def add_all(self, objects: Iterable[DICOMObject]): + if isinstance(objects, DICOMObject): + # This mistake is common (for me) and causes unreadable errors. Avoid. + raise ValueError( + "parameter 'objects' should be an iterable of " + "DICOMObjects, not a single object" + ) + self.prune_expired() + for obj in objects: + self.add(obj, prune=False) + + def add(self, obj: DICOMObject, prune=True): + """Add this object to cache + + Returns + ------- + The input DICOMObject. Just so you can add and return a value in a single + line in calling code. Like new_dicom = cache.add(get_new_dicom()) + """ + logger.debug(f"Adding to cache: {obj}") + if prune: + self.prune_expired() + address = self.to_address(obj.reference()) + self.root.add(obj, address=address) + if self.expiry: + self.expiry.add(address) + + if obj.children(): + for x in obj.children(): + self.add( + x, prune=False + ) # avoid too many calls to prune_expired + + def retrieve(self, reference: DICOMObjectReference): + """Try to retrieve object from cache + + Parameters + ---------- + reference + The dicom object you would like to retrieve + + Raises + ------ + NodeNotFound + If the object does not exist in cache or has expired + + Returns + ------- + DICOMObject + The cached object + """ + self.prune_expired() + try: + data = self.root.get_node( + self.to_address(reference), create=False + ).data + if data: + return data + else: + raise NodeNotFound( + f"Node found in cache, but no data for reference " + f"{reference}" + ) + except KeyError as e: + raise NodeNotFound( + f"No node found in cache for reference {reference}" + ) from e + + def prune_expired(self): + """Remove all expired nodes""" + if not self.expiry: + logger.debug("prune: not pruning as self.expiry = False") + return # don't do anything + expired = self.expiry.collect_expired() + self._awaiting_prune = self._awaiting_prune + expired + self._awaiting_prune.sort(key=lambda x: len(x)) + prune_later = [] + pruned = [] + while self._awaiting_prune: + address = self._awaiting_prune.pop() # work from last + try: + self.root.prune_leaf(address) + pruned.append(address) + except ValueError: + # was not a leaf. Make empty and save for later + self.root.get_node(address).data = None + prune_later.append(address) + if pruned: + msg = f"prune: Pruned away {len(pruned)} leaves: ({pruned})" + if prune_later: + msg += f"could not prune {len(prune_later)}. Leaving those for later" + logger.debug(msg) + + self._awaiting_prune = prune_later + + @staticmethod + def to_address(ref: DICOMObjectReference) -> TreeAddress: + """Convert reference to address that can be used in TreeNode""" + if isinstance(ref, StudyReference): + return (ref.study_uid,) + elif isinstance(ref, SeriesReference): + return ref.study_uid, ref.series_uid + elif isinstance(ref, InstanceReference): + return ref.study_uid, ref.series_uid, ref.instance_uid + else: + raise ValueError(f"Expected DICOM object reference, but got {ref}") + + +class QueryCache: + """Caches the response to DICOM queries""" + + def __init__(self, cache: DICOMObjectCache): + self.cache = cache + self.queries: Dict[Query, Tuple[DICOMObjectReference, ...]] = {} + + def add_response(self, query: Query, response: Sequence[DICOMObject]): + """Cache response for this query""" + self.cache.add_all(response) + references = tuple(x.reference() for x in response) + self.queries[query.json()] = references + + def get_response(self, query: Query) -> List[Study]: + """Obtain cached response for this query + + Raises + ------ + NodeNotFound + If any of the results of query are not in cache or have expired + """ + try: + references = self.queries[query.json()] + except KeyError as e: + raise NodeNotFound( + f"Query {query.to_short_string()} not found in cache" + ) from e + + try: + retrieved = [self.cache.retrieve(x) for x in references] + logger.debug( + f"Found all ({len(retrieved)}) objects in cache for " + f"{query.to_short_string()}. Returning." + ) + return retrieved + except NodeNotFound as e: + # This query response is not (fully) cached anymore. Remove + self.queries.pop(query.json()) + raise NodeNotFound( + f"One or more response to {query.to_short_string()} " + f"was not in cache" + ) from e + + +class CachedSearcher(Searcher): + """A cache wrapped around a Searcher instance. Serves search responses from + cache first. Calls searcher if needed. + + Caches two types of searcher method calls: + find_studies(Query): + Caches each returned DICOM object individually and then associates this + with the incoming query. Only if the query matches exactly is a cached + response returned. + Associates DICOM tree address (study/series/instance). Not DICOM object + identities. This means that an underlying DICOM object can be updated without + invalidating the cache response. The cached response to a query will be + returned as long as there are non-expired cached objects at each associated + tree address + + find_study_by_id(study_id, level): + Retrieves study_id from cache and checks whether it has children up to + the required level (depth). + + """ + + def __init__(self, searcher: Searcher, cache: DICOMObjectCache): + self.searcher = searcher + self.cache = cache + self.query_cache = QueryCache(cache=cache) + + def __str__(self): + return f"CachedSearcher for {self.searcher}" + + def find_studies(self, query: Query) -> Sequence[Study]: + """Try to return from cache, otherwise call searcher.""" + try: + return self.query_cache.get_response(query) + except NodeNotFound: + logger.debug( + f"No cache for {query.to_short_string()}." + f"Performing query with {self.searcher}" + ) + response = self.searcher.find_studies(query) + self.query_cache.add_response(query, response) + return response + + def find_study_by_id( + self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY + ) -> Study: + """Find a single study at the given depth""" + try: + from_cache: Study = self.cache.retrieve( + StudyReference(study_uid=study_uid) + ) + if ( + from_cache.max_object_depth() + > DICOMObjectLevels.from_query_level(query_level) + ): + raise NodeNotFound( + f"{from_cache} found in cache, but did not contain " + f"objects up to '{query_level}' level" + ) + return from_cache + except NodeNotFound as e: + logger.debug( + f"Could not find study in cache ({e}). Launching query to find" + f"additional info" + ) + study = self.searcher.find_study_by_id( + study_uid, query_level=query_level + ) + self.cache.add(study) + return study + + +class NodeNotFound(DICOMTrolleyError): + pass diff --git a/dicomtrolley/core.py b/dicomtrolley/core.py index 5bed543..4d72d46 100644 --- a/dicomtrolley/core.py +++ b/dicomtrolley/core.py @@ -1,9 +1,17 @@ """Provides common base classes that allow modules to talk to each other.""" from dataclasses import dataclass from datetime import date, datetime -from enum import Enum +from enum import Enum, IntEnum from itertools import chain -from typing import List, Optional, Sequence, Type, TypeVar, Union +from typing import ( + Iterable, + List, + Optional, + Sequence, + Tuple, + Type, + TypeVar, +) from pydantic import Field, ValidationError from pydantic.class_validators import validator @@ -13,28 +21,78 @@ from dicomtrolley.exceptions import ( DICOMTrolleyError, + NoReferencesFoundError, UnSupportedParameterError, ) -class DICOMObjectLevels: - """DICOM uses a study->series->instance structure""" +class DICOMObjectLevels(IntEnum): + """Represents DICOM study->series->instance structure + + Level is ordered. Study is highest, instance the lowest level. + + Notes + ----- + Similar to core.QueryLevels, but different enough to warrant separate classes. + DICOMObjectLevel is a property of a DICOM object. QueryLevel is a property of + a Query. + """ - STUDY = "Study" - SERIES = "Series" - INSTANCE = "Instance" + STUDY = 2 + SERIES = 1 + INSTANCE = 0 - all = [STUDY, SERIES, INSTANCE] + @classmethod + def from_query_level(cls, level: "QueryLevels"): + if level == QueryLevels.STUDY: + return cls.STUDY + elif level == QueryLevels.SERIES: + return cls.SERIES + elif level == QueryLevels.INSTANCE: + return cls.INSTANCE + else: + raise ValueError(f"Unknown DICOMObjectLevels {level}") class DICOMDownloadable: """An object that can be downloaded by a Downloader""" def reference(self) -> "DICOMObjectReference": - raise NotImplementedError + raise NotImplementedError() + + def contained_references( + self, max_level: DICOMObjectLevels + ) -> Tuple["DICOMObjectReference", ...]: + """DICOM object references contained in this object at the given level. + + For example, a Study might contain information for all its Instances. In + this case sub_references(max_level=Instance) will return InstanceReferences + + Parameters + ---------- + max_level: + The returned references will be this level or lower. For example, an + instance will return a reference to itself when max_level is Series: + >>> Instance.contained_references(max_level=Series) + (InstanceReference,) + A study will be broken up: + >>> Study.contained_references(max_level=Series) + (SeriesReference, SeriesReference, ...) + + Returns + ------- + Tuple of references. Of max_level or lower + + Raises + ------ + NoReferencesFoundError + If sub-references cannot be obtained from this downloadable. + + """ + raise NotImplementedError() -@dataclass +@dataclass(frozen=True) # make this hashable and non-modifyable class DICOMObjectReference(DICOMDownloadable): """Points to a study, series, or instance by uid @@ -47,20 +105,31 @@ class DICOMObjectReference(DICOMDownloadable): study_uid: str @property - def level(self) -> str: + def level(self): """Does this reference point to Study, Series or Instance? Returns ------- str One of the values in DICOMObjectLevels.all """ - return "" + return NotImplementedError() def reference(self): return self + def contained_references( + self, max_level: DICOMObjectLevels + ) -> Tuple["DICOMObjectReference", ...]: + """DICOM object references either return themselves or an error""" + if max_level < self.level: + raise NoReferencesFoundError( + f"{self} does not contain references of {str(max_level)} or lower" + ) + else: + return (self.reference(),) + -@dataclass +@dataclass(frozen=True) class InstanceReference(DICOMObjectReference): """All information needed to download a single slice (SOPInstance)""" @@ -79,7 +148,7 @@ def level(self): return DICOMObjectLevels.INSTANCE -@dataclass +@dataclass(frozen=True) class SeriesReference(DICOMObjectReference): """Reference to a single Series, part of a study""" @@ -94,7 +163,7 @@ def level(self): return DICOMObjectLevels.SERIES -@dataclass +@dataclass(frozen=True) class StudyReference(DICOMObjectReference): """Reference to a single study""" @@ -167,6 +236,28 @@ def all_series(self) -> List["Series"]: """ raise NotImplementedError() + def max_object_depth(self) -> DICOMObjectLevels: + """What is the deepest object level? Does contain only Study or also Series, + Instances? + """ + raise NotImplementedError() + + @staticmethod + def extract_refs( + objects: Iterable["DICOMObject"], + ) -> Tuple[DICOMObjectReference, ...]: + """Extract reference from each object and put in a tuple, raise error for + empty objects. For cleaner code in DICOMObject.contained_references() + + Raises + ------ + NoReferencesFoundError + if objects is empty + """ + if not objects: + raise NoReferencesFoundError() + return tuple(x.reference() for x in objects) + class Instance(DICOMObject): parent: "Series" @@ -192,6 +283,17 @@ def reference(self) -> InstanceReference: instance_uid=self.uid, ) + def contained_references( + self, max_level: DICOMObjectLevels + ) -> Tuple["DICOMObjectReference", ...]: + return (self.reference(),) # instance is deepest level so always OK + + def max_object_depth(self) -> DICOMObjectLevels: + """What is the deepest object level? Does contain only Study or also Series, + Instances? + """ + return DICOMObjectLevels.INSTANCE + class Series(DICOMObject): instances: Sequence[Instance] @@ -226,6 +328,28 @@ def reference(self) -> SeriesReference: """Return a Reference to this object using uids""" return SeriesReference(study_uid=self.parent.uid, series_uid=self.uid) + def contained_references( + self, max_level: DICOMObjectLevels + ) -> Tuple["DICOMObjectReference", ...]: + + if max_level == DICOMObjectLevels.STUDY: + return self.extract_refs([self]) + elif max_level == DICOMObjectLevels.SERIES: + return self.extract_refs([self]) + elif max_level == DICOMObjectLevels.INSTANCE: + return self.extract_refs(self.all_instances()) + else: + raise ValueError(f'unknown DICOMObjectLevel "{str(max_level)}"') + + def max_object_depth(self) -> DICOMObjectLevels: + """What is the deepest object level? Does contain only Study or also Series, + Instances? + """ + if self.instances: + return DICOMObjectLevels.INSTANCE + else: + return DICOMObjectLevels.SERIES + class Study(DICOMObject): series: Sequence[Series] @@ -259,6 +383,28 @@ def reference(self) -> StudyReference: """Return a Reference to this object using uids""" return StudyReference(study_uid=self.uid) + def contained_references( + self, max_level: DICOMObjectLevels + ) -> Tuple["DICOMObjectReference", ...]: + + if max_level == DICOMObjectLevels.STUDY: + return self.extract_refs([self]) + elif max_level == DICOMObjectLevels.SERIES: + return self.extract_refs(self.all_series()) + elif max_level == DICOMObjectLevels.INSTANCE: + return self.extract_refs(self.all_instances()) + else: + raise ValueError(f'unknown DICOMObjectLevel "{str(max_level)}"') + + def max_object_depth(self) -> DICOMObjectLevels: + """What is the deepest object level? Does contain only Study or also Series, + Instances? + """ + if self.series: + return self.series[0].max_object_depth() + else: + return DICOMObjectLevels.STUDY + class NonInstanceParameterError(DICOMTrolleyError): """A DICOMDownloadable could not be converted into its constituent instances. @@ -282,106 +428,65 @@ class NonSeriesParameterError(DICOMTrolleyError): """ -def to_instance_refs( - objects: Sequence[DICOMDownloadable], -) -> List[InstanceReference]: - """Convert all input to instance references. Raise informative errors. +def to_instance_refs(objects: Sequence[DICOMDownloadable]): + """Convert all to instance references. See to_refs()""" + try: + return to_references(objects, max_level=DICOMObjectLevels.INSTANCE) + except NoReferencesFoundError as e: + raise NonInstanceParameterError( + f"Cannot obtain instance references: {e}" + ) from e - This is a common pre-processing step used by Downloader classes. Many cannot - download higher-level objects like 'study 123' directly, but instead require - you to query all instances contained in 'study 123' first and pass those to - the downloader. - Parameters - ---------- - objects: - Convert these objects +def to_series_level_refs(objects: Sequence[DICOMDownloadable]): + """Convert all to at least series references.""" + try: + return to_references(objects, max_level=DICOMObjectLevels.SERIES) + except NoReferencesFoundError as e: + raise NonSeriesParameterError( + f"Cannot obtain series references: {e}" + ) from e - Returns - ------- - List[InstanceReference] - References to all instances contained in objects - Raises - ------ - NonInstanceParameterError - If any input object could not be converted into instances. This is the - case for higher-level references like StudyReference, or for DICOMObjects - that do not contain any deeper-level information, such as a Study that - contains no Series (which is the correct result of Study-level queries). - - """ - output: List[InstanceReference] = [] - for obj in objects: - if isinstance(obj, InstanceReference): - output.append(obj) # Already an instance reference. Just add - elif isinstance(obj, DICOMObjectReference): - raise NonInstanceParameterError( - f"Cannot extract instances from '{obj}' " - ) - elif isinstance(obj, DICOMObject): - instances = obj.all_instances() # Extract instances - if instances: - output = output + [x.reference() for x in instances] - else: - raise NonInstanceParameterError( - f"{obj} contains no instances. " - f"Was this information queried for?" - ) - - return output - - -def to_series_level_refs( - objects: Sequence[DICOMDownloadable], -) -> List[Union[SeriesReference, InstanceReference]]: - """Make sure all input objects are Series or Instance references. - Convert if possible, raise exceptions if not. - - Weaker version of to_instance_refs(). Weaker because series references without - instance info are also allowed here. See that function for more info +def to_references( + objects: Sequence[DICOMDownloadable], max_level: DICOMObjectLevels +) -> List[DICOMObjectReference]: + """Make sure all input objects are of max_level or lower. Extract lower level + references if possible. For example, if max_level is series, try to extract + series references from a study. Raise exceptions if not possible. Parameters ---------- objects: Convert these objects + max_level: + Return references of this level or lower, otherwise raise exception. Returns ------- List[Union[SeriesReference, InstanceReference]] References to series and instances contained in input objects + Notes + ----- + This is a common pre-processing step used by Downloader classes. Many cannot + download higher-level objects like 'study 123' directly, but instead require + you to query all instances contained in 'study 123' first and pass those to + the downloader. + Raises ------ - NonSeriesParameterError + NoReferencesFoundError If any input object could not be converted into instances. This is the case for higher-level references like StudyReference, or for DICOMObjects that do not contain any deeper-level information, such as a Study that - contains no Series (which is the correct result of Study-level queries). + contains no Series (which is a correct result for Study-level queries). """ - output: List[Union[InstanceReference, SeriesReference]] = [] + refs: List[DICOMObjectReference] = [] for obj in objects: - if isinstance( - obj, (InstanceReference, SeriesReference, Instance, Series) - ): - # already OK, just add - output.append(obj.reference()) - elif isinstance(obj, StudyReference): - # no series in here, I don't have enough info - raise NonSeriesParameterError( - f"Cannot extract series from '{obj}' " - ) - elif isinstance(obj, Study): - if obj.series: - for series in obj.series: - output.append(series.reference()) - else: - raise NonSeriesParameterError( - f"Cannot extract series from '{obj}'" - f". It contains no series " - ) - return output + refs.extend(obj.contained_references(max_level=max_level)) + return refs class Downloader: @@ -416,7 +521,7 @@ def datasets(self, objects: Sequence[DICOMDownloadable]): download can only process Instance targets. See Exception docstring for rationale """ - raise NotImplementedError + raise NotImplementedError() def datasets_async( self, instances: Sequence[InstanceReference], max_workers=None @@ -440,7 +545,7 @@ def datasets_async( ------- Iterator[Dataset, None, None] """ - raise NotImplementedError + raise NotImplementedError() class QueryLevels(str, Enum): @@ -450,6 +555,18 @@ class QueryLevels(str, Enum): SERIES = "SERIES" INSTANCE = "INSTANCE" + @classmethod + def from_object_level(cls, level: DICOMObjectLevels): + """Convert from DICOMObjectLevel enum. See docstring there for reason""" + if level == DICOMObjectLevels.STUDY: + return cls.STUDY + elif level == DICOMObjectLevels.SERIES: + return cls.SERIES + elif level == DICOMObjectLevels.INSTANCE: + return cls.INSTANCE + else: + raise ValueError(f"Unknown DICOMObjectLevels {level}") + # Used in Query.init_from_query() type annotation QuerySubType = TypeVar("QuerySubType", bound="Query") @@ -619,8 +736,10 @@ def find_study(self, query: Query) -> Study: ) return results[0] - def find_full_study_by_id(self, study_uid: str) -> Study: - """Find a single study at image level + def find_study_by_id( + self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY + ) -> Study: + """Find a single study at the given depth Useful for automatically finding all instances for a study. Meant to be implemented in child classes @@ -629,57 +748,16 @@ def find_full_study_by_id(self, study_uid: str) -> Study: ---------- study_uid: str Study to search for + query_level: QueryLevels + Depth of search. Whether to find only study level info or go deeper into + series or instances. Deeper levels will result in slower, more extensive + searches Returns ------- Study + Potentially containing series or instances, dependent on query_level - Raises - ------ - DICOMTrolleyError - If no results or more than one result is returned by query - """ - raise NotImplementedError() - - def find_study_at_instance_level(self, study_uid: str) -> Study: - """Find a single study at image level - - Useful for automatically finding all instances for a study. - - Parameters - ---------- - study_uid: str - Study to search for - - Returns - ------- - Study - Containing full DICOM object information, series and instances - - Raises - ------ - DICOMTrolleyError - If no results or more than one result is returned by query - """ - return self.find_study( - Query(StudyInstanceUID=study_uid, query_level=QueryLevels.INSTANCE) - ) - - def find_study_at_series_level(self, study_uid: str) -> Study: - """Find a single study at series level - - Meant to be - implemented in child classes - - Parameters - ---------- - study_uid: str - Study to search for - - Returns - ------- - Study - Containing series, but not instances Raises ------ @@ -687,7 +765,7 @@ def find_study_at_series_level(self, study_uid: str) -> Study: If no results or more than one result is returned by query """ return self.find_study( - Query(StudyInstanceUID=study_uid, query_level=QueryLevels.SERIES) + Query(StudyInstanceUID=study_uid, query_level=query_level) ) diff --git a/dicomtrolley/datastructures.py b/dicomtrolley/datastructures.py new file mode 100644 index 0000000..ffda44f --- /dev/null +++ b/dicomtrolley/datastructures.py @@ -0,0 +1,379 @@ +"""Additional datastructures that do not belong to any specific dicomtrolley +class or function + +""" +from datetime import datetime +from enum import Enum +from typing import ( + Any, + DefaultDict, + Hashable, + Iterable, + Iterator, + List, + Sequence, + Tuple, +) +from typing import OrderedDict as OrderedDictType + +# Used in TreeNode functions +TreeAddress = Tuple[str, ...] + + +def addr(dot_address: str): + """Convenience method for creating an address with dot notation + + Works quick and readable as long as you don't have dots in your address keys. + If you do, just use a ['list','of','strings','with.dots.if.you.have.to'] + """ + return tuple(dot_address.split(".")) + + +class PruneStrategy(str, Enum): + """How to handle pruning multiple nodes at once. + Consider this tree with five nodes: + A + / \ + B C + / \ + D E + + I want to prune node B and E. + There are two choices to be made with regard to this. + Choice 1: + Pruning node B will orphan node D, and make it inaccessible. Is this OK? + Choice 2: + If orphaning node D is not OK, is it still ok to remove E? Or should we just + disallow the whole pruning? + + These two choices inform tree strategies: + FORCE + Just prune. Yes you can prune B. D and E will disappear and that's fine + + WHERE_POSSIBLE + Let's not get overzealous. You can't prune B, but you can still prune E. + + CHECK_FIRST + Let's be very careful. Only prune if ALL nodes can be pruned. This + assures that a successful prune removes all nodes + + """ + + FORCE = "Force" # prune all nodes, don't check for anything + + WHERE_POSSIBLE = ( + "Where_possible" # prune nodes without children where you can + ) + + CHECK_FIRST = "Check_first" # only prune if all targets can be pruned + + +class TreeNode(DefaultDict[str, "TreeNode"]): + """Recursive defaultdict with a 'data' property. Helps parse to tree structures. + + There is no native tree datastructure in python as far as I know. + + Some properties: + * Inherits 'access-means-creation' functionality from defaultdict: + ``` + >>> root = TreeNode() + >>> root['study1']['series1']['instance1'].data = 'some instance info' + >>> 'study1' in root + True + ``` + * Address nodes by nested dict notation or by list of strings: + ``` + >>> root = TreeNode() + >>> root['study1']['series1']['instance1'].data = 'some instance info' + >>> 'study1' in root + True + ``` + + Examples + -------- + >>> root = TreeNode() + >>> root['study1']['series1']['instance1'].data = 'some instance info' + >>> 'study1' in root + True + >>> root['study1']['series2'].data = 'some series data' + >>> list(root['study1'].keys) + ['series1', 'series2'] + """ + + def __init__(self, data=None, allow_overwrite=True): + """ + + Parameters + ---------- + data: + Optional data to associate with this node + allow_overwrite: bool, optional + If False, will raise exception when overwriting data attribute + """ + super().__init__(lambda: TreeNode(allow_overwrite=allow_overwrite)) + self._data = data + self.allow_overwrite = allow_overwrite + + @property + def data(self): + return self._data + + @data.setter + def data(self, value): + if self.allow_overwrite or not self.data: + self._data = value + else: + raise ValueError("Overwriting data is not allowed") + + @classmethod + def init_from_addresses(cls, addresses: Sequence[TreeAddress]): + """Create TreeNode and make sure all given addresses exist""" + root = cls() + for address in addresses: + root.get_node(address) + return root + + def iter_leaf_addresses( + self, + address_so_far: Tuple[str, ...] = (), + ) -> Iterator[TreeAddress]: + """Generate addresses for all this nodes' children, depth-first""" + + if not self.keys(): + yield address_so_far + else: + for key in self.keys(): + yield from self[key].iter_leaf_addresses( + address_so_far=address_so_far + tuple(key) + ) + + def exists(self, address: TreeAddress) -> bool: + """Check whether a node exists at address without creating any new nodes + + Parameters + ---------- + address: + subsequent keys. To check TreeNode()['a']['b']['c'], use ['a', 'b', 'c'], + for example + """ + if not address: + return True # required for recursive calls below to work + else: + key, address = address[0], address[1:] + child = self.get(key) + if child is not None: + return child.exists(address) # recurse + else: + return False + + def add(self, object_in: Any, address: TreeAddress): + self.get_node(address).data = object_in + + def copy(self) -> "TreeNode": + """Create a copy of this node and all children""" + copied = TreeNode(data=self.data, allow_overwrite=self.allow_overwrite) + for key, item in self.items(): + copied[key] = item.copy() + + return copied + + def pop_address(self, address: TreeAddress): + """Pop off the leaf node at this address. Leave stem nodes. + + Raises + ------ + KeyError + If node at address does not exist. Follows dict.pop() behaviour. + """ + if not address: + raise KeyError("Address was empty. Cannot pop self!") + + key, address = address[0], address[1:] + try: + if not address: # last key in address has been reached. We can pop + return self.pop(key) + else: # + return self.get_node(tuple(key), create=False).pop_address( + address + ) + except KeyError as e: + raise KeyError(f"No node found at {address}.{key}") from e + + def prune(self, address: TreeAddress): + """Remove the node at this address, including any child nodes + + Raises + ------ + KeyError + If node at address does not exist + """ + _ = self.pop_address(address) + + def is_leaf(self): + return not self.keys() + + def prune_leaf(self, address: TreeAddress): + """Prune node at this address only if it has no children + + Raises + ------ + KeyError + When the referenced node does not exist + ValueError + When referenced node is not a leaf (still has child nodes) + """ + + if not address: + raise KeyError("Address was empty. Cannot prune self!") + + key, address_rest = address[0], address[1:] + try: + child = self.get_node((key,), create=False) + except KeyError as e: + raise KeyError( + f"Cannot prune non-existing node at {address}" + ) from e + + if address_rest: # more address to traverse. Recurse + child.prune_leaf(address_rest) + else: # no more address. Child should be a leaf + if child.is_leaf(): + self.pop(key) + else: + raise ValueError(f"Node at {address} is not a child node") + + def prune_all( # noqa: C901 # not too complex I think. Just elifs + self, + addresses: List[TreeAddress], + strategy: PruneStrategy = PruneStrategy.WHERE_POSSIBLE, + ): + """Prune the leaf at each address. You can pass non-leaf addresses as well + provided that all children of this leaf are passed as well. + + Parameters + ---------- + addresses + Prune nodes at these locations + strategy + Indicates how to handle child nodes and invalid targets. + See PruneStrategy docstring + + Raises + ------ + KeyError + If strategy is PruneStrategy.CHECK_FIRST and illegal addresses are passed. + Illegal meaning 'would remove unlisted child nodes' + """ + if strategy == PruneStrategy.FORCE: + for address in addresses: + try: + self.prune(address) + except KeyError: + continue + elif strategy == PruneStrategy.WHERE_POSSIBLE: + for address in addresses: + try: + self.prune_leaf(address) + except ValueError: + continue + elif strategy == PruneStrategy.CHECK_FIRST: + # simulate subtree first to avoid leaving tree in half-pruned state after + # error + simtree = self.copy() + addresses.sort(key=lambda x: len(x), reverse=True) + try: + for address in addresses: + simtree.prune_leaf(address=address) + except ValueError as e: + raise KeyError( + f"Cannot prune all addresses: {e}. Pruning cancelled" + ) from e + # No exceptions. We can safely remove + for address in addresses: + self.pop_address(address) + else: + raise ValueError(f"Unknown strategy '{strategy}'") + + def get_node(self, address: TreeAddress, create=True): + """Get node at given address, creating if it does not exist + + Parameters + ---------- + address + Address to get + create: Bool, optional + If true, create non-existing address. Else, raise exception + Defaults to True + + Raises + ------ + KeyError + If address does not exist and create is False + + Returns + ------- + TreeNode + The treenode at this address + """ + if not address: + return self + else: + key = address[0] + if not create and key not in self.keys(): + raise KeyError(f"Key {key} not found") + return self[key].get_node(address[1:], create=create) + + +class ExpiringCollection: + """A collection of objects that expires after a set time + + Collects expired items in .expired_items list. + """ + + def __init__(self, expire_after_seconds: int): + self.expire_after_seconds = expire_after_seconds + self.stamped_items = LastUpdatedOrderedDict() + self.expired_items: List[Any] = [] + + @staticmethod + def _now(): + return datetime.now() + + def add(self, item): + self.stamped_items[item] = self._now() + + def add_all(self, items: Iterable[Hashable]): + for item in items: + self.add(item) + + @property + def items(self): + self.check_expired() + return list(self.stamped_items.keys()) + + def check_expired(self): + """Move expired to expired_items list""" + expired = [] + for item, timestamp in self.stamped_items.items(): + if (self._now() - timestamp).seconds > self.expire_after_seconds: + expired.append(item) + else: + break # timestamps should be ordered so we can stop checking + + self.expired_items = self.expired_items + expired + [self.stamped_items.pop(x) for x in expired] + + def collect_expired(self) -> List[Hashable]: + """Returns all expired items and removes them from local list""" + self.check_expired() + expired = self.expired_items + self.expired_items = [] + return expired + + +class LastUpdatedOrderedDict(OrderedDictType[Any, Any]): + """Store items in the order the keys were last added""" + + def __setitem__(self, key, value): + super().__setitem__(key, value) + self.move_to_end(key) diff --git a/dicomtrolley/exceptions.py b/dicomtrolley/exceptions.py index 3074a9f..2dcc611 100644 --- a/dicomtrolley/exceptions.py +++ b/dicomtrolley/exceptions.py @@ -10,3 +10,11 @@ class UnSupportedParameterError(DICOMTrolleyError): """ pass + + +class NoReferencesFoundError(DICOMTrolleyError): + """Cannot find any references for this object at the given level. Used in + DICOMDownloadable + """ + + pass diff --git a/dicomtrolley/parsing.py b/dicomtrolley/parsing.py index 950855b..6a05740 100644 --- a/dicomtrolley/parsing.py +++ b/dicomtrolley/parsing.py @@ -1,5 +1,5 @@ """Models parsing things into study/series/instance structure""" -from typing import Any, DefaultDict, Dict, List, Sequence +from typing import Dict, List, Sequence from pydicom.dataset import Dataset @@ -13,6 +13,7 @@ Study, StudyReference, ) +from dicomtrolley.datastructures import TreeNode from dicomtrolley.exceptions import DICOMTrolleyError @@ -24,46 +25,6 @@ def flatten(dicom_object) -> List[DICOMObject]: return nodes -class TreeNode(DefaultDict[Any, "TreeNode"]): - """Recursive defaultdict with a 'data' property. Helps parse to tree structures. - - Examples - -------- - >>> root = TreeNode() - >>> root['study1']['series1']['instance1'].data = 'some instance info' - >>> 'study1' in root - True - >>> root['study1']['series2'].data = 'some series data' - >>> list(root['study1'].keys) - ['series1', 'series2'] - """ - - def __init__(self, data=None, allow_overwrite=True): - """ - - Parameters - ---------- - data: - Optional data to associate with this node - allow_overwrite: bool, optional - If False, will raise exception when overwriting data attribute - """ - super().__init__(lambda: TreeNode(allow_overwrite=allow_overwrite)) - self._data = data - self.allow_overwrite = allow_overwrite - - @property - def data(self): - return self._data - - @data.setter - def data(self, value): - if self.allow_overwrite or not self.data: - self._data = value - else: - raise ValueError("Overwriting data is not allowed") - - class DICOMParseTree: """Models study/series/instance as a tree. Allows arbitrary branch insertions""" @@ -87,7 +48,7 @@ def init_from_objects(cls, objects: Sequence[DICOMObject]): Notes ----- - Parent objects for instances and series will be created to maintain to + Parent objects for instances and series will be created to ensure all nodes are connected to the tree root. For example, running this method with only a single instance as input will result in a regular root->study->series->instance tree. This is possible because all DICOMObjects @@ -189,7 +150,7 @@ def value_or_dataset(val): study_instance_uid, study_node = study_node_in study = Study( - uid=study_instance_uid, + uid=str(study_instance_uid), data=value_or_dataset(study_node.data), series=tuple(), ) @@ -263,7 +224,6 @@ def retrieve(self, reference: DICOMObjectReference) -> DICOMObject: DICOMObjectNotFound If data for the given parameters does not exist in tree """ - try: if isinstance(reference, StudyReference): return self[reference.study_uid] diff --git a/dicomtrolley/qido_rs.py b/dicomtrolley/qido_rs.py index 38213ac..2d1ee64 100644 --- a/dicomtrolley/qido_rs.py +++ b/dicomtrolley/qido_rs.py @@ -91,7 +91,7 @@ def uri_base(self) -> str: The non-query part of the URI as defined in DICOM PS3.18 section 10.6 table 10.6.1-2 """ - raise NotImplementedError + raise NotImplementedError() def uri_search_params(self) -> Dict[str, Union[str, List[str]]]: """The search parameter part of the URI as defined in @@ -301,7 +301,7 @@ def query_level_should_be_series_or_instance( """A relational query only makes sense for the instance and series levels. If you want to look for studies, us a hierarchical query """ - if values["query_level"] == QueryLevels.STUDY: + if values.get("query_level") == QueryLevels.STUDY: raise ValueError(STUDY_VALUE_ERROR_TEXT) return values @@ -312,30 +312,22 @@ def uri_base(self) -> str: The non-query part of the URI as defined in DICOM PS3.18 section 10.6 table 10.6.1-2 - - Raises - ------ - ValueError - if query_level is STUDY. This makes no sense for a relational query """ - if self.query_level == QueryLevels.STUDY: - raise ValueError(STUDY_VALUE_ERROR_TEXT) - elif self.query_level == QueryLevels.SERIES: + # QueryLevels.Study is checked in query_level_should_be_series_or_instance() + if self.query_level == QueryLevels.SERIES: return "/series" elif self.query_level == QueryLevels.INSTANCE: if self.StudyInstanceUID: # all instances for this study return f"/studies/{self.StudyInstanceUID}/instances" else: - # all instances on the intire server (might be slow) + # all instances on the entire server (might be slow) return "/instances" - else: - raise ValueError( - f'Unknown querylevel "{self.query_level}". ' - f'Should be one of "{QueryLevels}"' - ) + # Unreachable due to pydantic validation and root validator. But I + # get uncomfortable from open elifs. + raise ValueError(f"Unknown query level {self.query_level}") class QidoRS(Searcher): diff --git a/dicomtrolley/trolley.py b/dicomtrolley/trolley.py index 605bc4d..fc27ee4 100644 --- a/dicomtrolley/trolley.py +++ b/dicomtrolley/trolley.py @@ -7,27 +7,23 @@ Searcher and Downloader classes should be stand-alone. They are not allowed to communicate directly. Trolley has knowledge of both and is in control. """ -import itertools + import tempfile from typing import List, Optional, Sequence, Union from dicomtrolley.core import ( DICOMDownloadable, - DICOMObject, + DICOMObjectLevels, DICOMObjectReference, Downloader, - Instance, - InstanceReference, NonInstanceParameterError, NonSeriesParameterError, + QueryLevels, Searcher, - Series, - SeriesReference, Study, ) -from dicomtrolley.exceptions import DICOMTrolleyError +from dicomtrolley.exceptions import NoReferencesFoundError from dicomtrolley.logs import get_module_logger -from dicomtrolley.parsing import DICOMObjectNotFound, DICOMObjectTree from dicomtrolley.storage import DICOMDiskStorage, StorageDir @@ -51,7 +47,6 @@ def __init__( self, downloader: Downloader, searcher: Searcher, - query_missing=True, storage: Optional[DICOMDiskStorage] = None, ): """ @@ -62,20 +57,13 @@ def __init__( The module to use for downloads searcher: Searcher The module to use for queries - query_missing: bool, optional - if True, Trolley.download() will query for missing DICOM instances. For - example when passing a Study obtained from a study-level query, which does - not contain any information on instances - if False, missing instances will raise MissingObjectInformationError storage: DICOMDiskStorage instance, optional All downloads are saved to disk by calling this objects' save() method. Defaults to basic StorageDir (saves as /studyid/seriesid/instanceid) """ self.downloader = downloader self.searcher = searcher - self._searcher_cache = CachedSearcher( - searcher, query_missing=query_missing - ) + if storage: self.storage = storage else: @@ -135,58 +123,64 @@ def download( def fetch_all_datasets(self, objects: Sequence[DICOMDownloadable]): """Get full DICOM dataset for all instances contained in objects. + Some downloaders require explicit series- or instance level information to be + able to download. Additional queries might be fired to obtain this + information. + Returns ------- - Iterator[Dataset] - The downloaded dataset and the context that was used to download it + Iterator[Dataset, None, None] + All datasets belonging to input objects """ try: yield from self.downloader.datasets(objects) except NonSeriesParameterError: # downloader wants at least series level information. Do extra work. - series_lvl_objects = self.ensure_to_series_level(objects) - yield from self.downloader.datasets(series_lvl_objects) + series_lvl_refs = self.obtain_references( + objects=objects, max_level=DICOMObjectLevels.SERIES + ) + yield from self.downloader.datasets(series_lvl_refs) except NonInstanceParameterError: # downloader wants only instance input. Do extra work. - instances = self.convert_to_instances(objects) - yield from self.downloader.datasets(instances) - - def convert_to_instances( - self, objects_in: Sequence[DICOMDownloadable] - ) -> List[InstanceReference]: - """Find all instances contained in objects, running additional image-level - queries with searcher if needed. - - Holds DICOMObjects in an internal cache, avoiding queries as much as - possible. - - Note - ---- - This method can fire additional search queries and might take a long time to - return depending on the number of objects and missing instances - """ - return list( - itertools.chain.from_iterable( - self._searcher_cache.retrieve_instance_references(x) - for x in objects_in + instance_refs = self.obtain_references( + objects=objects, max_level=DICOMObjectLevels.INSTANCE ) - ) + yield from self.downloader.datasets(instance_refs) - def ensure_to_series_level( - self, objects_in: Sequence[DICOMDownloadable] - ) -> Sequence[Union[InstanceReference, SeriesReference]]: - """Make sure all input is converted to instance or series + def obtain_references( + self, + objects: Sequence[DICOMDownloadable], + max_level: DICOMObjectLevels, + ) -> List[DICOMObjectReference]: + """Get download references for all downloadable objects, at max_level or + lower. query if needed. - ---- - This method can fire additional search queries and might take a long time to - return depending on the number of objects and missing instances or series + For example, if level is QueryLevels.Instance and a Study object is given, + try to extract instances from this study. If those instances or not in the + study, ask searcher to obtain them + + Returns + ------- + List[DICOMObjectReference] of the level given or deeper """ - return list( - itertools.chain.from_iterable( - self._searcher_cache.ensure_series_level_references(x) - for x in objects_in - ) - ) + references: List[DICOMObjectReference] = [] + for downloadable in objects: + try: + references += downloadable.contained_references( + max_level=max_level + ) + except NoReferencesFoundError: + # Not enough info in object itself. We need searcher + logger.debug( + f"Not enough info to extract '{str(max_level)}-level' " + f"references from {downloadable}. Asking searcher." + ) + study = self.searcher.find_study_by_id( + study_uid=downloadable.reference().study_uid, + query_level=QueryLevels.from_object_level(max_level), + ) + references += study.contained_references(max_level=max_level) + return references def fetch_all_datasets_async(self, objects, max_workers=None): """Get DICOM dataset for each instance given objects using multiple threads. @@ -216,204 +210,8 @@ def fetch_all_datasets_async(self, objects, max_workers=None): ) except NonInstanceParameterError: yield from self.downloader.datasets_async( - instances=self.convert_to_instances(objects), + instances=self.obtain_references( + objects=objects, max_level=DICOMObjectLevels.INSTANCE + ), max_workers=max_workers, ) - - -class CachedSearcher: - def __init__( - self, - searcher: Searcher, - cache: Optional[DICOMObjectTree] = None, - query_missing: Optional[bool] = True, - ): - """A DICOMObject tree (study/series/instances) that will launch - queries to expand itself if needed. Tries to query as little as - possible. - - Created this to efficiently get all instances for download based on a - variable collection of DICOMDownloadable objects. - - Parameters - ---------- - searcher: Searcher - Use this searcher to search for missing elements in cache - cache: DICOMObjectTree, Optional - Use this tree as cache. Defaults to an empty tree - query_missing: Bool, optional - Launch queries to find missing information. If False, will raise - MissingObjectInformationError. Defaults to True - """ - self.searcher = searcher - if not cache: - cache = DICOMObjectTree([]) - self.cache = cache - self.query_missing = query_missing - - def retrieve_instance_references( - self, downloadable: DICOMDownloadable - ) -> List[InstanceReference]: - """Get references for all instances contained in downloadable, performing - additional queries if needed - - Raises - ------ - MissingObjectInformationError - If Trolley.query_missing is False and queries would be required to - retrieve all instances. - - """ - # DICOMObject might already contain all required info - if isinstance(downloadable, DICOMObject): - instances = downloadable.all_instances() - if instances: - return [x.reference() for x in instances] - - # No instances in downloadable. Do we have instances in cache for this? - reference = downloadable.reference() - try: - return [ - x.reference() for x in self.get_instances_from_cache(reference) - ] - except MissingObjectInformationError as e: # not found, we'll have to query - if not self.query_missing: - raise MissingObjectInformationError( - f"No instances cached for {reference} " - f"and query_missing was False" - ) from e - - logger.debug(f"No instances cached for {reference}. Querying") - self.query_study_instances(reference) - return [ - x.reference() for x in self.get_instances_from_cache(reference) - ] - - def ensure_series_level_references( - self, downloadable: DICOMDownloadable - ) -> List[Union[InstanceReference, SeriesReference]]: - """Extract references of at least series level. Query if not found - - Raises - ------ - MissingObjectInformationError - If Trolley.query_missing is False and queries would be required to - retrieve Series. - - - TODO: Strong whiffs of code smell here. This method should not be this - long. Some pointers for refactoring: Get rid of explicit type checking. - possibly move logic to DICOMDownloadable classes. Why does this method - take a DICOMDownloadable and not just a reference? It seems conversion logic - is shoehorned into CachedSearcher and Trolley classes. CachedSearcher should - govern a cache of DICOM information, Trolley should handle high level - requests. Neither are quite right for conversion. Possibly create separate - class for this. - """ - # Instance and Series should just be left as is - if isinstance( - downloadable, - (Instance, Series, InstanceReference, SeriesReference), - ): - return [downloadable.reference()] - # If its a study, it might need work - - elif isinstance(downloadable, Study): - series: Sequence[ - Union[Instance, Series] - ] = downloadable.all_series() - if series: # there were series inside Study already - return [x.reference() for x in series] - - # no studies. Maybe they are in cache? - reference = downloadable.reference() - try: - series = self.get_series_level_from_cache(reference) - if series: - return [x.reference() for x in series] - else: - raise MissingObjectInformationError() # not expected. Being careful. - - except MissingObjectInformationError as e: # not found, we'll have to query - if not self.query_missing: - raise MissingObjectInformationError( - f"No series cached for {reference} " - f"and query_missing was False" - ) from e - - logger.debug(f"No series cached for {reference}. Querying") - self.query_study_series(reference) - return [ - x.reference() - for x in self.get_series_level_from_cache(reference) - ] - - def get_instances_from_cache( - self, reference: DICOMObjectReference - ) -> List[Instance]: - """Retrieve all instances for this reference. Raise exception if not found - - Raises - ------ - MissingObjectInformationError - If no instances are found in cache - """ - try: - instances = self.cache.retrieve(reference).all_instances() - if instances: - return instances - else: - # reference was in cache, but there are no instances - raise MissingObjectInformationError( - f"No instances found for {reference}" - ) - except DICOMObjectNotFound as e: - # reference was not in cache - raise MissingObjectInformationError( - f"No instances found for {reference}" - ) from e - - def get_series_level_from_cache( - self, reference: DICOMObjectReference - ) -> Sequence[Union[Instance, Series]]: - """Retrieve series this reference, or instance if required. - Raise exception if not found - - Raises - ------ - MissingObjectInformationError - If no series or instance can be found for this reference - """ - try: - series: Sequence[Union[Instance, Series]] = self.cache.retrieve( - reference - ).all_series() - if series: - return series - else: - # reference was in cache, but there are no instances - raise MissingObjectInformationError( - f"No series found for {reference}" - ) - except DICOMObjectNotFound as e: - # reference was not in cache - raise MissingObjectInformationError( - f"No series found for {reference}" - ) from e - - def query_study_instances(self, reference: DICOMObjectReference): - study = self.searcher.find_study_at_instance_level(reference.study_uid) - self.cache.add_study(study) - - def query_study_series(self, reference: DICOMObjectReference): - study = self.searcher.find_study_at_series_level(reference.study_uid) - self.cache.add_study(study) - - -class MissingObjectInformationError(DICOMTrolleyError): - """An operation on a DICOM object cannot complete because the required information - is not available. For example, trying to extract all instances from a Study that - was retrieved without instance information - """ - - pass diff --git a/docs/index.md b/docs/index.md index dfbf749..ef6d5e9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -37,7 +37,7 @@ studies = trolley.find_studies(Query(PatientName='B*')) trolley.download(studies[0], output_dir='/tmp/trolley') ``` ## Documentation -see [dicomtrolley github docs](https://github.com/sjoerdk/dicomtrolley/tree/master/docs) +see [dicomtrolley docs on readthedocs.io](https://dicomtrolley.readthedocs.io) ## Examples Example code can be found [on github](https://github.com/sjoerdk/dicomtrolley/tree/master/examples) diff --git a/docs/usage.md b/docs/usage.md index d171aac..9464e46 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -125,6 +125,22 @@ class MyStorage(DICOMDiskStorage): trolley = Trolley(searcher=a_searcher, downloader=a_downloader, storage=MyStorage()) ``` +## Caching +You can add caching to any [`Searcher`](concepts.md#searcher) by wrapping it with +a [CachedSearcher][dicomtrolley.caching.CachedSearcher] instance: + +```python +from dicomtrolley.caching import CachedSearcher, DICOMObjectCache + +searcher = CachedSearcher(searcher=a_searcher, + cache=DICOMObjectCache(expiry_seconds=300)) + +trolley = Trolley(searcher=searcher, downloader=a_downloader) +``` + +[CachedSearcher][dicomtrolley.caching.CachedSearcher] is a [Searcher][dicomtrolley.core.Searcher] +and can be used like any other. It will return cached results to any of its function +calls for up to `expiry_seconds` seconds. ## Logging Dicomtrolley uses the standard [logging](https://docs.python.org/3/library/logging.html) module. The root logger is @@ -149,9 +165,7 @@ trolley = Trolley(searcher=a_searcher, downloader=Rad69(session=requests.session(), url="https://server/rad69", errors_to_ignore = [XDSMissingDocumentError])) - -study = trolley.find_study(Query(PatientName="AB*")) -trolley.download(study, '/tmp') # will skip series raising XDSMissingDocumentError +# trolley.download() # will now skip series raising XDSMissingDocumentError ``` ## Authentication diff --git a/tests/mock_responses.py b/tests/mock_responses.py index b88059b..e1ee845 100644 --- a/tests/mock_responses.py +++ b/tests/mock_responses.py @@ -6,12 +6,12 @@ import re import urllib from dataclasses import dataclass, field -from typing import Any, Dict, List, Pattern, Union +from typing import Any, Callable, Dict, List, Pattern, Union from requests_mock import ANY from requests_toolbelt.multipart.encoder import MultipartEncoder -from dicomtrolley.core import InstanceReference +from dicomtrolley.core import InstanceReference, QueryLevels from dicomtrolley.xml_templates import ( A_RAD69_RESPONSE_SOAP_HEADER, RAD69_SOAP_RESPONSE_NOT_FOUND, @@ -29,7 +29,7 @@ class MockResponse: url: Union[str, Pattern[str]] status_code: int = 200 method: str = "GET" - text: str = "" + text: Union[str, Callable[[Any, Any], Any]] = "" content: bytes = field(default_factory=bytes) json: Dict[str, Any] = field(default_factory=dict) reason: str = "" @@ -159,6 +159,8 @@ def as_instance_reference(cls): method="GET", ) +MINT_SEARCH_MOCK_STUDY_UID = "1.2.340.114850.2.857.2.793263.2.125336546.1" + MINT_SEARCH_STUDY_LEVEL = MockResponse( url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=STUDY", text="' + 'tag="0020000d" vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />' '' '' + 'tag="0020000d" vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />' "", ) +# exactly one study +MINT_SEARCH_STUDY_LEVEL_SINGLE = MockResponse( + url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=STUDY", + text="' + "", +) MINT_SEARCH_SERIES_LEVEL = MockResponse( url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=SERIES", @@ -195,7 +209,7 @@ def as_instance_reference(cls): ' tag="0020000e" vr="UI" val="1.2.840.113619.2.239.1783.1568025913.0.76" />' '' '' + 'vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />' "", ) +MINT_SEARCH_SERIES_LEVEL_SINGLE = MockResponse( + url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=SERIES", + text="' + '' + '', +) + MINT_SEARCH_INSTANCE_LEVEL = MockResponse( url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=INSTANCE", text="<' '/series><' "/studySearchResults>", @@ -272,7 +303,7 @@ def as_instance_reference(cls): # The IDS in the MINT response. To not have to copy-paste these in tests MINT_SEARCH_INSTANCE_LEVEL_IDS = { - "study_uid": "111", + "study_uid": MINT_SEARCH_MOCK_STUDY_UID, "series_uids": ( "1.2.40.0.13.1.202066129828111990737107018349786560571", "1.2.840.113663.1500.1.460388269.2.1.20201105.84519.348", @@ -532,6 +563,9 @@ def create_rad69_response(bytes_parts, soap_header=None): ] ), ) + +# respond with a valid mint search response containing three studies, whatever the +# called url was. Blunt. MINT_SEARCH_ANY = MockResponse( url=ANY, method=ANY, @@ -555,6 +589,31 @@ def create_rad69_response(bytes_parts, soap_header=None): ) +def mint_response(request, context): + """Generate a MINT query response that matches the requested StudyInstanceUID + and also honours query_level. + """ + query_level = request.qs["querylevel"][0].upper() + requested_suid = request.qs["studyinstanceuid"][0] + + if query_level == QueryLevels.STUDY: + base_response = MINT_SEARCH_STUDY_LEVEL_SINGLE.text + elif query_level == QueryLevels.SERIES: + base_response = MINT_SEARCH_SERIES_LEVEL_SINGLE.text + elif query_level == QueryLevels.INSTANCE: + base_response = MINT_SEARCH_INSTANCE_LEVEL.text + else: + raise ValueError(f"Unknown query level {query_level}") + + return base_response.replace(MINT_SEARCH_MOCK_STUDY_UID, requested_suid) + + +# Respond to MockUrls.QIDO_RS_URL queries with a mint response matching the +# requested StudyInstanceUID. +MINT_SEARCH_MATCH_SUID = MockResponse( + url=re.compile(MockUrls.MINT_URL + ".*"), method=ANY, text=mint_response +) + # a valid response when a query has 0 results QIDO_RS_204_NO_RESULTS = MockResponse( url=re.compile(MockUrls.QIDO_RS_URL + ".*"), diff --git a/tests/test_caching.py b/tests/test_caching.py new file mode 100644 index 0000000..d8d14ec --- /dev/null +++ b/tests/test_caching.py @@ -0,0 +1,211 @@ +from datetime import datetime, timedelta + +import pytest + +from dicomtrolley.caching import CachedSearcher, DICOMObjectCache, NodeNotFound +from dicomtrolley.core import Query, QueryLevels +from tests.mock_responses import MINT_SEARCH_MATCH_SUID + + +def test_object_cache(some_studies): + """Test basics of cache. Adding and retrieving""" + cache = DICOMObjectCache() + a_study = some_studies[0] + + # Nothing has been added yet. Retrieving should fail + with pytest.raises(NodeNotFound): + cache.retrieve(a_study.reference()) + + # After adding, retrieve should work and fetch the right thing + cache.add(a_study) + assert a_study == cache.retrieve(a_study.reference()) + + +def test_object_cache_child_objects(some_studies): + """If you add a full study, you should be able to retrieve the series from that + study from cache. + """ + a_study = some_studies[0] + cache = DICOMObjectCache(initial_objects=[a_study]) + + a_series = a_study.series[0] + assert a_series == cache.retrieve(a_series.reference()) + + +def test_object_cache_no_unneeded_leaves(some_studies): + """Checking whether a node exists should not create an empty node there""" + + cache = DICOMObjectCache() + assert not cache.root.items() + + with pytest.raises(NodeNotFound): + cache.retrieve(some_studies[0].reference()) + + assert not cache.root.items() + + +def test_object_cache_expiry(some_studies): + a_study = some_studies[0] + cache = DICOMObjectCache(initial_objects=[a_study], expiry_seconds=300) + + # item is not expired and can be retrieved + assert cache.retrieve(a_study.reference()) + # retrieved item is the actual study that was put in + assert cache.retrieve(a_study.reference()).series[0].instances[2] + + # now its 20 minutes later + cache.expiry._now = lambda: datetime.utcnow() + timedelta(seconds=600) + + # item should no longer be available + with pytest.raises(NodeNotFound): + cache.retrieve(a_study.reference()) + + +def test_cached_searcher_queries(requests_mock, a_cached_searcher): + """Check caching of calls to find_studies(Query)""" + + searcher, set_time = a_cached_searcher + + assert len(requests_mock.request_history) == 0 # no network calls yet + assert searcher.find_studies( + Query(StudyInstanceUID="1000") + ) # something comes back + assert len(requests_mock.request_history) == 1 # due to a network call + + # now the result should have been cached + assert searcher.find_studies( + Query(StudyInstanceUID="1000") + ) # something comes back + assert len(requests_mock.request_history) == 1 # but no more network calls + + set_time(200) # later, but nothing has expired yet + # results to other queries are not cached yet so cause network call + assert searcher.find_studies(Query(StudyInstanceUID="9999")) + assert len(requests_mock.request_history) == 2 + + set_time( + 400 + ) # First query result should have expired so again network call + assert searcher.find_studies(Query(StudyInstanceUID="1000")) + assert len(requests_mock.request_history) == 3 + + # but other query has not expired, so should not cause network call + assert searcher.find_studies(Query(StudyInstanceUID="9999")) + assert len(requests_mock.request_history) == 3 + + +def test_cached_searcher_query_by_id(requests_mock, a_cached_searcher): + """Check caching for calls to .find_study_by_id(study_id, level)""" + searcher, set_time = a_cached_searcher + + assert len(requests_mock.request_history) == 0 + assert searcher.find_study_by_id( + study_uid="1000", query_level=QueryLevels.STUDY + ) + assert len(requests_mock.request_history) == 1 + + assert searcher.find_study_by_id( + study_uid="1000", query_level=QueryLevels.STUDY + ) + assert len(requests_mock.request_history) == 1 + + set_time(400) + assert searcher.find_study_by_id( + study_uid="1000", query_level=QueryLevels.STUDY + ) + assert len(requests_mock.request_history) == 2 + + +def test_cached_searcher_shared_cache(requests_mock, a_cached_searcher): + """Methods .find_studies(Query) and .find_study_by_id(study_id, level) share + their cache + """ + searcher, set_time = a_cached_searcher + + assert len(requests_mock.request_history) == 0 + # Query a study at Study level + assert searcher.find_studies( + Query(StudyInstanceUID="1000", query_level=QueryLevels.STUDY) + ) + assert len(requests_mock.request_history) == 1 + + # This study should now also be available from cache + searcher.find_study_by_id(study_uid="1000", query_level=QueryLevels.STUDY) + assert len(requests_mock.request_history) == 1 + + # however, series info is not available, so this should cause another call + searcher.find_study_by_id(study_uid="1000", query_level=QueryLevels.SERIES) + assert len(requests_mock.request_history) == 2 + + # Even though everything is available, a different query will not use cache. + # It's beyond scope to parse the semantics of a query + assert searcher.find_studies( + Query(StudyInstanceUID="1000", query_level=QueryLevels.SERIES) + ) + assert len(requests_mock.request_history) == 3 + + +def test_cached_searcher_complex(requests_mock, a_cached_searcher): + """Cache saves objects as a tree structure. As such it is possible to update + a leaf without its stem, making the stem expire before the leaf. Handle this. + + + """ + searcher, set_time = a_cached_searcher # default expiry is 300 secs + + # Query a study at full depth. + assert len(requests_mock.request_history) == 0 + study = searcher.find_study( + Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE) + ) + assert len(requests_mock.request_history) == 1 + # This is then saved in cache (no new request) + assert searcher.find_study( + Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE) + ) + assert len(requests_mock.request_history) == 1 + + set_time(200) # later, a single instance is updated + updated_instance = study.series[1].instances[0] + searcher.cache.add(updated_instance) + + set_time(400) # Now everything is expired except for this single instance + # you can retrieve the instance + assert searcher.cache.retrieve(updated_instance.reference()) + # but not the expired parent study + with pytest.raises(NodeNotFound): + assert searcher.cache.retrieve(study.reference()) + + # And trying the query again should send request again + assert len(requests_mock.request_history) == 1 + assert searcher.find_study( + Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE) + ) + assert len(requests_mock.request_history) == 2 + + +def test_mop_up(): + """Odds and ends I would like to just make sure about""" + cache = DICOMObjectCache() + with pytest.raises(ValueError): + cache.to_address("not a ref!") + + +@pytest.fixture +def a_cached_searcher(requests_mock, a_mint): + """A cached searcher with working mocked search backend, a 300 second expiry + and a method to set its now() so you can test expiry + """ + + # set up working response to a call to a_mint + requests_mock.register_uri(**MINT_SEARCH_MATCH_SUID.as_dict()) + + # set up a cache that you can set the current time on + cache = DICOMObjectCache(expiry_seconds=300) + now = datetime.now() + + def set_time(secs): + cache.expiry._now = lambda: now + timedelta(seconds=secs) + + searcher = CachedSearcher(searcher=a_mint, cache=cache) + return searcher, set_time diff --git a/tests/test_core.py b/tests/test_core.py index 859b889..f41fb45 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,17 +4,21 @@ from pydicom.dataset import Dataset from dicomtrolley.core import ( + DICOMObjectLevels, ExtendedQuery, Instance, + InstanceReference, NonInstanceParameterError, NonSeriesParameterError, Query, Series, + SeriesReference, Study, StudyReference, to_instance_refs, to_series_level_refs, ) +from dicomtrolley.exceptions import NoReferencesFoundError from dicomtrolley.mint import MintQuery from tests.factories import ( InstanceReferenceFactory, @@ -179,3 +183,91 @@ def test_to_series_level_refs(a_study_level_study): to_series_level_refs( [SeriesReferenceFactory(), InstanceReferenceFactory()] ) + + +def test_contained_references_for_references(): + """DICOM object references can only return themselves, or error""" + + instance_ref: InstanceReference = InstanceReferenceFactory() + # an instance reference is the deepest level, so this can always return itself + for level in DICOMObjectLevels: + assert ( + instance_ref.contained_references(max_level=level)[0] + == instance_ref + ) + + series_ref: SeriesReference = SeriesReferenceFactory() + # a series reference can return itself at series or study level + for level in (DICOMObjectLevels.STUDY, DICOMObjectLevels.SERIES): + assert ( + series_ref.contained_references(max_level=level)[0] == series_ref + ) + + # but can't return instance level. There is not info for that + with pytest.raises(NoReferencesFoundError): + series_ref.contained_references(max_level=DICOMObjectLevels.INSTANCE) + + # study ref can only return at study level + study_ref: StudyReference = StudyReferenceFactory() + assert ( + study_ref.contained_references(max_level=DICOMObjectLevels.STUDY)[0] + == study_ref + ) + + for level in (DICOMObjectLevels.INSTANCE, DICOMObjectLevels.SERIES): + with pytest.raises(NoReferencesFoundError): + study_ref.contained_references(max_level=level) + + +def test_contained_references_for_dicom_objects(some_studies): + """DICOM objects can contain additional dicom levels""" + image_level_study, study_level_study = some_studies + + # If images (instances) are in there, this should be OK: + assert ( + len( + image_level_study.contained_references( + max_level=DICOMObjectLevels.INSTANCE + ) + ) + == 18 + ) + assert ( + len( + image_level_study.contained_references( + max_level=DICOMObjectLevels.SERIES + ) + ) + == 2 + ) + assert ( + len( + image_level_study.contained_references( + max_level=DICOMObjectLevels.STUDY + ) + ) + == 1 + ) + + # But if there is no image info, trying to go deeper will fail + for level in (DICOMObjectLevels.INSTANCE, DICOMObjectLevels.SERIES): + with pytest.raises(NoReferencesFoundError): + study_level_study.contained_references(max_level=level) + + # staying at the same level is OK though + assert study_level_study.contained_references( + max_level=DICOMObjectLevels.STUDY + ) + + # Anything is OK for an instance, as it is the deepest level + an_instance = image_level_study.series[0].instances[0] + for level in DICOMObjectLevels: + assert an_instance.contained_references(max_level=level) + + # Just topping off with series. as we're on such a nice testing spree here + a_series = image_level_study.series[0] + a_series.instances = [] # without instances + for level in (DICOMObjectLevels.SERIES, DICOMObjectLevels.STUDY): + assert a_series.contained_references(max_level=level) + with pytest.raises(NoReferencesFoundError): + a_series.contained_references(max_level=DICOMObjectLevels.INSTANCE) diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py new file mode 100644 index 0000000..529a4a3 --- /dev/null +++ b/tests/test_datastructures.py @@ -0,0 +1,211 @@ +from datetime import datetime, timedelta + +import pytest + +from dicomtrolley.datastructures import ( + ExpiringCollection, + PruneStrategy, + TreeNode, + addr, +) + + +def test_parsing_node(): + node = TreeNode() + node["a"]["b"]["c"].data = "some data" + assert node["a"]["b"]["c"].data == "some data" + + node["a"]["b2"].data = "some other data" + assert list(node["a"].keys()) == ["b", "b2"] + + +def test_parsing_node_exceptions(): + node = TreeNode(allow_overwrite=False) + node["a"]["b"].data = "some data" + with pytest.raises(ValueError): + node["a"]["b"].data = "some other data" + + # this should not raise anything + node2 = TreeNode(allow_overwrite=True) + node2["a"]["b"].data = "some data" + node2["a"]["b"].data = "some other data" + + +def test_parsing_node_exists_check(): + node = TreeNode() + node["a"]["b"]["c"].data = "some data" + assert node.exists(address=["a", "b", "c"]) + assert not node.exists(address=["a", "b", "d"]) + assert not node.exists(address=["f", "s"]) + + +def test_parsing_node_addressing(): + """You should be able to use dict notation (tree[key1][key2]) and List of strings + (Tree.get(['key1'.'key2'])) + """ + + node = TreeNode() + _ = node["a"]["b"] # addressing means creating + assert node.exists(addr("a.b")) + + # checking existence does not create + assert not node.exists(addr("d.b")) + assert not node.exists(addr("d.b")) + + # checking with get_node does create + _ = node.get_node(addr("d.b")) + assert node.exists(addr("d.b")) + + +def test_parsing_node_prune(): + """You can remove nodes yes""" + + # simple parent with two child nodes + root = TreeNode() + _ = root.get_node(addr("a.b")) + _ = root.get_node(addr("a.c")) + _ = root.get_node(addr("a.d")) + assert root.exists(addr("a.b")) + assert root.exists(addr("a.c")) + assert root.exists(addr("a.d")) + + # Pruning child will leave siblings + root.prune(addr("a.b")) + assert not root.exists(addr("a.b")) + assert root.exists(addr("a.c")) + + # Pruning parent will remove siblings + root.prune(addr("a")) + assert not root.exists(addr("a.c")) + + # this should not work + with pytest.raises(KeyError): + root.prune([]) + + +def test_parsing_node_prune_leaf(): + root = TreeNode() + _ = root.get_node(addr("a.b")) + + # not allowed to prune stem node + with pytest.raises(ValueError): + root.prune_leaf(addr("a")) + + # but leaf is ok + root.prune_leaf(addr("a.b")) + root.prune_leaf(addr("a")) + + # this should not work + with pytest.raises(KeyError): + root.prune_leaf([]) + + +def test_parsing_node_prune_all(): + """When pruning many nodes at once, make sure to handle parent-child relations""" + + # You can pass the stem before the leaf, prune_leaf_all will sort + tree1 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")]) + tree1.prune_all( + [addr("a.b"), addr("a.b.d")], strategy=PruneStrategy.CHECK_FIRST + ) + + # Prune all won't prune 'a' as 'a.c' still exists + tree2 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")]) + with pytest.raises(KeyError): + tree2.prune_all( + [addr("a"), addr("a.b.d")], strategy=PruneStrategy.CHECK_FIRST + ) + + # If you pass 'a.c' for pruning as well there is no problem + tree3 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")]) + tree3.prune_all( + [addr("a.b"), addr("a.b.d"), addr("a.c")], + strategy=PruneStrategy.CHECK_FIRST, + ) + + +@pytest.fixture() +def an_expiring_collection(): + """A 5 minute-expiring collection with a .set_time function for debugging""" + collection = ExpiringCollection(expire_after_seconds=300) + now = datetime.now() + + def set_time(secs): + collection._now = lambda: now + timedelta(seconds=secs) + + return collection, set_time + + +def test_expiring_collection(an_expiring_collection): + collection, set_time = an_expiring_collection + + # you can get a thing + collection.add("a_thing") + assert collection.items + assert not collection.collect_expired() + + # unless its expired + set_time(600) + + assert not collection.items + assert collection.collect_expired() == ["a_thing"] + + # adding a thing a second time will update the times + set_time(0) + collection.add(12) # add '12' at time 0. should expire in 300 + set_time(200) + collection.add(12) # add it again + assert collection.items == [12] # no doubles + set_time(400) + assert collection.items == [12] # last add was less than 300 ago + + set_time(700) + assert collection.items == [] # but now that too has timed out + + +def test_expiring_collection_update_clash(an_expiring_collection): + """If you update an object timestamp, order of timestamps should be maintained""" + collection, set_time = an_expiring_collection + collection.add_all(["item1", "item2"]) + set_time(200) + collection.add("item1") # item2 should now expire before item1 + set_time(400) # item 2 should now be expired. Is it handled properly? + assert collection.collect_expired() == ["item2"] + assert collection.items == ["item1"] + + +@pytest.fixture +def a_tree(): + """Produce this tree: + # A + # / \ + # B C + # / \ + # D E + """ + + return TreeNode.init_from_addresses( + [addr("a.b.d"), addr("a.b.e"), addr("a.c")] + ) + + +@pytest.mark.parametrize( + "strategy, result", + [ + (PruneStrategy.FORCE, {"ac"}), + (PruneStrategy.WHERE_POSSIBLE, {"abe", "ac"}), + (PruneStrategy.CHECK_FIRST, {"abd", "abe", "ac"}), + ], +) +def test_prune_strategies(a_tree, strategy, result): + try: + a_tree.prune_all( + addresses=[addr("a.b"), addr("a.b.d")], strategy=strategy + ) + except KeyError: + pass + + addresses = [x for x in a_tree.iter_leaf_addresses()] + address_strings = {"".join(x) for x in addresses} + + assert address_strings == result diff --git a/tests/test_parsing.py b/tests/test_parsing.py index c6c1f0c..077f518 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -5,7 +5,7 @@ StudyReference, ) from dicomtrolley.exceptions import DICOMTrolleyError -from dicomtrolley.parsing import DICOMObjectTree, DICOMParseTree, TreeNode +from dicomtrolley.parsing import DICOMObjectTree, DICOMParseTree from tests.factories import ( create_c_find_image_response, create_c_find_study_response, @@ -14,27 +14,6 @@ ) -def test_parsing_node(): - node = TreeNode() - node["a"]["b"]["c"].data = "some data" - assert node["a"]["b"]["c"].data == "some data" - - node["a"]["b2"].data = "some other data" - assert list(node["a"].keys()) == ["b", "b2"] - - -def test_parsing_node_exceptions(): - node = TreeNode(allow_overwrite=False) - node["a"]["b"].data = "some data" - with pytest.raises(ValueError): - node["a"]["b"].data = "some other data" - - # this should not raise anything - node2 = TreeNode(allow_overwrite=True) - node2["a"]["b"].data = "some data" - node2["a"]["b"].data = "some other data" - - def test_parse_tree(): """Parse flat datasets into a tree structure""" tree = DICOMParseTree() diff --git a/tests/test_qido_rs.py b/tests/test_qido_rs.py index bbe1267..7716776 100644 --- a/tests/test_qido_rs.py +++ b/tests/test_qido_rs.py @@ -1,6 +1,7 @@ from datetime import datetime import pytest +from pydantic import ValidationError from dicomtrolley.core import Query, QueryLevels from dicomtrolley.qido_rs import HierarchicalQuery, QidoRS, RelationalQuery @@ -33,12 +34,13 @@ def test_valid_hierarchical_query(query_params): [ {"SeriesInstanceUID": "123"}, {"SOPClassInstanceUID": "789"}, + {"SOPClassInstanceUID": "789", "query_level": QueryLevels.INSTANCE}, {"min_study_date": datetime(year=2023, month=5, day=29)}, ], ) def test_invalid_hierarchical_query(query_params): """These queries are invalid, should not pass init validation""" - with pytest.raises(ValueError): + with pytest.raises((ValueError, ValidationError)): HierarchicalQuery(**query_params) @@ -166,3 +168,28 @@ def test_ensure_query_type(a_qido): Query(AccessionNumber="123", query_level=QueryLevels.SERIES) ) assert ensured.uri_base() == "/series" + + +def test_mop_up(a_qido): + """Checks previously uncovered parts""" + assert RelationalQuery(query_level=QueryLevels.INSTANCE).uri_base() + + with pytest.raises( + ValidationError + ): # relational at study level makes no sense + assert RelationalQuery(query_level=QueryLevels.STUDY) + + assert RelationalQuery( + query_level=QueryLevels.INSTANCE, StudyInstanceUID="123" + ).uri_base() + + with pytest.raises(ValidationError): + assert RelationalQuery(query_level="unknown") + + query = HierarchicalQuery( + StudyInstanceUID="234", + SeriesInstanceUID="123", + query_level=QueryLevels.INSTANCE, + ) + assert query.uri_base() + assert query.uri_search_params() == {} diff --git a/tests/test_trolley.py b/tests/test_trolley.py index 902e8db..568e48f 100644 --- a/tests/test_trolley.py +++ b/tests/test_trolley.py @@ -3,27 +3,18 @@ import pytest -from dicomtrolley.core import SeriesReference, StudyReference from dicomtrolley.exceptions import DICOMTrolleyError -from dicomtrolley.mint import Mint, MintQuery -from dicomtrolley.parsing import DICOMObjectTree +from dicomtrolley.mint import MintQuery from dicomtrolley.storage import FlatStorageDir from dicomtrolley.trolley import ( - CachedSearcher, - MissingObjectInformationError, Trolley, ) -from tests.conftest import create_mint_study, set_mock_response + +from tests.conftest import create_mint_study from tests.factories import ( - InstanceReferenceFactory, - SeriesReferenceFactory, StudyReferenceFactory, quick_dataset, ) -from tests.mock_responses import ( - MINT_SEARCH_INSTANCE_LEVEL_IDS, -) -from tests.mock_servers import MINT_SEARCH_INSTANCE_LEVEL_ANY def test_trolley_find(a_trolley, some_mint_studies): @@ -55,7 +46,7 @@ def test_trolley_get_dataset(a_trolley, some_mint_studies): download """ # Search will yield full info for the missing study - a_trolley.searcher.find_study_at_instance_level = Mock( + a_trolley.searcher.find_study_by_id = Mock( return_value=create_mint_study( uid="1.2.340.114850.2.857.2.793263.2.125336546.1" ) @@ -172,90 +163,3 @@ def test_trolley_encapsulation_error(a_trolley): # this should be caught an raised as a TrolleyError with pytest.raises(DICOMTrolleyError): a_trolley.download(StudyReferenceFactory(), output_dir="/tmp") - - -def test_cached_searcher_retrieve_instances(a_mint, requests_mock): - # set a single-study response for any mint call - set_mock_response(requests_mock, MINT_SEARCH_INSTANCE_LEVEL_ANY) - - # a reference to the two series contained in response - series_reference_1 = SeriesReference( - study_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["study_uid"], - series_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["series_uids"][0], - ) - - # a reference to a series contained in response - series_reference_2 = SeriesReference( - study_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["study_uid"], - series_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["series_uids"][1], - ) - - searcher = CachedSearcher(searcher=a_mint) - - assert requests_mock.request_history == [] # no requests have been made - - # now ask for a series that is not in cached searcher - instances = searcher.retrieve_instance_references(series_reference_1) - assert len(instances) == 1 # this series has only one instance - assert len(requests_mock.request_history) == 1 # a request has been made - - # ask for the other series - instances = searcher.retrieve_instance_references(series_reference_2) - assert len(instances) == 13 - # No extra requests as the whole study was retrieved - assert len(requests_mock.request_history) == 1 - - -def test_cached_searcher_no_download(a_mint): - """You can disable auto-querying for missing info to limit requests""" - study1 = create_mint_study(uid="1") - study2 = create_mint_study(uid="2") - for series in study2.series: # study2 will have no instance info - series.instances = [] - - # search starts out with some info - searcher = CachedSearcher( - searcher=a_mint, - cache=DICOMObjectTree([study1, study2]), - query_missing=False, - ) - - # requesting study which has instance info should work - assert len(searcher.retrieve_instance_references(study1)) == 14 - - # however, this will raise errors - with pytest.raises(MissingObjectInformationError): - searcher.retrieve_instance_references(study2) - - -def test_cached_searcher_ensure_series(a_mint_study_with_instances): - # a study reference cannot be found, will issue a call - # an instance that is already cached will not yield a call - # a series will not yield a call if it is known - searcher = Mock(spec=Mint) - - a_study = a_mint_study_with_instances - for series in a_study.series: # study will have no instance - series.instances = [] - - cache = CachedSearcher( - searcher=searcher, - cache=DICOMObjectTree([a_study]), - query_missing=False, - ) - - # any instance references should be find regardless of cached or not - cache.ensure_series_level_references( - downloadable=InstanceReferenceFactory() - ) - cache.ensure_series_level_references(downloadable=SeriesReferenceFactory()) - - # there are series for this study in cache so should be fine - references = cache.ensure_series_level_references( - downloadable=a_study.reference() - ) - assert len(references) == 2 - - # a new series is not in cache so will not work - with pytest.raises(MissingObjectInformationError): - cache.ensure_series_level_references(StudyReference("unknown_study"))