From 27d4d29f9470b3144902fbf58c12f5363fdca976 Mon Sep 17 00:00:00 2001
From: sjoerdk <sjoerd.kerkstra@radboudumc.nl>
Date: Thu, 18 Apr 2024 22:11:18 +0200
Subject: [PATCH] Rewrites caching, improves dicom level reasoning

Rewrites caching:
* Trolley no longer caches results by default, cleans up Trolley.
* Completely rewrites CachedSearcher to implement Searcher interface
* Caching is now optional by wrapping any searcher instance
* Caching is now time-based and done per DICOM object
* Adds query caching (previously only get_study_for_id)

Makes reasoning about DICOM levels (study/series/instance) easier:
* Adds numeric value and comparison to DICOMObjectLevels
* Introduces DICOMObject.contained_references()
* Merges to_series_level_refs, to_instance_refs into to_refs(level)
---
 conftest.py                    |   2 +-
 dicomtrolley/caching.py        | 303 ++++++++++++++++++++++++++
 dicomtrolley/core.py           | 364 ++++++++++++++++++-------------
 dicomtrolley/datastructures.py | 379 +++++++++++++++++++++++++++++++++
 dicomtrolley/exceptions.py     |   8 +
 dicomtrolley/parsing.py        |  48 +----
 dicomtrolley/qido_rs.py        |  24 +--
 dicomtrolley/trolley.py        | 308 +++++----------------------
 docs/index.md                  |   2 +-
 docs/usage.md                  |  20 +-
 tests/mock_responses.py        |  79 ++++++-
 tests/test_caching.py          | 211 ++++++++++++++++++
 tests/test_core.py             |  92 ++++++++
 tests/test_datastructures.py   | 211 ++++++++++++++++++
 tests/test_parsing.py          |  23 +-
 tests/test_qido_rs.py          |  29 ++-
 tests/test_trolley.py          | 104 +--------
 17 files changed, 1611 insertions(+), 596 deletions(-)
 create mode 100644 dicomtrolley/caching.py
 create mode 100644 dicomtrolley/datastructures.py
 create mode 100644 tests/test_caching.py
 create mode 100644 tests/test_datastructures.py
diff --git a/conftest.py b/conftest.py
index 95e47c3..596885f 100644
--- a/conftest.py
+++ b/conftest.py
@@ -29,7 +29,7 @@ def a_wado(a_session):
 @pytest.fixture
 def a_trolley(a_mint, a_wado) -> Trolley:
     """Trolley instance that will not hit any server"""
-    return Trolley(searcher=a_mint, downloader=a_wado, query_missing=True)
+    return Trolley(searcher=a_mint, downloader=a_wado)
 
 
 class NoSaveStorageDir(StorageDir):
diff --git a/dicomtrolley/caching.py b/dicomtrolley/caching.py
new file mode 100644
index 0000000..9711acf
--- /dev/null
+++ b/dicomtrolley/caching.py
@@ -0,0 +1,303 @@
+"""Storing DICOM query results locally to avoid unneeded calls to server"""
+
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+from dicomtrolley.core import (
+    DICOMObject,
+    DICOMObjectLevels,
+    DICOMObjectReference,
+    InstanceReference,
+    Query,
+    QueryLevels,
+    Searcher,
+    SeriesReference,
+    Study,
+    StudyReference,
+)
+from dicomtrolley.datastructures import (
+    ExpiringCollection,
+    TreeAddress,
+    TreeNode,
+)
+from dicomtrolley.exceptions import DICOMTrolleyError
+from dicomtrolley.logs import get_module_logger
+
+logger = get_module_logger("caching")
+
+
+class DICOMObjectCache:
+    def __init__(
+        self,
+        initial_objects: Optional[List[DICOMObject]] = None,
+        expiry_seconds: Optional[int] = 600,
+    ):
+        """A tree holding expiring DICOM objects. Objects can be retrieved by
+        study/series/instance UID tuples:
+
+        >>>cache = DICOMObjectCache()
+        >>>cache.add(a_study)    # with uid 'study1' and full series and instance info
+        >>>cache.retrieve(reference=('study1','series'))
+        <Series>
+        >>>cache.retrieve(reference=('study1','series','instance'))
+        <Instance>
+        # 20 minutes later... study has expired
+        >>>cache.retrieve(reference=('study1','series'))
+        <NodeNotFound "Data for study1,series1 is not cached">
+
+        Parameters
+        ----------
+        initial_objects, optional
+            Shorthand for add() on all objects in this list. Defaults to empty
+
+        expiry_seconds, optional
+            Expire objects after this many seconds. If set to None, will disable
+            expiry. Defaults to 600 (10 minutes)
+
+        Notes
+        -----
+        The functionality here is similar to dicomtrolley.parsing.TreeNode, but the
+        use case is different enough to warrant a separate class I think
+
+        """
+        if expiry_seconds is None:
+            self.expiry = None
+        else:
+            self.expiry = ExpiringCollection(
+                expire_after_seconds=expiry_seconds
+            )
+        self.root = TreeNode()
+        self._awaiting_prune: List[TreeAddress] = []
+        if initial_objects:  # not none or empty:
+            if not isinstance(initial_objects, list):
+                raise ValueError(
+                    f"Expected list but got {initial_objects}. Did you"
+                    f"forget [braces] for initial_objects value?"
+                )
+            for x in initial_objects:
+                self.add(x)
+
+    def add_all(self, objects: Iterable[DICOMObject]):
+        if isinstance(objects, DICOMObject):
+            # This mistake is common (for me) and causes unreadable errors. Avoid.
+            raise ValueError(
+                "parameter 'objects' should be an iterable of "
+                "DICOMObjects, not a single object"
+            )
+        self.prune_expired()
+        for obj in objects:
+            self.add(obj, prune=False)
+
+    def add(self, obj: DICOMObject, prune=True):
+        """Add this object to cache
+
+        Returns
+        -------
+        The input DICOMObject. Just so you can add and return a value in a single
+        line in calling code. Like new_dicom = cache.add(get_new_dicom())
+        """
+        logger.debug(f"Adding to cache: {obj}")
+        if prune:
+            self.prune_expired()
+        address = self.to_address(obj.reference())
+        self.root.add(obj, address=address)
+        if self.expiry:
+            self.expiry.add(address)
+
+        if obj.children():
+            for x in obj.children():
+                self.add(
+                    x, prune=False
+                )  # avoid too many calls to prune_expired
+
+    def retrieve(self, reference: DICOMObjectReference):
+        """Try to retrieve object from cache
+
+        Parameters
+        ----------
+        reference
+            The dicom object you would like to retrieve
+
+        Raises
+        ------
+        NodeNotFound
+            If the object does not exist in cache or has expired
+
+        Returns
+        -------
+        DICOMObject
+            The cached object
+        """
+        self.prune_expired()
+        try:
+            data = self.root.get_node(
+                self.to_address(reference), create=False
+            ).data
+            if data:
+                return data
+            else:
+                raise NodeNotFound(
+                    f"Node found in cache, but no data for reference "
+                    f"{reference}"
+                )
+        except KeyError as e:
+            raise NodeNotFound(
+                f"No node found in cache for reference {reference}"
+            ) from e
+
+    def prune_expired(self):
+        """Remove all expired nodes"""
+        if not self.expiry:
+            logger.debug("prune: not pruning as self.expiry = False")
+            return  # don't do anything
+        expired = self.expiry.collect_expired()
+        self._awaiting_prune = self._awaiting_prune + expired
+        self._awaiting_prune.sort(key=lambda x: len(x))
+        prune_later = []
+        pruned = []
+        while self._awaiting_prune:
+            address = self._awaiting_prune.pop()  # work from last
+            try:
+                self.root.prune_leaf(address)
+                pruned.append(address)
+            except ValueError:
+                #  was not a leaf. Make empty and save for later
+                self.root.get_node(address).data = None
+                prune_later.append(address)
+        if pruned:
+            msg = f"prune: Pruned away {len(pruned)} leaves: ({pruned})"
+            if prune_later:
+                msg += f"could not prune {len(prune_later)}. Leaving those for later"
+            logger.debug(msg)
+
+        self._awaiting_prune = prune_later
+
+    @staticmethod
+    def to_address(ref: DICOMObjectReference) -> TreeAddress:
+        """Convert reference to address that can be used in TreeNode"""
+        if isinstance(ref, StudyReference):
+            return (ref.study_uid,)
+        elif isinstance(ref, SeriesReference):
+            return ref.study_uid, ref.series_uid
+        elif isinstance(ref, InstanceReference):
+            return ref.study_uid, ref.series_uid, ref.instance_uid
+        else:
+            raise ValueError(f"Expected DICOM object reference, but got {ref}")
+
+
+class QueryCache:
+    """Caches the response to DICOM queries"""
+
+    def __init__(self, cache: DICOMObjectCache):
+        self.cache = cache
+        self.queries: Dict[Query, Tuple[DICOMObjectReference, ...]] = {}
+
+    def add_response(self, query: Query, response: Sequence[DICOMObject]):
+        """Cache response for this query"""
+        self.cache.add_all(response)
+        references = tuple(x.reference() for x in response)
+        self.queries[query.json()] = references
+
+    def get_response(self, query: Query) -> List[Study]:
+        """Obtain cached response for this query
+
+        Raises
+        ------
+        NodeNotFound
+            If any of the results of query are not in cache or have expired
+        """
+        try:
+            references = self.queries[query.json()]
+        except KeyError as e:
+            raise NodeNotFound(
+                f"Query {query.to_short_string()} not found in cache"
+            ) from e
+
+        try:
+            retrieved = [self.cache.retrieve(x) for x in references]
+            logger.debug(
+                f"Found all ({len(retrieved)}) objects in cache for "
+                f"{query.to_short_string()}. Returning."
+            )
+            return retrieved
+        except NodeNotFound as e:
+            # This query response is not (fully) cached anymore. Remove
+            self.queries.pop(query.json())
+            raise NodeNotFound(
+                f"One or more response to {query.to_short_string()} "
+                f"was not in cache"
+            ) from e
+
+
+class CachedSearcher(Searcher):
+    """A cache wrapped around a Searcher instance. Serves search responses from
+    cache first. Calls searcher if needed.
+
+    Caches two types of searcher method calls:
+    find_studies(Query):
+        Caches each returned DICOM object individually and then associates this
+        with the incoming query. Only if the query matches exactly is a cached
+        response returned.
+        Associates DICOM tree address (study/series/instance). Not DICOM object
+        identities. This means that an underlying DICOM object can be updated without
+        invalidating the cache response. The cached response to a query will be
+        returned as long as there are non-expired cached objects at each associated
+        tree address
+
+    find_study_by_id(study_id, level):
+        Retrieves study_id from cache and checks whether it has children up to
+        the required level (depth).
+
+    """
+
+    def __init__(self, searcher: Searcher, cache: DICOMObjectCache):
+        self.searcher = searcher
+        self.cache = cache
+        self.query_cache = QueryCache(cache=cache)
+
+    def __str__(self):
+        return f"CachedSearcher for {self.searcher}"
+
+    def find_studies(self, query: Query) -> Sequence[Study]:
+        """Try to return from cache, otherwise call searcher."""
+        try:
+            return self.query_cache.get_response(query)
+        except NodeNotFound:
+            logger.debug(
+                f"No cache for {query.to_short_string()}."
+                f"Performing query with {self.searcher}"
+            )
+            response = self.searcher.find_studies(query)
+            self.query_cache.add_response(query, response)
+            return response
+
+    def find_study_by_id(
+        self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY
+    ) -> Study:
+        """Find a single study at the given depth"""
+        try:
+            from_cache: Study = self.cache.retrieve(
+                StudyReference(study_uid=study_uid)
+            )
+            if (
+                from_cache.max_object_depth()
+                > DICOMObjectLevels.from_query_level(query_level)
+            ):
+                raise NodeNotFound(
+                    f"{from_cache} found in cache, but did not contain "
+                    f"objects up to '{query_level}' level"
+                )
+            return from_cache
+        except NodeNotFound as e:
+            logger.debug(
+                f"Could not find study in cache ({e}). Launching query to find"
+                f"additional info"
+            )
+            study = self.searcher.find_study_by_id(
+                study_uid, query_level=query_level
+            )
+            self.cache.add(study)
+            return study
+
+
+class NodeNotFound(DICOMTrolleyError):
+    pass
diff --git a/dicomtrolley/core.py b/dicomtrolley/core.py
index 5bed543..4d72d46 100644
--- a/dicomtrolley/core.py
+++ b/dicomtrolley/core.py
@@ -1,9 +1,17 @@
 """Provides common base classes that allow modules to talk to each other."""
 from dataclasses import dataclass
 from datetime import date, datetime
-from enum import Enum
+from enum import Enum, IntEnum
 from itertools import chain
-from typing import List, Optional, Sequence, Type, TypeVar, Union
+from typing import (
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+)
 
 from pydantic import Field, ValidationError
 from pydantic.class_validators import validator
@@ -13,28 +21,78 @@
 
 from dicomtrolley.exceptions import (
     DICOMTrolleyError,
+    NoReferencesFoundError,
     UnSupportedParameterError,
 )
 
 
-class DICOMObjectLevels:
-    """DICOM uses a study->series->instance structure"""
+class DICOMObjectLevels(IntEnum):
+    """Represents DICOM study->series->instance structure
+
+    Level is ordered. Study is highest, instance the lowest level.
+
+    Notes
+    -----
+    Similar to core.QueryLevels, but different enough to warrant separate classes.
+    DICOMObjectLevel is a property of a DICOM object. QueryLevel is a property of
+    a Query.
+    """
 
-    STUDY = "Study"
-    SERIES = "Series"
-    INSTANCE = "Instance"
+    STUDY = 2
+    SERIES = 1
+    INSTANCE = 0
 
-    all = [STUDY, SERIES, INSTANCE]
+    @classmethod
+    def from_query_level(cls, level: "QueryLevels"):
+        if level == QueryLevels.STUDY:
+            return cls.STUDY
+        elif level == QueryLevels.SERIES:
+            return cls.SERIES
+        elif level == QueryLevels.INSTANCE:
+            return cls.INSTANCE
+        else:
+            raise ValueError(f"Unknown DICOMObjectLevels {level}")
 
 
 class DICOMDownloadable:
     """An object that can be downloaded by a Downloader"""
 
     def reference(self) -> "DICOMObjectReference":
-        raise NotImplementedError
+        raise NotImplementedError()
+
+    def contained_references(
+        self, max_level: DICOMObjectLevels
+    ) -> Tuple["DICOMObjectReference", ...]:
+        """DICOM object references contained in this object at the given level.
+
+        For example, a Study might contain information for all its Instances. In
+        this case sub_references(max_level=Instance) will return InstanceReferences
+
+        Parameters
+        ----------
+        max_level:
+            The returned references will be this level or lower. For example, an
+            instance will return a reference to itself when max_level is Series:
+                >>> Instance.contained_references(max_level=Series)
+                (InstanceReference,)
+            A study will be broken up:
+                >>> Study.contained_references(max_level=Series)
+                (SeriesReference, SeriesReference, ...)
+
+        Returns
+        -------
+        Tuple of references. Of max_level or lower
+
+        Raises
+        ------
+        NoReferencesFoundError
+            If sub-references cannot be obtained from this downloadable.
+
+        """
+        raise NotImplementedError()
 
 
-@dataclass
+@dataclass(frozen=True)  # make this hashable and non-modifyable
 class DICOMObjectReference(DICOMDownloadable):
     """Points to a study, series, or instance by uid
 
@@ -47,20 +105,31 @@ class DICOMObjectReference(DICOMDownloadable):
     study_uid: str
 
     @property
-    def level(self) -> str:
+    def level(self):
         """Does this reference point to Study, Series or Instance?
         Returns
         -------
         str
             One of the values in DICOMObjectLevels.all
         """
-        return ""
+        return NotImplementedError()
 
     def reference(self):
         return self
 
+    def contained_references(
+        self, max_level: DICOMObjectLevels
+    ) -> Tuple["DICOMObjectReference", ...]:
+        """DICOM object references either return themselves or an error"""
+        if max_level < self.level:
+            raise NoReferencesFoundError(
+                f"{self} does not contain references of {str(max_level)} or lower"
+            )
+        else:
+            return (self.reference(),)
+
 
-@dataclass
+@dataclass(frozen=True)
 class InstanceReference(DICOMObjectReference):
     """All information needed to download a single slice (SOPInstance)"""
 
@@ -79,7 +148,7 @@ def level(self):
         return DICOMObjectLevels.INSTANCE
 
 
-@dataclass
+@dataclass(frozen=True)
 class SeriesReference(DICOMObjectReference):
     """Reference to a single Series, part of a study"""
 
@@ -94,7 +163,7 @@ def level(self):
         return DICOMObjectLevels.SERIES
 
 
-@dataclass
+@dataclass(frozen=True)
 class StudyReference(DICOMObjectReference):
     """Reference to a single study"""
 
@@ -167,6 +236,28 @@ def all_series(self) -> List["Series"]:
         """
         raise NotImplementedError()
 
+    def max_object_depth(self) -> DICOMObjectLevels:
+        """What is the deepest object level? Does contain only Study or also Series,
+        Instances?
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def extract_refs(
+        objects: Iterable["DICOMObject"],
+    ) -> Tuple[DICOMObjectReference, ...]:
+        """Extract reference from each object and put in a tuple, raise error for
+        empty objects. For cleaner code in DICOMObject.contained_references()
+
+        Raises
+        ------
+        NoReferencesFoundError
+            if objects is empty
+        """
+        if not objects:
+            raise NoReferencesFoundError()
+        return tuple(x.reference() for x in objects)
+
 
 class Instance(DICOMObject):
     parent: "Series"
@@ -192,6 +283,17 @@ def reference(self) -> InstanceReference:
             instance_uid=self.uid,
         )
 
+    def contained_references(
+        self, max_level: DICOMObjectLevels
+    ) -> Tuple["DICOMObjectReference", ...]:
+        return (self.reference(),)  # instance is deepest level so always OK
+
+    def max_object_depth(self) -> DICOMObjectLevels:
+        """What is the deepest object level? Does contain only Study or also Series,
+        Instances?
+        """
+        return DICOMObjectLevels.INSTANCE
+
 
 class Series(DICOMObject):
     instances: Sequence[Instance]
@@ -226,6 +328,28 @@ def reference(self) -> SeriesReference:
         """Return a Reference to this object using uids"""
         return SeriesReference(study_uid=self.parent.uid, series_uid=self.uid)
 
+    def contained_references(
+        self, max_level: DICOMObjectLevels
+    ) -> Tuple["DICOMObjectReference", ...]:
+
+        if max_level == DICOMObjectLevels.STUDY:
+            return self.extract_refs([self])
+        elif max_level == DICOMObjectLevels.SERIES:
+            return self.extract_refs([self])
+        elif max_level == DICOMObjectLevels.INSTANCE:
+            return self.extract_refs(self.all_instances())
+        else:
+            raise ValueError(f'unknown DICOMObjectLevel "{str(max_level)}"')
+
+    def max_object_depth(self) -> DICOMObjectLevels:
+        """What is the deepest object level? Does contain only Study or also Series,
+        Instances?
+        """
+        if self.instances:
+            return DICOMObjectLevels.INSTANCE
+        else:
+            return DICOMObjectLevels.SERIES
+
 
 class Study(DICOMObject):
     series: Sequence[Series]
@@ -259,6 +383,28 @@ def reference(self) -> StudyReference:
         """Return a Reference to this object using uids"""
         return StudyReference(study_uid=self.uid)
 
+    def contained_references(
+        self, max_level: DICOMObjectLevels
+    ) -> Tuple["DICOMObjectReference", ...]:
+
+        if max_level == DICOMObjectLevels.STUDY:
+            return self.extract_refs([self])
+        elif max_level == DICOMObjectLevels.SERIES:
+            return self.extract_refs(self.all_series())
+        elif max_level == DICOMObjectLevels.INSTANCE:
+            return self.extract_refs(self.all_instances())
+        else:
+            raise ValueError(f'unknown DICOMObjectLevel "{str(max_level)}"')
+
+    def max_object_depth(self) -> DICOMObjectLevels:
+        """What is the deepest object level? Does contain only Study or also Series,
+        Instances?
+        """
+        if self.series:
+            return self.series[0].max_object_depth()
+        else:
+            return DICOMObjectLevels.STUDY
+
 
 class NonInstanceParameterError(DICOMTrolleyError):
     """A DICOMDownloadable could not be converted into its constituent instances.
@@ -282,106 +428,65 @@ class NonSeriesParameterError(DICOMTrolleyError):
     """
 
 
-def to_instance_refs(
-    objects: Sequence[DICOMDownloadable],
-) -> List[InstanceReference]:
-    """Convert all input to instance references. Raise informative errors.
+def to_instance_refs(objects: Sequence[DICOMDownloadable]):
+    """Convert all to instance references. See to_refs()"""
+    try:
+        return to_references(objects, max_level=DICOMObjectLevels.INSTANCE)
+    except NoReferencesFoundError as e:
+        raise NonInstanceParameterError(
+            f"Cannot obtain instance references: {e}"
+        ) from e
 
-    This is a common pre-processing step used by Downloader classes. Many cannot
-    download higher-level objects like 'study 123' directly, but instead require
-    you to query all instances contained in 'study 123' first and pass those to
-    the downloader.
 
-    Parameters
-    ----------
-    objects:
-        Convert these objects
+def to_series_level_refs(objects: Sequence[DICOMDownloadable]):
+    """Convert all to at least series references."""
+    try:
+        return to_references(objects, max_level=DICOMObjectLevels.SERIES)
+    except NoReferencesFoundError as e:
+        raise NonSeriesParameterError(
+            f"Cannot obtain series references: {e}"
+        ) from e
 
-    Returns
-    -------
-    List[InstanceReference]
-        References to all instances contained in objects
 
-    Raises
-    ------
-    NonInstanceParameterError
-        If any input object could not be converted into instances. This is the
-        case for higher-level references like StudyReference, or for DICOMObjects
-        that do not contain any deeper-level information, such as a Study that
-        contains no Series (which is the correct result of Study-level queries).
-
-    """
-    output: List[InstanceReference] = []
-    for obj in objects:
-        if isinstance(obj, InstanceReference):
-            output.append(obj)  # Already an instance reference. Just add
-        elif isinstance(obj, DICOMObjectReference):
-            raise NonInstanceParameterError(
-                f"Cannot extract instances from '{obj}' "
-            )
-        elif isinstance(obj, DICOMObject):
-            instances = obj.all_instances()  # Extract instances
-            if instances:
-                output = output + [x.reference() for x in instances]
-            else:
-                raise NonInstanceParameterError(
-                    f"{obj} contains no instances. "
-                    f"Was this information queried for?"
-                )
-
-    return output
-
-
-def to_series_level_refs(
-    objects: Sequence[DICOMDownloadable],
-) -> List[Union[SeriesReference, InstanceReference]]:
-    """Make sure all input objects are Series or Instance references.
-    Convert if possible, raise exceptions if not.
-
-    Weaker version of to_instance_refs(). Weaker because series references without
-    instance info are also allowed here. See that function for more info
+def to_references(
+    objects: Sequence[DICOMDownloadable], max_level: DICOMObjectLevels
+) -> List[DICOMObjectReference]:
+    """Make sure all input objects are of max_level or lower. Extract lower level
+    references if possible. For example, if max_level is series, try to extract
+    series references from a study. Raise exceptions if not possible.
 
     Parameters
     ----------
     objects:
         Convert these objects
+    max_level:
+        Return references of this level or lower, otherwise raise exception.
 
     Returns
     -------
     List[Union[SeriesReference, InstanceReference]]
         References to series and instances contained in input objects
 
+    Notes
+    -----
+    This is a common pre-processing step used by Downloader classes. Many cannot
+    download higher-level objects like 'study 123' directly, but instead require
+    you to query all instances contained in 'study 123' first and pass those to
+    the downloader.
+
     Raises
     ------
-    NonSeriesParameterError
+    NoReferencesFoundError
         If any input object could not be converted into instances. This is the
         case for higher-level references like StudyReference, or for DICOMObjects
         that do not contain any deeper-level information, such as a Study that
-        contains no Series (which is the correct result of Study-level queries).
+        contains no Series (which is a correct result for Study-level queries).
 
     """
-    output: List[Union[InstanceReference, SeriesReference]] = []
+    refs: List[DICOMObjectReference] = []
     for obj in objects:
-        if isinstance(
-            obj, (InstanceReference, SeriesReference, Instance, Series)
-        ):
-            # already OK, just add
-            output.append(obj.reference())
-        elif isinstance(obj, StudyReference):
-            # no series in here, I don't have enough info
-            raise NonSeriesParameterError(
-                f"Cannot extract series from '{obj}' "
-            )
-        elif isinstance(obj, Study):
-            if obj.series:
-                for series in obj.series:
-                    output.append(series.reference())
-            else:
-                raise NonSeriesParameterError(
-                    f"Cannot extract series from '{obj}'"
-                    f". It contains no series "
-                )
-    return output
+        refs.extend(obj.contained_references(max_level=max_level))
+    return refs
 
 
 class Downloader:
@@ -416,7 +521,7 @@ def datasets(self, objects: Sequence[DICOMDownloadable]):
             download can only process Instance targets. See Exception docstring
             for rationale
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def datasets_async(
         self, instances: Sequence[InstanceReference], max_workers=None
@@ -440,7 +545,7 @@ def datasets_async(
         -------
         Iterator[Dataset, None, None]
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
 
 class QueryLevels(str, Enum):
@@ -450,6 +555,18 @@ class QueryLevels(str, Enum):
     SERIES = "SERIES"
     INSTANCE = "INSTANCE"
 
+    @classmethod
+    def from_object_level(cls, level: DICOMObjectLevels):
+        """Convert from DICOMObjectLevel enum. See docstring there for reason"""
+        if level == DICOMObjectLevels.STUDY:
+            return cls.STUDY
+        elif level == DICOMObjectLevels.SERIES:
+            return cls.SERIES
+        elif level == DICOMObjectLevels.INSTANCE:
+            return cls.INSTANCE
+        else:
+            raise ValueError(f"Unknown DICOMObjectLevels {level}")
+
 
 # Used in Query.init_from_query() type annotation
 QuerySubType = TypeVar("QuerySubType", bound="Query")
@@ -619,8 +736,10 @@ def find_study(self, query: Query) -> Study:
             )
         return results[0]
 
-    def find_full_study_by_id(self, study_uid: str) -> Study:
-        """Find a single study at image level
+    def find_study_by_id(
+        self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY
+    ) -> Study:
+        """Find a single study at the given depth
 
         Useful for automatically finding all instances for a study. Meant to be
         implemented in child classes
@@ -629,57 +748,16 @@ def find_full_study_by_id(self, study_uid: str) -> Study:
         ----------
         study_uid: str
             Study to search for
+        query_level: QueryLevels
+            Depth of search. Whether to find only study level info or go deeper into
+            series or instances. Deeper levels will result in slower, more extensive
+            searches
 
         Returns
         -------
         Study
+            Potentially containing series or instances, dependent on query_level
 
-        Raises
-        ------
-        DICOMTrolleyError
-            If no results or more than one result is returned by query
-        """
-        raise NotImplementedError()
-
-    def find_study_at_instance_level(self, study_uid: str) -> Study:
-        """Find a single study at image level
-
-        Useful for automatically finding all instances for a study.
-
-        Parameters
-        ----------
-        study_uid: str
-            Study to search for
-
-        Returns
-        -------
-        Study
-            Containing full DICOM object information, series and instances
-
-        Raises
-        ------
-        DICOMTrolleyError
-            If no results or more than one result is returned by query
-        """
-        return self.find_study(
-            Query(StudyInstanceUID=study_uid, query_level=QueryLevels.INSTANCE)
-        )
-
-    def find_study_at_series_level(self, study_uid: str) -> Study:
-        """Find a single study at series level
-
-        Meant to be
-        implemented in child classes
-
-        Parameters
-        ----------
-        study_uid: str
-            Study to search for
-
-        Returns
-        -------
-        Study
-            Containing series, but not instances
 
         Raises
         ------
@@ -687,7 +765,7 @@ def find_study_at_series_level(self, study_uid: str) -> Study:
             If no results or more than one result is returned by query
         """
         return self.find_study(
-            Query(StudyInstanceUID=study_uid, query_level=QueryLevels.SERIES)
+            Query(StudyInstanceUID=study_uid, query_level=query_level)
         )
 
 
diff --git a/dicomtrolley/datastructures.py b/dicomtrolley/datastructures.py
new file mode 100644
index 0000000..ffda44f
--- /dev/null
+++ b/dicomtrolley/datastructures.py
@@ -0,0 +1,379 @@
+"""Additional datastructures that do not belong to any specific dicomtrolley
+class or function
+
+"""
+from datetime import datetime
+from enum import Enum
+from typing import (
+    Any,
+    DefaultDict,
+    Hashable,
+    Iterable,
+    Iterator,
+    List,
+    Sequence,
+    Tuple,
+)
+from typing import OrderedDict as OrderedDictType
+
+# Used in TreeNode functions
+TreeAddress = Tuple[str, ...]
+
+
+def addr(dot_address: str):
+    """Convenience method for creating an address with dot notation
+
+    Works quick and readable as long as you don't have dots in your address keys.
+    If you do, just use a ['list','of','strings','with.dots.if.you.have.to']
+    """
+    return tuple(dot_address.split("."))
+
+
+class PruneStrategy(str, Enum):
+    """How to handle pruning multiple nodes at once.
+    Consider this tree with five nodes:
+          A
+        /  \
+       B    C
+      / \
+     D   E
+
+    I want to prune node B and E.
+    There are two choices to be made with regard to this.
+    Choice 1:
+        Pruning node B will orphan node D, and make it inaccessible. Is this OK?
+    Choice 2:
+        If orphaning node D is not OK, is it still ok to remove E? Or should we just
+        disallow the whole pruning?
+
+    These two choices inform tree strategies:
+    FORCE
+        Just prune. Yes you can prune B. D and E will disappear and that's fine
+
+    WHERE_POSSIBLE
+        Let's not get overzealous. You can't prune B, but you can still prune E.
+
+    CHECK_FIRST
+        Let's be very careful. Only prune if ALL nodes can be pruned. This
+        assures that a successful prune removes all nodes
+
+    """
+
+    FORCE = "Force"  # prune all nodes, don't check for anything
+
+    WHERE_POSSIBLE = (
+        "Where_possible"  # prune nodes without children where you can
+    )
+
+    CHECK_FIRST = "Check_first"  # only prune if all targets can be pruned
+
+
+class TreeNode(DefaultDict[str, "TreeNode"]):
+    """Recursive defaultdict with a 'data' property. Helps parse to tree structures.
+
+    There is no native tree datastructure in python as far as I know.
+
+    Some properties:
+    * Inherits 'access-means-creation' functionality from defaultdict:
+      ```
+       >>> root = TreeNode()
+       >>> root['study1']['series1']['instance1'].data = 'some instance info'
+       >>> 'study1' in root
+       True
+      ```
+    * Address nodes by nested dict notation or by list of strings:
+      ```
+      >>> root = TreeNode()
+      >>> root['study1']['series1']['instance1'].data = 'some instance info'
+      >>> 'study1' in root
+      True
+      ```
+
+    Examples
+    --------
+    >>> root = TreeNode()
+    >>> root['study1']['series1']['instance1'].data = 'some instance info'
+    >>> 'study1' in root
+    True
+    >>> root['study1']['series2'].data = 'some series data'
+    >>> list(root['study1'].keys)
+    ['series1', 'series2']
+    """
+
+    def __init__(self, data=None, allow_overwrite=True):
+        """
+
+        Parameters
+        ----------
+        data:
+            Optional data to associate with this node
+        allow_overwrite: bool, optional
+            If False, will raise exception when overwriting data attribute
+        """
+        super().__init__(lambda: TreeNode(allow_overwrite=allow_overwrite))
+        self._data = data
+        self.allow_overwrite = allow_overwrite
+
+    @property
+    def data(self):
+        return self._data
+
+    @data.setter
+    def data(self, value):
+        if self.allow_overwrite or not self.data:
+            self._data = value
+        else:
+            raise ValueError("Overwriting data is not allowed")
+
+    @classmethod
+    def init_from_addresses(cls, addresses: Sequence[TreeAddress]):
+        """Create TreeNode and make sure all given addresses exist"""
+        root = cls()
+        for address in addresses:
+            root.get_node(address)
+        return root
+
+    def iter_leaf_addresses(
+        self,
+        address_so_far: Tuple[str, ...] = (),
+    ) -> Iterator[TreeAddress]:
+        """Generate addresses for all this nodes' children, depth-first"""
+
+        if not self.keys():
+            yield address_so_far
+        else:
+            for key in self.keys():
+                yield from self[key].iter_leaf_addresses(
+                    address_so_far=address_so_far + tuple(key)
+                )
+
+    def exists(self, address: TreeAddress) -> bool:
+        """Check whether a node exists at address without creating any new nodes
+
+        Parameters
+        ----------
+        address:
+            subsequent keys. To check TreeNode()['a']['b']['c'], use ['a', 'b', 'c'],
+            for example
+        """
+        if not address:
+            return True  # required for recursive calls below to work
+        else:
+            key, address = address[0], address[1:]
+            child = self.get(key)
+            if child is not None:
+                return child.exists(address)  # recurse
+            else:
+                return False
+
+    def add(self, object_in: Any, address: TreeAddress):
+        self.get_node(address).data = object_in
+
+    def copy(self) -> "TreeNode":
+        """Create a copy of this node and all children"""
+        copied = TreeNode(data=self.data, allow_overwrite=self.allow_overwrite)
+        for key, item in self.items():
+            copied[key] = item.copy()
+
+        return copied
+
+    def pop_address(self, address: TreeAddress):
+        """Pop off the leaf node at this address. Leave stem nodes.
+
+        Raises
+        ------
+        KeyError
+            If node at address does not exist. Follows dict.pop() behaviour.
+        """
+        if not address:
+            raise KeyError("Address was empty. Cannot pop self!")
+
+        key, address = address[0], address[1:]
+        try:
+            if not address:  # last key in address has been reached. We can pop
+                return self.pop(key)
+            else:  #
+                return self.get_node(tuple(key), create=False).pop_address(
+                    address
+                )
+        except KeyError as e:
+            raise KeyError(f"No node found at {address}.{key}") from e
+
+    def prune(self, address: TreeAddress):
+        """Remove the node at this address, including any child nodes
+
+        Raises
+        ------
+        KeyError
+            If node at address does not exist
+        """
+        _ = self.pop_address(address)
+
+    def is_leaf(self):
+        return not self.keys()
+
+    def prune_leaf(self, address: TreeAddress):
+        """Prune node at this address only if it has no children
+
+        Raises
+        ------
+        KeyError
+            When the referenced node does not exist
+        ValueError
+            When referenced node is not a leaf (still has child nodes)
+        """
+
+        if not address:
+            raise KeyError("Address was empty. Cannot prune self!")
+
+        key, address_rest = address[0], address[1:]
+        try:
+            child = self.get_node((key,), create=False)
+        except KeyError as e:
+            raise KeyError(
+                f"Cannot prune non-existing node at {address}"
+            ) from e
+
+        if address_rest:  # more address to traverse. Recurse
+            child.prune_leaf(address_rest)
+        else:  # no more address. Child should be a leaf
+            if child.is_leaf():
+                self.pop(key)
+            else:
+                raise ValueError(f"Node at {address} is not a child node")
+
+    def prune_all(  # noqa: C901  # not too complex I think. Just elifs
+        self,
+        addresses: List[TreeAddress],
+        strategy: PruneStrategy = PruneStrategy.WHERE_POSSIBLE,
+    ):
+        """Prune the leaf at each address. You can pass non-leaf addresses as well
+        provided that all children of this leaf are passed as well.
+
+        Parameters
+        ----------
+        addresses
+            Prune nodes at these locations
+        strategy
+            Indicates how to handle child nodes and invalid targets.
+            See PruneStrategy docstring
+
+        Raises
+        ------
+        KeyError
+            If strategy is PruneStrategy.CHECK_FIRST and illegal addresses are passed.
+            Illegal meaning 'would remove unlisted child nodes'
+        """
+        if strategy == PruneStrategy.FORCE:
+            for address in addresses:
+                try:
+                    self.prune(address)
+                except KeyError:
+                    continue
+        elif strategy == PruneStrategy.WHERE_POSSIBLE:
+            for address in addresses:
+                try:
+                    self.prune_leaf(address)
+                except ValueError:
+                    continue
+        elif strategy == PruneStrategy.CHECK_FIRST:
+            # simulate subtree first to avoid leaving tree in half-pruned state after
+            # error
+            simtree = self.copy()
+            addresses.sort(key=lambda x: len(x), reverse=True)
+            try:
+                for address in addresses:
+                    simtree.prune_leaf(address=address)
+            except ValueError as e:
+                raise KeyError(
+                    f"Cannot prune all addresses: {e}. Pruning cancelled"
+                ) from e
+            # No exceptions. We can safely remove
+            for address in addresses:
+                self.pop_address(address)
+        else:
+            raise ValueError(f"Unknown strategy '{strategy}'")
+
+    def get_node(self, address: TreeAddress, create=True):
+        """Get node at given address, creating if it does not exist
+
+        Parameters
+        ----------
+        address
+            Address to get
+        create: Bool, optional
+            If true, create non-existing address. Else, raise exception
+             Defaults to True
+
+        Raises
+        ------
+        KeyError
+            If address does not exist and create is False
+
+        Returns
+        -------
+        TreeNode
+            The treenode at this address
+        """
+        if not address:
+            return self
+        else:
+            key = address[0]
+            if not create and key not in self.keys():
+                raise KeyError(f"Key {key} not found")
+            return self[key].get_node(address[1:], create=create)
+
+
+class ExpiringCollection:
+    """A collection of objects that expires after a set time
+
+    Collects expired items in .expired_items list.
+    """
+
+    def __init__(self, expire_after_seconds: int):
+        self.expire_after_seconds = expire_after_seconds
+        self.stamped_items = LastUpdatedOrderedDict()
+        self.expired_items: List[Any] = []
+
+    @staticmethod
+    def _now():
+        return datetime.now()
+
+    def add(self, item):
+        self.stamped_items[item] = self._now()
+
+    def add_all(self, items: Iterable[Hashable]):
+        for item in items:
+            self.add(item)
+
+    @property
+    def items(self):
+        self.check_expired()
+        return list(self.stamped_items.keys())
+
+    def check_expired(self):
+        """Move expired to expired_items list"""
+        expired = []
+        for item, timestamp in self.stamped_items.items():
+            if (self._now() - timestamp).seconds > self.expire_after_seconds:
+                expired.append(item)
+            else:
+                break  # timestamps should be ordered so we can stop checking
+
+        self.expired_items = self.expired_items + expired
+        [self.stamped_items.pop(x) for x in expired]
+
+    def collect_expired(self) -> List[Hashable]:
+        """Returns all expired items and removes them from local list"""
+        self.check_expired()
+        expired = self.expired_items
+        self.expired_items = []
+        return expired
+
+
+class LastUpdatedOrderedDict(OrderedDictType[Any, Any]):
+    """Store items in the order the keys were last added"""
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+        self.move_to_end(key)
diff --git a/dicomtrolley/exceptions.py b/dicomtrolley/exceptions.py
index 3074a9f..2dcc611 100644
--- a/dicomtrolley/exceptions.py
+++ b/dicomtrolley/exceptions.py
@@ -10,3 +10,11 @@ class UnSupportedParameterError(DICOMTrolleyError):
     """
 
     pass
+
+
+class NoReferencesFoundError(DICOMTrolleyError):
+    """Cannot find any references for this object at the given level. Used in
+    DICOMDownloadable
+    """
+
+    pass
diff --git a/dicomtrolley/parsing.py b/dicomtrolley/parsing.py
index 950855b..6a05740 100644
--- a/dicomtrolley/parsing.py
+++ b/dicomtrolley/parsing.py
@@ -1,5 +1,5 @@
 """Models parsing things into study/series/instance structure"""
-from typing import Any, DefaultDict, Dict, List, Sequence
+from typing import Dict, List, Sequence
 
 from pydicom.dataset import Dataset
 
@@ -13,6 +13,7 @@
     Study,
     StudyReference,
 )
+from dicomtrolley.datastructures import TreeNode
 from dicomtrolley.exceptions import DICOMTrolleyError
 
 
@@ -24,46 +25,6 @@ def flatten(dicom_object) -> List[DICOMObject]:
     return nodes
 
 
-class TreeNode(DefaultDict[Any, "TreeNode"]):
-    """Recursive defaultdict with a 'data' property. Helps parse to tree structures.
-
-    Examples
-    --------
-    >>> root = TreeNode()
-    >>> root['study1']['series1']['instance1'].data = 'some instance info'
-    >>> 'study1' in root
-    True
-    >>> root['study1']['series2'].data = 'some series data'
-    >>> list(root['study1'].keys)
-    ['series1', 'series2']
-    """
-
-    def __init__(self, data=None, allow_overwrite=True):
-        """
-
-        Parameters
-        ----------
-        data:
-            Optional data to associate with this node
-        allow_overwrite: bool, optional
-            If False, will raise exception when overwriting data attribute
-        """
-        super().__init__(lambda: TreeNode(allow_overwrite=allow_overwrite))
-        self._data = data
-        self.allow_overwrite = allow_overwrite
-
-    @property
-    def data(self):
-        return self._data
-
-    @data.setter
-    def data(self, value):
-        if self.allow_overwrite or not self.data:
-            self._data = value
-        else:
-            raise ValueError("Overwriting data is not allowed")
-
-
 class DICOMParseTree:
     """Models study/series/instance as a tree. Allows arbitrary branch insertions"""
 
@@ -87,7 +48,7 @@ def init_from_objects(cls, objects: Sequence[DICOMObject]):
 
         Notes
         -----
-        Parent objects for instances and series will be created to maintain to
+        Parent objects for instances and series will be created to
         ensure all nodes are connected to the tree root. For example, running this
         method with only a single instance as input will result in a regular
         root->study->series->instance tree. This is possible because all DICOMObjects
@@ -189,7 +150,7 @@ def value_or_dataset(val):
         study_instance_uid, study_node = study_node_in
 
         study = Study(
-            uid=study_instance_uid,
+            uid=str(study_instance_uid),
             data=value_or_dataset(study_node.data),
             series=tuple(),
         )
@@ -263,7 +224,6 @@ def retrieve(self, reference: DICOMObjectReference) -> DICOMObject:
         DICOMObjectNotFound
             If data for the given parameters does not exist in tree
         """
-
         try:
             if isinstance(reference, StudyReference):
                 return self[reference.study_uid]
diff --git a/dicomtrolley/qido_rs.py b/dicomtrolley/qido_rs.py
index 38213ac..2d1ee64 100644
--- a/dicomtrolley/qido_rs.py
+++ b/dicomtrolley/qido_rs.py
@@ -91,7 +91,7 @@ def uri_base(self) -> str:
         The non-query part of the URI as defined in
         DICOM PS3.18 section 10.6 table 10.6.1-2
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def uri_search_params(self) -> Dict[str, Union[str, List[str]]]:
         """The search parameter part of the URI as defined in
@@ -301,7 +301,7 @@ def query_level_should_be_series_or_instance(
         """A relational query only makes sense for the instance and series levels.
         If you want to look for studies, us a hierarchical query
         """
-        if values["query_level"] == QueryLevels.STUDY:
+        if values.get("query_level") == QueryLevels.STUDY:
             raise ValueError(STUDY_VALUE_ERROR_TEXT)
 
         return values
@@ -312,30 +312,22 @@ def uri_base(self) -> str:
 
         The non-query part of the URI as defined in
         DICOM PS3.18 section 10.6 table 10.6.1-2
-
-        Raises
-        ------
-        ValueError
-            if query_level is STUDY. This makes no sense for a relational query
         """
 
-        if self.query_level == QueryLevels.STUDY:
-            raise ValueError(STUDY_VALUE_ERROR_TEXT)
-        elif self.query_level == QueryLevels.SERIES:
+        # QueryLevels.Study is checked in query_level_should_be_series_or_instance()
+        if self.query_level == QueryLevels.SERIES:
             return "/series"
         elif self.query_level == QueryLevels.INSTANCE:
             if self.StudyInstanceUID:
                 # all instances for this study
                 return f"/studies/{self.StudyInstanceUID}/instances"
             else:
-                # all instances on the intire server (might be slow)
+                # all instances on the entire server (might be slow)
                 return "/instances"
-
         else:
-            raise ValueError(
-                f'Unknown querylevel "{self.query_level}". '
-                f'Should be one of "{QueryLevels}"'
-            )
+            # Unreachable due to pydantic validation and root validator. But I
+            # get uncomfortable from open elifs.
+            raise ValueError(f"Unknown query level {self.query_level}")
 
 
 class QidoRS(Searcher):
diff --git a/dicomtrolley/trolley.py b/dicomtrolley/trolley.py
index 605bc4d..fc27ee4 100644
--- a/dicomtrolley/trolley.py
+++ b/dicomtrolley/trolley.py
@@ -7,27 +7,23 @@
 Searcher and Downloader classes should be stand-alone. They are not allowed to
 communicate directly. Trolley has knowledge of both and is in control.
 """
-import itertools
+
 import tempfile
 from typing import List, Optional, Sequence, Union
 
 from dicomtrolley.core import (
     DICOMDownloadable,
-    DICOMObject,
+    DICOMObjectLevels,
     DICOMObjectReference,
     Downloader,
-    Instance,
-    InstanceReference,
     NonInstanceParameterError,
     NonSeriesParameterError,
+    QueryLevels,
     Searcher,
-    Series,
-    SeriesReference,
     Study,
 )
-from dicomtrolley.exceptions import DICOMTrolleyError
+from dicomtrolley.exceptions import NoReferencesFoundError
 from dicomtrolley.logs import get_module_logger
-from dicomtrolley.parsing import DICOMObjectNotFound, DICOMObjectTree
 from dicomtrolley.storage import DICOMDiskStorage, StorageDir
 
 
@@ -51,7 +47,6 @@ def __init__(
         self,
         downloader: Downloader,
         searcher: Searcher,
-        query_missing=True,
         storage: Optional[DICOMDiskStorage] = None,
     ):
         """
@@ -62,20 +57,13 @@ def __init__(
             The module to use for downloads
         searcher: Searcher
             The module to use for queries
-        query_missing: bool, optional
-            if True, Trolley.download() will query for missing DICOM instances. For
-            example when passing a Study obtained from a study-level query, which does
-            not contain any information on instances
-            if False, missing instances will raise MissingObjectInformationError
         storage: DICOMDiskStorage instance, optional
             All downloads are saved to disk by calling this objects' save() method.
             Defaults to basic StorageDir (saves as /studyid/seriesid/instanceid)
         """
         self.downloader = downloader
         self.searcher = searcher
-        self._searcher_cache = CachedSearcher(
-            searcher, query_missing=query_missing
-        )
+
         if storage:
             self.storage = storage
         else:
@@ -135,58 +123,64 @@ def download(
     def fetch_all_datasets(self, objects: Sequence[DICOMDownloadable]):
         """Get full DICOM dataset for all instances contained in objects.
 
+        Some downloaders require explicit series- or instance level information to be
+        able to download. Additional queries might be fired to obtain this
+        information.
+
         Returns
         -------
-        Iterator[Dataset]
-            The downloaded dataset and the context that was used to download it
+        Iterator[Dataset, None, None]
+            All datasets belonging to input objects
         """
         try:
             yield from self.downloader.datasets(objects)
         except NonSeriesParameterError:
             # downloader wants at least series level information. Do extra work.
-            series_lvl_objects = self.ensure_to_series_level(objects)
-            yield from self.downloader.datasets(series_lvl_objects)
+            series_lvl_refs = self.obtain_references(
+                objects=objects, max_level=DICOMObjectLevels.SERIES
+            )
+            yield from self.downloader.datasets(series_lvl_refs)
         except NonInstanceParameterError:
             # downloader wants only instance input. Do extra work.
-            instances = self.convert_to_instances(objects)
-            yield from self.downloader.datasets(instances)
-
-    def convert_to_instances(
-        self, objects_in: Sequence[DICOMDownloadable]
-    ) -> List[InstanceReference]:
-        """Find all instances contained in objects, running additional image-level
-        queries with searcher if needed.
-
-        Holds DICOMObjects in an internal cache, avoiding queries as much as
-        possible.
-
-        Note
-        ----
-        This method can fire additional search queries and might take a long time to
-        return depending on the number of objects and missing instances
-        """
-        return list(
-            itertools.chain.from_iterable(
-                self._searcher_cache.retrieve_instance_references(x)
-                for x in objects_in
+            instance_refs = self.obtain_references(
+                objects=objects, max_level=DICOMObjectLevels.INSTANCE
             )
-        )
+            yield from self.downloader.datasets(instance_refs)
 
-    def ensure_to_series_level(
-        self, objects_in: Sequence[DICOMDownloadable]
-    ) -> Sequence[Union[InstanceReference, SeriesReference]]:
-        """Make sure all input is converted to instance or series
+    def obtain_references(
+        self,
+        objects: Sequence[DICOMDownloadable],
+        max_level: DICOMObjectLevels,
+    ) -> List[DICOMObjectReference]:
+        """Get download references for all downloadable objects, at max_level or
+        lower. query if needed.
 
-        ----
-        This method can fire additional search queries and might take a long time to
-        return depending on the number of objects and missing instances or series
+        For example, if level is QueryLevels.Instance and a Study object is given,
+        try to extract instances from this study. If those instances or not in the
+        study, ask searcher to obtain them
+
+        Returns
+        -------
+            List[DICOMObjectReference] of the level given or deeper
         """
-        return list(
-            itertools.chain.from_iterable(
-                self._searcher_cache.ensure_series_level_references(x)
-                for x in objects_in
-            )
-        )
+        references: List[DICOMObjectReference] = []
+        for downloadable in objects:
+            try:
+                references += downloadable.contained_references(
+                    max_level=max_level
+                )
+            except NoReferencesFoundError:
+                # Not enough info in object itself. We need searcher
+                logger.debug(
+                    f"Not enough info to extract '{str(max_level)}-level' "
+                    f"references from {downloadable}. Asking searcher."
+                )
+                study = self.searcher.find_study_by_id(
+                    study_uid=downloadable.reference().study_uid,
+                    query_level=QueryLevels.from_object_level(max_level),
+                )
+                references += study.contained_references(max_level=max_level)
+        return references
 
     def fetch_all_datasets_async(self, objects, max_workers=None):
         """Get DICOM dataset for each instance given objects using multiple threads.
@@ -216,204 +210,8 @@ def fetch_all_datasets_async(self, objects, max_workers=None):
             )
         except NonInstanceParameterError:
             yield from self.downloader.datasets_async(
-                instances=self.convert_to_instances(objects),
+                instances=self.obtain_references(
+                    objects=objects, max_level=DICOMObjectLevels.INSTANCE
+                ),
                 max_workers=max_workers,
             )
-
-
-class CachedSearcher:
-    def __init__(
-        self,
-        searcher: Searcher,
-        cache: Optional[DICOMObjectTree] = None,
-        query_missing: Optional[bool] = True,
-    ):
-        """A DICOMObject tree (study/series/instances) that will launch
-        queries to expand itself if needed. Tries to query as little as
-        possible.
-
-        Created this to efficiently get all instances for download based on a
-        variable collection of DICOMDownloadable objects.
-
-        Parameters
-        ----------
-        searcher: Searcher
-            Use this searcher to search for missing elements in cache
-        cache: DICOMObjectTree, Optional
-            Use this tree as cache. Defaults to an empty tree
-        query_missing: Bool, optional
-            Launch queries to find missing information. If False, will raise
-            MissingObjectInformationError. Defaults to True
-        """
-        self.searcher = searcher
-        if not cache:
-            cache = DICOMObjectTree([])
-        self.cache = cache
-        self.query_missing = query_missing
-
-    def retrieve_instance_references(
-        self, downloadable: DICOMDownloadable
-    ) -> List[InstanceReference]:
-        """Get references for all instances contained in downloadable, performing
-        additional queries if needed
-
-        Raises
-        ------
-        MissingObjectInformationError
-            If Trolley.query_missing is False and queries would be required to
-            retrieve all instances.
-
-        """
-        # DICOMObject might already contain all required info
-        if isinstance(downloadable, DICOMObject):
-            instances = downloadable.all_instances()
-            if instances:
-                return [x.reference() for x in instances]
-
-        # No instances in downloadable. Do we have instances in cache for this?
-        reference = downloadable.reference()
-        try:
-            return [
-                x.reference() for x in self.get_instances_from_cache(reference)
-            ]
-        except MissingObjectInformationError as e:  # not found, we'll have to query
-            if not self.query_missing:
-                raise MissingObjectInformationError(
-                    f"No instances cached for {reference} "
-                    f"and query_missing was False"
-                ) from e
-
-            logger.debug(f"No instances cached for {reference}. Querying")
-            self.query_study_instances(reference)
-            return [
-                x.reference() for x in self.get_instances_from_cache(reference)
-            ]
-
-    def ensure_series_level_references(
-        self, downloadable: DICOMDownloadable
-    ) -> List[Union[InstanceReference, SeriesReference]]:
-        """Extract references of at least series level. Query if not found
-
-        Raises
-        ------
-        MissingObjectInformationError
-            If Trolley.query_missing is False and queries would be required to
-            retrieve Series.
-
-
-        TODO: Strong whiffs of code smell here. This method should not be this
-        long. Some pointers for refactoring: Get rid of explicit type checking.
-        possibly move logic to DICOMDownloadable classes. Why does this method
-        take a DICOMDownloadable and not just a reference? It seems conversion logic
-        is shoehorned into CachedSearcher and Trolley classes. CachedSearcher should
-        govern a cache of DICOM information, Trolley should handle high level
-        requests. Neither are quite right for conversion. Possibly create separate
-        class for this.
-        """
-        # Instance and Series should just be left as is
-        if isinstance(
-            downloadable,
-            (Instance, Series, InstanceReference, SeriesReference),
-        ):
-            return [downloadable.reference()]
-        # If its a study, it might need work
-
-        elif isinstance(downloadable, Study):
-            series: Sequence[
-                Union[Instance, Series]
-            ] = downloadable.all_series()
-            if series:  # there were series inside Study already
-                return [x.reference() for x in series]
-
-        # no studies. Maybe they are in cache?
-        reference = downloadable.reference()
-        try:
-            series = self.get_series_level_from_cache(reference)
-            if series:
-                return [x.reference() for x in series]
-            else:
-                raise MissingObjectInformationError()  # not expected. Being careful.
-
-        except MissingObjectInformationError as e:  # not found, we'll have to query
-            if not self.query_missing:
-                raise MissingObjectInformationError(
-                    f"No series cached for {reference} "
-                    f"and query_missing was False"
-                ) from e
-
-            logger.debug(f"No series cached for {reference}. Querying")
-            self.query_study_series(reference)
-            return [
-                x.reference()
-                for x in self.get_series_level_from_cache(reference)
-            ]
-
-    def get_instances_from_cache(
-        self, reference: DICOMObjectReference
-    ) -> List[Instance]:
-        """Retrieve all instances for this reference. Raise exception if not found
-
-        Raises
-        ------
-        MissingObjectInformationError
-            If no instances are found in cache
-        """
-        try:
-            instances = self.cache.retrieve(reference).all_instances()
-            if instances:
-                return instances
-            else:
-                # reference was in cache, but there are no instances
-                raise MissingObjectInformationError(
-                    f"No instances found for {reference}"
-                )
-        except DICOMObjectNotFound as e:
-            # reference was not in cache
-            raise MissingObjectInformationError(
-                f"No instances found for {reference}"
-            ) from e
-
-    def get_series_level_from_cache(
-        self, reference: DICOMObjectReference
-    ) -> Sequence[Union[Instance, Series]]:
-        """Retrieve series this reference, or instance if required.
-        Raise exception if not found
-
-        Raises
-        ------
-        MissingObjectInformationError
-            If no series or instance can be found for this reference
-        """
-        try:
-            series: Sequence[Union[Instance, Series]] = self.cache.retrieve(
-                reference
-            ).all_series()
-            if series:
-                return series
-            else:
-                # reference was in cache, but there are no instances
-                raise MissingObjectInformationError(
-                    f"No series found for {reference}"
-                )
-        except DICOMObjectNotFound as e:
-            # reference was not in cache
-            raise MissingObjectInformationError(
-                f"No series found for {reference}"
-            ) from e
-
-    def query_study_instances(self, reference: DICOMObjectReference):
-        study = self.searcher.find_study_at_instance_level(reference.study_uid)
-        self.cache.add_study(study)
-
-    def query_study_series(self, reference: DICOMObjectReference):
-        study = self.searcher.find_study_at_series_level(reference.study_uid)
-        self.cache.add_study(study)
-
-
-class MissingObjectInformationError(DICOMTrolleyError):
-    """An operation on a DICOM object cannot complete because the required information
-    is not available. For example, trying to extract all instances from a Study that
-    was retrieved without instance information
-    """
-
-    pass
diff --git a/docs/index.md b/docs/index.md
index dfbf749..ef6d5e9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -37,7 +37,7 @@ studies = trolley.find_studies(Query(PatientName='B*'))
 trolley.download(studies[0], output_dir='/tmp/trolley')
 ```
 ## Documentation
-see [dicomtrolley github docs](https://github.com/sjoerdk/dicomtrolley/tree/master/docs)
+see [dicomtrolley docs on readthedocs.io](https://dicomtrolley.readthedocs.io)
 
 ## Examples
 Example code can be found [on github](https://github.com/sjoerdk/dicomtrolley/tree/master/examples)
diff --git a/docs/usage.md b/docs/usage.md
index d171aac..9464e46 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -125,6 +125,22 @@ class MyStorage(DICOMDiskStorage):
 trolley = Trolley(searcher=a_searcher, downloader=a_downloader, storage=MyStorage())
 
 ```
+## Caching
+You can add caching to any [`Searcher`](concepts.md#searcher) by wrapping it with
+a [CachedSearcher][dicomtrolley.caching.CachedSearcher] instance:
+
+```python
+from dicomtrolley.caching import CachedSearcher, DICOMObjectCache
+
+searcher = CachedSearcher(searcher=a_searcher, 
+                          cache=DICOMObjectCache(expiry_seconds=300))
+
+trolley = Trolley(searcher=searcher, downloader=a_downloader)
+```
+
+[CachedSearcher][dicomtrolley.caching.CachedSearcher] is a [Searcher][dicomtrolley.core.Searcher]
+and can be used like any other. It will return cached results to any of its function
+calls for up to `expiry_seconds` seconds.
 
 ## Logging
 Dicomtrolley uses the standard [logging](https://docs.python.org/3/library/logging.html) module. The root logger is 
@@ -149,9 +165,7 @@ trolley = Trolley(searcher=a_searcher,
                   downloader=Rad69(session=requests.session(),
                                    url="https://server/rad69",
                                    errors_to_ignore = [XDSMissingDocumentError]))
-
-study = trolley.find_study(Query(PatientName="AB*"))
-trolley.download(study, '/tmp') # will skip series raising XDSMissingDocumentError
+# trolley.download() # will now skip series raising XDSMissingDocumentError
 ```
 
 ## Authentication
diff --git a/tests/mock_responses.py b/tests/mock_responses.py
index b88059b..e1ee845 100644
--- a/tests/mock_responses.py
+++ b/tests/mock_responses.py
@@ -6,12 +6,12 @@
 import re
 import urllib
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Pattern, Union
+from typing import Any, Callable, Dict, List, Pattern, Union
 
 from requests_mock import ANY
 from requests_toolbelt.multipart.encoder import MultipartEncoder
 
-from dicomtrolley.core import InstanceReference
+from dicomtrolley.core import InstanceReference, QueryLevels
 from dicomtrolley.xml_templates import (
     A_RAD69_RESPONSE_SOAP_HEADER,
     RAD69_SOAP_RESPONSE_NOT_FOUND,
@@ -29,7 +29,7 @@ class MockResponse:
     url: Union[str, Pattern[str]]
     status_code: int = 200
     method: str = "GET"
-    text: str = ""
+    text: Union[str, Callable[[Any, Any], Any]] = ""
     content: bytes = field(default_factory=bytes)
     json: Dict[str, Any] = field(default_factory=dict)
     reason: str = ""
@@ -159,6 +159,8 @@ def as_instance_reference(cls):
     method="GET",
 )
 
+MINT_SEARCH_MOCK_STUDY_UID = "1.2.340.114850.2.857.2.793263.2.125336546.1"
+
 MINT_SEARCH_STUDY_LEVEL = MockResponse(
     url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=STUDY",
     text="<?xml version='1.0' encoding='UTF-3'?><studySearchResults "
@@ -167,19 +169,31 @@ def as_instance_reference(cls):
     'studyUUID="35997945-c535-4570-3c1f-3514f27695e9" version="1" '
     'lastModified="2021-08-09T06:42:04.325Z"><attr tag="00100020" vr="LO" '
     'val="1892052" /><attr tag="00100010" vr="PN" val="TEST^K.J.M." /><attr '
-    'tag="0020000d" vr="UI" val="1.2.340.114850.2.857.2.793263.2.125336546.1" />'
+    'tag="0020000d" vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />'
     '</study><study studyUUID="c19a038a-fe0f-4e4b-b690-a895bd8db1e2" version="1"'
     ' lastModified="2021-08-09T06:42:26.722Z"><attr tag="00100020" vr="LO" '
     'val="1892052" /><attr tag="00100010" vr="PN" val="TEST^K.J.M." />'
     '<attr tag="0020000d" vr="UI" '
-    'val="1.2.340.114850.2.857.8.793263.2.126347154.1" /></study><study '
+    'val="' + MINT_SEARCH_MOCK_STUDY_UID + '" /></study><study '
     'studyUUID="26582e0f-473e-422d-9c24-12ebdbc6dac3" version="1" '
     'lastModified="2021-08-09T06:42:10.598Z"><attr tag="00100020" vr="LO" '
     'val="1892052" /><attr tag="00100010" vr="PN" val="BEELDEN^W^I L" /><attr '
-    'tag="0020000d" vr="UI" val="1.2.340.114850.2.857.8.793263.2.126347158.1" />'
+    'tag="0020000d" vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />'
     "</study></studySearchResults>",
 )
 
+# exactly one study
+MINT_SEARCH_STUDY_LEVEL_SINGLE = MockResponse(
+    url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=STUDY",
+    text="<?xml version='1.0' encoding='UTF-3'?><studySearchResults "
+    'xmlns="http://medical.nema.org/mint" queryfields="PatientName=B*" '
+    'includefields="StudyInstanceUID,PatientName,PatientID"><study '
+    'studyUUID="35997945-c535-4570-3c1f-3514f27695e9" version="1" '
+    'lastModified="2021-08-09T06:42:04.325Z"><attr tag="00100020" vr="LO" '
+    'val="1892052" /><attr tag="00100010" vr="PN" val="TEST^K.J.M." /><attr '
+    'tag="0020000d" vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />'
+    "</study></studySearchResults>",
+)
 
 MINT_SEARCH_SERIES_LEVEL = MockResponse(
     url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=SERIES",
@@ -195,7 +209,7 @@ def as_instance_reference(cls):
     ' tag="0020000e" vr="UI" val="1.2.840.113619.2.239.1783.1568025913.0.76" />'
     '</series><attr tag="00100020" vr="LO" val="1392052" /><attr tag="00100010"'
     ' vr="PN" val="BEELDENZORG^W^I L" /><attr tag="0020000d" vr="UI" '
-    'val="1.2.840.114350.2.357.3.798268.2.126847153.1" /></study><study'
+    'val="' + MINT_SEARCH_MOCK_STUDY_UID + '" /></study><study'
     ' studyUUID="85997945-c585-4570-8c1f-8514f27695e9" version="1" '
     'lastModified="2021-03-09T06:42:04.825Z"><series><attr tag="0020000e"'
     ' vr="UI" val="1.2.40.0.13.1.202066129828111990737107018349786560571"'
@@ -203,10 +217,27 @@ def as_instance_reference(cls):
     'val="1.2.840.113663.1500.1.460388269.2.1.20201105.84519.348" />'
     '</series><attr tag="00100020" vr="LO" val="1392052" /><attr tag="00100010"'
     ' vr="PN" val="BEELDENZORG^W^I L" /><attr tag="0020000d" '
-    'vr="UI" val="111" />'
+    'vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" />'
     "</study></studySearchResults>",
 )
 
+MINT_SEARCH_SERIES_LEVEL_SINGLE = MockResponse(
+    url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=SERIES",
+    text="<?xml version='1.0' encoding='UTF-8'?><studySearchResults "
+    'xmlns="http://medical.nema.org/mint" query_level="SERIES" '
+    'queryfields="PatientName=B*" includefields="StudyInstanceUID,PatientName,'
+    'PatientID"><study studyUUID="26532e0f-478e-422d-9c24-12ebdbc6dac8" '
+    'version="1" lastModified="2021-03-09T06:42:10.593Z"><series><attr '
+    'tag="0020000e" vr="UI" '
+    'val="1.2.40.0.13.1.31997853020103855051756062351916846110" /></series>'
+    '<series><attr tag="0020000e" vr="UI" '
+    'val="1.2.840.113619.2.239.1783.1568025913.0.105" /></series><series><attr '
+    ' tag="0020000e" vr="UI" val="1.2.840.113619.2.239.1783.1568025913.0.76" />'
+    '</series><attr tag="00100020" vr="LO" val="1392052" /><attr tag="00100010"'
+    ' vr="PN" val="BEELDENZORG^W^I L" /><attr tag="0020000d" vr="UI" '
+    'val="' + MINT_SEARCH_MOCK_STUDY_UID + '" /></study></studySearchResults>',
+)
+
 MINT_SEARCH_INSTANCE_LEVEL = MockResponse(
     url=MockUrls.MINT_URL + "/studies?PatientName=B*&QueryLevel=INSTANCE",
     text="<?xml version='1.0' encoding='UTF-8'?><studySearchResults "
@@ -263,7 +294,7 @@ def as_instance_reference(cls):
     'val="1.2.840.113663.1500.1.460388269.2.1.20201105.84519.348" /><'
     '/series><attr tag="00100020" vr="LO" val="1392052" /><attr '
     'tag="00100010" vr="PN" val="BEELDENZORG^W^I L" /><attr tag="0020000d" '
-    'vr="UI" val="111" /><attr tag="00201208" '
+    'vr="UI" val="' + MINT_SEARCH_MOCK_STUDY_UID + '" /><attr tag="00201208" '
     'vr="IS" val="200" /><attr tag="00100030" '
     'vr="DA" val="1900" /></study><'
     "/studySearchResults>",
@@ -272,7 +303,7 @@ def as_instance_reference(cls):
 
 # The IDS in the MINT response. To not have to copy-paste these in tests
 MINT_SEARCH_INSTANCE_LEVEL_IDS = {
-    "study_uid": "111",
+    "study_uid": MINT_SEARCH_MOCK_STUDY_UID,
     "series_uids": (
         "1.2.40.0.13.1.202066129828111990737107018349786560571",
         "1.2.840.113663.1500.1.460388269.2.1.20201105.84519.348",
@@ -532,6 +563,9 @@ def create_rad69_response(bytes_parts, soap_header=None):
         ]
     ),
 )
+
+# respond with a valid mint search response containing three studies, whatever the
+# called url was. Blunt.
 MINT_SEARCH_ANY = MockResponse(
     url=ANY,
     method=ANY,
@@ -555,6 +589,31 @@ def create_rad69_response(bytes_parts, soap_header=None):
 )
 
 
+def mint_response(request, context):
+    """Generate a MINT query response that matches the requested StudyInstanceUID
+    and also honours query_level.
+    """
+    query_level = request.qs["querylevel"][0].upper()
+    requested_suid = request.qs["studyinstanceuid"][0]
+
+    if query_level == QueryLevels.STUDY:
+        base_response = MINT_SEARCH_STUDY_LEVEL_SINGLE.text
+    elif query_level == QueryLevels.SERIES:
+        base_response = MINT_SEARCH_SERIES_LEVEL_SINGLE.text
+    elif query_level == QueryLevels.INSTANCE:
+        base_response = MINT_SEARCH_INSTANCE_LEVEL.text
+    else:
+        raise ValueError(f"Unknown query level {query_level}")
+
+    return base_response.replace(MINT_SEARCH_MOCK_STUDY_UID, requested_suid)
+
+
+# Respond to MockUrls.QIDO_RS_URL queries with a mint response matching the
+# requested StudyInstanceUID.
+MINT_SEARCH_MATCH_SUID = MockResponse(
+    url=re.compile(MockUrls.MINT_URL + ".*"), method=ANY, text=mint_response
+)
+
 # a valid response when a query has 0 results
 QIDO_RS_204_NO_RESULTS = MockResponse(
     url=re.compile(MockUrls.QIDO_RS_URL + ".*"),
diff --git a/tests/test_caching.py b/tests/test_caching.py
new file mode 100644
index 0000000..d8d14ec
--- /dev/null
+++ b/tests/test_caching.py
@@ -0,0 +1,211 @@
+from datetime import datetime, timedelta
+
+import pytest
+
+from dicomtrolley.caching import CachedSearcher, DICOMObjectCache, NodeNotFound
+from dicomtrolley.core import Query, QueryLevels
+from tests.mock_responses import MINT_SEARCH_MATCH_SUID
+
+
+def test_object_cache(some_studies):
+    """Test basics of cache. Adding and retrieving"""
+    cache = DICOMObjectCache()
+    a_study = some_studies[0]
+
+    # Nothing has been added yet. Retrieving should fail
+    with pytest.raises(NodeNotFound):
+        cache.retrieve(a_study.reference())
+
+    # After adding, retrieve should work and fetch the right thing
+    cache.add(a_study)
+    assert a_study == cache.retrieve(a_study.reference())
+
+
+def test_object_cache_child_objects(some_studies):
+    """If you add a full study, you should be able to retrieve the series from that
+    study from cache.
+    """
+    a_study = some_studies[0]
+    cache = DICOMObjectCache(initial_objects=[a_study])
+
+    a_series = a_study.series[0]
+    assert a_series == cache.retrieve(a_series.reference())
+
+
+def test_object_cache_no_unneeded_leaves(some_studies):
+    """Checking whether a node exists should not create an empty node there"""
+
+    cache = DICOMObjectCache()
+    assert not cache.root.items()
+
+    with pytest.raises(NodeNotFound):
+        cache.retrieve(some_studies[0].reference())
+
+    assert not cache.root.items()
+
+
+def test_object_cache_expiry(some_studies):
+    a_study = some_studies[0]
+    cache = DICOMObjectCache(initial_objects=[a_study], expiry_seconds=300)
+
+    # item is not expired and can be retrieved
+    assert cache.retrieve(a_study.reference())
+    # retrieved item is the actual study that was put in
+    assert cache.retrieve(a_study.reference()).series[0].instances[2]
+
+    # now its 20 minutes later
+    cache.expiry._now = lambda: datetime.utcnow() + timedelta(seconds=600)
+
+    # item should no longer be available
+    with pytest.raises(NodeNotFound):
+        cache.retrieve(a_study.reference())
+
+
+def test_cached_searcher_queries(requests_mock, a_cached_searcher):
+    """Check caching of calls to find_studies(Query)"""
+
+    searcher, set_time = a_cached_searcher
+
+    assert len(requests_mock.request_history) == 0  # no network calls yet
+    assert searcher.find_studies(
+        Query(StudyInstanceUID="1000")
+    )  # something comes back
+    assert len(requests_mock.request_history) == 1  # due to a network call
+
+    # now the result should have been cached
+    assert searcher.find_studies(
+        Query(StudyInstanceUID="1000")
+    )  # something comes back
+    assert len(requests_mock.request_history) == 1  # but no more network calls
+
+    set_time(200)  # later, but nothing has expired yet
+    # results to other queries are not cached yet so cause network call
+    assert searcher.find_studies(Query(StudyInstanceUID="9999"))
+    assert len(requests_mock.request_history) == 2
+
+    set_time(
+        400
+    )  # First query result should have expired so again network call
+    assert searcher.find_studies(Query(StudyInstanceUID="1000"))
+    assert len(requests_mock.request_history) == 3
+
+    # but other query has not expired, so should not cause network call
+    assert searcher.find_studies(Query(StudyInstanceUID="9999"))
+    assert len(requests_mock.request_history) == 3
+
+
+def test_cached_searcher_query_by_id(requests_mock, a_cached_searcher):
+    """Check caching for calls to .find_study_by_id(study_id, level)"""
+    searcher, set_time = a_cached_searcher
+
+    assert len(requests_mock.request_history) == 0
+    assert searcher.find_study_by_id(
+        study_uid="1000", query_level=QueryLevels.STUDY
+    )
+    assert len(requests_mock.request_history) == 1
+
+    assert searcher.find_study_by_id(
+        study_uid="1000", query_level=QueryLevels.STUDY
+    )
+    assert len(requests_mock.request_history) == 1
+
+    set_time(400)
+    assert searcher.find_study_by_id(
+        study_uid="1000", query_level=QueryLevels.STUDY
+    )
+    assert len(requests_mock.request_history) == 2
+
+
+def test_cached_searcher_shared_cache(requests_mock, a_cached_searcher):
+    """Methods .find_studies(Query) and  .find_study_by_id(study_id, level) share
+    their cache
+    """
+    searcher, set_time = a_cached_searcher
+
+    assert len(requests_mock.request_history) == 0
+    # Query a study at Study level
+    assert searcher.find_studies(
+        Query(StudyInstanceUID="1000", query_level=QueryLevels.STUDY)
+    )
+    assert len(requests_mock.request_history) == 1
+
+    # This study should now also be available from cache
+    searcher.find_study_by_id(study_uid="1000", query_level=QueryLevels.STUDY)
+    assert len(requests_mock.request_history) == 1
+
+    # however, series info is not available, so this should cause another call
+    searcher.find_study_by_id(study_uid="1000", query_level=QueryLevels.SERIES)
+    assert len(requests_mock.request_history) == 2
+
+    # Even though everything is available, a different query will not use cache.
+    # It's beyond scope to parse the semantics of a query
+    assert searcher.find_studies(
+        Query(StudyInstanceUID="1000", query_level=QueryLevels.SERIES)
+    )
+    assert len(requests_mock.request_history) == 3
+
+
+def test_cached_searcher_complex(requests_mock, a_cached_searcher):
+    """Cache saves objects as a tree structure. As such it is possible to update
+    a leaf without its stem, making the stem expire before the leaf. Handle this.
+
+
+    """
+    searcher, set_time = a_cached_searcher  # default expiry is 300 secs
+
+    # Query a study at full depth.
+    assert len(requests_mock.request_history) == 0
+    study = searcher.find_study(
+        Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE)
+    )
+    assert len(requests_mock.request_history) == 1
+    # This is then saved in cache (no new request)
+    assert searcher.find_study(
+        Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE)
+    )
+    assert len(requests_mock.request_history) == 1
+
+    set_time(200)  # later, a single instance is updated
+    updated_instance = study.series[1].instances[0]
+    searcher.cache.add(updated_instance)
+
+    set_time(400)  # Now everything is expired except for this single instance
+    # you can retrieve the instance
+    assert searcher.cache.retrieve(updated_instance.reference())
+    # but not the expired parent study
+    with pytest.raises(NodeNotFound):
+        assert searcher.cache.retrieve(study.reference())
+
+    # And trying the query again should send request again
+    assert len(requests_mock.request_history) == 1
+    assert searcher.find_study(
+        Query(StudyInstanceUID="1000", query_level=QueryLevels.INSTANCE)
+    )
+    assert len(requests_mock.request_history) == 2
+
+
+def test_mop_up():
+    """Odds and ends I would like to just make sure about"""
+    cache = DICOMObjectCache()
+    with pytest.raises(ValueError):
+        cache.to_address("not a ref!")
+
+
+@pytest.fixture
+def a_cached_searcher(requests_mock, a_mint):
+    """A cached searcher with working mocked search backend, a 300 second expiry
+    and a method to set its now() so you can test expiry
+    """
+
+    # set up working response to a call to a_mint
+    requests_mock.register_uri(**MINT_SEARCH_MATCH_SUID.as_dict())
+
+    # set up a cache that you can set the current time on
+    cache = DICOMObjectCache(expiry_seconds=300)
+    now = datetime.now()
+
+    def set_time(secs):
+        cache.expiry._now = lambda: now + timedelta(seconds=secs)
+
+    searcher = CachedSearcher(searcher=a_mint, cache=cache)
+    return searcher, set_time
diff --git a/tests/test_core.py b/tests/test_core.py
index 859b889..f41fb45 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4,17 +4,21 @@
 from pydicom.dataset import Dataset
 
 from dicomtrolley.core import (
+    DICOMObjectLevels,
     ExtendedQuery,
     Instance,
+    InstanceReference,
     NonInstanceParameterError,
     NonSeriesParameterError,
     Query,
     Series,
+    SeriesReference,
     Study,
     StudyReference,
     to_instance_refs,
     to_series_level_refs,
 )
+from dicomtrolley.exceptions import NoReferencesFoundError
 from dicomtrolley.mint import MintQuery
 from tests.factories import (
     InstanceReferenceFactory,
@@ -179,3 +183,91 @@ def test_to_series_level_refs(a_study_level_study):
     to_series_level_refs(
         [SeriesReferenceFactory(), InstanceReferenceFactory()]
     )
+
+
+def test_contained_references_for_references():
+    """DICOM object references can only return themselves, or error"""
+
+    instance_ref: InstanceReference = InstanceReferenceFactory()
+    # an instance reference is the deepest level, so this can always return itself
+    for level in DICOMObjectLevels:
+        assert (
+            instance_ref.contained_references(max_level=level)[0]
+            == instance_ref
+        )
+
+    series_ref: SeriesReference = SeriesReferenceFactory()
+    # a series reference can return itself at series or study level
+    for level in (DICOMObjectLevels.STUDY, DICOMObjectLevels.SERIES):
+        assert (
+            series_ref.contained_references(max_level=level)[0] == series_ref
+        )
+
+    # but can't return instance level. There is not info for that
+    with pytest.raises(NoReferencesFoundError):
+        series_ref.contained_references(max_level=DICOMObjectLevels.INSTANCE)
+
+    # study ref can only return at study level
+    study_ref: StudyReference = StudyReferenceFactory()
+    assert (
+        study_ref.contained_references(max_level=DICOMObjectLevels.STUDY)[0]
+        == study_ref
+    )
+
+    for level in (DICOMObjectLevels.INSTANCE, DICOMObjectLevels.SERIES):
+        with pytest.raises(NoReferencesFoundError):
+            study_ref.contained_references(max_level=level)
+
+
+def test_contained_references_for_dicom_objects(some_studies):
+    """DICOM objects can contain additional dicom levels"""
+    image_level_study, study_level_study = some_studies
+
+    # If images (instances) are in there, this should be OK:
+    assert (
+        len(
+            image_level_study.contained_references(
+                max_level=DICOMObjectLevels.INSTANCE
+            )
+        )
+        == 18
+    )
+    assert (
+        len(
+            image_level_study.contained_references(
+                max_level=DICOMObjectLevels.SERIES
+            )
+        )
+        == 2
+    )
+    assert (
+        len(
+            image_level_study.contained_references(
+                max_level=DICOMObjectLevels.STUDY
+            )
+        )
+        == 1
+    )
+
+    # But if there is no image info, trying to go deeper will fail
+    for level in (DICOMObjectLevels.INSTANCE, DICOMObjectLevels.SERIES):
+        with pytest.raises(NoReferencesFoundError):
+            study_level_study.contained_references(max_level=level)
+
+    # staying at the same level is OK though
+    assert study_level_study.contained_references(
+        max_level=DICOMObjectLevels.STUDY
+    )
+
+    # Anything is OK for an instance, as it is the deepest level
+    an_instance = image_level_study.series[0].instances[0]
+    for level in DICOMObjectLevels:
+        assert an_instance.contained_references(max_level=level)
+
+    # Just topping off with series. as we're on such a nice testing spree here
+    a_series = image_level_study.series[0]
+    a_series.instances = []  # without instances
+    for level in (DICOMObjectLevels.SERIES, DICOMObjectLevels.STUDY):
+        assert a_series.contained_references(max_level=level)
+    with pytest.raises(NoReferencesFoundError):
+        a_series.contained_references(max_level=DICOMObjectLevels.INSTANCE)
diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py
new file mode 100644
index 0000000..529a4a3
--- /dev/null
+++ b/tests/test_datastructures.py
@@ -0,0 +1,211 @@
+from datetime import datetime, timedelta
+
+import pytest
+
+from dicomtrolley.datastructures import (
+    ExpiringCollection,
+    PruneStrategy,
+    TreeNode,
+    addr,
+)
+
+
+def test_parsing_node():
+    node = TreeNode()
+    node["a"]["b"]["c"].data = "some data"
+    assert node["a"]["b"]["c"].data == "some data"
+
+    node["a"]["b2"].data = "some other data"
+    assert list(node["a"].keys()) == ["b", "b2"]
+
+
+def test_parsing_node_exceptions():
+    node = TreeNode(allow_overwrite=False)
+    node["a"]["b"].data = "some data"
+    with pytest.raises(ValueError):
+        node["a"]["b"].data = "some other data"
+
+    # this should not raise anything
+    node2 = TreeNode(allow_overwrite=True)
+    node2["a"]["b"].data = "some data"
+    node2["a"]["b"].data = "some other data"
+
+
+def test_parsing_node_exists_check():
+    node = TreeNode()
+    node["a"]["b"]["c"].data = "some data"
+    assert node.exists(address=["a", "b", "c"])
+    assert not node.exists(address=["a", "b", "d"])
+    assert not node.exists(address=["f", "s"])
+
+
+def test_parsing_node_addressing():
+    """You should be able to use dict notation (tree[key1][key2]) and List of strings
+    (Tree.get(['key1'.'key2']))
+    """
+
+    node = TreeNode()
+    _ = node["a"]["b"]  # addressing means creating
+    assert node.exists(addr("a.b"))
+
+    # checking existence does not create
+    assert not node.exists(addr("d.b"))
+    assert not node.exists(addr("d.b"))
+
+    # checking with get_node does create
+    _ = node.get_node(addr("d.b"))
+    assert node.exists(addr("d.b"))
+
+
+def test_parsing_node_prune():
+    """You can remove nodes yes"""
+
+    # simple parent with two child nodes
+    root = TreeNode()
+    _ = root.get_node(addr("a.b"))
+    _ = root.get_node(addr("a.c"))
+    _ = root.get_node(addr("a.d"))
+    assert root.exists(addr("a.b"))
+    assert root.exists(addr("a.c"))
+    assert root.exists(addr("a.d"))
+
+    # Pruning child will leave siblings
+    root.prune(addr("a.b"))
+    assert not root.exists(addr("a.b"))
+    assert root.exists(addr("a.c"))
+
+    # Pruning parent will remove siblings
+    root.prune(addr("a"))
+    assert not root.exists(addr("a.c"))
+
+    # this should not work
+    with pytest.raises(KeyError):
+        root.prune([])
+
+
+def test_parsing_node_prune_leaf():
+    root = TreeNode()
+    _ = root.get_node(addr("a.b"))
+
+    # not allowed to prune stem node
+    with pytest.raises(ValueError):
+        root.prune_leaf(addr("a"))
+
+    # but leaf is ok
+    root.prune_leaf(addr("a.b"))
+    root.prune_leaf(addr("a"))
+
+    # this should not work
+    with pytest.raises(KeyError):
+        root.prune_leaf([])
+
+
+def test_parsing_node_prune_all():
+    """When pruning many nodes at once, make sure to handle parent-child relations"""
+
+    # You can pass the stem before the leaf, prune_leaf_all will sort
+    tree1 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")])
+    tree1.prune_all(
+        [addr("a.b"), addr("a.b.d")], strategy=PruneStrategy.CHECK_FIRST
+    )
+
+    # Prune all won't prune 'a' as 'a.c' still exists
+    tree2 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")])
+    with pytest.raises(KeyError):
+        tree2.prune_all(
+            [addr("a"), addr("a.b.d")], strategy=PruneStrategy.CHECK_FIRST
+        )
+
+    # If you pass 'a.c' for pruning as well there is no problem
+    tree3 = TreeNode.init_from_addresses([addr("a.b.d"), addr("a.c")])
+    tree3.prune_all(
+        [addr("a.b"), addr("a.b.d"), addr("a.c")],
+        strategy=PruneStrategy.CHECK_FIRST,
+    )
+
+
+@pytest.fixture()
+def an_expiring_collection():
+    """A 5 minute-expiring collection with a .set_time function for debugging"""
+    collection = ExpiringCollection(expire_after_seconds=300)
+    now = datetime.now()
+
+    def set_time(secs):
+        collection._now = lambda: now + timedelta(seconds=secs)
+
+    return collection, set_time
+
+
+def test_expiring_collection(an_expiring_collection):
+    collection, set_time = an_expiring_collection
+
+    # you can get a thing
+    collection.add("a_thing")
+    assert collection.items
+    assert not collection.collect_expired()
+
+    # unless its expired
+    set_time(600)
+
+    assert not collection.items
+    assert collection.collect_expired() == ["a_thing"]
+
+    # adding a thing a second time will update the times
+    set_time(0)
+    collection.add(12)  # add '12' at time 0. should expire in 300
+    set_time(200)
+    collection.add(12)  # add it again
+    assert collection.items == [12]  # no doubles
+    set_time(400)
+    assert collection.items == [12]  # last add was less than 300 ago
+
+    set_time(700)
+    assert collection.items == []  # but now that too has timed out
+
+
+def test_expiring_collection_update_clash(an_expiring_collection):
+    """If you update an object timestamp, order of timestamps should be maintained"""
+    collection, set_time = an_expiring_collection
+    collection.add_all(["item1", "item2"])
+    set_time(200)
+    collection.add("item1")  # item2 should now expire before item1
+    set_time(400)  # item 2 should now be expired. Is it handled properly?
+    assert collection.collect_expired() == ["item2"]
+    assert collection.items == ["item1"]
+
+
+@pytest.fixture
+def a_tree():
+    """Produce this tree:
+    #       A
+    #     /  \
+    #    B    C
+    #   / \
+    #  D   E
+    """
+
+    return TreeNode.init_from_addresses(
+        [addr("a.b.d"), addr("a.b.e"), addr("a.c")]
+    )
+
+
+@pytest.mark.parametrize(
+    "strategy, result",
+    [
+        (PruneStrategy.FORCE, {"ac"}),
+        (PruneStrategy.WHERE_POSSIBLE, {"abe", "ac"}),
+        (PruneStrategy.CHECK_FIRST, {"abd", "abe", "ac"}),
+    ],
+)
+def test_prune_strategies(a_tree, strategy, result):
+    try:
+        a_tree.prune_all(
+            addresses=[addr("a.b"), addr("a.b.d")], strategy=strategy
+        )
+    except KeyError:
+        pass
+
+    addresses = [x for x in a_tree.iter_leaf_addresses()]
+    address_strings = {"".join(x) for x in addresses}
+
+    assert address_strings == result
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index c6c1f0c..077f518 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -5,7 +5,7 @@
     StudyReference,
 )
 from dicomtrolley.exceptions import DICOMTrolleyError
-from dicomtrolley.parsing import DICOMObjectTree, DICOMParseTree, TreeNode
+from dicomtrolley.parsing import DICOMObjectTree, DICOMParseTree
 from tests.factories import (
     create_c_find_image_response,
     create_c_find_study_response,
@@ -14,27 +14,6 @@
 )
 
 
-def test_parsing_node():
-    node = TreeNode()
-    node["a"]["b"]["c"].data = "some data"
-    assert node["a"]["b"]["c"].data == "some data"
-
-    node["a"]["b2"].data = "some other data"
-    assert list(node["a"].keys()) == ["b", "b2"]
-
-
-def test_parsing_node_exceptions():
-    node = TreeNode(allow_overwrite=False)
-    node["a"]["b"].data = "some data"
-    with pytest.raises(ValueError):
-        node["a"]["b"].data = "some other data"
-
-    # this should not raise anything
-    node2 = TreeNode(allow_overwrite=True)
-    node2["a"]["b"].data = "some data"
-    node2["a"]["b"].data = "some other data"
-
-
 def test_parse_tree():
     """Parse flat datasets into a tree structure"""
     tree = DICOMParseTree()
diff --git a/tests/test_qido_rs.py b/tests/test_qido_rs.py
index bbe1267..7716776 100644
--- a/tests/test_qido_rs.py
+++ b/tests/test_qido_rs.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 
 import pytest
+from pydantic import ValidationError
 
 from dicomtrolley.core import Query, QueryLevels
 from dicomtrolley.qido_rs import HierarchicalQuery, QidoRS, RelationalQuery
@@ -33,12 +34,13 @@ def test_valid_hierarchical_query(query_params):
     [
         {"SeriesInstanceUID": "123"},
         {"SOPClassInstanceUID": "789"},
+        {"SOPClassInstanceUID": "789", "query_level": QueryLevels.INSTANCE},
         {"min_study_date": datetime(year=2023, month=5, day=29)},
     ],
 )
 def test_invalid_hierarchical_query(query_params):
     """These queries are invalid, should not pass init validation"""
-    with pytest.raises(ValueError):
+    with pytest.raises((ValueError, ValidationError)):
         HierarchicalQuery(**query_params)
 
 
@@ -166,3 +168,28 @@ def test_ensure_query_type(a_qido):
         Query(AccessionNumber="123", query_level=QueryLevels.SERIES)
     )
     assert ensured.uri_base() == "/series"
+
+
+def test_mop_up(a_qido):
+    """Checks previously uncovered parts"""
+    assert RelationalQuery(query_level=QueryLevels.INSTANCE).uri_base()
+
+    with pytest.raises(
+        ValidationError
+    ):  # relational at study level makes no sense
+        assert RelationalQuery(query_level=QueryLevels.STUDY)
+
+    assert RelationalQuery(
+        query_level=QueryLevels.INSTANCE, StudyInstanceUID="123"
+    ).uri_base()
+
+    with pytest.raises(ValidationError):
+        assert RelationalQuery(query_level="unknown")
+
+    query = HierarchicalQuery(
+        StudyInstanceUID="234",
+        SeriesInstanceUID="123",
+        query_level=QueryLevels.INSTANCE,
+    )
+    assert query.uri_base()
+    assert query.uri_search_params() == {}
diff --git a/tests/test_trolley.py b/tests/test_trolley.py
index 902e8db..568e48f 100644
--- a/tests/test_trolley.py
+++ b/tests/test_trolley.py
@@ -3,27 +3,18 @@
 
 import pytest
 
-from dicomtrolley.core import SeriesReference, StudyReference
 from dicomtrolley.exceptions import DICOMTrolleyError
-from dicomtrolley.mint import Mint, MintQuery
-from dicomtrolley.parsing import DICOMObjectTree
+from dicomtrolley.mint import MintQuery
 from dicomtrolley.storage import FlatStorageDir
 from dicomtrolley.trolley import (
-    CachedSearcher,
-    MissingObjectInformationError,
     Trolley,
 )
-from tests.conftest import create_mint_study, set_mock_response
+
+from tests.conftest import create_mint_study
 from tests.factories import (
-    InstanceReferenceFactory,
-    SeriesReferenceFactory,
     StudyReferenceFactory,
     quick_dataset,
 )
-from tests.mock_responses import (
-    MINT_SEARCH_INSTANCE_LEVEL_IDS,
-)
-from tests.mock_servers import MINT_SEARCH_INSTANCE_LEVEL_ANY
 
 
 def test_trolley_find(a_trolley, some_mint_studies):
@@ -55,7 +46,7 @@ def test_trolley_get_dataset(a_trolley, some_mint_studies):
     download
     """
     # Search will yield full info for the missing study
-    a_trolley.searcher.find_study_at_instance_level = Mock(
+    a_trolley.searcher.find_study_by_id = Mock(
         return_value=create_mint_study(
             uid="1.2.340.114850.2.857.2.793263.2.125336546.1"
         )
@@ -172,90 +163,3 @@ def test_trolley_encapsulation_error(a_trolley):
     # this should be caught an raised as a TrolleyError
     with pytest.raises(DICOMTrolleyError):
         a_trolley.download(StudyReferenceFactory(), output_dir="/tmp")
-
-
-def test_cached_searcher_retrieve_instances(a_mint, requests_mock):
-    # set a single-study response for any mint call
-    set_mock_response(requests_mock, MINT_SEARCH_INSTANCE_LEVEL_ANY)
-
-    # a reference to the two series contained in response
-    series_reference_1 = SeriesReference(
-        study_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["study_uid"],
-        series_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["series_uids"][0],
-    )
-
-    # a reference to a series contained in response
-    series_reference_2 = SeriesReference(
-        study_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["study_uid"],
-        series_uid=MINT_SEARCH_INSTANCE_LEVEL_IDS["series_uids"][1],
-    )
-
-    searcher = CachedSearcher(searcher=a_mint)
-
-    assert requests_mock.request_history == []  # no requests have been made
-
-    # now ask for a series that is not in cached searcher
-    instances = searcher.retrieve_instance_references(series_reference_1)
-    assert len(instances) == 1  # this series has only one instance
-    assert len(requests_mock.request_history) == 1  # a request has been made
-
-    # ask for the other series
-    instances = searcher.retrieve_instance_references(series_reference_2)
-    assert len(instances) == 13
-    # No extra requests as the whole study was retrieved
-    assert len(requests_mock.request_history) == 1
-
-
-def test_cached_searcher_no_download(a_mint):
-    """You can disable auto-querying for missing info to limit requests"""
-    study1 = create_mint_study(uid="1")
-    study2 = create_mint_study(uid="2")
-    for series in study2.series:  # study2 will have no instance info
-        series.instances = []
-
-    # search starts out with some info
-    searcher = CachedSearcher(
-        searcher=a_mint,
-        cache=DICOMObjectTree([study1, study2]),
-        query_missing=False,
-    )
-
-    # requesting study which has instance info should work
-    assert len(searcher.retrieve_instance_references(study1)) == 14
-
-    # however, this will raise errors
-    with pytest.raises(MissingObjectInformationError):
-        searcher.retrieve_instance_references(study2)
-
-
-def test_cached_searcher_ensure_series(a_mint_study_with_instances):
-    # a study reference cannot be found, will issue a call
-    # an instance that is already cached will not yield a call
-    # a series will not yield a call if it is known
-    searcher = Mock(spec=Mint)
-
-    a_study = a_mint_study_with_instances
-    for series in a_study.series:  # study will have no instance
-        series.instances = []
-
-    cache = CachedSearcher(
-        searcher=searcher,
-        cache=DICOMObjectTree([a_study]),
-        query_missing=False,
-    )
-
-    # any instance references should be find regardless of cached or not
-    cache.ensure_series_level_references(
-        downloadable=InstanceReferenceFactory()
-    )
-    cache.ensure_series_level_references(downloadable=SeriesReferenceFactory())
-
-    # there are series for this study in cache so should be fine
-    references = cache.ensure_series_level_references(
-        downloadable=a_study.reference()
-    )
-    assert len(references) == 2
-
-    # a new series is not in cache so will not work
-    with pytest.raises(MissingObjectInformationError):
-        cache.ensure_series_level_references(StudyReference("unknown_study"))