AllenInstitute · morriscb · Oct 12, 2023 · Oct 4, 2023 · aamster · Oct 11, 2023
diff --git a/allensdk/api/cloud_cache/cloud_cache.py b/allensdk/api/cloud_cache/cloud_cache.py
@@ -134,6 +134,17 @@ def latest_manifest_file(self) -> str:
         """
         return self._find_latest_file(self.manifest_file_names)
 
+    @property
+    def cache_dir(self) -> str:
+        """Return the cache directory path.
+
+        Returns
+        -------
+        str
+            Full cache directory path
+        """
+        return self._cache_dir
+
     # ====================== BasicLocalCache methods ==========================
 
     @abstractmethod
@@ -330,7 +341,7 @@ def data_path(self, file_id) -> dict:
         RuntimeError
             If the file cannot be downloaded
         """
-        file_attributes = self._manifest.data_file_attributes(file_id)
+        file_attributes = self.get_file_attributes(file_id)
         exists = self._file_exists(file_attributes)
         local_path = file_attributes.local_path
         output = {'local_path': local_path,
@@ -339,6 +350,21 @@ def data_path(self, file_id) -> dict:
 
         return output
 
+    def get_file_attributes(self, file_id):
+        """
+        Retrieve file attributes for a given file_id from the meatadata.
+
+        Parameters
+        ----------
+        file_id: str or int
+            The unique identifier of the file to be accessed
+
+        Returns
+        -------
+        CacheFileAttributes
+        """
+        return self._manifest.data_file_attributes(file_id)
+
 
 class CloudCacheBase(BasicLocalCache):
     """
@@ -1042,6 +1068,10 @@ def __init__(self, cache_dir, bucket_name, project_name,
 
     _s3_client = None
 
+    @property
+    def bucket_name(self) -> str:
+        return self._bucket_name
+
     @property
     def s3_client(self):
         if self._s3_client is None:

diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/behavior_project_cache.py b/allensdk/brain_observatory/behavior/behavior_project_cache/behavior_project_cache.py
@@ -1,6 +1,7 @@
 from typing import Optional, List, Union
 from pathlib import Path
 import pandas as pd
+import numpy as np
 
 from allensdk.api.warehouse_cache.cache import Cache
 from allensdk.brain_observatory.behavior.behavior_ophys_experiment import \
@@ -330,6 +331,36 @@ def get_behavior_session(
             behavior_session_id=behavior_session_id
         )
 
+    def get_raw_natural_movie(self) -> np.ndarray:
+        """Download the raw movie data from the cloud and return it as a numpy
+        array.
+
+        Returns
+        -------
+        raw_movie : np.ndarray
+        """
+        return self.fetch_api.get_raw_natural_movie()
+
+    def get_natural_movie_template(self, n_workers=None) -> pd.DataFrame:
+        """Download the movie if needed and process it into warped and unwarped
+        frames as presented to the mouse. The DataFrame is indexed with the
+        same frame index as shown in the stimulus presentation table.
+
+        The processing of the movie requires signicant processing and its
+        return size is very large so take care in requesting this data.
+
+        Parameters
+        ----------
+        n_workers : int
+            Number of processes to use to transform the movie to what was shown
+            on the monitor. Default=None (use all cores).
+
+        Returns
+        -------
+        processed_movie : pd.DataFrame
+        """
+        return self.fetch_api.get_natural_movie_template(n_workers=n_workers)
+
 
 def _write_json(path, df):
     """Wrapper to change the arguments for saving a pandas json

diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/__init__.py b/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/__init__.py
@@ -2,3 +2,4 @@
 from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_project_cloud_api import BehaviorProjectCloudApi  # noqa: F401, E501
 from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_neuropixels_project_cloud_api import VisualBehaviorNeuropixelsProjectCloudApi  # noqa: F401, E501
 from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_neuropixels_project_cloud_api import ProjectCloudApiBase  # noqa: F401, E501
+from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import NaturalMovieOneCache  # noqa: F401, E501
diff --git a/...vatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py b/...vatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py
@@ -1,4 +1,6 @@
-from typing import Iterable
+from typing import Iterable, Union
+import numpy as np
+import pathlib
 
 import pandas as pd
 from allensdk.brain_observatory.behavior.behavior_ophys_experiment import (
@@ -10,6 +12,9 @@
 from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.project_cloud_api_base import (  # noqa: E501
     ProjectCloudApiBase,
 )
+from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import (  # noqa: E501
+    NaturalMovieOneCache,
+)
 from allensdk.brain_observatory.behavior.behavior_session import (
     BehaviorSession,
 )
@@ -18,6 +23,9 @@
     return_one_dataframe_row_only,
 )
 from allensdk.core.utilities import literal_col_eval
+from allensdk.api.cloud_cache.cloud_cache import (
+    S3CloudCache, LocalCache, StaticLocalCache)
+
 
 COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]
 INTEGER_COLUMNS = [
@@ -59,6 +67,26 @@ def sanitize_data_columns(
 class BehaviorProjectCloudApi(BehaviorProjectBase, ProjectCloudApiBase):
     MANIFEST_COMPATIBILITY = ["0.0.0", "2.0.0"]
 
+    def __init__(
+        self,
+        cache: Union[S3CloudCache, LocalCache, StaticLocalCache],
+        skip_version_check: bool = False,
+        local: bool = False
+    ):
+        super().__init__(cache=cache,
+                         skip_version_check=skip_version_check,
+                         local=local)
+        self._load_manifest_tables()
+
+        if isinstance(cache, S3CloudCache):
+            bucket_name = cache.bucket_name
+        else:
+            bucket_name = None
+        self._natural_movie_cache = NaturalMovieOneCache(
+            bucket_name=bucket_name,
+            cache_dir=str(pathlib.Path(cache.cache_dir) / "resources"),
+        )
+
     def _load_manifest_tables(self):
         expected_metadata = set(
             [
@@ -249,14 +277,36 @@ def get_ophys_experiment_table(self):
         """
         return self._ophys_experiment_table
 
-    def get_natural_movie_template(self, number: int) -> Iterable[bytes]:
-        """Download a template for the natural movie stimulus. This is the
-        actual movie that was shown during the recording session.
-        :param number: identifier for this scene
-        :type number: int
-        :returns: An iterable yielding an npy file as bytes
+    def get_raw_natural_movie(self) -> np.ndarray:
+        """Download the raw natural movie presented to the mouse.
+
+        Returns
+        -------
+        natural_movie_one : numpy.ndarray
         """
-        raise NotImplementedError()
+        return self._natural_movie_cache.get_raw_movie()
+
+    def get_natural_movie_template(self, n_workers=None) -> pd.DataFrame:
+        """Download the movie if needed and process it into warped and unwarped
+        frames as presented to the mouse. The DataFrame is indexed with the
+        same frame index as shown in the stimulus presentation table.
+
+        The processing of the movie requires signicant processing and its
+        return size is very large so take care in requesting this data.
+
+        Parameters
+        ----------
+        n_workers : int
+            Number of processes to use to transform the movie to what was shown
+            on the monitor. Default=None (use all cores).
+
+        Returns
+        -------
+        processed_movie : pd.DataFrame
+        """
+        return self._natural_movie_cache.get_processed_template_movie(
+            n_workers=n_workers
+        )
 
     def get_natural_scene_template(self, number: int) -> Iterable[bytes]:
         """Download a template for the natural scene stimulus. This is the

diff --git a/...servatory/behavior/behavior_project_cache/project_apis/data_io/natural_movie_one_cache.py b/...servatory/behavior/behavior_project_cache/project_apis/data_io/natural_movie_one_cache.py
@@ -0,0 +1,91 @@
+import pathlib
+from typing import Union
+
+import numpy as np
+from allensdk.api.cloud_cache.cloud_cache import S3CloudCache
+from allensdk.api.cloud_cache.file_attributes import CacheFileAttributes
+from allensdk.brain_observatory.behavior.data_objects.stimuli.stimulus_templates import (  # noqa: E501
+    StimulusMovieTemplateFactory,
+)
+
+
+class NaturalMovieOneCache(S3CloudCache):
+    def __init__(self, cache_dir: Union[str, pathlib.Path], bucket_name: str):
+        super().__init__(
+            cache_dir=cache_dir,
+            bucket_name=bucket_name,
+            project_name=None,
+            ui_class_name=None,
+        )
+
+        # Set the file attributes by hand. This is used to get around needing
+        # to run the data release tool and create/download a manifest file.
+        # The hash has been pre-calculated from the file_hash_from_path
+        # method in allensdk/api/cloud_cache/utils.py
+        self._file_attributes = CacheFileAttributes(
+            url="https://staging.visual-behavior-ophys-data.s3.us-west-2.amazonaws.com/visual-behavior-ophys/resources/Movie_TOE1.npy",  # noqa E501
+            version_id="0y.DEg5ASDGaWA4Syls5MeC.S5Y6oIIS",
+            file_hash="7e44cba154b29e1511ab8e5453b7aa5070f1ae456724b5b2541c97c052fbd80aebf159e5f933ab319bda8fdab7b863a096cdb44f129abd20a8c4cc791af4bc41",  # noqa E501
+            local_path=pathlib.Path(cache_dir) / "Movie_TOE1.npy",
+        )
+
+    def _list_all_manifests(self) -> list:
+        """
+        Return a list of all of the file names of the manifests associated
+        with this dataset
+        """
+        return None
+
+    def get_file_attributes(self, file_id):
+        """
+        Retrieve file attributes for a given file_id from the meatadata.
+
+        Parameters
+        ----------
+        file_id: str or int
+            The unique identifier of the file to be accessed (not used in this
+            overwrite of the method)
+
+        Returns
+        -------
+        CacheFileAttributes
+        """
+        return self._file_attributes
+
+    def get_raw_movie(self):
+        """Download the raw movie data from the cloud and return it as a numpy
+        array.
+
+        Returns
+        -------
+        raw_movie : np.ndarray
+        """
+        return np.load(self.download_data(None))
+
+    def get_processed_template_movie(self, n_workers=None):
+        """Download the movie if needed and process it into warped and unwarped
+        frames as presented to the mouse. The DataFrame is indexed with the
+        same frame index as shown in the stimulus presentation table.
+
+        The processing of the movie requires signicant processing and its
+        return size is very large so take care in requesting this data.
+
+        Parameters
+        ----------
+        n_workers : int
+            Number of processes to use to transform the movie to what was shown
+            on the monitor. Default=None (use all cores).
+
+        Returns
+        -------
+        processed_movie : pd.DataFrame
+        """
+        movie_data = self.get_raw_movie()
+        movie_template = StimulusMovieTemplateFactory.from_unprocessed(
+            movie_name="natural_movie_one",
+            movie_frames=movie_data,
+            n_workers=n_workers,
+        )
+        return movie_template.to_dataframe(
+            index_name="movie_frame_index", index_type="int"
+        )
diff --git a/...test/brain_observatory/behavior/behavior_project_cache/test_behavior_project_cloud_api.py b/...test/brain_observatory/behavior/behavior_project_cache/test_behavior_project_cloud_api.py
@@ -25,6 +25,8 @@ def __init__(
         ophys_cells_table,
         cachedir,
     ):
+        self.bucket_name = "test_bucket"
+        self.cache_dir = cachedir
         self.file_id_column = "file_id"
         self.session_table_path = cachedir / "session.csv"
         self.behavior_session_table_path = cachedir / "behavior_session.csv"

diff --git a/allensdk/test/brain_observatory/behavior/behavior_project_cache/test_natural_movie_cache.py b/allensdk/test/brain_observatory/behavior/behavior_project_cache/test_natural_movie_cache.py
@@ -0,0 +1,35 @@
+from unittest.mock import patch
+
+import numpy as np
+from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import (  # noqa: E501
+    NaturalMovieOneCache,
+)
+
+
+def test_natural_movie_cache():
+    """
+    Test that the natural movie is loaded and processed correctly
+    """
+    rng = np.random.default_rng(1234)
+    with patch(
+        target="allensdk.brain_observatory.behavior."
+        "behavior_project_cache.project_apis.data_io."
+        "natural_movie_one_cache.NaturalMovieOneCache."
+        "get_raw_movie",
+        return_value=rng.integers(
+            low=0, high=256, size=(1, 304, 608), dtype=np.uint8
+        ),
+    ):
+        cache = NaturalMovieOneCache(
+            cache_dir="fake_dir", bucket_name="fake_bucket"
+        )
+        movie = cache.get_processed_template_movie(n_workers=1)
+        assert movie.index.name == "movie_frame_index"
+        assert movie.columns.to_list() == ["unwarped", "warped"]
+
+        unwarped = movie.loc[0, "unwarped"]
+        warped = movie.loc[0, "warped"]
+        assert unwarped.shape == (1200, 1920)
+        assert warped.shape == (1200, 1920)
+        assert unwarped.dtype == "float64"
+        assert warped.dtype == "uint8"