Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ticket/PSB-207: #2726

Merged
merged 1 commit into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion allensdk/api/cloud_cache/cloud_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ def latest_manifest_file(self) -> str:
"""
return self._find_latest_file(self.manifest_file_names)

@property
def cache_dir(self) -> str:
"""Return the cache directory path.

Returns
-------
str
Full cache directory path
"""
return self._cache_dir

# ====================== BasicLocalCache methods ==========================

@abstractmethod
Expand Down Expand Up @@ -330,7 +341,7 @@ def data_path(self, file_id) -> dict:
RuntimeError
If the file cannot be downloaded
"""
file_attributes = self._manifest.data_file_attributes(file_id)
file_attributes = self.get_file_attributes(file_id)
exists = self._file_exists(file_attributes)
local_path = file_attributes.local_path
output = {'local_path': local_path,
Expand All @@ -339,6 +350,21 @@ def data_path(self, file_id) -> dict:

return output

def get_file_attributes(self, file_id):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pretty sure it returns CacheFileAttributes and not dict ? Can you add return type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, was looking at my natural movie cache and saw what looked like a dict at first glace. Fixed.

"""
Retrieve file attributes for a given file_id from the meatadata.

Parameters
----------
file_id: str or int
The unique identifier of the file to be accessed

Returns
-------
CacheFileAttributes
"""
return self._manifest.data_file_attributes(file_id)


class CloudCacheBase(BasicLocalCache):
"""
Expand Down Expand Up @@ -1042,6 +1068,10 @@ def __init__(self, cache_dir, bucket_name, project_name,

_s3_client = None

@property
def bucket_name(self) -> str:
return self._bucket_name

@property
def s3_client(self):
if self._s3_client is None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional, List, Union
from pathlib import Path
import pandas as pd
import numpy as np

from allensdk.api.warehouse_cache.cache import Cache
from allensdk.brain_observatory.behavior.behavior_ophys_experiment import \
Expand Down Expand Up @@ -330,6 +331,36 @@ def get_behavior_session(
behavior_session_id=behavior_session_id
)

def get_raw_natural_movie(self) -> np.ndarray:
"""Download the raw movie data from the cloud and return it as a numpy
array.

Returns
-------
raw_movie : np.ndarray
"""
return self.fetch_api.get_raw_natural_movie()

def get_natural_movie_template(self, n_workers=None) -> pd.DataFrame:
"""Download the movie if needed and process it into warped and unwarped
frames as presented to the mouse. The DataFrame is indexed with the
same frame index as shown in the stimulus presentation table.

The processing of the movie requires signicant processing and its
return size is very large so take care in requesting this data.

Parameters
----------
n_workers : int
Number of processes to use to transform the movie to what was shown
on the monitor. Default=None (use all cores).

Returns
-------
processed_movie : pd.DataFrame
"""
return self.fetch_api.get_natural_movie_template(n_workers=n_workers)


def _write_json(path, df):
"""Wrapper to change the arguments for saving a pandas json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_project_cloud_api import BehaviorProjectCloudApi # noqa: F401, E501
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_neuropixels_project_cloud_api import VisualBehaviorNeuropixelsProjectCloudApi # noqa: F401, E501
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.behavior_neuropixels_project_cloud_api import ProjectCloudApiBase # noqa: F401, E501
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import NaturalMovieOneCache # noqa: F401, E501
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Iterable
from typing import Iterable, Union
import numpy as np
import pathlib

import pandas as pd
from allensdk.brain_observatory.behavior.behavior_ophys_experiment import (
Expand All @@ -10,6 +12,9 @@
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.project_cloud_api_base import ( # noqa: E501
ProjectCloudApiBase,
)
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import ( # noqa: E501
NaturalMovieOneCache,
)
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession,
)
Expand All @@ -18,6 +23,9 @@
return_one_dataframe_row_only,
)
from allensdk.core.utilities import literal_col_eval
from allensdk.api.cloud_cache.cloud_cache import (
S3CloudCache, LocalCache, StaticLocalCache)


COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]
INTEGER_COLUMNS = [
Expand Down Expand Up @@ -59,6 +67,26 @@ def sanitize_data_columns(
class BehaviorProjectCloudApi(BehaviorProjectBase, ProjectCloudApiBase):
MANIFEST_COMPATIBILITY = ["0.0.0", "2.0.0"]

def __init__(
self,
cache: Union[S3CloudCache, LocalCache, StaticLocalCache],
skip_version_check: bool = False,
local: bool = False
):
super().__init__(cache=cache,
skip_version_check=skip_version_check,
local=local)
self._load_manifest_tables()

if isinstance(cache, S3CloudCache):
bucket_name = cache.bucket_name
else:
bucket_name = None
self._natural_movie_cache = NaturalMovieOneCache(
bucket_name=bucket_name,
cache_dir=str(pathlib.Path(cache.cache_dir) / "resources"),
)

def _load_manifest_tables(self):
expected_metadata = set(
[
Expand Down Expand Up @@ -249,14 +277,36 @@ def get_ophys_experiment_table(self):
"""
return self._ophys_experiment_table

def get_natural_movie_template(self, number: int) -> Iterable[bytes]:
"""Download a template for the natural movie stimulus. This is the
actual movie that was shown during the recording session.
:param number: identifier for this scene
:type number: int
:returns: An iterable yielding an npy file as bytes
def get_raw_natural_movie(self) -> np.ndarray:
"""Download the raw natural movie presented to the mouse.

Returns
-------
natural_movie_one : numpy.ndarray
"""
raise NotImplementedError()
return self._natural_movie_cache.get_raw_movie()

def get_natural_movie_template(self, n_workers=None) -> pd.DataFrame:
"""Download the movie if needed and process it into warped and unwarped
frames as presented to the mouse. The DataFrame is indexed with the
same frame index as shown in the stimulus presentation table.

The processing of the movie requires signicant processing and its
return size is very large so take care in requesting this data.

Parameters
----------
n_workers : int
Number of processes to use to transform the movie to what was shown
on the monitor. Default=None (use all cores).

Returns
-------
processed_movie : pd.DataFrame
"""
return self._natural_movie_cache.get_processed_template_movie(
n_workers=n_workers
)

def get_natural_scene_template(self, number: int) -> Iterable[bytes]:
"""Download a template for the natural scene stimulus. This is the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pathlib
from typing import Union

import numpy as np
from allensdk.api.cloud_cache.cloud_cache import S3CloudCache
from allensdk.api.cloud_cache.file_attributes import CacheFileAttributes
from allensdk.brain_observatory.behavior.data_objects.stimuli.stimulus_templates import ( # noqa: E501
StimulusMovieTemplateFactory,
)


class NaturalMovieOneCache(S3CloudCache):
def __init__(self, cache_dir: Union[str, pathlib.Path], bucket_name: str):
super().__init__(
cache_dir=cache_dir,
bucket_name=bucket_name,
project_name=None,
ui_class_name=None,
)

# Set the file attributes by hand. This is used to get around needing
# to run the data release tool and create/download a manifest file.
# The hash has been pre-calculated from the file_hash_from_path
# method in allensdk/api/cloud_cache/utils.py
self._file_attributes = CacheFileAttributes(
url="https://staging.visual-behavior-ophys-data.s3.us-west-2.amazonaws.com/visual-behavior-ophys/resources/Movie_TOE1.npy", # noqa E501
version_id="0y.DEg5ASDGaWA4Syls5MeC.S5Y6oIIS",
file_hash="7e44cba154b29e1511ab8e5453b7aa5070f1ae456724b5b2541c97c052fbd80aebf159e5f933ab319bda8fdab7b863a096cdb44f129abd20a8c4cc791af4bc41", # noqa E501
local_path=pathlib.Path(cache_dir) / "Movie_TOE1.npy",
)

def _list_all_manifests(self) -> list:
"""
Return a list of all of the file names of the manifests associated
with this dataset
"""
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this raise NotImplementedError instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. This is a function that gets called in the base class so some value needs to be there. I'll change up the comment.


def get_file_attributes(self, file_id):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this returns CacheFileAttributes and not dict ? Can you add return type?

"""
Retrieve file attributes for a given file_id from the meatadata.

Parameters
----------
file_id: str or int
The unique identifier of the file to be accessed (not used in this
overwrite of the method)

Returns
-------
CacheFileAttributes
"""
return self._file_attributes

def get_raw_movie(self):
"""Download the raw movie data from the cloud and return it as a numpy
array.

Returns
-------
raw_movie : np.ndarray
"""
return np.load(self.download_data(None))

def get_processed_template_movie(self, n_workers=None):
"""Download the movie if needed and process it into warped and unwarped
frames as presented to the mouse. The DataFrame is indexed with the
same frame index as shown in the stimulus presentation table.

The processing of the movie requires signicant processing and its
return size is very large so take care in requesting this data.

Parameters
----------
n_workers : int
Number of processes to use to transform the movie to what was shown
on the monitor. Default=None (use all cores).

Returns
-------
processed_movie : pd.DataFrame
"""
movie_data = self.get_raw_movie()
movie_template = StimulusMovieTemplateFactory.from_unprocessed(
movie_name="natural_movie_one",
movie_frames=movie_data,
n_workers=n_workers,
)
return movie_template.to_dataframe(
index_name="movie_frame_index", index_type="int"
)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def __init__(
ophys_cells_table,
cachedir,
):
self.bucket_name = "test_bucket"
self.cache_dir = cachedir
self.file_id_column = "file_id"
self.session_table_path = cachedir / "session.csv"
self.behavior_session_table_path = cachedir / "behavior_session.csv"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from unittest.mock import patch

import numpy as np
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.natural_movie_one_cache import ( # noqa: E501
NaturalMovieOneCache,
)


def test_natural_movie_cache():
"""
Test that the natural movie is loaded and processed correctly
"""
rng = np.random.default_rng(1234)
with patch(
target="allensdk.brain_observatory.behavior."
"behavior_project_cache.project_apis.data_io."
"natural_movie_one_cache.NaturalMovieOneCache."
"get_raw_movie",
return_value=rng.integers(
low=0, high=256, size=(1, 304, 608), dtype=np.uint8
),
):
cache = NaturalMovieOneCache(
cache_dir="fake_dir", bucket_name="fake_bucket"
)
movie = cache.get_processed_template_movie(n_workers=1)
assert movie.index.name == "movie_frame_index"
assert movie.columns.to_list() == ["unwarped", "warped"]

unwarped = movie.loc[0, "unwarped"]
warped = movie.loc[0, "warped"]
assert unwarped.shape == (1200, 1920)
assert warped.shape == (1200, 1920)
assert unwarped.dtype == "float64"
assert warped.dtype == "uint8"
Loading