From bc0dfefd0a498b2a746bd840cba8d8de7ce0ac1f Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Thu, 31 Oct 2019 15:52:13 -0700 Subject: [PATCH 01/60] v1.2.0 --- allensdk/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allensdk/__init__.py b/allensdk/__init__.py index 059f8d046..709a77084 100644 --- a/allensdk/__init__.py +++ b/allensdk/__init__.py @@ -35,7 +35,7 @@ # import logging -__version__ = '1.1.0' +__version__ = '1.2.0' try: from logging import NullHandler From e41e5e9190744c9e4f6fb85f7e490a30d337ef72 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 1 Nov 2019 10:28:56 -0700 Subject: [PATCH 02/60] GH-1151 expose methods for memoized cache clearing and getting size --- allensdk/api/cache.py | 40 +++++++++++----- allensdk/test/api/test_cache.py | 84 ++++++++++++++++++++++++--------- 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/allensdk/api/cache.py b/allensdk/api/cache.py index 666c4844a..e006c1558 100755 --- a/allensdk/api/cache.py +++ b/allensdk/api/cache.py @@ -42,25 +42,39 @@ import pandas.io.json as pj import functools -from functools import wraps +from functools import wraps, _make_key import os import logging import csv def memoize(f): - memodict = dict() - - @wraps(f) - def wrapper(*args, **kwargs): - key = (args, tuple(kwargs.items())) - - if key not in memodict: - memodict[key] = f(*args, **kwargs) - - return memodict[key] - - return wrapper + cache = {} + sentinel = object() # unique object for cache misses + make_key = _make_key # efficient key building from function args + cache_get = cache.get + cache_len = cache.__len__ + + @wraps(f) + def wrapper(*args, **kwargs): + key = make_key(args, kwargs, typed=False) # Don't consider 3.0 and 3 different + result = cache_get(key, sentinel) + if result is not sentinel: + return result + result = f(*args, **kwargs) + cache[key] = result + return result + + def clear_cache(): + cache.clear() + + def cache_size(): + return cache_len() + + wrapper.clear_cache = clear_cache + wrapper.cache_size = cache_size + + return wrapper class Cache(object): _log = logging.getLogger('allensdk.api.cache') diff --git a/allensdk/test/api/test_cache.py b/allensdk/test/api/test_cache.py index a93371561..5fd9ff79f 100755 --- a/allensdk/test/api/test_cache.py +++ b/allensdk/test/api/test_cache.py @@ -38,6 +38,7 @@ import pandas as pd import pandas.io.json as pj import numpy as np +import time import pytest from mock import MagicMock, mock_open, patch @@ -160,33 +161,70 @@ def test_wrap_dataframe(ju_read_url_get, ju_write, mock_read_json, rma, cache): ju_write.assert_called_once_with('example.txt', _msg) mock_read_json.assert_called_once_with('example.txt', orient='records') -def test_memoize(): - - import time +def test_memoize_with_function(): + @memoize + def f(x): + time.sleep(0.1) + return x + + # Build cache + for i in range(3): + uncached_result = f(i) + assert uncached_result == i + assert f.cache_size() == 3 + + # Test cache was accessed + for i in range(3): + t0 = time.time() + result = f(i) + t1 = time.time() + assert result == i + assert t1 - t0 < 0.1 + + # Test cache clear + f.clear_cache() + assert f.cache_size() == 0 + + +def test_memoize_with_kwarg_function(): + @memoize + def f(x, *, y, z=1): + time.sleep(0.1) + return (x * y * z) + + # Build cache + f(2, y=1, z=2) + assert f.cache_size() == 1 + + # Test cache was accessed + t0 = time.time() + result = f(2, y=1, z=2) + t1 = time.time() + assert result == 4 + assert t1 - t0 < 1.0 + + +def test_memoize_with_instance_method(): + class FooBar(object): @memoize - def f(x): - time.sleep(1) + def f(self, x): + time.sleep(0.1) return x - for ii in range(2): - t0 = time.time() - print(f(0), time.time() - t0) - - class FooBar(object): - - def __init__(self): pass - - @memoize - def f(self, x): - time.sleep(.1) - return 1 - - fb = FooBar() - - for ii in range(2): - t0 = time.time() - fb.f(0), time.time() - t0 + fb = FooBar() + # Build cache + for i in range(3): + uncached_result = fb.f(i) + assert uncached_result == i + assert fb.f.cache_size() == 3 + + for i in range(3): + t0 = time.time() + result = fb.f(i) + t1 = time.time() + assert result == i + assert t1 - t0 < 0.1 def test_get_default_manifest_file(): From 7a60d037bc6571d86216085414439786d4f8e643 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 1 Nov 2019 11:30:35 -0700 Subject: [PATCH 03/60] Revert "GH-1151 expose methods for memoized cache clearing and getting size" This reverts commit e41e5e9190744c9e4f6fb85f7e490a30d337ef72. --- allensdk/api/cache.py | 40 +++++----------- allensdk/test/api/test_cache.py | 84 +++++++++------------------------ 2 files changed, 36 insertions(+), 88 deletions(-) diff --git a/allensdk/api/cache.py b/allensdk/api/cache.py index e006c1558..666c4844a 100755 --- a/allensdk/api/cache.py +++ b/allensdk/api/cache.py @@ -42,39 +42,25 @@ import pandas.io.json as pj import functools -from functools import wraps, _make_key +from functools import wraps import os import logging import csv def memoize(f): - cache = {} - sentinel = object() # unique object for cache misses - make_key = _make_key # efficient key building from function args - cache_get = cache.get - cache_len = cache.__len__ - - @wraps(f) - def wrapper(*args, **kwargs): - key = make_key(args, kwargs, typed=False) # Don't consider 3.0 and 3 different - result = cache_get(key, sentinel) - if result is not sentinel: - return result - result = f(*args, **kwargs) - cache[key] = result - return result - - def clear_cache(): - cache.clear() - - def cache_size(): - return cache_len() - - wrapper.clear_cache = clear_cache - wrapper.cache_size = cache_size - - return wrapper + memodict = dict() + + @wraps(f) + def wrapper(*args, **kwargs): + key = (args, tuple(kwargs.items())) + + if key not in memodict: + memodict[key] = f(*args, **kwargs) + + return memodict[key] + + return wrapper class Cache(object): _log = logging.getLogger('allensdk.api.cache') diff --git a/allensdk/test/api/test_cache.py b/allensdk/test/api/test_cache.py index 5fd9ff79f..a93371561 100755 --- a/allensdk/test/api/test_cache.py +++ b/allensdk/test/api/test_cache.py @@ -38,7 +38,6 @@ import pandas as pd import pandas.io.json as pj import numpy as np -import time import pytest from mock import MagicMock, mock_open, patch @@ -161,70 +160,33 @@ def test_wrap_dataframe(ju_read_url_get, ju_write, mock_read_json, rma, cache): ju_write.assert_called_once_with('example.txt', _msg) mock_read_json.assert_called_once_with('example.txt', orient='records') +def test_memoize(): + + import time -def test_memoize_with_function(): - @memoize - def f(x): - time.sleep(0.1) - return x - - # Build cache - for i in range(3): - uncached_result = f(i) - assert uncached_result == i - assert f.cache_size() == 3 - - # Test cache was accessed - for i in range(3): - t0 = time.time() - result = f(i) - t1 = time.time() - assert result == i - assert t1 - t0 < 0.1 - - # Test cache clear - f.clear_cache() - assert f.cache_size() == 0 - - -def test_memoize_with_kwarg_function(): - @memoize - def f(x, *, y, z=1): - time.sleep(0.1) - return (x * y * z) - - # Build cache - f(2, y=1, z=2) - assert f.cache_size() == 1 - - # Test cache was accessed - t0 = time.time() - result = f(2, y=1, z=2) - t1 = time.time() - assert result == 4 - assert t1 - t0 < 1.0 - - -def test_memoize_with_instance_method(): - class FooBar(object): @memoize - def f(self, x): - time.sleep(0.1) + def f(x): + time.sleep(1) return x - fb = FooBar() - # Build cache - for i in range(3): - uncached_result = fb.f(i) - assert uncached_result == i - assert fb.f.cache_size() == 3 - - for i in range(3): - t0 = time.time() - result = fb.f(i) - t1 = time.time() - assert result == i - assert t1 - t0 < 0.1 + for ii in range(2): + t0 = time.time() + print(f(0), time.time() - t0) + + class FooBar(object): + + def __init__(self): pass + + @memoize + def f(self, x): + time.sleep(.1) + return 1 + + fb = FooBar() + + for ii in range(2): + t0 = time.time() + fb.f(0), time.time() - t0 def test_get_default_manifest_file(): From 7bb482a578e03e24ab66d9ec2867f30cc74bce8f Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 1 Nov 2019 11:23:04 -0700 Subject: [PATCH 04/60] GH-1151 expose methods for memoized cache clearing and getting size --- allensdk/api/cache.py | 51 +++++++++++++++----- allensdk/test/api/test_cache.py | 84 ++++++++++++++++++++++++--------- 2 files changed, 99 insertions(+), 36 deletions(-) diff --git a/allensdk/api/cache.py b/allensdk/api/cache.py index 666c4844a..80926650a 100755 --- a/allensdk/api/cache.py +++ b/allensdk/api/cache.py @@ -42,25 +42,50 @@ import pandas.io.json as pj import functools -from functools import wraps +from functools import wraps, _make_key import os import logging import csv def memoize(f): - memodict = dict() - - @wraps(f) - def wrapper(*args, **kwargs): - key = (args, tuple(kwargs.items())) - - if key not in memodict: - memodict[key] = f(*args, **kwargs) - - return memodict[key] - - return wrapper + """ + Creates an unbound cache of function calls and results. Note that arguments + of different types are not cached separately (so f(3.0) and f(3) are not + treated as distinct calls) + + Arguments to the cached function must be hashable. + + View the cache size with f.cache_size(). + Clear the cache with f.cache_clear(). + Access the underlying function with f.__wrapped__. + """ + cache = {} + sentinel = object() # unique object for cache misses + make_key = _make_key # efficient key building from function args + cache_get = cache.get + cache_len = cache.__len__ + + @wraps(f) + def wrapper(*args, **kwargs): + key = make_key(args, kwargs, typed=False) # Don't consider 3.0 and 3 different + result = cache_get(key, sentinel) + if result is not sentinel: + return result + result = f(*args, **kwargs) + cache[key] = result + return result + + def clear_cache(): + cache.clear() + + def cache_size(): + return cache_len() + + wrapper.clear_cache = clear_cache + wrapper.cache_size = cache_size + + return wrapper class Cache(object): _log = logging.getLogger('allensdk.api.cache') diff --git a/allensdk/test/api/test_cache.py b/allensdk/test/api/test_cache.py index a93371561..64a14cdf2 100755 --- a/allensdk/test/api/test_cache.py +++ b/allensdk/test/api/test_cache.py @@ -38,6 +38,7 @@ import pandas as pd import pandas.io.json as pj import numpy as np +import time import pytest from mock import MagicMock, mock_open, patch @@ -160,33 +161,70 @@ def test_wrap_dataframe(ju_read_url_get, ju_write, mock_read_json, rma, cache): ju_write.assert_called_once_with('example.txt', _msg) mock_read_json.assert_called_once_with('example.txt', orient='records') -def test_memoize(): - - import time +def test_memoize_with_function(): + @memoize + def f(x): + time.sleep(0.1) + return x + + # Build cache + for i in range(3): + uncached_result = f(i) + assert uncached_result == i + assert f.cache_size() == 3 + + # Test cache was accessed + for i in range(3): + t0 = time.time() + result = f(i) + t1 = time.time() + assert result == i + assert t1 - t0 < 0.1 + + # Test cache clear + f.clear_cache() + assert f.cache_size() == 0 + + +def test_memoize_with_kwarg_function(): + @memoize + def f(x, *, y, z=1): + time.sleep(0.1) + return (x * y * z) + + # Build cache + f(2, y=1, z=2) + assert f.cache_size() == 1 + + # Test cache was accessed + t0 = time.time() + result = f(2, y=1, z=2) + t1 = time.time() + assert result == 4 + assert t1 - t0 < 0.1 + + +def test_memoize_with_instance_method(): + class FooBar(object): @memoize - def f(x): - time.sleep(1) + def f(self, x): + time.sleep(0.1) return x - for ii in range(2): - t0 = time.time() - print(f(0), time.time() - t0) - - class FooBar(object): - - def __init__(self): pass - - @memoize - def f(self, x): - time.sleep(.1) - return 1 - - fb = FooBar() - - for ii in range(2): - t0 = time.time() - fb.f(0), time.time() - t0 + fb = FooBar() + # Build cache + for i in range(3): + uncached_result = fb.f(i) + assert uncached_result == i + assert fb.f.cache_size() == 3 + + for i in range(3): + t0 = time.time() + result = fb.f(i) + t1 = time.time() + assert result == i + assert t1 - t0 < 0.1 def test_get_default_manifest_file(): From 423a9b7189a44646716cbddf4d876ed572e8d5b5 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Fri, 1 Nov 2019 11:48:21 -0700 Subject: [PATCH 05/60] Add BehaviorBase ABC --- .../behavior/internal/__init__.py | 1 + .../behavior/internal/behavior_base.py | 130 ++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 allensdk/brain_observatory/behavior/internal/__init__.py create mode 100644 allensdk/brain_observatory/behavior/internal/behavior_base.py diff --git a/allensdk/brain_observatory/behavior/internal/__init__.py b/allensdk/brain_observatory/behavior/internal/__init__.py new file mode 100644 index 000000000..857f5e359 --- /dev/null +++ b/allensdk/brain_observatory/behavior/internal/__init__.py @@ -0,0 +1 @@ +from allensdk.brain_observatory.behavior.internal.behavior_base import BehaviorBase # noqa: F401 diff --git a/allensdk/brain_observatory/behavior/internal/behavior_base.py b/allensdk/brain_observatory/behavior/internal/behavior_base.py new file mode 100644 index 000000000..288dc42af --- /dev/null +++ b/allensdk/brain_observatory/behavior/internal/behavior_base.py @@ -0,0 +1,130 @@ +import abc + +from typing import Dict, NamedTuple + +import numpy as np +import pandas as pd + + +RunningSpeed = NamedTuple("RunningSpeed", [("timestamps", np.ndarray), + ("values", np.ndarray)]) + + +class BehaviorBase(abc.ABC): + """Abstract base class implementing required methods for interacting with + behavior session data. + + Child classes should be instantiated with a fetch API that implements these + methods. Both fetch API and session object should inherit from this base. + """ + @abc.abstractmethod + def get_licks(self) -> pd.DataFrame: + """Get lick data from pkl file. + + Returns + ------- + np.ndarray + A dataframe containing lick timestamps. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_rewards(self) -> pd.DataFrame: + """Get reward data from pkl file. + + Returns + ------- + pd.DataFrame + A dataframe containing timestamps of delivered rewards. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_running_data_df(self) -> pd.DataFrame: + """Get running speed data. + + Returns + ------- + pd.DataFrame + Dataframe containing various signals used to compute running speed. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_running_speed(self) -> RunningSpeed: + """Get running speed using timestamps from + self.get_stimulus_timestamps. + + Note: Do not correct for monitor delay. + + Returns + ------- + RunningSpeed (NamedTuple with two fields) + timestamps : np.ndarray + Timestamps of running speed data samples + values : np.ndarray + Running speed of the experimental subject (in cm / s). + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_stimulus_presentations(self) -> pd.DataFrame: + """Get stimulus presentation data. + + Returns + ------- + pd.DataFrame + Table whose rows are stimulus presentations + (i.e. a given image, for a given duration, typically 250 ms) + and whose columns are presentation characteristics. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_stimulus_templates(self) -> Dict[str, np.ndarray]: + """Get stimulus templates (movies, scenes) for behavior session. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary containing the stimulus images presented during the + session. Keys are data set names, and values are 3D numpy arrays. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_stimulus_timestamps(self) -> np.ndarray: + """Get stimulus timestamps from pkl file. + + Note: Located with behavior_session_id + + Returns + ------- + np.ndarray + Timestamps associated with stimulus presentations on the monitor. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_task_parameters(self) -> dict: + """Get task parameters from pkl file. + + Returns + ------- + dict + A dictionary containing parameters used to define the task runtime + behavior. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_trials(self) -> pd.DataFrame: + """Get trials from pkl file + + Returns + ------- + pd.DataFrame + A dataframe containing behavioral trial start/stop times, + and trial data + """ + raise NotImplementedError() From 52a372e904bd6eb61165546ef418f1ee9e412455 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Fri, 1 Nov 2019 15:03:31 -0700 Subject: [PATCH 06/60] Add BehaviorOphysBase ABC --- .../behavior/internal/__init__.py | 1 + .../behavior/internal/behavior_ophys_base.py | 149 ++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 allensdk/brain_observatory/behavior/internal/behavior_ophys_base.py diff --git a/allensdk/brain_observatory/behavior/internal/__init__.py b/allensdk/brain_observatory/behavior/internal/__init__.py index 857f5e359..6130a90e3 100644 --- a/allensdk/brain_observatory/behavior/internal/__init__.py +++ b/allensdk/brain_observatory/behavior/internal/__init__.py @@ -1 +1,2 @@ from allensdk.brain_observatory.behavior.internal.behavior_base import BehaviorBase # noqa: F401 +from allensdk.brain_observatory.behavior.internal.behhavior_ophys_base import BehaviorOphysBase # noqa: F401 diff --git a/allensdk/brain_observatory/behavior/internal/behavior_ophys_base.py b/allensdk/brain_observatory/behavior/internal/behavior_ophys_base.py new file mode 100644 index 000000000..8ae582882 --- /dev/null +++ b/allensdk/brain_observatory/behavior/internal/behavior_ophys_base.py @@ -0,0 +1,149 @@ +import abc + +import numpy as np +import pandas as pd + +from allensdk.brain_observatory.behavior.internal.behavior_base import BehaviorBase +from allensdk.brain_observatory.behavior.image_api import Image + + +class BehaviorOphysBase(BehaviorBase): + """Abstract base class implementing required methods for interacting with + behavior+ophys session data. + + Child classes should be instantiated with a fetch API that implements these + methods. Both fetch API and session object should inherit from this base. + """ + @abc.abstractmethod + def get_average_projection(self) -> Image: + """Get an image whose values are the average obtained values at + each pixel of the ophys movie over time. + + Returns + ---------- + allensdk.brain_observatory.behavior.image_api.Image: + Array-like interface to avg projection image data and metadata. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_max_projection(self) -> Image: + """Get an image whose values are the maximum obtained values at + each pixel of the ophys movie over time. + + Returns + ---------- + allensdk.brain_observatory.behavior.image_api.Image: + Array-like interface to max projection image data and metadata. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_cell_specimen_table(self) -> pd.DataFrame: + """Get a cell specimen dataframe containing ROI information about + cells identified in an ophys experiment. + + Returns + ------- + pd.DataFrame + Cell ROI information organized into a dataframe. + Index is the cell ROI IDs. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_corrected_fluorescence_traces(self) -> pd.DataFrame: + """Get motion-corrected fluorescence traces. + + Returns + ------- + pd.DataFrame + Motion-corrected fluorescence traces organized into a dataframe. + Index is the cell ROI IDs. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_dff_traces(self) -> pd.DataFrame: + """Get a table of delta fluorescence over fluorescence traces. + + Returns + ------- + pd.DataFrame + The traces of dff (normalized fluorescence) organized into a + dataframe. Index is the cell ROI IDs. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_metadata(self) -> dict: + """Get behavior+ophys session metadata. + + Returns + ------- + dict + A dictionary of session-specific metadata. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_motion_correction(self) -> pd.DataFrame: + """Get motion correction trace data. + + Returns + ------- + pd.DataFrame + A dataframe containing trace data used during motion + correction computation. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_ophys_timestamps(self) -> np.ndarray: + """Get optical physiology frame timestamps. + + Returns + ------- + np.ndarray + Timestamps associated with frames captured by the microscope. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_raw_stimulus_timestamps(self) -> np.ndarray: + """Get raw stimulus timestamps. + + Returns + ------- + np.ndarray + Timestamps associated with stimulus presentations on the monitor + without accounting for monitor delay. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_stimulus_timestamps(self) -> np.ndarray: + """Get stimulus timestamps. + + Returns + ------- + np.ndarray + Timestamps associated with stimulus presentations on the monitor + after accounting for monitor delay. + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_stimulus_presentations(self) -> pd.DataFrame: + """Get stimulus presentation data. + + NOTE: Uses monitor delay corrected stimulus timestamps. + + Returns + ------- + pd.DataFrame + Table whose rows are stimulus presentations + (i.e. a given image, for a given duration, typically 250 ms) + and whose columns are presentation characteristics. + """ + raise NotImplementedError() From ffc7c319c78ed03aa2d777401481ac130849818e Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Fri, 1 Nov 2019 15:04:16 -0700 Subject: [PATCH 07/60] Add additional documentation info to BehaviorBase abstract methods --- .../brain_observatory/behavior/internal/behavior_base.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/allensdk/brain_observatory/behavior/internal/behavior_base.py b/allensdk/brain_observatory/behavior/internal/behavior_base.py index 288dc42af..3c0873e9b 100644 --- a/allensdk/brain_observatory/behavior/internal/behavior_base.py +++ b/allensdk/brain_observatory/behavior/internal/behavior_base.py @@ -55,7 +55,7 @@ def get_running_speed(self) -> RunningSpeed: """Get running speed using timestamps from self.get_stimulus_timestamps. - Note: Do not correct for monitor delay. + NOTE: Do not correct for monitor delay. Returns ------- @@ -71,6 +71,8 @@ def get_running_speed(self) -> RunningSpeed: def get_stimulus_presentations(self) -> pd.DataFrame: """Get stimulus presentation data. + NOTE: Uses timestamps that do not account for monitor delay. + Returns ------- pd.DataFrame @@ -96,12 +98,13 @@ def get_stimulus_templates(self) -> Dict[str, np.ndarray]: def get_stimulus_timestamps(self) -> np.ndarray: """Get stimulus timestamps from pkl file. - Note: Located with behavior_session_id + NOTE: Located with behavior_session_id Returns ------- np.ndarray - Timestamps associated with stimulus presentations on the monitor. + Timestamps associated with stimulus presentations on the monitor + that do no account for monitor delay. """ raise NotImplementedError() From 6b3c5ad200f307df258e8fdca6b837389132e686 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Mon, 4 Nov 2019 11:16:55 -0800 Subject: [PATCH 08/60] hotfix import typo --- allensdk/brain_observatory/behavior/internal/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allensdk/brain_observatory/behavior/internal/__init__.py b/allensdk/brain_observatory/behavior/internal/__init__.py index 6130a90e3..3364c5707 100644 --- a/allensdk/brain_observatory/behavior/internal/__init__.py +++ b/allensdk/brain_observatory/behavior/internal/__init__.py @@ -1,2 +1,2 @@ from allensdk.brain_observatory.behavior.internal.behavior_base import BehaviorBase # noqa: F401 -from allensdk.brain_observatory.behavior.internal.behhavior_ophys_base import BehaviorOphysBase # noqa: F401 +from allensdk.brain_observatory.behavior.internal.behavior_ophys_base import BehaviorOphysBase # noqa: F401 From 8b8321fe7b9d03f487cf923c9be0507d21be4f38 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 1 Nov 2019 12:38:59 -0700 Subject: [PATCH 09/60] GH-1143: OphysLimsAPI method to clear all its caches remove session-level caching and update new code to use mypy notations --- .../behavior/behavior_ophys_session.py | 259 ++++++++++++------ allensdk/internal/api/ophys_lims_api.py | 16 ++ 2 files changed, 198 insertions(+), 77 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_ophys_session.py b/allensdk/brain_observatory/behavior/behavior_ophys_session.py index 3229fac76..2eddfcb26 100644 --- a/allensdk/brain_observatory/behavior/behavior_ophys_session.py +++ b/allensdk/brain_observatory/behavior/behavior_ophys_session.py @@ -1,98 +1,203 @@ import numpy as np import pandas as pd import xarray as xr -import math -from typing import NamedTuple -import os +from typing import Any -from allensdk.core.lazy_property import LazyProperty, LazyPropertyMixin from allensdk.internal.api.behavior_ophys_api import BehaviorOphysLimsApi -from allensdk.brain_observatory.behavior.behavior_ophys_api.behavior_ophys_nwb_api import equals, BehaviorOphysNwbApi +from allensdk.brain_observatory.behavior.behavior_ophys_api\ + .behavior_ophys_nwb_api import BehaviorOphysNwbApi from allensdk.deprecated import legacy -from allensdk.brain_observatory.behavior.trials_processing import calculate_reward_rate -from allensdk.brain_observatory.behavior.dprime import get_rolling_dprime, get_trial_count_corrected_false_alarm_rate, get_trial_count_corrected_hit_rate -from allensdk.brain_observatory.behavior.dprime import get_hit_rate, get_false_alarm_rate +from allensdk.brain_observatory.behavior.trials_processing\ + import calculate_reward_rate +from allensdk.brain_observatory.behavior.dprime import\ + get_rolling_dprime, get_trial_count_corrected_false_alarm_rate,\ + get_trial_count_corrected_hit_rate +from allensdk.brain_observatory.behavior.dprime import get_hit_rate,\ + get_false_alarm_rate from allensdk.brain_observatory.behavior.image_api import Image, ImageApi +from allensdk.brain_observatory.running_speed import RunningSpeed +import logging -class BehaviorOphysSession(LazyPropertyMixin): - """Represents data from a single Visual Behavior Ophys imaging session. LazyProperty attributes access the data only on the first demand, and then memoize the result for reuse. - - Attributes: - ophys_experiment_id : int (LazyProperty) - Unique identifier for this experimental session - max_projection : allensdk.brain_observatory.behavior.image_api.Image (LazyProperty) - 2D max projection image - stimulus_timestamps : numpy.ndarray (LazyProperty) - Timestamps associated the stimulus presentations on the monitor - ophys_timestamps : numpy.ndarray (LazyProperty) - Timestamps associated with frames captured by the microscope - metadata : dict (LazyProperty) - A dictionary of session-specific metadata - dff_traces : pandas.DataFrame (LazyProperty) - The traces of dff organized into a dataframe; index is the cell roi ids - cell_specimen_table : pandas.DataFrame (LazyProperty) - Cell roi information organized into a dataframe; index is the cell roi ids - running_speed : allensdk.brain_observatory.running_speed.RunningSpeed (LazyProperty) - NamedTuple with two fields - timestamps : numpy.ndarray - Timestamps of running speed data samples - values : np.ndarray - Running speed of the experimental subject (in cm / s). - running_data_df : pandas.DataFrame (LazyProperty) - Dataframe containing various signals used to compute running speed - stimulus_presentations : pandas.DataFrame (LazyProperty) - Table whose rows are stimulus presentations (i.e. a given image, for a given duration, typically 250 ms) and whose columns are presentation characteristics. - stimulus_templates : dict (LazyProperty) - A dictionary containing the stimulus images presented during the session keys are data set names, and values are 3D numpy arrays. - licks : pandas.DataFrame (LazyProperty) - A dataframe containing lick timestamps - rewards : pandas.DataFrame (LazyProperty) - A dataframe containing timestamps of delivered rewards - task_parameters : dict (LazyProperty) - A dictionary containing parameters used to define the task runtime behavior - trials : pandas.DataFrame (LazyProperty) - A dataframe containing behavioral trial start/stop times, and trial data - corrected_fluorescence_traces : pandas.DataFrame (LazyProperty) - The motion-corrected fluorescence traces organized into a dataframe; index is the cell roi ids - average_projection : allensdk.brain_observatory.behavior.image_api.Image (LazyProperty) - 2D image of the microscope field of view, averaged across the experiment - motion_correction : pandas.DataFrame LazyProperty - A dataframe containing trace data used during motion correction computation +class BehaviorOphysSession(object): + """Represents data from a single Visual Behavior Ophys imaging session. + Can be initialized with an api that fetches data, or by using class methods + `from_lims` and `from_nwb_path`. """ @classmethod - def from_lims(cls, ophys_experiment_id): + def from_lims(cls, ophys_experiment_id: int) -> "BehaviorOphysSession": return cls(api=BehaviorOphysLimsApi(ophys_experiment_id)) @classmethod - def from_nwb_path(cls, nwb_path, **api_kwargs): - api_kwargs["filter_invalid_rois"] = api_kwargs.get("filter_invalid_rois", True) - return cls(api=BehaviorOphysNwbApi.from_path(path=nwb_path, **api_kwargs)) + def from_nwb_path( + cls, nwb_path: str, **api_kwargs: Any) -> "BehaviorOphysSession": + api_kwargs["filter_invalid_rois"] = api_kwargs.get( + "filter_invalid_rois", True) + return cls(api=BehaviorOphysNwbApi.from_path( + path=nwb_path, **api_kwargs)) def __init__(self, api=None): - self.api = api - self.ophys_experiment_id = LazyProperty(self.api.get_ophys_experiment_id) - self.max_projection = LazyProperty(self.get_max_projection) - self.stimulus_timestamps = LazyProperty(self.api.get_stimulus_timestamps) - self.ophys_timestamps = LazyProperty(self.api.get_ophys_timestamps) - self.metadata = LazyProperty(self.api.get_metadata) - self.dff_traces = LazyProperty(self.api.get_dff_traces) - self.cell_specimen_table = LazyProperty(self.api.get_cell_specimen_table) - self.running_speed = LazyProperty(self.api.get_running_speed) - self.running_data_df = LazyProperty(self.api.get_running_data_df) - self.stimulus_presentations = LazyProperty(self.api.get_stimulus_presentations) - self.stimulus_templates = LazyProperty(self.api.get_stimulus_templates) - self.licks = LazyProperty(self.api.get_licks) - self.rewards = LazyProperty(self.api.get_rewards) - self.task_parameters = LazyProperty(self.api.get_task_parameters) - self.trials = LazyProperty(self.api.get_trials) - self.corrected_fluorescence_traces = LazyProperty(self.api.get_corrected_fluorescence_traces) - self.average_projection = LazyProperty(self.get_average_projection) - self.motion_correction = LazyProperty(self.api.get_motion_correction) - self.segmentation_mask_image = LazyProperty(self.get_segmentation_mask_image) + # Using properties rather than initializing attributes to take advantage + # of API-level cache and not introduce a lot of overhead when the + # class is initialized (sometimes these calls can take a while) + @property + def ophys_experiment_id(self) -> int: + """Unique identifier for this experimental session. + :rtype: int + """ + return self.api.get_ophys_experiment_id() + + @property + def max_projection(self) -> Image: + """2D max projection image. + :rtype: allensdk.brain_observatory.behavior.image_api.Image + """ + return self.get_max_projection() + + @property + def stimulus_timestamps(self) -> np.ndarray: + """Timestamps associated withs timulus presentations on the + monitor (corrected for monitor delay). + :rtype: numpy.ndarray + """ + return self.api.get_stimulus_timestamps() + + @property + def ophys_timestamps(self) -> np.ndarray: + """Timestamps associated with frames captured by the microscope + :rtype: numpy.ndarray + """ + return self.api.ophys_timestamps() + + @property + def metadata(self) -> dict: + """Dictioanry of session-specific metadata. + :rtype: dict + """ + return self.api.get_metadata() + + @property + def dff_traces(self) -> pd.DataFrame: + """Traces of dff organized into a dataframe; index is the cell roi ids. + :rtype: pandas.DataFrame + """ + return self.api.get_dff_traces() + + @property + def cell_specimen_table(self) -> pd.DataFrame: + """Cell roi information organized into a dataframe; index is the cell + roi ids. + :rtype: pandas.DataFrame + """ + return self.api.get_cell_specimen_table() + + @property + def running_speed(self) -> RunningSpeed: + """Running speed of mouse. NamedTuple with two fields + timestamps : numpy.ndarray + Timestamps of running speed data samples + values : np.ndarray + Running speed of the experimental subject (in cm / s). + :rtype: allensdk.brain_observatory.running_speed.RunningSpeed + """ + return self.api.get_running_speed() + + @property + def running_data_df(self) -> pd.DataFrame: + """Dataframe containing various signals used to compute running speed + :rtype: pandas.DataFrame + """ + return self.api.get_running_data_df() + + @property + def stimulus_presentations(self) -> pd.DataFrame: + """Table whose rows are stimulus presentations (i.e. a given image, + for a given duration, typically 250 ms) and whose columns are + presentation characteristics. + :rtype: pandas.DataFrame + """ + return self.api.get_stimulus_presentations() + + @property + def stimulus_templates(self) -> dict: + """A dictionary containing the stimulus images presented during the + session keys are data set names, and values are 3D numpy arrays. + :rtype: dict + """ + return self.api.get_stimulus_templates() + + @property + def licks(self) -> pd.DataFrame: + """A dataframe containing lick timestamps. + :rtype: pandas.DataFrame + """ + return self.api.get_licks() + + @property + def rewards(self) -> pd.DataFrame: + """A dataframe containing timestamps of delivered rewards. + :rtype: pandas.DataFrame + """ + return self.api.get_rewards() + + @property + def task_parameters(self) -> dict: + """A dictionary containing parameters used to define the task runtime + behavior. + :rtype: dict + """ + return self.api.get_task_parameters() + + @property + def trials(self) -> pd.DataFrame: + """A dataframe containing behavioral trial start/stop times, and trial + data + :rtype: pandas.DataFrame""" + return self.api.get_trials() + + @property + def corrected_fluorescence_traces(self) -> pd.DataFrame: + """The motion-corrected fluorescence traces organized into a dataframe; + index is the cell roi ids. + :rtype: pandas.DataFrame + """ + return self.api.get_corrected_fluorescence_traces() + + @property + def average_projection(self) -> pd.DataFrame: + """2D image of the microscope field of view, averaged across the + experiment + :rtype: pandas.DataFrame + """ + return self.get_average_projection() + + @property + def motion_correction(self) -> pd.DataFrame: + """A dataframe containing trace data used during motion correction + computation + :rtype: pandas.DataFrame + """ + return self.api.get_motion_correction() + + @property + def segmentation_mask_image(self) -> Image: + """An image with pixel value 1 if that pixel was included in an ROI, + and 0 otherwise + :rtype: allensdk.brain_observatory.behavior.image_api.Image + """ + return self.get_segmentation_mask_image() + + def clear_cache(self) -> None: + """Convenience method to clear the api cache, if applicable.""" + try: + self.api.clear_cache() + except AttributeError: + logging.getLogger("BehaviorOphysSession").warning( + f"Attempted to clear API cache, but method `clear_cache`" + " does not exist on {self.api.__name__}") def get_roi_masks(self, cell_specimen_ids=None): """ Obtains boolean masks indicating the location of one or more cell's ROIs in this session. @@ -102,7 +207,7 @@ def get_roi_masks(self, cell_specimen_ids=None): cell_specimen_ids : array-like of int, optional ROI masks for these cell specimens will be returned. The default behavior is to return masks for all cell specimens. - + Returns ------- result : xr.DataArray diff --git a/allensdk/internal/api/ophys_lims_api.py b/allensdk/internal/api/ophys_lims_api.py index bdbf9be3a..c5723eeaf 100644 --- a/allensdk/internal/api/ophys_lims_api.py +++ b/allensdk/internal/api/ophys_lims_api.py @@ -11,6 +11,7 @@ from allensdk.brain_observatory.behavior.image_api import ImageApi import allensdk.brain_observatory.roi_masks as roi from allensdk.internal.core.lims_utilities import safe_system_path +import inspect class OphysLimsApi(PostgresQueryMixin): @@ -22,6 +23,21 @@ def __init__(self, ophys_experiment_id): def get_ophys_experiment_id(self): return self.ophys_experiment_id + def clear_cache(self): + """ + Calls `clear_cache` method on all bound methods in this instance + (where valid). + Intended to clear calls cached with the `memoize` decorator. + Note that this will also clear functions decorated with `lru_cache` and + `lfu_cache` in this class (or any other function with `clear_cache` + attribute). + """ + for _, method in inspect.getmembers(self, inspect.ismethod): + try: + method.clear_cache() + except (AttributeError, TypeError): + pass + @memoize def get_ophys_experiment_dir(self): query = ''' From 0ac8368d4a36efaf1c80ef80ab252bd257785b9e Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 1 Nov 2019 16:50:07 -0700 Subject: [PATCH 10/60] Minor updates: update cache clear method to be the same as functools.lru_cache update import continuation method --- allensdk/api/cache.py | 9 ++++---- .../behavior/behavior_ophys_session.py | 22 +++++++++---------- allensdk/internal/api/ophys_lims_api.py | 4 ++-- allensdk/test/api/test_cache.py | 10 ++++----- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/allensdk/api/cache.py b/allensdk/api/cache.py index 80926650a..55fd9581c 100755 --- a/allensdk/api/cache.py +++ b/allensdk/api/cache.py @@ -75,18 +75,19 @@ def wrapper(*args, **kwargs): result = f(*args, **kwargs) cache[key] = result return result - - def clear_cache(): + + def cache_clear(): cache.clear() - + def cache_size(): return cache_len() - wrapper.clear_cache = clear_cache + wrapper.cache_clear = cache_clear wrapper.cache_size = cache_size return wrapper + class Cache(object): _log = logging.getLogger('allensdk.api.cache') diff --git a/allensdk/brain_observatory/behavior/behavior_ophys_session.py b/allensdk/brain_observatory/behavior/behavior_ophys_session.py index 2eddfcb26..01707c33d 100644 --- a/allensdk/brain_observatory/behavior/behavior_ophys_session.py +++ b/allensdk/brain_observatory/behavior/behavior_ophys_session.py @@ -2,21 +2,21 @@ import pandas as pd import xarray as xr from typing import Any +import logging from allensdk.internal.api.behavior_ophys_api import BehaviorOphysLimsApi from allensdk.brain_observatory.behavior.behavior_ophys_api\ .behavior_ophys_nwb_api import BehaviorOphysNwbApi from allensdk.deprecated import legacy -from allensdk.brain_observatory.behavior.trials_processing\ - import calculate_reward_rate -from allensdk.brain_observatory.behavior.dprime import\ - get_rolling_dprime, get_trial_count_corrected_false_alarm_rate,\ - get_trial_count_corrected_hit_rate -from allensdk.brain_observatory.behavior.dprime import get_hit_rate,\ - get_false_alarm_rate +from allensdk.brain_observatory.behavior.trials_processing import ( + calculate_reward_rate) +from allensdk.brain_observatory.behavior.dprime import ( + get_rolling_dprime, get_trial_count_corrected_false_alarm_rate, + get_trial_count_corrected_hit_rate) +from allensdk.brain_observatory.behavior.dprime import ( + get_hit_rate, get_false_alarm_rate) from allensdk.brain_observatory.behavior.image_api import Image, ImageApi from allensdk.brain_observatory.running_speed import RunningSpeed -import logging class BehaviorOphysSession(object): @@ -59,7 +59,7 @@ def max_projection(self) -> Image: @property def stimulus_timestamps(self) -> np.ndarray: - """Timestamps associated withs timulus presentations on the + """Timestamps associated with stimulus presentations on the monitor (corrected for monitor delay). :rtype: numpy.ndarray """ @@ -190,10 +190,10 @@ def segmentation_mask_image(self) -> Image: """ return self.get_segmentation_mask_image() - def clear_cache(self) -> None: + def cache_clear(self) -> None: """Convenience method to clear the api cache, if applicable.""" try: - self.api.clear_cache() + self.api.cache_clear() except AttributeError: logging.getLogger("BehaviorOphysSession").warning( f"Attempted to clear API cache, but method `clear_cache`" diff --git a/allensdk/internal/api/ophys_lims_api.py b/allensdk/internal/api/ophys_lims_api.py index c5723eeaf..db37ae2e6 100644 --- a/allensdk/internal/api/ophys_lims_api.py +++ b/allensdk/internal/api/ophys_lims_api.py @@ -23,7 +23,7 @@ def __init__(self, ophys_experiment_id): def get_ophys_experiment_id(self): return self.ophys_experiment_id - def clear_cache(self): + def cache_clear(self): """ Calls `clear_cache` method on all bound methods in this instance (where valid). @@ -34,7 +34,7 @@ def clear_cache(self): """ for _, method in inspect.getmembers(self, inspect.ismethod): try: - method.clear_cache() + method.cache_clear() except (AttributeError, TypeError): pass diff --git a/allensdk/test/api/test_cache.py b/allensdk/test/api/test_cache.py index 64a14cdf2..78e1e5e35 100755 --- a/allensdk/test/api/test_cache.py +++ b/allensdk/test/api/test_cache.py @@ -167,13 +167,13 @@ def test_memoize_with_function(): def f(x): time.sleep(0.1) return x - + # Build cache for i in range(3): uncached_result = f(i) assert uncached_result == i assert f.cache_size() == 3 - + # Test cache was accessed for i in range(3): t0 = time.time() @@ -181,9 +181,9 @@ def f(x): t1 = time.time() assert result == i assert t1 - t0 < 0.1 - + # Test cache clear - f.clear_cache() + f.cache_clear() assert f.cache_size() == 0 @@ -192,7 +192,7 @@ def test_memoize_with_kwarg_function(): def f(x, *, y, z=1): time.sleep(0.1) return (x * y * z) - + # Build cache f(2, y=1, z=2) assert f.cache_size() == 1 From b1cfcb7dca4bceb187a4eea65283dba5e0a75b4e Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 11:08:09 -0700 Subject: [PATCH 11/60] #1102 use pytest to generate tmp files in test_session_analysis_regression --- .../test_session_analysis_regression.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/allensdk/test/brain_observatory/test_session_analysis_regression.py b/allensdk/test/brain_observatory/test_session_analysis_regression.py index 369139485..d914ff6cd 100644 --- a/allensdk/test/brain_observatory/test_session_analysis_regression.py +++ b/allensdk/test/brain_observatory/test_session_analysis_regression.py @@ -4,7 +4,6 @@ import pytest import os -import tempfile import json from pkg_resources import resource_filename # @UnresolvedImport import numpy as np @@ -102,9 +101,8 @@ def nm2(nwb_c, analysis_c): return NaturalMovie.from_analysis_file(BODS(nwb_c), analysis_c, si.NATURAL_MOVIE_TWO) @pytest.fixture(scope="module") -def analysis_a_new(nwb_a): - with tempfile.NamedTemporaryFile(delete=True) as tf: - save_path = tf.name +def analysis_a_new(nwb_a, tmpdir_factory): + save_path = str(tmpdir_factory.mktemp("session_a") / "session_a_new.h5") logging.debug("running analysis a") session_analysis = SessionAnalysis(nwb_a, save_path) @@ -118,9 +116,8 @@ def analysis_a_new(nwb_a): os.remove(save_path) @pytest.fixture(scope="module") -def analysis_b_new(nwb_b): - with tempfile.NamedTemporaryFile(delete=True) as tf: - save_path = tf.name +def analysis_b_new(nwb_b, tmpdir_factory): + save_path = str(tmpdir_factory.mktemp("session_b") / "session_b_new.h5") logging.debug("running analysis b") session_analysis = SessionAnalysis(nwb_b, save_path) @@ -134,9 +131,8 @@ def analysis_b_new(nwb_b): os.remove(save_path) @pytest.fixture(scope="module") -def analysis_c_new(nwb_c): - with tempfile.NamedTemporaryFile(delete=True) as tf: - save_path = tf.name +def analysis_c_new(nwb_c, tmpdir_factory): + save_path = str(tmpdir_factory.mktemp("session_c") / "session_c_new.h5") logging.debug("running analysis c") session_analysis = SessionAnalysis(nwb_c, save_path) From 0ad2cf44feb33b47f7012fc44a318b807e65cd89 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 12:05:50 -0700 Subject: [PATCH 12/60] #1102 add a note to brain observatory docs describing ks_2samp issue --- doc_template/brain_observatory.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc_template/brain_observatory.rst b/doc_template/brain_observatory.rst index e57d6e122..97f909bd2 100644 --- a/doc_template/brain_observatory.rst +++ b/doc_template/brain_observatory.rst @@ -12,6 +12,12 @@ an experiment container have different stimulus protocols, but cover the same im .. image:: /_static/container_session_layout.png :align: center +**Note:** Version 1.3 of scipy fixed an error in its 2 sample Kolmogorov-Smirnoff test implementation. The new version produces more accurate p values for small and medium-sized samples. +This change impacts speed tuning analysis p values (as returned by `StimulusAnalysis.get_speed_tuning`). +If you access precalculated analysis results via `BrainObservatoryCache.get_ophys_experiment_analysis`, you will see values calculated +using an older version of scipy's `ks_2samp`. To access values calculated from the new version, install scipy>=1.3.0 in your environment and construct a `StimulusAnalysis` object +from a `BrainObservatoryNwbDataSet` (as returned by `BrainObservatoryCache.get_ophys_experiment_data`). + **Note:** Data collected after September 2016 uses a new session C stimulus designed to better-characterize spatial receptive fields in higher visual areas. The original locally sparse noise stimulus used 4.65 visual degree pixels. Session C2 broke that stimulus into two separate stimulus blocks: one with 4.65 degree pixels and one with 9.3 degree pixels. Note that the :py:mod:`~allensdk.brain_observatory.stimulus_info` From d1235da72a424d924f281f24e7239f0891559962 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 12:14:00 -0700 Subject: [PATCH 13/60] #1102 warn about scipy ks_2samp changes on get_speed_tuning --- allensdk/brain_observatory/stimulus_analysis.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/allensdk/brain_observatory/stimulus_analysis.py b/allensdk/brain_observatory/stimulus_analysis.py index 69c83d2bf..3b629f04f 100644 --- a/allensdk/brain_observatory/stimulus_analysis.py +++ b/allensdk/brain_observatory/stimulus_analysis.py @@ -33,6 +33,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # +import warnings import scipy.stats as st import numpy as np import pandas as pd @@ -86,6 +87,10 @@ def __init__(self, data_set): self._pval = StimulusAnalysis._PRELOAD self._peak = StimulusAnalysis._PRELOAD + # get_speed_tuning emits a warning describing a scipy ks_2samp update. + # we only want to see this warning once + self.__warned_speed_tuning = False + @property def stim_table(self): if self._stim_table is StimulusAnalysis._PRELOAD: @@ -285,6 +290,13 @@ def get_speed_tuning(self, binsize): tuple: binned_dx_sp, binned_cells_sp, binned_dx_vis, binned_cells_vis, peak_run """ + if not self.__warned_speed_tuning: + self.__warned_speed_tuning = True + warnings.warn( + f"scipy 1.3 (your version: {scipy.__version__}) improved two-sample Kolmogorov-Smirnoff test p values for small and medium-sized samples. " + "Precalculated speed tuning p values may not agree with outputs obtained under recent scipy versions!" + ) + StimulusAnalysis._log.info( 'Calculating speed tuning, spontaneous vs visually driven') From b03867579caf2466ee685387b854e7629c1de79b Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 12:16:09 -0700 Subject: [PATCH 14/60] #1102 don't manually clean up test artifacts in test_session_analysis_regression --- .../brain_observatory/test_session_analysis_regression.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/allensdk/test/brain_observatory/test_session_analysis_regression.py b/allensdk/test/brain_observatory/test_session_analysis_regression.py index d914ff6cd..1dcc466be 100644 --- a/allensdk/test/brain_observatory/test_session_analysis_regression.py +++ b/allensdk/test/brain_observatory/test_session_analysis_regression.py @@ -112,8 +112,6 @@ def analysis_a_new(nwb_a, tmpdir_factory): yield save_path - if os.path.exists(save_path): - os.remove(save_path) @pytest.fixture(scope="module") def analysis_b_new(nwb_b, tmpdir_factory): @@ -127,8 +125,6 @@ def analysis_b_new(nwb_b, tmpdir_factory): yield save_path - if os.path.exists(save_path): - os.remove(save_path) @pytest.fixture(scope="module") def analysis_c_new(nwb_c, tmpdir_factory): @@ -148,9 +144,6 @@ def analysis_c_new(nwb_c, tmpdir_factory): yield save_path - if os.path.exists(save_path): - os.remove(save_path) - def compare_peak(p1, p2): assert len(set(p1.columns) ^ set(p2.columns)) == 0 From 62b061ab3f97e2b3f783f5192748fadbd9dde562 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Mon, 4 Nov 2019 14:20:21 -0800 Subject: [PATCH 15/60] #1102 point session analysis regression tests at new data --- .../test_session_analysis_regression_data.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/allensdk/test/brain_observatory/test_session_analysis_regression_data.json b/allensdk/test/brain_observatory/test_session_analysis_regression_data.json index b862e2859..5ef6a8c77 100644 --- a/allensdk/test/brain_observatory/test_session_analysis_regression_data.json +++ b/allensdk/test/brain_observatory/test_session_analysis_regression_data.json @@ -8,9 +8,9 @@ "nwb_c": "/allen/aibs/informatics/module_test_data/observatory/py2_analysis/569494121.nwb" }, "3": { - "analysis_a": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/510859641_three_session_A_analysis.h5", - "analysis_b": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/510698988_three_session_B_analysis.h5", - "analysis_c": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/510532780_three_session_C_analysis.h5", + "analysis_a": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/new_ks_2samp/510859641_three_session_A_analysis.h5", + "analysis_b": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/new_ks_2samp/510698988_three_session_B_analysis.h5", + "analysis_c": "/allen/aibs/informatics/module_test_data/observatory/py3_analysis/new_ks_2samp/510532780_three_session_C_analysis.h5", "nwb_a": "/allen/aibs/informatics/module_test_data/observatory//plots/510859641.nwb", "nwb_b": "/allen/aibs/informatics/module_test_data/observatory/plots/510698988.nwb", "nwb_c": "/allen/aibs/informatics/module_test_data/observatory/plots/510532780.nwb" From 5ec3d6e8d3394d8b1ce6f951d9260cb88f6a4d10 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Mon, 4 Nov 2019 14:20:49 -0800 Subject: [PATCH 16/60] #1102 import scipy into stimulus analysis (so that warning can print version) --- allensdk/brain_observatory/stimulus_analysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/allensdk/brain_observatory/stimulus_analysis.py b/allensdk/brain_observatory/stimulus_analysis.py index 3b629f04f..9a4838fb8 100644 --- a/allensdk/brain_observatory/stimulus_analysis.py +++ b/allensdk/brain_observatory/stimulus_analysis.py @@ -35,6 +35,7 @@ # import warnings import scipy.stats as st +import scipy import numpy as np import pandas as pd import logging From 3c036fae682df1aaf1429e643a751dc31cf2a865 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 12:50:28 -0700 Subject: [PATCH 17/60] #1082 update ecephys project lims api units and add tests --- .../ecephys_project_api.py | 19 +- .../ecephys_project_lims_api.py | 91 ++++++--- .../ecephys/test_ecephys_project_lims_api.py | 189 +++++++++--------- 3 files changed, 178 insertions(+), 121 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index edf43a490..f6560013e 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -1,3 +1,13 @@ +from typing import Optional, TypeVar + +import numpy as np +import pandas as pd + + +ArrayLike = TypeVar("ArrayLike", list, np.ndarray, pd.Series, tuple) + + + class EcephysProjectApi: def get_sessions(self, *args, **kwargs): raise NotImplementedError() @@ -11,7 +21,14 @@ def get_targeted_regions(self, *args, **kwargs): def get_isi_experiments(self, *args, **kwargs): raise NotImplementedError() - def get_units(self, *args, **kwargs): + def get_units( + self, + unit_ids: Optional[ArrayLike] = None, + channel_ids: Optional[ArrayLike] = None, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): raise NotImplementedError() def get_channels(self, *args, **kwargs): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 3b6cc6a91..521132895 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -1,10 +1,11 @@ from pathlib import Path import shutil import warnings +from typing import Optional, TypeVar import pandas as pd -from .ecephys_project_api import EcephysProjectApi +from .ecephys_project_api import EcephysProjectApi, ArrayLike from .http_engine import HttpEngine from .utilities import postgres_macros, build_and_execute @@ -75,33 +76,77 @@ def get_probe_lfp_data(self, probe_id): ) def get_units( - self, unit_ids=None, - channel_ids=None, - probe_ids=None, - session_ids=None, - quality="good", - **kwargs + self, + unit_ids: Optional[ArrayLike] = None, + channel_ids: Optional[ArrayLike] = None, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None ): + """ Query LIMS for records describing sorted ecephys units. + + Parameters + ---------- + units_ids : + channel_ids : + probe_ids : + session_ids : + published_at : + If provided, only units from sessions published prior to this date + will be returned. Format should be YYYY-MM-DD + + """ + + published_at_not_null = None if published_at is None else True + published_at = f"'{published_at}'" if published_at is not None else None + response = build_and_execute( """ {%- import 'postgres_macros' as pm -%} {%- import 'macros' as m -%} - select eu.* + select + eu.id, + eu.ecephys_channel_id, + eu.quality, + eu.snr, + eu.firing_rate, + eu.isi_violations, + eu.presence_ratio, + eu.amplitude_cutoff, + eu.isolation_distance, + eu.l_ratio, + eu.d_prime, + eu.nn_hit_rate, + eu.nn_miss_rate, + eu.silhouette_score, + eu.max_drift, + eu.cumulative_drift, + eu.epoch_name_quality_metrics, + eu.epoch_name_waveform_metrics, + eu.duration, + eu.halfwidth, + eu.\"PT_ratio\", + eu.repolarization_slope, + eu.recovery_slope, + eu.amplitude, + eu.spread, + eu.velocity_above, + eu.velocity_below from ecephys_units eu join ecephys_channels ec on ec.id = eu.ecephys_channel_id join ecephys_probes ep on ep.id = ec.ecephys_probe_id - join ecephys_sessions es on es.id = ep.ecephys_session_id - where ec.valid_data - and ep.workflow_state != 'failed' - and es.workflow_state != 'failed' - {{pm.optional_equals('eu.quality', quality) -}} - {{pm.optional_contains('eu.id', unit_ids) -}} - {{pm.optional_contains('ec.id', channel_ids) -}} - {{pm.optional_contains('ep.id', probe_ids) -}} - {{pm.optional_contains('es.id', session_ids) -}} - {{pm.optional_le('eu.amplitude_cutoff', amplitude_cutoff_maximum) -}} - {{pm.optional_ge('eu.presence_ratio', presence_ratio_minimum) -}} - {{pm.optional_le('eu.isi_violations', isi_violations_maximum) -}} + join ecephys_sessions es on es.id = ep.ecephys_session_id + where + not es.habituation + and ec.valid_data + and ep.workflow_state != 'failed' + and es.workflow_state != 'failed' + {{pm.optional_not_null('es.published_at', published_at_not_null)}} + {{pm.optional_le('es.published_at', published_at)}} + {{pm.optional_contains('eu.id', unit_ids) -}} + {{pm.optional_contains('ec.id', channel_ids) -}} + {{pm.optional_contains('ep.id', probe_ids) -}} + {{pm.optional_contains('es.id', session_ids) -}} """, base=postgres_macros(), engine=self.postgres_engine.select, @@ -109,10 +154,8 @@ def get_units( channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - quality=f"'{quality}'" if quality is not None else quality, - amplitude_cutoff_maximum=get_unit_filter_value("amplitude_cutoff_maximum", replace_none=False, **kwargs), - presence_ratio_minimum=get_unit_filter_value("presence_ratio_minimum", replace_none=False, **kwargs), - isi_violations_maximum=get_unit_filter_value("isi_violations_maximum", replace_none=False, **kwargs) + published_at_not_null=published_at_not_null, + published_at=published_at ) response.set_index("id", inplace=True) diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py index 4c011bfc4..985cda5f3 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py @@ -1,5 +1,6 @@ import os import re +from unittest import mock import pytest import pandas as pd @@ -10,103 +11,99 @@ ) -@pytest.mark.parametrize( - "method,conditions,expected", - [ - [ - "get_sessions", - {"published": True}, - re.compile(r".*where true and es\.workflow_state in \('uploaded'\) and es\.habituation = false and es\.published_at is not null and pr\.name in \('BrainTV Neuropixels Visual Behavior','BrainTV Neuropixels Visual Coding'\)$"), - ], - [ - "get_sessions", - {"session_ids": [1, 2, 3]}, - re.compile(r".*and es\.id in \(1,2,3\).*"), - ], - [ - "get_units", - {"session_ids": [1, 2, 3]}, - re.compile(r"select eu\.\*.*and es\.id in \(1,2,3\) and eu.amplitude_cutoff <= 0.1 and eu.presence_ratio >= 0.95 and eu.isi_violations <= 0.5$"), - ], - [ - "get_units", - {"session_ids": [1, 2, 3], "unit_ids": (4, 5, 6)}, - re.compile(r"select eu\.\*.*and eu\.id in \(4,5,6\) and es\.id in \(1,2,3\) and eu.amplitude_cutoff <= 0.1 and eu.presence_ratio >= 0.95 and eu.isi_violations <= 0.5$") - ], - [ - "get_channels", - {}, - re.compile(r"select ec\.id as id.*where valid_data and ep.workflow_state != 'failed' and es.workflow_state != 'failed'$"), - ], - [ - "get_probes", - {}, - re.compile(r"select ep\.id as id, ep.ecephys_session_id.*where true and ep.workflow_state != 'failed' and es.workflow_state != 'failed'$"), - ], - ], -) -def test_query(method, conditions, expected): - class MockPgEngine: - def select(self, rendered): - self.query = " ".join([item.strip() for item in str(rendered).split()]) - return pd.DataFrame({"id": [1, 2, 3], "ecephys_channel_id": [1, 2, 3], "genotype": [np.nan, "a", "b"]}) - - pg_engine = MockPgEngine() - api = epla.EcephysProjectLimsApi(postgres_engine=pg_engine, app_engine=None) - - results = getattr(api, method)(**conditions) - - obtained = pg_engine.query.strip() - print(obtained) - match = expected.match(obtained) - assert match is not None - - -def test_get_session_data(): - - session_id = 12345 - wkf_id = 987 - - class MockPgEngine: - def select(self, rendered): - pattern = re.compile( - r".*and ear.ecephys_session_id = (?P\d+).*", re.DOTALL - ) - match = pattern.match(rendered) - sid_obt = int(match["session_id"]) - assert session_id == sid_obt - return pd.DataFrame({"id": [wkf_id]}) - - class MockHttpEngine: - def stream(self, path): - assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" - - api = epla.EcephysProjectLimsApi( - postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() - ) - api.get_session_data(session_id) +class MockSelector: + def __init__(self, checks, response): + self.checks = checks + self.response = response -def test_get_probe_data(): + def __call__(self, query, *args, **kwargs): + self.passed = {} + self.query = query + for name, check in self.checks.items(): + self.passed[name] = check(query) + return self.response - probe_id = 12345 - wkf_id = 987 - class MockPgEngine: - def select(self, rendered): - pattern = re.compile( - r".*and earp.ecephys_probe_id = (?P\d+).*", re.DOTALL - ) - match = pattern.match(rendered) - pid_obt = int(match["probe_id"]) - assert probe_id == pid_obt - return pd.DataFrame({"id": [wkf_id]}) - - class MockHttpEngine: - def stream(self, path): - assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" - - api = epla.EcephysProjectLimsApi( - postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() - ) - api.get_probe_lfp_data(probe_id) +@pytest.mark.parametrize("method_name,kwargs,response,checks,expected", [ + [ + "get_units", + {}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "no_pa_check": lambda st: "published_at" not in st + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_units", + {"session_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_units", + {"unit_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "filters_units": lambda st: re.compile(r".+and eu.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_units", + {"channel_ids": [1, 2, 3], "probe_ids": [4, 5, 6]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "filters_channels": lambda st: re.compile(r".+and ec.id in \(1,2,3\).*", re.DOTALL).match(st) is not None, + "filters_probes": lambda st: re.compile(r".+and ep.id in \(4,5,6\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_units", + {"published_at": "2019-10-22"}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None, + "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ] +]) +def test_pg_query(method_name,kwargs, response, checks, expected): + + selector = MockSelector(checks, response) + + with mock.patch("allensdk.internal.api.psycopg2_select", new=selector) as ptc: + api = epla.EcephysProjectLimsApi.default() + obtained = getattr(api, method_name)(**kwargs) + pd.testing.assert_frame_equal(expected, obtained, check_like=True) + + any_checks_failed = False + for name, result in ptc.passed.items(): + if not result: + print(f"check {name} failed") + any_checks_failed = True + + if any_checks_failed: + print(ptc.query) + assert not any_checks_failed From c7f2bed6ddca214a4d45874a2ccf0ee995dd418c Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 13:21:18 -0700 Subject: [PATCH 18/60] #1082 EcephysProjectLimsApi.get_channels returns the same columns as EcephysProjectWarehouseApi --- .../ecephys_project_api.py | 8 ++- .../ecephys_project_lims_api.py | 61 +++++++++---------- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index f6560013e..c1b1d9bb2 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -31,7 +31,13 @@ def get_units( ): raise NotImplementedError() - def get_channels(self, *args, **kwargs): + def get_channels( + self, + channel_ids: Optional[ArrayLike] = None, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): raise NotImplementedError() def get_probes(self, *args, **kwargs): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 521132895..5a43135d7 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -157,56 +157,55 @@ def get_units( published_at_not_null=published_at_not_null, published_at=published_at ) + return response.set_index("id", inplace=True) - response.set_index("id", inplace=True) + def get_channels( + self, + channel_ids: Optional[ArrayLike] = None, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): - return response + published_at_not_null = None if published_at is None else True + published_at = f"'{published_at}'" if published_at is not None else None - def get_channels(self, channel_ids=None, probe_ids=None, session_ids=None, **kwargs): response = build_and_execute( """ {%- import 'postgres_macros' as pm -%} select - ec.id as id, - ec.ecephys_probe_id, + ec.id, + ec.ecephys_probe_id, ec.local_index, ec.probe_vertical_position, ec.probe_horizontal_position, ec.manual_structure_id as ecephys_structure_id, st.acronym as ecephys_structure_acronym, - pc.unit_count - from ecephys_channels ec + ec.anterior_posterior_ccf_coordinate, + ec.dorsal_ventral_ccf_coordinate, + ec.left_right_ccf_coordinate + from ecephys_channels ec join ecephys_probes ep on ep.id = ec.ecephys_probe_id - join ecephys_sessions es on es.id = ep.ecephys_session_id - left join structures st on st.id = ec.manual_structure_id - join ( - select ech.id as ecephys_channel_id, - count (distinct eun.id) as unit_count - from ecephys_channels ech - join ecephys_units eun on ( - eun.ecephys_channel_id = ech.id - and eun.quality = 'good' - {{pm.optional_le('eun.amplitude_cutoff', amplitude_cutoff_maximum) -}} - {{pm.optional_ge('eun.presence_ratio', presence_ratio_minimum) -}} - {{pm.optional_le('eun.isi_violations', isi_violations_maximum) -}} - ) - group by ech.id - ) pc on ec.id = pc.ecephys_channel_id - where valid_data - and ep.workflow_state != 'failed' - and es.workflow_state != 'failed' - {{pm.optional_contains('ec.id', channel_ids) -}} - {{pm.optional_contains('ep.id', probe_ids) -}} - {{pm.optional_contains('es.id', session_ids) -}} + join ecephys_sessions es on es.id = ep.ecephys_session_id + left join structures st on ec.manual_structure_id = st.id + where + not es.habituation + and valid_data + and ep.workflow_state != 'failed' + and es.workflow_state != 'failed' + {{pm.optional_not_null('es.published_at', published_at_not_null)}} + {{pm.optional_le('es.published_at', published_at)}} + {{pm.optional_contains('ec.id', channel_ids) -}} + {{pm.optional_contains('ep.id', probe_ids) -}} + {{pm.optional_contains('es.id', session_ids) -}} """, base=postgres_macros(), engine=self.postgres_engine.select, channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - amplitude_cutoff_maximum=get_unit_filter_value("amplitude_cutoff_maximum", replace_none=False, **kwargs), - presence_ratio_minimum=get_unit_filter_value("presence_ratio_minimum", replace_none=False, **kwargs), - isi_violations_maximum=get_unit_filter_value("isi_violations_maximum", replace_none=False, **kwargs) + published_at_not_null=published_at_not_null, + published_at=published_at ) return response.set_index("id") From ad33c8243130a7968e9bd56d36deaae663e226f7 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 13:29:22 -0700 Subject: [PATCH 19/60] #1082 EcephysProjectLimsApi.get_probes returns the same columns as EcephysProjectWarehouseApi --- .../ecephys_project_api.py | 7 +- .../ecephys_project_lims_api.py | 110 ++++++------------ 2 files changed, 39 insertions(+), 78 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index c1b1d9bb2..26357a86b 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -40,7 +40,12 @@ def get_channels( ): raise NotImplementedError() - def get_probes(self, *args, **kwargs): + def get_probes( + self, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): raise NotImplementedError() def get_probe_lfp_data(self, probe_id, *args, **kwargs): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 5a43135d7..21237ea4c 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -209,94 +209,50 @@ def get_channels( ) return response.set_index("id") - def get_probes(self, probe_ids=None, session_ids=None, **kwargs): + def get_probes( + self, + probe_ids: Optional[ArrayLike] = None, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): + + published_at_not_null = None if published_at is None else True + published_at = f"'{published_at}'" if published_at is not None else None + response = build_and_execute( """ {%- import 'postgres_macros' as pm -%} select - ep.id as id, - ep.ecephys_session_id, - ep.global_probe_sampling_rate, - ep.global_probe_lfp_sampling_rate, - total_time_shift, - channel_count, - unit_count, - case - when nwb_id is not null then true - else false - end as has_lfp_nwb, - str.structure_acronyms as structure_acronyms - from ecephys_probes ep - join ecephys_sessions es on es.id = ep.ecephys_session_id - join ( - select epr.id as ecephys_probe_id, - count (distinct ech.id) as channel_count, - count (distinct eun.id) as unit_count - from ecephys_probes epr - join ecephys_channels ech on ( - ech.ecephys_probe_id = epr.id - and ech.valid_data - ) - join ecephys_units eun on ( - eun.ecephys_channel_id = ech.id - and eun.quality = 'good' - {{pm.optional_le('eun.amplitude_cutoff', amplitude_cutoff_maximum) -}} - {{pm.optional_ge('eun.presence_ratio', presence_ratio_minimum) -}} - {{pm.optional_le('eun.isi_violations', isi_violations_maximum) -}} - ) - group by epr.id - ) chc on ep.id = chc.ecephys_probe_id - left join ( - select - epr.id as ecephys_probe_id, - wkf.id as nwb_id - from ecephys_probes epr - join ecephys_analysis_runs ear on ( - ear.ecephys_session_id = epr.ecephys_session_id - and ear.current - ) - right join ecephys_analysis_run_probes earp on ( - earp.ecephys_probe_id = epr.id - and earp.ecephys_analysis_run_id = ear.id - ) - right join well_known_files wkf on ( - wkf.attachable_id = earp.id - and wkf.attachable_type = 'EcephysAnalysisRunProbe' - ) - join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id - where wkft.name = 'EcephysLfpNwb' - ) nwb on ep.id = nwb.ecephys_probe_id - left join ( - select epr.id as ecephys_probe_id, - array_agg (st.id) as structure_ids, - array_agg (distinct st.acronym) as structure_acronyms - from ecephys_probes epr - join ecephys_channels ech on ( - ech.ecephys_probe_id = epr.id - and ech.valid_data - ) - left join structures st on st.id = ech.manual_structure_id - group by epr.id - ) str on ep.id = str.ecephys_probe_id - where true - and ep.workflow_state != 'failed' - and es.workflow_state != 'failed' - {{pm.optional_contains('ep.id', probe_ids) -}} - {{pm.optional_contains('es.id', session_ids) -}} + ep.id, + ep.ecephys_session_id, + ep.name, + ep.global_probe_sampling_rate as sampling_rate, + ep.global_probe_lfp_sampling_rate as lfp_sampling_rate, + ep.phase, + ep.air_channel_index, + ep.surface_channel_index, + ep.use_lfp_data as has_lfp_data, + ep.temporal_subsampling_factor as lfp_temporal_subsampling_factor + from ecephys_probes ep + join ecephys_sessions es on es.id = ep.ecephys_session_id + where + not es.habituation + and ep.workflow_state != 'failed' + and es.workflow_state != 'failed' + {{pm.optional_not_null('es.published_at', published_at_not_null)}} + {{pm.optional_le('es.published_at', published_at)}} + {{pm.optional_contains('ep.id', probe_ids) -}} + {{pm.optional_contains('es.id', session_ids) -}} """, base=postgres_macros(), engine=self.postgres_engine.select, probe_ids=probe_ids, session_ids=session_ids, - amplitude_cutoff_maximum=get_unit_filter_value("amplitude_cutoff_maximum", replace_none=False, **kwargs), - presence_ratio_minimum=get_unit_filter_value("presence_ratio_minimum", replace_none=False, **kwargs), - isi_violations_maximum=get_unit_filter_value("isi_violations_maximum", replace_none=False, **kwargs) + published_at_not_null=published_at_not_null, + published_at=published_at ) - response = response.set_index("id") - # Clarify name for external users - response.rename(columns={"use_lfp_data": "has_lfp_data"}, inplace=True) + return response.set_index("id") - return response def get_sessions( self, From f19cd1d5ae0aa440e19ebb33189b8f67fe764830 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 13:40:17 -0700 Subject: [PATCH 20/60] #1082 EcephysProjectLimsApi.get_sessions returns the same columns as EcephysProjectWarehouseApi --- .../ecephys_project_api.py | 6 +- .../ecephys_project_lims_api.py | 87 +++++-------------- 2 files changed, 26 insertions(+), 67 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index 26357a86b..f7e283192 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -9,7 +9,11 @@ class EcephysProjectApi: - def get_sessions(self, *args, **kwargs): + def get_sessions( + self, + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None + ): raise NotImplementedError() def get_session_data(self, session_id, *args, **kwargs): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 21237ea4c..9f554da02 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -256,63 +256,36 @@ def get_probes( def get_sessions( self, - session_ids=None, - workflow_states=("uploaded",), - published=None, - habituation=False, - project_names=( - "BrainTV Neuropixels Visual Behavior", - "BrainTV Neuropixels Visual Coding", - ), - **kwargs + session_ids: Optional[ArrayLike] = None, + published_at: Optional[str] = None ): + published_at_not_null = None if published_at is None else True + published_at = f"'{published_at}'" if published_at is not None else None + response = build_and_execute( """ {%- import 'postgres_macros' as pm -%} {%- import 'macros' as m -%} select - stimulus_name as session_type, - sp.id as specimen_id, - es.id as id, + es.id, + es.specimen_id, + es.stimulus_name as session_type, + es.isi_experiment_id, + es.date_of_acquisition, + es.published_at, dn.full_genotype as genotype, - gd.name as gender, + gd.name as sex, ages.days as age_in_days, - pr.code as project_code, - probe_count, - channel_count, - unit_count, case when nwb_id is not null then true else false - end as has_nwb, - str.structure_acronyms as structure_acronyms + end as has_nwb from ecephys_sessions es join specimens sp on sp.id = es.specimen_id join donors dn on dn.id = sp.donor_id join genders gd on gd.id = dn.gender_id join ages on ages.id = dn.age_id - join projects pr on pr.id = es.project_id - join ( - select es.id as ecephys_session_id, - count (distinct epr.id) as probe_count, - count (distinct ech.id) as channel_count, - count (distinct eun.id) as unit_count - from ecephys_sessions es - join ecephys_probes epr on epr.ecephys_session_id = es.id - join ecephys_channels ech on ( - ech.ecephys_probe_id = epr.id - and ech.valid_data - ) - join ecephys_units eun on ( - eun.ecephys_channel_id = ech.id - and eun.quality = 'good' - {{pm.optional_le('eun.amplitude_cutoff', amplitude_cutoff_maximum) -}} - {{pm.optional_ge('eun.presence_ratio', presence_ratio_minimum) -}} - {{pm.optional_le('eun.isi_violations', isi_violations_maximum) -}} - ) - group by es.id - ) pc on es.id = pc.ecephys_session_id left join ( select ecephys_sessions.id as ecephys_session_id, wkf.id as nwb_id @@ -328,36 +301,18 @@ def get_sessions( join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id where wkft.name = 'EcephysNwb' ) nwb on es.id = nwb.ecephys_session_id - left join ( - select es.id as ecephys_session_id, - array_agg (st.id) as structure_ids, - array_agg (distinct st.acronym) as structure_acronyms - from ecephys_sessions es - join ecephys_probes epr on epr.ecephys_session_id = es.id - join ecephys_channels ech on ( - ech.ecephys_probe_id = epr.id - and ech.valid_data - ) - left join structures st on st.id = ech.manual_structure_id - group by es.id - ) str on es.id = str.ecephys_session_id - where true - {{pm.optional_contains('es.id', session_ids) -}} - {{pm.optional_contains('es.workflow_state', workflow_states, True) -}} - {{pm.optional_equals('es.habituation', habituation) -}} - {{pm.optional_not_null('es.published_at', published) -}} - {{pm.optional_contains('pr.name', project_names, True) -}} + where + not es.habituation + and es.workflow_state != 'failed' + {{pm.optional_contains('es.id', session_ids) -}} + {{pm.optional_not_null('es.published_at', published_at_not_null)}} + {{pm.optional_le('es.published_at', published_at)}} """, base=postgres_macros(), engine=self.postgres_engine.select, session_ids=session_ids, - workflow_states=workflow_states, - published=published, - habituation=f"{habituation}".lower() if habituation is not None else habituation, - project_names=project_names, - amplitude_cutoff_maximum=get_unit_filter_value("amplitude_cutoff_maximum", replace_none=False, **kwargs), - presence_ratio_minimum=get_unit_filter_value("presence_ratio_minimum", replace_none=False, **kwargs), - isi_violations_maximum=get_unit_filter_value("isi_violations_maximum", replace_none=False, **kwargs) + published_at_not_null=published_at_not_null, + published_at=published_at ) response.set_index("id", inplace=True) From b33a28521e4150a4bbded49d451c6af643d29338 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 14:27:38 -0700 Subject: [PATCH 21/60] #1082 add test for EcephysProjectLimsApi.get_unit_analysis_metrics --- .../ecephys_project_lims_api.py | 2 +- .../ecephys/test_ecephys_project_lims_api.py | 106 +++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 9f554da02..472d9c72a 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -157,7 +157,7 @@ def get_units( published_at_not_null=published_at_not_null, published_at=published_at ) - return response.set_index("id", inplace=True) + return response.set_index("id", inplace=False) def get_channels( self, diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py index 985cda5f3..0ebab0a8e 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py @@ -87,6 +87,60 @@ def __call__(self, query, *args, **kwargs): {"something": [12, 14]}, index=pd.Index(name="id", data=[5, 6]) ) + ], + [ + "get_channels", + {"published_at": "2019-10-22", "session_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None, + "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None, + "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_probes", + {"published_at": "2019-10-22", "session_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14]}), + { + "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None, + "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None, + "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_sessions", + {"published_at": "2019-10-22", "session_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "something": [12, 14], "genotype": ["foo", np.nan]}), + { + "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None, + "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None, + "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"something": [12, 14], "genotype": ["foo", "wt"]}, + index=pd.Index(name="id", data=[5, 6]) + ) + ], + [ + "get_unit_analysis_metrics", + {"ecephys_session_ids": [1, 2, 3]}, + pd.DataFrame({"id": [5, 6], "data": [{"a": 1, "b": 2}, {"a": 3, "b": 4}], "ecephys_unit_id": [10, 11]}), + { + "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None + }, + pd.DataFrame( + {"id": [5, 6], "a": [1, 3], "b": [2, 4]}, + index=pd.Index(name="iecephys_unit_id", data=[10, 11]) + ) ] ]) def test_pg_query(method_name,kwargs, response, checks, expected): @@ -96,7 +150,7 @@ def test_pg_query(method_name,kwargs, response, checks, expected): with mock.patch("allensdk.internal.api.psycopg2_select", new=selector) as ptc: api = epla.EcephysProjectLimsApi.default() obtained = getattr(api, method_name)(**kwargs) - pd.testing.assert_frame_equal(expected, obtained, check_like=True) + pd.testing.assert_frame_equal(expected, obtained, check_like=True, check_dtype=False) any_checks_failed = False for name, result in ptc.passed.items(): @@ -107,3 +161,53 @@ def test_pg_query(method_name,kwargs, response, checks, expected): if any_checks_failed: print(ptc.query) assert not any_checks_failed + + +def test_get_session_data(): + + session_id = 12345 + wkf_id = 987 + + class MockPgEngine: + def select(self, rendered): + pattern = re.compile( + r".*and ear.ecephys_session_id = (?P\d+).*", re.DOTALL + ) + match = pattern.match(rendered) + sid_obt = int(match["session_id"]) + assert session_id == sid_obt + return pd.DataFrame({"id": [wkf_id]}) + + class MockHttpEngine: + def stream(self, path): + assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" + + api = epla.EcephysProjectLimsApi( + postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() + ) + api.get_session_data(session_id) + + +def test_get_probe_data(): + + probe_id = 12345 + wkf_id = 987 + + class MockPgEngine: + def select(self, rendered): + pattern = re.compile( + r".*and earp.ecephys_probe_id = (?P\d+).*", re.DOTALL + ) + match = pattern.match(rendered) + pid_obt = int(match["probe_id"]) + assert probe_id == pid_obt + return pd.DataFrame({"id": [wkf_id]}) + + class MockHttpEngine: + def stream(self, path): + assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" + + api = epla.EcephysProjectLimsApi( + postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() + ) + api.get_probe_lfp_data(probe_id) From 843ae538cfc45dc0c52f7b33d4e6d76889e81f71 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 14:51:34 -0700 Subject: [PATCH 22/60] /#1082 remove unnecessary kwargs in EcephysProjectCache._get_units api call --- .../brain_observatory/ecephys/ecephys_project_cache.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py index 44f239764..254a98458 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py @@ -132,14 +132,8 @@ def _get_channels(self): def _get_units(self, filter_by_validity: bool = True, **unit_filter_kwargs) -> pd.DataFrame: path = self.get_cache_path(None, self.UNITS_KEY) - get_units = partial( - self.fetch_api.get_units, - amplitude_cutoff_maximum=None, # pull down all the units to csv and filter on the way out - presence_ratio_minimum=None, - isi_violations_maximum=None, - filter_by_validity=filter_by_validity - ) - units: pd.DataFrame = one_file_call_caching(path, get_units, write_csv, read_csv, num_tries=self.fetch_tries) + + units = call_caching(self.fetch_api.get_units, path, strategy='lazy', **csv_io) units = units.rename(columns={ 'PT_ratio': 'waveform_PT_ratio', 'amplitude': 'waveform_amplitude', From a827daad3283d048069052ebf421461b6bd04fea Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 15:08:10 -0700 Subject: [PATCH 23/60] #1082 update ecephys project cache test to account for cache-side unit filtering --- .../ecephys/test_ecephys_project_cache.py | 26 +++++-------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py index 724472d13..5a89e2df0 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py @@ -41,21 +41,11 @@ def units(): 'snr': [1.5, 4.9], "amplitude_cutoff": [0.05, 0.2], "presence_ratio": [10, 20], - "isi_violations": [0.3, 0.4] + "isi_violations": [0.3, 0.4], + "quality": ["good", "noise"] }, index=pd.Series(name='id', data=[1, 2])) -@pytest.fixture -def filtered_units(): - return pd.DataFrame({ - 'ecephys_channel_id': [3], - 'snr': [4.2], - 'amplitude_cutoff': [0.08], - 'presence_ratio': [15], - 'isi_violations': [0.35] - }, index=pd.Series(name='id', data=[3])) - - @pytest.fixture def analysis_metrics(): return pd.DataFrame({ @@ -117,7 +107,7 @@ def shared_tmpdir(tmpdir_factory): @pytest.fixture -def mock_api(shared_tmpdir, raw_sessions, units, filtered_units, channels, raw_probes, analysis_metrics): +def mock_api(shared_tmpdir, raw_sessions, units, channels, raw_probes, analysis_metrics): class MockApi: def __init__(self, **kwargs): @@ -130,10 +120,7 @@ def get_sessions(self, **kwargs): return raw_sessions def get_units(self, **kwargs): - if kwargs['filter_by_validity']: - return filtered_units - else: - return units + return units def get_channels(self, **kwargs): return channels @@ -214,9 +201,10 @@ def test_get_sessions(tmpdir_cache, sessions): @pytest.mark.parametrize("filter_by_validity", [False, True]) -def test_get_units(tmpdir_cache, units, filtered_units, filter_by_validity): +def test_get_units(tmpdir_cache, units, filter_by_validity): if filter_by_validity: - lazy_cache_test(tmpdir_cache, '_get_units', "get_units", filtered_units, filter_by_validity=filter_by_validity) + units = units[units["quality"] == "good"].drop(columns="quality") + lazy_cache_test(tmpdir_cache, '_get_units', "get_units", units, filter_by_validity=filter_by_validity) else: units = units[units["amplitude_cutoff"] <= 0.1] lazy_cache_test(tmpdir_cache, '_get_units', "get_units", units, filter_by_validity=filter_by_validity) From c01fa4fe708f15be85e0252bafae1a3529e60e3d Mon Sep 17 00:00:00 2001 From: nile graddis Date: Tue, 29 Oct 2019 15:27:28 -0700 Subject: [PATCH 24/60] #1082 fix call_caching call in EcephysProjectCache.get_units --- allensdk/brain_observatory/ecephys/ecephys_project_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py index 254a98458..116b2f63e 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py @@ -133,7 +133,7 @@ def _get_channels(self): def _get_units(self, filter_by_validity: bool = True, **unit_filter_kwargs) -> pd.DataFrame: path = self.get_cache_path(None, self.UNITS_KEY) - units = call_caching(self.fetch_api.get_units, path, strategy='lazy', **csv_io) + units = one_file_call_caching(path, self.fetch_api.get_units, write_csv, read_csv, num_tries=self.fetch_tries) units = units.rename(columns={ 'PT_ratio': 'waveform_PT_ratio', 'amplitude': 'waveform_amplitude', From ba2684604a0d8f8c3038067e489f03f7b62dbdae Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 13:27:02 -0700 Subject: [PATCH 25/60] @1082 implement template getters for ecephys project lims api --- .../ecephys_project_api.py | 6 +- .../ecephys_project_lims_api.py | 68 +++++++++++++++++-- .../ecephys_project_warehouse_api.py | 3 +- .../ecephys/test_ecephys_project_lims_api.py | 31 +++++++++ 4 files changed, 99 insertions(+), 9 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index f7e283192..9e2b5fe34 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -1,4 +1,4 @@ -from typing import Optional, TypeVar +from typing import Optional, TypeVar, Generator import numpy as np import pandas as pd @@ -55,10 +55,10 @@ def get_probes( def get_probe_lfp_data(self, probe_id, *args, **kwargs): raise NotImplementedError() - def get_natural_movie_template(self, number, *args, **kwargs): + def get_natural_movie_template(self, number) -> Generator: raise NotImplementedError() - def get_natural_scene_template(self, number, *args, **kwargs): + def get_natural_scene_template(self, number) -> Generator: raise NotImplementedError() def get_unit_analysis_metrics(self, unit_ids=None, ecephys_session_ids=None, session_types=None, *args, **kwargs): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 472d9c72a..d5bc6f06b 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -1,7 +1,4 @@ -from pathlib import Path -import shutil -import warnings -from typing import Optional, TypeVar +from typing import Optional, Generator import pandas as pd @@ -10,10 +7,12 @@ from .utilities import postgres_macros, build_and_execute from allensdk.internal.api import PostgresQueryMixin -from allensdk.brain_observatory.ecephys import get_unit_filter_value class EcephysProjectLimsApi(EcephysProjectApi): + + STIMULUS_NAMESPACE = "brain_observatory_1.1" + def __init__(self, postgres_engine, app_engine): self.postgres_engine = postgres_engine self.app_engine = app_engine @@ -350,6 +349,65 @@ def get_unit_analysis_metrics(self, unit_ids=None, ecephys_session_ids=None, ses return response + def _get_template(self, name, namespace): + try: + well_known_file = build_and_execute( + f""" + select + st.well_known_file_id + from stimuli st + join stimulus_namespaces sn on sn.id = st.stimulus_namespace_id + where + st.name = '{name}' + and sn.name = '{namespace}' + """, + base=postgres_macros(), + engine=self.postgres_engine.select_one + ) + wkf_id = well_known_file["well_known_file_id"] + except (KeyError, IndexError): + raise ValueError(f"expected exactly 1 template for {name}") + + download_link = f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" + return self.app_engine.stream(download_link) + + + def get_natural_movie_template(self, number: int) -> Generator[bytes, None, None]: + """ Download a template for the natural movie stimulus. This is the + actual movie that was shown during the recording session. + + Parameters + ---------- + number : + idenfifier for this movie (note that this is an integer, so to get + the template for natural_movie_three you should pass in 3) + + Returns + ------- + A generator yielding a bytestream of this template's data. + + """ + + return self._get_template(f"natural_movie_{number}", self.STIMULUS_NAMESPACE) + + + def get_natural_scene_template(self, number: int) -> Generator[bytes, None, None]: + """ Download a template for the natural scene stimulus. This is the + actual image that was shown during the recording session. + + Parameters + ---------- + number : + idenfifier for this scene + + Returns + ------- + A generator yielding a bytestream of this template's data. + + """ + return self._get_template(f"natural_scene_{int(number)}", self.STIMULUS_NAMESPACE) + + @classmethod def default(cls, pg_kwargs=None, app_kwargs=None): diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_warehouse_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_warehouse_api.py index 5b15e961b..7a9faa70a 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_warehouse_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_warehouse_api.py @@ -70,7 +70,8 @@ def _list_stimulus_templates(self, ecephys_product_id=714914585): "[attachable_type$eq'Product']" r"[attachable_id$eq{{ecephys_product_id}}]" ), - engine=self.rma_engine.get_rma_tabular, ecephys_product_id=ecephys_product_id + engine=self.rma_engine.get_rma_tabular, + ecephys_product_id=ecephys_product_id ) scene_number = [] diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py index 0ebab0a8e..a0c3f402d 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py @@ -211,3 +211,34 @@ def stream(self, path): postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() ) api.get_probe_lfp_data(probe_id) + + +@pytest.mark.parametrize("method,kwargs,query_pattern", [ + [ + "get_natural_movie_template", + {"number": 12}, + re.compile(".+st.name = 'natural_movie_12'.+", re.DOTALL) + ], + [ + "get_natural_scene_template", + {"number": 12}, + re.compile(".+st.name = 'natural_scene_12'.+", re.DOTALL) + ] +]) +def test_template_getter(method, kwargs, query_pattern): + + wkf_id = 12345 + + class MockPgEngine: + def select_one(self, rendered): + assert query_pattern.match(rendered) is not None + return {"well_known_file_id": wkf_id} + + class MockHttpEngine: + def stream(self, url): + assert url == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" + + api = epla.EcephysProjectLimsApi( + postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() + ) + getattr(api, method)(**kwargs) \ No newline at end of file From 8ffa89078411660c7f3b082ce6795f7408585c78 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 13:36:10 -0700 Subject: [PATCH 26/60] #1082 unify ecephys project lims api file getter tests --- .../ecephys/test_ecephys_project_lims_api.py | 90 +++++++------------ 1 file changed, 34 insertions(+), 56 deletions(-) diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py index a0c3f402d..7c8f013d2 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py @@ -163,82 +163,60 @@ def test_pg_query(method_name,kwargs, response, checks, expected): assert not any_checks_failed -def test_get_session_data(): +WKF_ID = 12345 +class MockPgEngine: - session_id = 12345 - wkf_id = 987 + def __init__(self, query_pattern): + self.query_pattern = query_pattern - class MockPgEngine: - def select(self, rendered): - pattern = re.compile( - r".*and ear.ecephys_session_id = (?P\d+).*", re.DOTALL - ) - match = pattern.match(rendered) - sid_obt = int(match["session_id"]) - assert session_id == sid_obt - return pd.DataFrame({"id": [wkf_id]}) - class MockHttpEngine: - def stream(self, path): - assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" +class MockTemplatePgEngine(MockPgEngine): - api = epla.EcephysProjectLimsApi( - postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() - ) - api.get_session_data(session_id) + def select_one(self, rendered): + assert self.query_pattern.match(rendered) is not None + return {"well_known_file_id": WKF_ID} -def test_get_probe_data(): +class MockDataPgEngine(MockPgEngine): + def select(self, rendered): + assert self.query_pattern.match(rendered) is not None + return pd.DataFrame({"id": [WKF_ID]}) - probe_id = 12345 - wkf_id = 987 - class MockPgEngine: - def select(self, rendered): - pattern = re.compile( - r".*and earp.ecephys_probe_id = (?P\d+).*", re.DOTALL - ) - match = pattern.match(rendered) - pid_obt = int(match["probe_id"]) - assert probe_id == pid_obt - return pd.DataFrame({"id": [wkf_id]}) +class MockHttpEngine: + def stream(self, url): + assert url == f"well_known_files/download/{WKF_ID}?wkf_id={WKF_ID}" - class MockHttpEngine: - def stream(self, path): - assert path == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" - api = epla.EcephysProjectLimsApi( - postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() - ) - api.get_probe_lfp_data(probe_id) - - -@pytest.mark.parametrize("method,kwargs,query_pattern", [ +@pytest.mark.parametrize("method,kwargs,query_pattern,pg_engine_cls", [ [ "get_natural_movie_template", {"number": 12}, - re.compile(".+st.name = 'natural_movie_12'.+", re.DOTALL) + re.compile(".+st.name = 'natural_movie_12'.+", re.DOTALL), + MockTemplatePgEngine ], [ "get_natural_scene_template", {"number": 12}, - re.compile(".+st.name = 'natural_scene_12'.+", re.DOTALL) + re.compile(".+st.name = 'natural_scene_12'.+", re.DOTALL), + MockTemplatePgEngine + ], + [ + "get_probe_lfp_data", + {"probe_id": 53}, + re.compile(r".+and earp.ecephys_probe_id = 53.+", re.DOTALL), + MockDataPgEngine + ], + [ + "get_session_data", + {"session_id": 53}, + re.compile(r".+and ear.ecephys_session_id = 53.+", re.DOTALL), + MockDataPgEngine ] ]) -def test_template_getter(method, kwargs, query_pattern): - - wkf_id = 12345 - - class MockPgEngine: - def select_one(self, rendered): - assert query_pattern.match(rendered) is not None - return {"well_known_file_id": wkf_id} - - class MockHttpEngine: - def stream(self, url): - assert url == f"well_known_files/download/{wkf_id}?wkf_id={wkf_id}" +def test_file_getter(method, kwargs, query_pattern, pg_engine_cls): api = epla.EcephysProjectLimsApi( - postgres_engine=MockPgEngine(), app_engine=MockHttpEngine() + postgres_engine=pg_engine_cls(query_pattern), app_engine=MockHttpEngine() ) getattr(api, method)(**kwargs) \ No newline at end of file From 52f7df5f5883336c2d01b3538de4616152686b4e Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 14:43:57 -0700 Subject: [PATCH 27/60] #1082 improve docs and typing for ecephys project lims api. refactor common handling of published_at --- .../ecephys_project_api.py | 14 +- .../ecephys_project_lims_api.py | 244 +++++++++++++++--- 2 files changed, 216 insertions(+), 42 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index 9e2b5fe34..3b9815d2a 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -4,10 +4,11 @@ import pandas as pd +# TODO: This should be a generic over the type of the values, but there is not +# good support currently for numpy and pandas type annotations ArrayLike = TypeVar("ArrayLike", list, np.ndarray, pd.Series, tuple) - class EcephysProjectApi: def get_sessions( self, @@ -16,7 +17,7 @@ def get_sessions( ): raise NotImplementedError() - def get_session_data(self, session_id, *args, **kwargs): + def get_session_data(self, session_id: int) -> Generator: raise NotImplementedError() def get_targeted_regions(self, *args, **kwargs): @@ -52,7 +53,7 @@ def get_probes( ): raise NotImplementedError() - def get_probe_lfp_data(self, probe_id, *args, **kwargs): + def get_probe_lfp_data(self, probe_id: int) -> Generator: raise NotImplementedError() def get_natural_movie_template(self, number) -> Generator: @@ -61,5 +62,10 @@ def get_natural_movie_template(self, number) -> Generator: def get_natural_scene_template(self, number) -> Generator: raise NotImplementedError() - def get_unit_analysis_metrics(self, unit_ids=None, ecephys_session_ids=None, session_types=None, *args, **kwargs): + def get_unit_analysis_metrics( + self, + unit_ids: Optional[ArrayLike] = None, + ecephys_session_ids: Optional[ArrayLike] = None, + session_types: Optional[ArrayLike] = None + ) -> pd.DataFrame: raise NotImplementedError() \ No newline at end of file diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index d5bc6f06b..5f06a7ecc 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -1,4 +1,4 @@ -from typing import Optional, Generator +from typing import Optional, Generator, Dict, Union import pandas as pd @@ -11,13 +11,56 @@ class EcephysProjectLimsApi(EcephysProjectApi): - STIMULUS_NAMESPACE = "brain_observatory_1.1" + STIMULUS_TEMPLATE_NAMESPACE = "brain_observatory_1.1" def __init__(self, postgres_engine, app_engine): + """ Downloads extracellular ephys data from the Allen Institute's + internal Laboratory Information Management System (LIMS). If you are + on our network you can use this class to get bleeding-edge data into + an EcephysProjectCache. If not, it won't work at all + + Parameters + ---------- + postgres_engine : + used for making queries against the LIMS postgres database. Must + implement: + select : takes a postgres query as a string. Returns a pandas + dataframe of results + select_one : takes a postgres query as a string. If there is + exactly one record in the response, returns that record as + a dict. Otherwise returns an empty dict. + app_engine : + used for making queries agains the lims web application. Must + implement: + stream : takes a url as a string. Returns a generator yielding + the response body as bytes. + + Notes + ----- + You almost certainly want to construct this class by calling + EcephysProjectLimsApi.default() rather than this constructor directly. + + """ + + self.postgres_engine = postgres_engine self.app_engine = app_engine - def get_session_data(self, session_id, **kwargs): + def get_session_data(self, session_id: int) -> Generator[bytes, None, None]: + """ Download an NWB file containing detailed data for an ecephys + session. + + Parameters + ---------- + session_id : + Download an NWB file for this session + + Returns + ------- + A generator yielding an NWB file as bytes. + + """ + nwb_response = build_and_execute( """ select wkf.id, wkf.filename, wkf.storage_directory, wkf.attachable_id from well_known_files wkf @@ -45,7 +88,21 @@ def get_session_data(self, session_id, **kwargs): f"well_known_files/download/{nwb_id}?wkf_id={nwb_id}" ) - def get_probe_lfp_data(self, probe_id): + def get_probe_lfp_data(self, probe_id: int) -> Generator[bytes, None, None]: + """ Download an NWB file containing detailed data for the local field + potential recorded from an ecephys probe. + + Parameters + ---------- + probe_id : + Download an NWB file for this probe's LFP + + Returns + ------- + A generator yielding an NWB file as bytes. + + """ + nwb_response = build_and_execute( """ select wkf.id from well_known_files wkf @@ -81,23 +138,36 @@ def get_units( probe_ids: Optional[ArrayLike] = None, session_ids: Optional[ArrayLike] = None, published_at: Optional[str] = None - ): - """ Query LIMS for records describing sorted ecephys units. + ) -> pd.DataFrame: + """ Download a table of records describing sorted ecephys units. Parameters ---------- - units_ids : - channel_ids : - probe_ids : - session_ids : - published_at : - If provided, only units from sessions published prior to this date - will be returned. Format should be YYYY-MM-DD + unit_ids : + A collection of integer identifiers for sorted ecephys units. If + provided, only return records describing these units. + channel_ids : + A collection of integer identifiers for ecephys channels. If + provided, results will be filtered to units recorded from these + channels. + probe_ids : + A collection of integer identifiers for ecephys probes. If + provided, results will be filtered to units recorded from these + probes. + session_ids : + A collection of integer identifiers for ecephys sessions. If + provided, results will be filtered to units recorded during + these sessions. + published_at : + A date (rendered as "YYYY-MM-DD"). If provided, only units + recorded during sessions published before this data will be + returned. - """ + Returns + ------- + a pd.DataFrame whose rows are ecephys channels. - published_at_not_null = None if published_at is None else True - published_at = f"'{published_at}'" if published_at is not None else None + """ response = build_and_execute( """ @@ -153,8 +223,7 @@ def get_units( channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - published_at_not_null=published_at_not_null, - published_at=published_at + **_split_published_at(published_at) ) return response.set_index("id", inplace=False) @@ -164,10 +233,31 @@ def get_channels( probe_ids: Optional[ArrayLike] = None, session_ids: Optional[ArrayLike] = None, published_at: Optional[str] = None - ): + ) -> pd.DataFrame: + """ Download a table of ecephys channel records. + + Parameters + ---------- + channel_ids : + A collection of integer identifiers for ecephys channels. If + provided, results will be filtered to these channels. + probe_ids : + A collection of integer identifiers for ecephys probes. If + provided, results will be filtered to channels on these probes. + session_ids : + A collection of integer identifiers for ecephys sessions. If + provided, results will be filtered to channels recorded from during + these sessions. + published_at : + A date (rendered as "YYYY-MM-DD"). If provided, only channels + recorded from during sessions published before this date will be + returned. - published_at_not_null = None if published_at is None else True - published_at = f"'{published_at}'" if published_at is not None else None + Returns + ------- + a pd.DataFrame whose rows are ecephys channels. + + """ response = build_and_execute( """ @@ -203,8 +293,7 @@ def get_channels( channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - published_at_not_null=published_at_not_null, - published_at=published_at + **_split_published_at(published_at) ) return response.set_index("id") @@ -213,10 +302,28 @@ def get_probes( probe_ids: Optional[ArrayLike] = None, session_ids: Optional[ArrayLike] = None, published_at: Optional[str] = None - ): + ) -> pd.DataFrame: + """ Download a table of ecephys probe records. + + Parameters + ---------- + probe_ids : + A collection of integer identifiers for ecephys probes. If + provided, results will be filtered to these probes. + session_ids : + A collection of integer identifiers for ecephys sessions. If + provided, results will be filtered to probes recorded from during + these sessions. + published_at : + A date (rendered as "YYYY-MM-DD"). If provided, only probes + recorded from during sessions published before this date will be + returned. - published_at_not_null = None if published_at is None else True - published_at = f"'{published_at}'" if published_at is not None else None + Returns + ------- + a pd.DataFrame whose rows are ecephys probes. + + """ response = build_and_execute( """ @@ -247,8 +354,7 @@ def get_probes( engine=self.postgres_engine.select, probe_ids=probe_ids, session_ids=session_ids, - published_at_not_null=published_at_not_null, - published_at=published_at + **_split_published_at(published_at) ) return response.set_index("id") @@ -257,10 +363,23 @@ def get_sessions( self, session_ids: Optional[ArrayLike] = None, published_at: Optional[str] = None - ): + ) -> pd.DataFrame: + """ Download a table of ecephys session records. - published_at_not_null = None if published_at is None else True - published_at = f"'{published_at}'" if published_at is not None else None + Parameters + ---------- + session_ids : + A collection of integer identifiers for ecephys sessions. If + provided, results will be filtered to these sessions. + published_at : + A date (rendered as "YYYY-MM-DD"). If provided, only sessions + published before this date will be returned. + + Returns + ------- + a pd.DataFrame whose rows are ecephys sessions. + + """ response = build_and_execute( """ @@ -310,8 +429,7 @@ def get_sessions( base=postgres_macros(), engine=self.postgres_engine.select, session_ids=session_ids, - published_at_not_null=published_at_not_null, - published_at=published_at + **_split_published_at(published_at) ) response.set_index("id", inplace=True) @@ -319,7 +437,37 @@ def get_sessions( return response - def get_unit_analysis_metrics(self, unit_ids=None, ecephys_session_ids=None, session_types=None): + def get_unit_analysis_metrics( + self, + unit_ids: Optional[ArrayLike] = None, + ecephys_session_ids: Optional[ArrayLike] = None, + session_types: Optional[ArrayLike] = None + ) -> pd.DataFrame: + """ Fetch analysis metrics (stimulus set-specific characterizations of + unit response patterns) for ecephys units. Note that the metrics + returned depend on the stimuli that were presented during recording ( + and thus on the session_type) + + Parameters + --------- + unit_ids : + integer identifiers for a set of ecephys units. If provided, the + response will only include metrics calculated for these units + ecephys_session_ids : + integer identifiers for a set of ecephys sessions. If provided, the + response will only include metrics calculated for units identified + during these sessions + session_types : + string names identifying ecephys session types (e.g. + "brain_observatory_1.1" or "functional_connectivity") + + Returns + ------- + a pandas dataframe indexed by ecephys unit id whose columns are + metrics. + + """ + response = build_and_execute( """ {%- import 'postgres_macros' as pm -%} @@ -350,6 +498,10 @@ def get_unit_analysis_metrics(self, unit_ids=None, ecephys_session_ids=None, ses def _get_template(self, name, namespace): + """ Identify the WellKnownFile record associated with a stimulus + template and stream its data if present. + """ + try: well_known_file = build_and_execute( f""" @@ -384,11 +536,13 @@ def get_natural_movie_template(self, number: int) -> Generator[bytes, None, None Returns ------- - A generator yielding a bytestream of this template's data. + A generator yielding an npy file as bytes """ - return self._get_template(f"natural_movie_{number}", self.STIMULUS_NAMESPACE) + return self._get_template( + f"natural_movie_{number}", self.STIMULUS_TEMPLATE_NAMESPACE + ) def get_natural_scene_template(self, number: int) -> Generator[bytes, None, None]: @@ -402,10 +556,12 @@ def get_natural_scene_template(self, number: int) -> Generator[bytes, None, None Returns ------- - A generator yielding a bytestream of this template's data. + A generator yielding a tiff file as bytes. """ - return self._get_template(f"natural_scene_{int(number)}", self.STIMULUS_NAMESPACE) + return self._get_template( + f"natural_scene_{int(number)}", self.STIMULUS_TEMPLATE_NAMESPACE + ) @classmethod @@ -422,3 +578,15 @@ def default(cls, pg_kwargs=None, app_kwargs=None): pg_engine = PostgresQueryMixin(**_pg_kwargs) app_engine = HttpEngine(**_app_kwargs) return cls(pg_engine, app_engine) + + +# TODO: in 3.8, use a typed dict here +def _split_published_at(published_at: Optional[str]) -> Dict[str, Optional[Union[bool, str]]]: + """ LIMS queries that filter on published_at need a couple of + reformattings of the argued date string. + """ + + return { + "published_at": f"'{published_at}'" if published_at is not None else None, + "published_at_not_null": None if published_at is None else True + } From 72425ced104b242b35d580f970f98faea54a1d5e Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 14:45:45 -0700 Subject: [PATCH 28/60] #1082 remove get_targeted_regions from EcephysProjectApi base (cache gets this info from other queries) --- .../ecephys/ecephys_project_api/ecephys_project_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index 3b9815d2a..9515102d9 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -20,9 +20,6 @@ def get_sessions( def get_session_data(self, session_id: int) -> Generator: raise NotImplementedError() - def get_targeted_regions(self, *args, **kwargs): - raise NotImplementedError() - def get_isi_experiments(self, *args, **kwargs): raise NotImplementedError() From 171919383bd58d09f087b81af070499db2dfe903 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Fri, 1 Nov 2019 15:02:57 -0700 Subject: [PATCH 29/60] #1082 better typing for published_at splitting utility --- .../ecephys_project_api.py | 3 +++ .../ecephys_project_lims_api.py | 26 +++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index 9515102d9..3eb36134f 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -6,6 +6,9 @@ # TODO: This should be a generic over the type of the values, but there is not # good support currently for numpy and pandas type annotations +# we should investigate numpy and pandas typing support and migrate +# https://github.com/numpy/numpy-stubs +# https://github.com/pandas-dev/pandas/blob/master/pandas/_typing.py ArrayLike = TypeVar("ArrayLike", list, np.ndarray, pd.Series, tuple) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 5f06a7ecc..76defdb31 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -1,4 +1,4 @@ -from typing import Optional, Generator, Dict, Union +from typing import Optional, Generator, NamedTuple import pandas as pd @@ -223,7 +223,7 @@ def get_units( channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - **_split_published_at(published_at) + **_split_published_at(published_at)._asdict() ) return response.set_index("id", inplace=False) @@ -293,7 +293,7 @@ def get_channels( channel_ids=channel_ids, probe_ids=probe_ids, session_ids=session_ids, - **_split_published_at(published_at) + **_split_published_at(published_at)._asdict() ) return response.set_index("id") @@ -354,7 +354,7 @@ def get_probes( engine=self.postgres_engine.select, probe_ids=probe_ids, session_ids=session_ids, - **_split_published_at(published_at) + **_split_published_at(published_at)._asdict() ) return response.set_index("id") @@ -429,7 +429,7 @@ def get_sessions( base=postgres_macros(), engine=self.postgres_engine.select, session_ids=session_ids, - **_split_published_at(published_at) + **_split_published_at(published_at)._asdict() ) response.set_index("id", inplace=True) @@ -580,13 +580,17 @@ def default(cls, pg_kwargs=None, app_kwargs=None): return cls(pg_engine, app_engine) -# TODO: in 3.8, use a typed dict here -def _split_published_at(published_at: Optional[str]) -> Dict[str, Optional[Union[bool, str]]]: +class SplitPublishedAt(NamedTuple): + published_at: Optional[str] + published_at_not_null: Optional[bool] + + +def _split_published_at(published_at: Optional[str]) -> SplitPublishedAt: """ LIMS queries that filter on published_at need a couple of reformattings of the argued date string. """ - return { - "published_at": f"'{published_at}'" if published_at is not None else None, - "published_at_not_null": None if published_at is None else True - } + return SplitPublishedAt( + published_at=f"'{published_at}'" if published_at is not None else None, + published_at_not_null=None if published_at is None else True + ) From c1828bc61b53cc873d5b7a7cb28b23631c1c125d Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Mon, 4 Nov 2019 16:08:12 -0800 Subject: [PATCH 30/60] #1082 ecephys project api specifies iterable responses for data streams --- .../ecephys_project_api.py | 10 ++++----- .../ecephys_project_lims_api.py | 22 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py index 3eb36134f..6db0e473d 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_api.py @@ -1,4 +1,4 @@ -from typing import Optional, TypeVar, Generator +from typing import Optional, TypeVar, Iterable import numpy as np import pandas as pd @@ -20,7 +20,7 @@ def get_sessions( ): raise NotImplementedError() - def get_session_data(self, session_id: int) -> Generator: + def get_session_data(self, session_id: int) -> Iterable: raise NotImplementedError() def get_isi_experiments(self, *args, **kwargs): @@ -53,13 +53,13 @@ def get_probes( ): raise NotImplementedError() - def get_probe_lfp_data(self, probe_id: int) -> Generator: + def get_probe_lfp_data(self, probe_id: int) -> Iterable: raise NotImplementedError() - def get_natural_movie_template(self, number) -> Generator: + def get_natural_movie_template(self, number) -> Iterable: raise NotImplementedError() - def get_natural_scene_template(self, number) -> Generator: + def get_natural_scene_template(self, number) -> Iterable: raise NotImplementedError() def get_unit_analysis_metrics( diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py index 76defdb31..d90e301c9 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_api/ecephys_project_lims_api.py @@ -1,4 +1,4 @@ -from typing import Optional, Generator, NamedTuple +from typing import Optional, Iterable, NamedTuple import pandas as pd @@ -32,7 +32,7 @@ def __init__(self, postgres_engine, app_engine): app_engine : used for making queries agains the lims web application. Must implement: - stream : takes a url as a string. Returns a generator yielding + stream : takes a url as a string. Returns an iterable yielding the response body as bytes. Notes @@ -46,7 +46,7 @@ def __init__(self, postgres_engine, app_engine): self.postgres_engine = postgres_engine self.app_engine = app_engine - def get_session_data(self, session_id: int) -> Generator[bytes, None, None]: + def get_session_data(self, session_id: int) -> Iterable[bytes]: """ Download an NWB file containing detailed data for an ecephys session. @@ -57,7 +57,7 @@ def get_session_data(self, session_id: int) -> Generator[bytes, None, None]: Returns ------- - A generator yielding an NWB file as bytes. + An iterable yielding an NWB file as bytes. """ @@ -88,7 +88,7 @@ def get_session_data(self, session_id: int) -> Generator[bytes, None, None]: f"well_known_files/download/{nwb_id}?wkf_id={nwb_id}" ) - def get_probe_lfp_data(self, probe_id: int) -> Generator[bytes, None, None]: + def get_probe_lfp_data(self, probe_id: int) -> Iterable[bytes]: """ Download an NWB file containing detailed data for the local field potential recorded from an ecephys probe. @@ -99,7 +99,7 @@ def get_probe_lfp_data(self, probe_id: int) -> Generator[bytes, None, None]: Returns ------- - A generator yielding an NWB file as bytes. + An iterable yielding an NWB file as bytes. """ @@ -160,7 +160,7 @@ def get_units( these sessions. published_at : A date (rendered as "YYYY-MM-DD"). If provided, only units - recorded during sessions published before this data will be + recorded during sessions published before this date will be returned. Returns @@ -524,7 +524,7 @@ def _get_template(self, name, namespace): return self.app_engine.stream(download_link) - def get_natural_movie_template(self, number: int) -> Generator[bytes, None, None]: + def get_natural_movie_template(self, number: int) -> Iterable[bytes]: """ Download a template for the natural movie stimulus. This is the actual movie that was shown during the recording session. @@ -536,7 +536,7 @@ def get_natural_movie_template(self, number: int) -> Generator[bytes, None, None Returns ------- - A generator yielding an npy file as bytes + An iterable yielding an npy file as bytes """ @@ -545,7 +545,7 @@ def get_natural_movie_template(self, number: int) -> Generator[bytes, None, None ) - def get_natural_scene_template(self, number: int) -> Generator[bytes, None, None]: + def get_natural_scene_template(self, number: int) -> Iterable[bytes]: """ Download a template for the natural scene stimulus. This is the actual image that was shown during the recording session. @@ -556,7 +556,7 @@ def get_natural_scene_template(self, number: int) -> Generator[bytes, None, None Returns ------- - A generator yielding a tiff file as bytes. + An iterable yielding a tiff file as bytes. """ return self._get_template( From 97baa7f411b32a78076f6414df1992616290b945 Mon Sep 17 00:00:00 2001 From: MattAitken Date: Fri, 1 Nov 2019 09:51:16 -0700 Subject: [PATCH 31/60] updating internal morphology to use absolute imports --- allensdk/internal/morphology/compartment.py | 2 +- allensdk/internal/morphology/morphology.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/allensdk/internal/morphology/compartment.py b/allensdk/internal/morphology/compartment.py index 66c75ed95..2a88781a8 100644 --- a/allensdk/internal/morphology/compartment.py +++ b/allensdk/internal/morphology/compartment.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with Allen SDK. If not, see . -import node +import allensdk.internal.morphology.node as node class Compartment(object): def __init__(self, node1, node2): diff --git a/allensdk/internal/morphology/morphology.py b/allensdk/internal/morphology/morphology.py index d5301ac3a..a515ee9be 100644 --- a/allensdk/internal/morphology/morphology.py +++ b/allensdk/internal/morphology/morphology.py @@ -16,8 +16,8 @@ import copy import math import numpy as np -from node import Node -from compartment import Compartment +from allensdk.internal.morphology.node import Node +from allensdk.internal.morphology.compartment import Compartment class Morphology( object ): From 20162a84138abe65322a6bc3c562eef2f7d1d00d Mon Sep 17 00:00:00 2001 From: MattAitken Date: Tue, 5 Nov 2019 13:14:06 -0800 Subject: [PATCH 32/60] removing reference to neuron_morphology swc, it was being over-imported later in the script from allensdk.internal.core before being used --- .../pipeline_modules/cell_types/morphology/upright_transform.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/allensdk/internal/pipeline_modules/cell_types/morphology/upright_transform.py b/allensdk/internal/pipeline_modules/cell_types/morphology/upright_transform.py index 776bbbb3b..979716378 100644 --- a/allensdk/internal/pipeline_modules/cell_types/morphology/upright_transform.py +++ b/allensdk/internal/pipeline_modules/cell_types/morphology/upright_transform.py @@ -5,10 +5,8 @@ import sys import numpy as np from scipy.spatial.distance import euclidean -import neuron_morphology.swc as swc import skimage.draw -#import allensdk.core.json_utilities as json def calculate_centroid(x, y): From a2a7723fb953206ec5043a87a65232515001053c Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 6 Nov 2019 10:26:37 -0800 Subject: [PATCH 33/60] hotfix for test -- wrong method name; tests pass on local machine --- allensdk/brain_observatory/behavior/behavior_ophys_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allensdk/brain_observatory/behavior/behavior_ophys_session.py b/allensdk/brain_observatory/behavior/behavior_ophys_session.py index 01707c33d..f2b5702eb 100644 --- a/allensdk/brain_observatory/behavior/behavior_ophys_session.py +++ b/allensdk/brain_observatory/behavior/behavior_ophys_session.py @@ -70,7 +70,7 @@ def ophys_timestamps(self) -> np.ndarray: """Timestamps associated with frames captured by the microscope :rtype: numpy.ndarray """ - return self.api.ophys_timestamps() + return self.api.get_ophys_timestamps() @property def metadata(self) -> dict: From 74d0ae02cd3698b108135aca342de0e29d9e6a44 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 6 Nov 2019 10:35:24 -0800 Subject: [PATCH 34/60] removing erroneous test that won't pass --- .../behavior/test_behavior_ophys_session.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_ophys_session.py b/allensdk/test/brain_observatory/behavior/test_behavior_ophys_session.py index 73bfe5862..c67ba1f92 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_ophys_session.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_ophys_session.py @@ -20,17 +20,6 @@ from allensdk.brain_observatory.behavior.image_api import ImageApi -@pytest.mark.nightly -@pytest.mark.parametrize('oeid1, oeid2, expected', [ - pytest.param(789359614, 789359614, True), - pytest.param(789359614, 739216204, False) -]) -def test_equal(oeid1, oeid2, expected): - d1 = BehaviorOphysSession.from_lims(oeid1) - d2 = BehaviorOphysSession.from_lims(oeid2) - - assert equals(d1, d2) == expected - @pytest.mark.requires_bamboo @pytest.mark.parametrize("get_expected,get_from_session", [ [ From fc578f61db325c438066091f7d7b08a989448dea Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 8 Nov 2019 13:12:10 -0800 Subject: [PATCH 35/60] add unit tests for previously untested behavior processing methods --- .../behavior/test_metadata_processing.py | 56 +++++++++++++ .../behavior/test_rewards_processing.py | 29 +++++++ .../behavior/test_running_processing.py | 82 +++++++++++++++++++ .../behavior/test_stimulus_processing.py | 76 +++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 allensdk/test/brain_observatory/behavior/test_metadata_processing.py create mode 100644 allensdk/test/brain_observatory/behavior/test_rewards_processing.py create mode 100644 allensdk/test/brain_observatory/behavior/test_running_processing.py create mode 100644 allensdk/test/brain_observatory/behavior/test_stimulus_processing.py diff --git a/allensdk/test/brain_observatory/behavior/test_metadata_processing.py b/allensdk/test/brain_observatory/behavior/test_metadata_processing.py new file mode 100644 index 000000000..96e01384a --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_metadata_processing.py @@ -0,0 +1,56 @@ +import numpy as np + +from allensdk.brain_observatory.behavior.metadata_processing import ( + get_task_parameters) + + +def test_get_task_parameters(): + data = { + "items": { + "behavior": { + "config": { + "DoC": { + "blank_duration_range": (0.5, 0.6), + "stimulus_window": 6.0, + "response_window": [0.15, 0.75], + "change_time_dist": "geometric", + }, + "reward": { + "reward_volume": 0.007, + }, + "behavior": { + "task_id": "DoC_untranslated", + }, + }, + "params": { + "stage": "TRAINING_3_images_A", + }, + "stimuli": { + "images": {"draw_log": [1]*10} + }, + } + } + } + actual = get_task_parameters(data) + expected = { + "blank_duration_sec": [0.5, 0.6], + "stimulus_duration_sec": 6.0, + "omitted_flash_fraction": np.nan, + "response_window_sec": [0.15, 0.75], + "reward_volume": 0.007, + "stage": "TRAINING_3_images_A", + "stimulus": "images", + "stimulus_distribution": "geometric", + "task": "DoC_untranslated", + "n_stimulus_frames": 10 + } + for k, v in actual.items(): + # Special nan checking since pytest doesn't do it well + try: + if np.isnan(v): + assert np.isnan(expected[k]) + else: + assert expected[k] == v + except (TypeError, ValueError): + assert expected[k] == v + assert list(actual.keys()) == list(expected.keys()) diff --git a/allensdk/test/brain_observatory/behavior/test_rewards_processing.py b/allensdk/test/brain_observatory/behavior/test_rewards_processing.py new file mode 100644 index 000000000..1df7f310a --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_rewards_processing.py @@ -0,0 +1,29 @@ +import pandas as pd + +from allensdk.brain_observatory.behavior.rewards_processing import get_rewards + + +def test_get_rewards(): + data = { + "items": { + "behavior": { + "trial_log": [ + { + 'rewards': [(0.007, 1085.965144219165, 64775)], + 'trial_params': { + 'catch': False, 'auto_reward': False, + 'change_time': 5}}, + { + 'rewards': [], + 'trial_params': { + 'catch': False, 'auto_reward': False, + 'change_time': 4} + } + ] + }}} + expected = pd.DataFrame( + {"volume": [0.007], + "timestamps": [1086.965144219165], + "auto_rewarded": False}).set_index("timestamps", drop=True) + + pd.testing.assert_frame_equal(expected, get_rewards(data, lambda x: x+1.0)) diff --git a/allensdk/test/brain_observatory/behavior/test_running_processing.py b/allensdk/test/brain_observatory/behavior/test_running_processing.py new file mode 100644 index 000000000..492ea2419 --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_running_processing.py @@ -0,0 +1,82 @@ +import numpy as np +import pandas as pd +import pytest + +from allensdk.brain_observatory.behavior.running_processing import ( + get_running_df, calc_deriv, deg_to_dist) + + +@pytest.fixture +def running_data(): + return { + "items": { + "behavior": { + "encoders": [ + { + "dx": np.array([0., 0.8444478, 0.7076058, 1.4225141, + 1.5040479]), + "vsig": [3.460190074169077, 3.4692217108095065, + 3.4808338150614873, 3.5014775559538975, + 3.5259919982636347], + "vin": [4.996858536847867, 4.99298783543054, + 4.995568303042091, 4.996858536847867, + 5.00201947207097], + }]}}} + + +@pytest.fixture +def timestamps(): + return np.array([0., 0.01670847, 0.03336808, 0.05002418, 0.06672007]) + + +@pytest.mark.parametrize( + "x,time,expected", [ + ([1.0, 1.0], [1.0, 2.0], [0.0, 0.0]), + ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 1.0, 1.0]), + ([1.0, 2.0, 3.0], [1.0, 4.0, 6.0], [1/3, ((1/3)+0.5)/2, 0.5]) + ] +) +def test_calc_deriv(x, time, expected): + assert np.all(calc_deriv(x, time) == expected) + + +@pytest.mark.parametrize( + "speed,expected", [ + (np.array([1.0]), [0.09605128650142128]), + (np.array([0., 2.0]), [0., 2.0 * 0.09605128650142128]) + ] +) +def test_deg_to_dist(speed, expected): + assert np.all(np.allclose(deg_to_dist(speed), expected)) + + +def test_get_running_df(running_data, timestamps): + expected = pd.DataFrame( + {'speed': { + 0.0: 4.0677840296488785, + 0.01670847: 4.468231641421186, + 0.03336808: 4.869192250359061, + 0.05002418: 4.47027713320348, + 0.06672007: 4.070849018882336}, + 'dx': { + 0.0: 0.0, + 0.01670847: 0.8444478, + 0.03336808: 0.7076058, + 0.05002418: 1.4225141, + 0.06672007: 1.5040479}, + 'v_sig': { + 0.0: 3.460190074169077, + 0.01670847: 3.4692217108095065, + 0.03336808: 3.4808338150614873, + 0.05002418: 3.5014775559538975, + 0.06672007: 3.5259919982636347}, + 'v_in': { + 0.0: 4.996858536847867, + 0.01670847: 4.99298783543054, + 0.03336808: 4.995568303042091, + 0.05002418: 4.996858536847867, + 0.06672007: 5.00201947207097}}) + expected.index.name = "timestamps" + + pd.testing.assert_frame_equal(expected, + get_running_df(running_data, timestamps)) diff --git a/allensdk/test/brain_observatory/behavior/test_stimulus_processing.py b/allensdk/test/brain_observatory/behavior/test_stimulus_processing.py new file mode 100644 index 000000000..4d25a6907 --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_stimulus_processing.py @@ -0,0 +1,76 @@ + +import numpy as np +import pytest + + +from allensdk.brain_observatory.behavior.stimulus_processing import ( + get_stimulus_presentations, _get_stimulus_epoch, _get_draw_epochs) + + +data = { + "items": { + "behavior": { + "stimuli": { + "images": { + "set_log": [ + ('Image', 'im065', 5.809955710916157, 0), + ('Image', 'im061', 314.06612555068784, 6), + ('Image', 'im062', 348.5941232265203, 12), + ], + "draw_log": ([0]+[1]*3 + [0]*3)*3 + [0] + } + }, + # "intervalsms": np.array([16.0]*10) + } + } +} +timestamps = np.array([0.016 * i for i in range(19)]) + + +@pytest.mark.parametrize( + "current_set_ix,start_frame,n_frames,expected", [ + (0, 0, 18, (0, 6)), + (2, 11, 18, (11, 18)) + ] +) +def test_get_stimulus_epoch(current_set_ix, start_frame, n_frames, expected): + log = data["items"]["behavior"]["stimuli"]["images"]["set_log"] + actual = _get_stimulus_epoch(log, current_set_ix, start_frame, n_frames) + assert actual == expected + + +@pytest.mark.parametrize( + "start_frame,stop_frame,expected", [ + (0, 6, [(1, 4)]), + (0, 11, [(1, 4), (8, 11)]) + ] +) +def test_get_draw_epochs(start_frame, stop_frame, expected): + draw_log = data["items"]["behavior"]["stimuli"]["images"]["draw_log"] + actual = _get_draw_epochs(draw_log, start_frame, stop_frame) + assert actual == expected + + +# def test_get_stimulus_templates(): +# pass +# # TODO +# # See below (get_images_dict is a dependency) + + +# def test_get_images_dict(): +# pass +# # TODO +# # This is too hard-coded to be testable right now. +# # convert_filepath_caseinsensitive prevents using any tempdirs/tempfiles + + +# def test_get_stimulus_presentations(): +# pass +# # TODO +# # Monster function with undocumented dependencies + + +# def test_get_visual_stimuli_df(): +# pass +# # TODO +# # See above (this is a dependency of get_stimulus_presentations) From d44f3b1256bb3204fa1de8ff5148d178b4be5ce3 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 8 Nov 2019 13:13:03 -0800 Subject: [PATCH 36/60] update behavior base to return correct class type for mypy linting --- .../brain_observatory/behavior/internal/behavior_base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/allensdk/brain_observatory/behavior/internal/behavior_base.py b/allensdk/brain_observatory/behavior/internal/behavior_base.py index 3c0873e9b..41863d2a9 100644 --- a/allensdk/brain_observatory/behavior/internal/behavior_base.py +++ b/allensdk/brain_observatory/behavior/internal/behavior_base.py @@ -4,10 +4,7 @@ import numpy as np import pandas as pd - - -RunningSpeed = NamedTuple("RunningSpeed", [("timestamps", np.ndarray), - ("values", np.ndarray)]) +from allensdk.brain_observatory.running_speed import RunningSpeed class BehaviorBase(abc.ABC): From 3bf6b96a603e626588d2dd95313f0bfb8c25cbe0 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 8 Nov 2019 13:14:43 -0800 Subject: [PATCH 37/60] make cache clearing a class mixin for easier reusability --- allensdk/core/cache_method_utilities.py | 18 ++++++++++++++++++ allensdk/internal/api/ophys_lims_api.py | 22 ++++------------------ 2 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 allensdk/core/cache_method_utilities.py diff --git a/allensdk/core/cache_method_utilities.py b/allensdk/core/cache_method_utilities.py new file mode 100644 index 000000000..faeff6aa9 --- /dev/null +++ b/allensdk/core/cache_method_utilities.py @@ -0,0 +1,18 @@ +import inspect + + +class CachedInstanceMethodMixin(object): + def cache_clear(self): + """ + Calls `clear_cache` method on all bound methods in this instance + (where valid). + Intended to clear calls cached with the `memoize` decorator. + Note that this will also clear functions decorated with `lru_cache` and + `lfu_cache` in this class (or any other function with `clear_cache` + attribute). + """ + for _, method in inspect.getmembers(self, inspect.ismethod): + try: + method.cache_clear() + except (AttributeError, TypeError): + pass diff --git a/allensdk/internal/api/ophys_lims_api.py b/allensdk/internal/api/ophys_lims_api.py index db37ae2e6..99c24c17f 100644 --- a/allensdk/internal/api/ophys_lims_api.py +++ b/allensdk/internal/api/ophys_lims_api.py @@ -6,15 +6,16 @@ import pytz import pandas as pd -from allensdk.internal.api import PostgresQueryMixin, OneOrMoreResultExpectedError +from allensdk.internal.api import ( + PostgresQueryMixin, OneOrMoreResultExpectedError) from allensdk.api.cache import memoize from allensdk.brain_observatory.behavior.image_api import ImageApi import allensdk.brain_observatory.roi_masks as roi from allensdk.internal.core.lims_utilities import safe_system_path -import inspect +from allensdk.core.cache_method_utilities import CachedInstanceMethodMixin -class OphysLimsApi(PostgresQueryMixin): +class OphysLimsApi(PostgresQueryMixin, CachedInstanceMethodMixin): def __init__(self, ophys_experiment_id): self.ophys_experiment_id = ophys_experiment_id @@ -23,21 +24,6 @@ def __init__(self, ophys_experiment_id): def get_ophys_experiment_id(self): return self.ophys_experiment_id - def cache_clear(self): - """ - Calls `clear_cache` method on all bound methods in this instance - (where valid). - Intended to clear calls cached with the `memoize` decorator. - Note that this will also clear functions decorated with `lru_cache` and - `lfu_cache` in this class (or any other function with `clear_cache` - attribute). - """ - for _, method in inspect.getmembers(self, inspect.ismethod): - try: - method.cache_clear() - except (AttributeError, TypeError): - pass - @memoize def get_ophys_experiment_dir(self): query = ''' From 96fb725e1e2f0b1a5156e21dc5bd080dc2eff2e9 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 8 Nov 2019 13:14:59 -0800 Subject: [PATCH 38/60] behavior data only lims api with tests --- .../internal/api/behavior_data_lims_api.py | 465 ++++++++++++++++++ .../behavior/test_behavior_data_lims_api.py | 267 ++++++++++ 2 files changed, 732 insertions(+) create mode 100644 allensdk/internal/api/behavior_data_lims_api.py create mode 100644 allensdk/test/brain_observatory/behavior/test_behavior_data_lims_api.py diff --git a/allensdk/internal/api/behavior_data_lims_api.py b/allensdk/internal/api/behavior_data_lims_api.py new file mode 100644 index 000000000..45c649233 --- /dev/null +++ b/allensdk/internal/api/behavior_data_lims_api.py @@ -0,0 +1,465 @@ +import numpy as np +import pandas as pd +import uuid +from datetime import datetime +import pytz + +from typing import Dict, Optional, Union, List, Any + +from allensdk.core.exceptions import DataFrameIndexError +from allensdk.brain_observatory.behavior.internal.behavior_base import ( + BehaviorBase) +from allensdk.brain_observatory.behavior.rewards_processing import get_rewards +from allensdk.brain_observatory.behavior.running_processing import ( + get_running_df) +from allensdk.brain_observatory.behavior.stimulus_processing import ( + get_stimulus_presentations, get_stimulus_templates, get_stimulus_metadata) +from allensdk.brain_observatory.running_speed import RunningSpeed +from allensdk.brain_observatory.behavior.metadata_processing import ( + get_task_parameters) +from allensdk.brain_observatory.behavior.trials_processing import get_trials +from allensdk.internal.core.lims_utilities import safe_system_path +from allensdk.internal.api import PostgresQueryMixin +from allensdk.api.cache import memoize +from allensdk.internal.api import ( + OneResultExpectedError, OneOrMoreResultExpectedError) +from allensdk.core.cache_method_utilities import CachedInstanceMethodMixin + + +class BehaviorDataLimsApi(PostgresQueryMixin, CachedInstanceMethodMixin, + BehaviorBase): + def __init__(self, behavior_session_id): + super().__init__() + # TODO: this password has been exposed in code but we really need + # to move towards using a secrets database + self.mtrain_db = PostgresQueryMixin( + dbname="mtrain", user="mtrainreader", + host="prodmtrain1", port=5432, password="mtrainro") + self.behavior_session_id = behavior_session_id + ids = self._get_ids() + self.ophys_experiment_ids = ids.get("ophys_experiment_ids") + self.ophys_session_id = ids.get("ophys_session_id") + self.behavior_training_id = ids.get("behavior_training_id") + self.foraging_id = ids.get("foraging_id") + self.ophys_container_id = ids.get("ophys_container_id") + + def _get_ids(self) -> Dict[str, Optional[Union[int, List[int]]]]: + """Fetch ids associated with this behavior_session_id. If there is no + id, return None. + :returns: Dictionary of ids with the following keys: + behavior_training_id: int -- Only if was a training session + ophys_session_id: int -- None if have behavior_training_id + ophys_experiment_ids: List[int] -- only if have ophys_session_id + foraging_id: int + :rtype: dict + """ + # Get all ids from the behavior_sessions table + query = f""" + SELECT + ophys_session_id, behavior_training_id, foraging_id + FROM + behavior_sessions + WHERE + behavior_sessions.id = {self.behavior_session_id}; + """ + ids_response = self.select(query) + if len(ids_response) > 1: + raise OneResultExpectedError + ids_dict = ids_response.iloc[0].to_dict() + + # Get additional ids if also an ophys session + # (experiment_id, container_id) + if ids_dict.get("ophys_session_id"): + oed_query = f""" + SELECT id + FROM ophys_experiments + WHERE ophys_session_id = {ids_dict["ophys_session_id"]}; + """ + oed = self.fetchall(oed_query) + + container_query = f""" + SELECT DISTINCT + visual_behavior_experiment_container_id id + FROM + ophys_experiments_visual_behavior_experiment_containers + WHERE + ophys_experiment_id IN ({",".join(set(map(str, oed)))}); + """ + container_id = self.fetchone(container_query, strict=True) + + ids_dict.update({"ophys_experiment_ids": oed, + "ophys_container_id": container_id}) + else: + ids_dict.update({"ophys_experiment_ids": None, + "ophys_container_id": None}) + return ids_dict + + def get_behavior_session_id(self) -> int: + """Getter to be consistent with BehaviorOphysLimsApi.""" + return self.behavior_session_id + + def get_behavior_session_uuid(self) -> Optional[int]: + data = self._behavior_stimulus_file() + return data.get("session_uuid") + + def get_behavior_stimulus_file(self) -> str: + """Return the path to the StimulusPickle file for a session. + :rtype: str + """ + query = f""" + SELECT + stim.storage_directory || stim.filename AS stim_file + FROM + well_known_files stim + WHERE + stim.attachable_id = {self.behavior_session_id} + AND stim.attachable_type = 'BehaviorSession' + AND stim.well_known_file_type_id IN ( + SELECT id + FROM well_known_file_types + WHERE name = 'StimulusPickle'); + """ + return safe_system_path(self.fetchone(query, strict=True)) + + @memoize + def _behavior_stimulus_file(self) -> pd.DataFrame: + """Helper method to cache stimulus file in memory since it takes about + a second to load (and is used in many methods). + """ + return pd.read_pickle(self.get_behavior_stimulus_file()) + + def get_licks(self) -> pd.DataFrame: + """Get lick data from pkl file. + This function assumes that the first sensor in the list of + lick_sensors is the desired lick sensor. If this changes we need + to update to get the proper line. + + :returns: pd.DataFrame -- A dataframe containing lick timestamps + """ + # Get licks from pickle file instead of sync + data = self._behavior_stimulus_file() + stimulus_timestamps = self.get_stimulus_timestamps() + lick_frames = (data["items"]["behavior"]["lick_sensors"][0] + ["lick_events"]) + lick_times = [stimulus_timestamps[frame] for frame in lick_frames] + return pd.DataFrame({"time": lick_times}) + + def get_rewards(self) -> pd.DataFrame: + """Get reward data from pkl file, based on pkl file timestamps + (not sync file). + + :returns: pd.DataFrame -- A dataframe containing timestamps of + delivered rewards. + """ + data = self._behavior_stimulus_file() + # No sync timestamps to rebase on, so pass dummy rebase function + return get_rewards(data, lambda x: x) + + def get_running_data_df(self) -> pd.DataFrame: + """Get running speed data. + + :returns: pd.DataFrame -- dataframe containing various signals used + to compute running speed. + """ + stimulus_timestamps = self.get_stimulus_timestamps() + data = self._behavior_stimulus_file() + return get_running_df(data, stimulus_timestamps) + + def get_running_speed(self) -> RunningSpeed: + """Get running speed using timestamps from + self.get_stimulus_timestamps. + + NOTE: Do not correct for monitor delay. + + :returns: RunningSpeed -- a NamedTuple containing the subject's + timestamps and running speeds (in cm/s) + """ + running_data_df = self.get_running_data_df() + if running_data_df.index.name != "timestamps": + raise DataFrameIndexError( + f"Expected index to be named 'timestamps' but got " + "'{running_data_df.index.name}'.") + return RunningSpeed(timestamps=running_data_df.index.values, + values=running_data_df.speed.values) + + def get_stimulus_frame_rate(self): + stimulus_timestamps = self.get_stimulus_timestamps() + return np.round(1 / np.mean(np.diff(stimulus_timestamps)), 0) + + def get_stimulus_presentations(self) -> pd.DataFrame: + """Get stimulus presentation data. + + NOTE: Uses timestamps that do not account for monitor delay. + + :returns: pd.DataFrame -- + Table whose rows are stimulus presentations + (i.e. a given image, for a given duration, typically 250 ms) + and whose columns are presentation characteristics. + """ + stimulus_timestamps = self.get_stimulus_timestamps() + data = self._behavior_stimulus_file() + raw_stim_pres_df = get_stimulus_presentations( + data, stimulus_timestamps) + + # Fill in nulls for image_name + # This makes two assumptions: + # 1. Nulls in `image_name` should be "gratings_" + # 2. Gratings are only present (or need to be fixed) when all + # values for `image_name` are null. + if pd.isnull(raw_stim_pres_df["image_name"]).all(): + if ~pd.isnull(raw_stim_pres_df["orientation"]).all(): + raw_stim_pres_df["image_name"] = ( + raw_stim_pres_df["orientation"] + .apply(lambda x: f"gratings_{x}")) + else: + raise ValueError("All values for 'orentation' and 'image_name'" + " are null.") + + stimulus_metadata_df = get_stimulus_metadata(data) + idx_name = raw_stim_pres_df.index.name + stimulus_index_df = ( + raw_stim_pres_df + .reset_index() + .merge(stimulus_metadata_df.reset_index(), on=["image_name"]) + .set_index(idx_name)) + stimulus_index_df = ( + stimulus_index_df[["image_set", "image_index", "start_time"]] + .rename(columns={"start_time": "timestamps"}) + .sort_index() + .set_index("timestamps", drop=True)) + stim_pres_df = raw_stim_pres_df.merge( + stimulus_index_df, left_on="start_time", right_index=True, + how="left") + if len(raw_stim_pres_df) != len(stim_pres_df): + raise ValueError("Length of `stim_pres_df` should not change after" + f" merge; was {len(raw_stim_pres_df)}, now " + f" {len(stim_pres_df)}.") + return stim_pres_df[sorted(stim_pres_df)] + + @memoize + def get_stimulus_templates(self) -> Dict[str, np.ndarray]: + """Get stimulus templates (movies, scenes) for behavior session. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary containing the stimulus images presented during the + session. Keys are data set names, and values are 3D numpy arrays. + """ + data = self._behavior_stimulus_file() + return get_stimulus_templates(data) + + @memoize + def get_stimulus_timestamps(self) -> np.ndarray: + """Get stimulus timestamps (vsyncs) from pkl file. + + NOTE: Located with behavior_session_id. Does not use the sync_file + which requires ophys_session_id. + + Returns + ------- + np.ndarray + Timestamps associated with stimulus presentations on the monitor + that do no account for monitor delay. + """ + data = self._behavior_stimulus_file() + vsyncs = data["items"]["behavior"]["intervalsms"] + return np.hstack((0, vsyncs)).cumsum() / 1000.0 # cumulative time + + def get_task_parameters(self) -> dict: + """Get task parameters from pkl file. + + Returns + ------- + dict + A dictionary containing parameters used to define the task runtime + behavior. + """ + data = self._behavior_stimulus_file() + return get_task_parameters(data) + + def get_trials(self) -> pd.DataFrame: + """Get trials from pkl file + + Returns + ------- + pd.DataFrame + A dataframe containing behavioral trial start/stop times, + and trial data + """ + licks = self.get_licks() + data = self._behavior_stimulus_file() + rewards = self.get_rewards() + stimulus_presentations = self.get_stimulus_presentations() + # Pass a dummy rebase function since we don't have two time streams + trial_df = get_trials(data, licks, rewards, stimulus_presentations, + lambda x: x) + + return trial_df + + @memoize + def get_sex(self) -> str: + """Returns sex of the animal (M/F) + :rtype: str + """ + query = f""" + SELECT g.name AS sex + FROM behavior_sessions bs + JOIN donors d ON bs.donor_id = d.id + JOIN genders g ON g.id = d.gender_id + WHERE bs.id = {self.behavior_session_id}; + """ + return self.fetchone(query, strict=True) + + @memoize + def get_age(self) -> str: + """Returns age code of the subject. + :rtype: str + """ + query = f""" + SELECT a.name AS age + FROM behavior_sessions bs + JOIN donors d ON d.id = bs.donor_id + JOIN ages a ON a.id = d.age_id + WHERE bs.id = {self.behavior_session_id}; + """ + return self.fetchone(query, strict=True) + + @memoize + def get_rig_name(self) -> str: + """Returns the name of the experimental rig. + :rtype: str + """ + query = f""" + SELECT e.name AS device_name + FROM behavior_sessions bs + JOIN equipment e ON e.id = bs.equipment_id + WHERE bs.id = {self.behavior_session_id}; + """ + return self.fetchone(query, strict=True) + + @memoize + def get_stimulus_name(self) -> str: + """Returns the name of the stimulus set used for the session. + :rtype: str + """ + query = f""" + SELECT stages.name + FROM behavior_sessions bs + JOIN stages ON stages.id = bs.state_id + WHERE bs.id = '{self.foraging_id}' + """ + return self.mtrain_db.fetchone(query, strict=True) + + @memoize + def get_reporter_line(self) -> List[str]: + """Returns the genotype name(s) of the reporter line(s). + :rtype: list + """ + query = f""" + SELECT g.name AS reporter_line + FROM behavior_sessions bs + JOIN donors d ON bs.donor_id=d.id + JOIN donors_genotypes dg ON dg.donor_id=d.id + JOIN genotypes g ON g.id=dg.genotype_id + JOIN genotype_types gt + ON gt.id=g.genotype_type_id AND gt.name = 'reporter' + WHERE bs.id={self.behavior_session_id}; + """ + result = self.fetchall(query) + if result is None or len(result) < 1: + raise OneOrMoreResultExpectedError( + f"Expected one or more, but received: '{result}' " + f"from query:\n'{query}'") + return result + + @memoize + def get_driver_line(self) -> List[str]: + """Returns the genotype name(s) of the driver line(s). + :rtype: list + """ + query = f""" + SELECT g.name AS driver_line + FROM behavior_sessions bs + JOIN donors d ON bs.donor_id=d.id + JOIN donors_genotypes dg ON dg.donor_id=d.id + JOIN genotypes g ON g.id=dg.genotype_id + JOIN genotype_types gt + ON gt.id=g.genotype_type_id AND gt.name = 'driver' + WHERE bs.id={self.behavior_session_id}; + """ + result = self.fetchall(query) + if result is None or len(result) < 1: + raise OneOrMoreResultExpectedError( + f"Expected one or more, but received: '{result}' " + f"from query:\n'{query}'") + return result + + @memoize + def get_external_specimen_name(self) -> int: + """Returns the LabTracks ID + :rtype: int + """ + # TODO: Should this even be included? + # Found sometimes there were entries with NONE which is + # why they are filtered out; also many entries in the table + # match the donor_id, which is why used DISTINCT + query = f""" + SELECT DISTINCT(sp.external_specimen_name) + FROM behavior_sessions bs + JOIN donors d ON bs.donor_id=d.id + JOIN specimens sp ON sp.donor_id=d.id + WHERE bs.id={self.behavior_session_id} + AND sp.external_specimen_name IS NOT NULL; + """ + return int(self.fetchone(query, strict=True)) + + @memoize + def get_full_genotype(self) -> str: + """Return the name of the subject's genotype + :rtype: str + """ + query = f""" + SELECT d.full_genotype + FROM behavior_sessions bs + JOIN donors d ON d.id=bs.donor_id + WHERE bs.id= {self.behavior_session_id}; + """ + return self.fetchone(query, strict=True) + + def get_experiment_date(self) -> datetime: + """Return timestamp the behavior stimulus file began recording in UTC + :rtype: datetime + """ + data = self._behavior_stimulus_file() + # Assuming file has local time of computer (Seattle) + tz = pytz.timezone("America/Los_Angeles") + return tz.localize(data["start_time"]).astimezone(pytz.utc) + + def get_metadata(self) -> Dict[str, Any]: + """Return metadata about the session. + :rtype: dict + """ + if self.get_behavior_session_uuid() is None: + bs_uuid = None + else: + bs_uuid = uuid.UUID(self.get_behavior_session_uuid()) + metadata = { + "rig_name": self.get_rig_name(), + "sex": self.get_sex(), + "age": self.get_age(), + "ophys_experiment_id": self.ophys_experiment_ids, + "experiment_container_id": self.experiment_container_id, + "stimulus_frame_rate": self.get_stimulus_frame_rate(), + "session_type": self.get_stimulus_name(), + "experiment_datetime": self.get_experiment_date(), + "reporter_line": self.get_reporter_line(), + "driver_line": self.get_driver_line(), + "LabTracks_ID": self.get_external_specimen_name(), + "full_genotype": self.get_full_genotype(), + "behavior_session_uuid": bs_uuid, + "foraging_id": self.foraging_id, + "behavior_session_id": self.behavior_session_id, + "behavior_training_id": self.behavior_training_id, + } + return metadata diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_data_lims_api.py b/allensdk/test/brain_observatory/behavior/test_behavior_data_lims_api.py new file mode 100644 index 000000000..804c830bf --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_behavior_data_lims_api.py @@ -0,0 +1,267 @@ +import pytest +import numpy as np +import pandas as pd +from datetime import datetime +import pytz +import math + +from allensdk.internal.api.behavior_data_lims_api import BehaviorDataLimsApi +from allensdk.internal.api.behavior_ophys_api import BehaviorOphysLimsApi +from allensdk.brain_observatory.running_speed import RunningSpeed +from allensdk.core.exceptions import DataFrameIndexError + + +@pytest.fixture +def MockBehaviorDataLimsApi(): + class MockBehaviorDataLimsApi(BehaviorDataLimsApi): + """ + Mock class that overrides some functions to provide test data and + initialize without calls to db. + """ + def __init__(self): + super().__init__(behavior_session_id=8675309) + + def _get_ids(self): + return {} + + def _behavior_stimulus_file(self): + data = { + "items": { + "behavior": { + "lick_sensors": [{ + "lick_events": [2, 6, 9], + }], + "intervalsms": np.array([16.0]*10), + }, + }, + "session_uuid": 123456, + "start_time": datetime(2019, 9, 26, 9), + } + return data + + def get_running_data_df(self): + return pd.DataFrame( + {"timestamps": [0.0, 0.1, 0.2], + "speed": [8.0, 15.0, 16.0]}).set_index("timestamps") + + api = MockBehaviorDataLimsApi() + yield api + api.cache_clear() + + +@pytest.fixture +def MockApiRunSpeedExpectedError(): + class MockApiRunSpeedExpectedError(BehaviorDataLimsApi): + """ + Mock class that overrides some functions to provide test data and + initialize without calls to db. + """ + def __init__(self): + super().__init__(behavior_session_id=8675309) + + def _get_ids(self): + return {} + + def get_running_data_df(self): + return pd.DataFrame( + {"timestamps": [0.0, 0.1, 0.2], + "speed": [8.0, 15.0, 16.0]}) + return MockApiRunSpeedExpectedError() + + +# Test the non-sql-query functions +# Does not include tests for the following functions, as they are just calls to +# static methods provided for convenience (and should be covered with their own +# unit tests): +# get_rewards +# get_running_data_df +# get_stimulus_templates +# get_task_parameters +# get_trials +# Does not include test for get_metadata since it just collects data from +# methods covered in other unit tests, or data derived from sql queries. +def test_get_stimulus_timestamps(MockBehaviorDataLimsApi): + api = MockBehaviorDataLimsApi + expected = np.array([0.016 * i for i in range(11)]) + assert np.allclose(expected, api.get_stimulus_timestamps()) + + +def test_get_licks(MockBehaviorDataLimsApi): + api = MockBehaviorDataLimsApi + expected = pd.DataFrame({"time": [0.016 * i for i in [2., 6., 9.]]}) + pd.testing.assert_frame_equal(expected, api.get_licks()) + + +def test_get_behavior_session_uuid(MockBehaviorDataLimsApi): + api = MockBehaviorDataLimsApi + assert 123456 == api.get_behavior_session_uuid() + + +def test_get_stimulus_frame_rate(MockBehaviorDataLimsApi): + api = MockBehaviorDataLimsApi + assert 62.0 == api.get_stimulus_frame_rate() + + +def test_get_experiment_date(MockBehaviorDataLimsApi): + api = MockBehaviorDataLimsApi + expected = datetime(2019, 9, 26, 16, tzinfo=pytz.UTC) + actual = api.get_experiment_date() + assert expected == actual + + +def test_get_running_speed(MockBehaviorDataLimsApi): + expected = RunningSpeed(timestamps=[0.0, 0.1, 0.2], + values=[8.0, 15.0, 16.0]) + api = MockBehaviorDataLimsApi + actual = api.get_running_speed() + assert expected == actual + + +def test_get_running_speed_raises_index_error(MockApiRunSpeedExpectedError): + with pytest.raises(DataFrameIndexError): + MockApiRunSpeedExpectedError.get_running_speed() + + +# def test_get_stimulus_presentations(MockBehaviorDataLimsApi): +# api = MockBehaviorDataLimsApi +# # TODO. This function is a monster with multiple dependencies, +# # no tests, and no documentation (for any of its dependencies). +# # Needs to be broken out into testable parts. + +@pytest.mark.requires_bamboo +@pytest.mark.nightly +class TestBehaviorRegression: + """ + Test whether behavior sessions (that are also ophys) loaded with + BehaviorDataLimsApi return the same results as sessions loaded + with BehaviorOphysLimsApi, for relevant functions. Do not check for + timestamps, which are from different files so will not be the same. + Also not checking for experiment_date, since they're from two different + sources (and I'm not sure how it's uploaded in the database). + + Do not test `get_licks` regression because the licks come from two + different sources and are recorded differently (behavior pickle file in + BehaviorDataLimsApi; sync file in BehaviorOphysLimeApi) + """ + @classmethod + def setup_class(cls): + cls.bd = BehaviorDataLimsApi(976012750) + cls.od = BehaviorOphysLimsApi(976255949) + + @classmethod + def teardown_class(cls): + cls.bd.cache_clear() + cls.od.cache_clear() + + def test_stim_file_regression(self): + assert (self.bd.get_behavior_stimulus_file() + == self.od.get_behavior_stimulus_file()) + + def test_get_rewards_regression(self): + """Index is timestamps here, so remove it before comparing.""" + bd_rewards = self.bd.get_rewards().reset_index(drop=True) + od_rewards = self.od.get_rewards().reset_index(drop=True) + pd.testing.assert_frame_equal(bd_rewards, od_rewards) + + def test_ophys_experiment_id_regression(self): + assert self.bd.ophys_experiment_ids[0] == self.od.ophys_experiment_id + + def test_behavior_uuid_regression(self): + assert (self.bd.get_behavior_session_uuid() + == self.od.get_behavior_session_uuid()) + + def test_container_id_regression(self): + assert (self.bd.ophys_container_id + == self.od.get_experiment_container_id()) + + def test_stimulus_frame_rate_regression(self): + assert (self.bd.get_stimulus_frame_rate() + == self.od.get_stimulus_frame_rate()) + + def test_get_running_speed_regression(self): + """Can't test values because they're intrinsically linked to timestamps + """ + bd_speed = self.bd.get_running_speed() + od_speed = self.od.get_running_speed() + assert len(bd_speed.values) == len(od_speed.values) + assert len(bd_speed.timestamps) == len(od_speed.timestamps) + + def test_get_running_df_regression(self): + """Can't test values because they're intrinsically linked to timestamps + """ + bd_running = self.bd.get_running_data_df() + od_running = self.od.get_running_data_df() + assert len(bd_running) == len(od_running) + assert list(bd_running) == list(od_running) + + def test_get_stimulus_presentations_regression(self): + drop_cols = ["start_time", "stop_time"] + bd_pres = self.bd.get_stimulus_presentations().drop(drop_cols, axis=1) + od_pres = self.od.get_stimulus_presentations().drop(drop_cols, axis=1) + # Duration needs less precision (timestamp-dependent) + pd.testing.assert_frame_equal(bd_pres, od_pres, check_less_precise=2) + + def test_get_stimulus_template_regression(self): + bd_template = self.bd.get_stimulus_templates() + od_template = self.od.get_stimulus_templates() + assert bd_template.keys() == od_template.keys() + for k in bd_template.keys(): + assert np.array_equal(bd_template[k], od_template[k]) + + def test_get_task_parameters_regression(self): + bd_params = self.bd.get_task_parameters() + od_params = self.od.get_task_parameters() + # Have to do special checking because of nan equality + assert bd_params.keys() == od_params.keys() + for k in bd_params.keys(): + bd_v = bd_params[k] + od_v = od_params[k] + try: + if math.isnan(bd_v): + assert math.isnan(od_v) + else: + assert bd_v == od_v + except (AttributeError, TypeError): + assert bd_v == od_v + + def test_get_trials_regression(self): + """ A lot of timestamp dependent values. Test what we can.""" + cols_to_test = ["reward_volume", "hit", "false_alarm", "miss", + "sham_change", "stimulus_change", "aborted", "go", + "catch", "auto_rewarded", "correct_reject", + "trial_length", "change_frame", "initial_image_name", + "change_image_name"] + bd_trials = self.bd.get_trials()[cols_to_test] + od_trials = self.od.get_trials()[cols_to_test] + pd.testing.assert_frame_equal(bd_trials, od_trials, + check_less_precise=2) + + def test_get_sex_regression(self): + assert self.bd.get_sex() == self.od.get_sex() + + def test_get_rig_name_regression(self): + assert self.bd.get_rig_name() == self.od.get_rig_name() + + def test_get_stimulus_name_regression(self): + assert self.bd.get_stimulus_name() == self.od.get_stimulus_name() + + def test_get_reporter_line_regression(self): + assert self.bd.get_reporter_line() == self.od.get_reporter_line() + + def test_get_driver_line_regression(self): + assert self.bd.get_driver_line() == self.od.get_driver_line() + + def test_get_external_specimen_name_regression(self): + assert (self.bd.get_external_specimen_name() + == self.od.get_external_specimen_name()) + + def test_get_full_genotype_regression(self): + assert self.bd.get_full_genotype() == self.od.get_full_genotype() + + def test_get_experiment_date_regression(self): + """Just testing the date since it comes from two different sources; + We expect that BehaviorOphysLimsApi will be earlier (more like when + rig was started up), while BehaviorDataLimsApi returns the start of + the actual behavior (from pkl file)""" + assert (self.bd.get_experiment_date().date() + == self.od.get_experiment_date().date()) From 56ed25fd57b7cde8826f7b5577c3a15b9388b216 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Thu, 7 Nov 2019 12:45:11 -0800 Subject: [PATCH 39/60] Use ellipse area calculation for eye areas --- .../gaze_mapping/__main__.py | 3 +- .../gaze_mapping/_gaze_mapper.py | 48 ++++++++++++++----- .../gaze_mapping/test_gaze_mapping.py | 22 ++++++++- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/allensdk/brain_observatory/gaze_mapping/__main__.py b/allensdk/brain_observatory/gaze_mapping/__main__.py index 4d04c6f0e..3feb2b7db 100644 --- a/allensdk/brain_observatory/gaze_mapping/__main__.py +++ b/allensdk/brain_observatory/gaze_mapping/__main__.py @@ -17,6 +17,7 @@ ) from allensdk.brain_observatory.gaze_mapping._gaze_mapper import ( compute_circular_areas, + compute_elliptical_areas, GazeMapper ) from allensdk.brain_observatory.gaze_mapping._filter_utils import ( @@ -138,7 +139,7 @@ def run_gaze_mapping(pupil_parameters: pd.DataFrame, cm_per_pixel=cm_per_pixel) raw_pupil_areas = compute_circular_areas(pupil_parameters) - raw_eye_areas = compute_circular_areas(eye_parameters) + raw_eye_areas = compute_elliptical_areas(eye_parameters) raw_pupil_on_monitor_cm = gaze_mapper.pupil_position_on_monitor_in_cm( cam_pupil_params=pupil_parameters[["center_x", "center_y"]].values, diff --git a/allensdk/brain_observatory/gaze_mapping/_gaze_mapper.py b/allensdk/brain_observatory/gaze_mapping/_gaze_mapper.py index 0b2a008bf..39c0b8f1a 100644 --- a/allensdk/brain_observatory/gaze_mapping/_gaze_mapper.py +++ b/allensdk/brain_observatory/gaze_mapping/_gaze_mapper.py @@ -297,31 +297,53 @@ def pupil_position_on_monitor_in_degrees(self, def compute_circular_areas(ellipse_params: pd.DataFrame) -> pd.Series: - """Compute circular area of a pupil or eye ellipse using half-major axis. + """Compute circular area of a pupil using half-major axis. - Assume the pupil/eye is a circle, and that as it moves off-axis + Assume the pupil is a circle, and that as it moves off-axis with the camera, the observed ellipse semi-major axis remains the radius of the circle. - Args: - ellipse_params (pandas.DataFrame): A table of pupil/eye parameters - consisting of 5 columns: - ("center_x", "center_y", "height", "phi", "width") - and n-row timepoints. + Parameters + ---------- + ellipse_params (pandas.DataFrame): A table of pupil parameters consisting + of 5 columns: ("center_x", "center_y", "height", "phi", "width") + and n-row timepoints. - NOTE: For ellipse_params produced by the Deep Lab Cut pipeline, - "width" and "height" columns, in fact, refer to the - "half-width" and "half-height". + NOTE: For ellipse_params produced by the Deep Lab Cut pipeline, + "width" and "height" columns, in fact, refer to the + "half-width" and "half-height". - Returns: - pandas.Series: A series of pupil/eye areas for n-timepoints. + Returns + ------- + pandas.Series: A series of pupil areas for n-timepoints. """ # Take the biggest value between height and width columns and - # assume that it is the pupil/eye circle radius. + # assume that it is the pupil circle radius. radii = ellipse_params[["height", "width"]].max(axis=1) return np.pi * radii * radii +def compute_elliptical_areas(ellipse_params: pd.DataFrame) -> pd.Series: + """Compute the elliptical area using elliptical fit parameters. + + Parameters + ---------- + ellipse_params (pandas.DataFrame): A table of pupil parameters consisting + of 5 columns: ("center_x", "center_y", "height", "phi", "width") + and n-row timepoints. + + NOTE: For ellipse_params produced by the Deep Lab Cut pipeline, + "width" and "height" columns, in fact, refer to the + "half-width" and "half-height". + + Returns + ------- + pd.Series + pandas.Series: A series of areas for n-timepoints. + """ + return np.pi * ellipse_params["height"] * ellipse_params["width"] + + def project_to_plane(plane_normal: np.ndarray, plane_point: np.ndarray, line_vectors: np.ndarray, diff --git a/allensdk/test/brain_observatory/gaze_mapping/test_gaze_mapping.py b/allensdk/test/brain_observatory/gaze_mapping/test_gaze_mapping.py index d92f7e7fd..5fbf9ec77 100644 --- a/allensdk/test/brain_observatory/gaze_mapping/test_gaze_mapping.py +++ b/allensdk/test/brain_observatory/gaze_mapping/test_gaze_mapping.py @@ -247,12 +247,30 @@ def test_mapping_gives_sane_outputs(gaze_mapper_fixture, ellipse_fits, expected, pd.Series([4 * np.pi, 16 * np.pi, 81 * np.pi, 729 * np.pi])), (pd.DataFrame({"height": [1, 3, 9, 27], "width": [2, 4, 8, 16]}), - pd.Series([4 * np.pi, 16 * np.pi, 81 * np.pi, 729 * np.pi])) + pd.Series([4 * np.pi, 16 * np.pi, 81 * np.pi, 729 * np.pi])), + + (pd.DataFrame({"height": [np.nan, 3, np.nan, 27], + "width": [2, 4, np.nan, np.nan]}), + pd.Series([4 * np.pi, 16 * np.pi, np.nan, 729 * np.pi])), ]) def test_compute_circular_areas(ellipse_params, expected): obtained = gm.compute_circular_areas(ellipse_params) - assert np.allclose(obtained, expected) + assert np.allclose(obtained, expected, equal_nan=True) + + +@pytest.mark.parametrize('ellipse_params, expected', [ + (pd.DataFrame({"height": [1, 2, 3, 4], "width": [4, 3, 2, 1]}), + pd.Series([4 * np.pi, 6 * np.pi, 6 * np.pi, 4 * np.pi])), + + (pd.DataFrame({"height": [np.nan, 7, 11, 12, np.nan], + "width": [5, 3, 11, np.nan, np.nan]}), + pd.Series([np.nan, np.pi * 21, np.pi * 121, np.nan, np.nan])) +]) +def test_compute_elliptical_areas(ellipse_params, expected): + obtained = gm.compute_elliptical_areas(ellipse_params) + + assert np.allclose(obtained, expected, equal_nan=True) @pytest.mark.parametrize("function_inputs,expected", [ From 1253ab8bcaf05a11d049a9194e40b45999285c26 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Fri, 8 Nov 2019 11:23:40 -0800 Subject: [PATCH 40/60] Return pupil and eye areas as cm^2 and add tests --- .../gaze_mapping/__main__.py | 176 ++++++++++++------ .../gaze_mapping/test_main.py | 157 ++++++++++++++++ 2 files changed, 276 insertions(+), 57 deletions(-) create mode 100644 allensdk/test/brain_observatory/gaze_mapping/test_main.py diff --git a/allensdk/brain_observatory/gaze_mapping/__main__.py b/allensdk/brain_observatory/gaze_mapping/__main__.py index 3feb2b7db..f243a96f7 100644 --- a/allensdk/brain_observatory/gaze_mapping/__main__.py +++ b/allensdk/brain_observatory/gaze_mapping/__main__.py @@ -1,6 +1,7 @@ import logging import sys from pathlib import Path +from typing import Dict import numpy as np import pandas as pd @@ -29,23 +30,68 @@ ) -def repackage_input_args(parser_args: dict) -> dict: - """Repackage arguments obtained by argschema. +def load_ellipse_fit_params(input_file: Path) -> Dict[str, pd.DataFrame]: + """Load Deep Lab Cut (DLC) ellipse fit h5 data as a dictionary of pandas + DataFrames. + + Parameters + ---------- + input_file : Path + Path to DLC .h5 file containing ellipse fits for pupil, + cr (corneal reflection), and eye. + + Returns + ------- + Dict[str, pd.DataFrame] + Dictionary where keys specify name of ellipse fit param type and values + are pandas DataFrames containing ellipse fit params. + + Raises + ------ + RuntimeError + If pupil, cr, and eye ellipse fits don't have the same number of rows. + """ + # TODO: Some ellipses.h5 files have the 'cr' key as complex type instead of + # float. For now, when loading ellipses.h5 files, always coerce to float + # but this should eventually be resolved upstream... + pupil_params = pd.read_hdf(input_file, key="pupil").astype(float) + cr_params = pd.read_hdf(input_file, key="cr").astype(float) + eye_params = pd.read_hdf(input_file, key="eye").astype(float) + + num_frames_match = ((pupil_params.shape[0] == cr_params.shape[0]) + and (cr_params.shape[0] == eye_params.shape[0])) + if not num_frames_match: + raise RuntimeError("The number of frames for ellipse fits don't " + "match when they should: " + f"pupil_params ({pupil_params.shape[0]}), " + f"cr_params ({cr_params.shape[0]}), " + f"eye_params ({eye_params.shape[0]}).") + + return {"pupil_params": pupil_params, + "cr_params": cr_params, + "eye_params": eye_params} + + +def preprocess_input_args(parser_args: dict) -> dict: + """Preprocess arguments obtained by argschema. 1) Converts individual coordinate/rotation fields to numpy position/rotation arrays. 2) Convert all arguments in millimeters to centimeters - Args: - parser_args (dict): Parsed args obtained from argschema. + Parameters + ---------- + parser_args (dict): Parsed args obtained from argschema. - Returns: - dict: Repackaged args. + Returns + ------- + dict: Repackaged args. """ new_args: dict = {} - new_args["input_file"] = parser_args["input_file"] + new_args.update(load_ellipse_fit_params(parser_args["input_file"])) + new_args["session_sync_file"] = parser_args["session_sync_file"] new_args["output_file"] = parser_args["output_file"] @@ -101,32 +147,34 @@ def run_gaze_mapping(pupil_parameters: pd.DataFrame, Example: Z-axis for monitor and camera are aligned with X-axis for eye coordinate system - Args: - pupil_parameters (pd.DataFrame): A table of pupil parameters with + Parameters + ---------- + pupil_parameters (pd.DataFrame): A table of pupil parameters with 5 columns ("center_x", "center_y", "height", "phi", "width") - and n-row timepoints. Coordinate - cr_parameters (pd.DataFrame): A table of corneal reflection params with + and n-row timepoints. + cr_parameters (pd.DataFrame): A table of corneal reflection params with 5 columns ("center_x", "center_y", "height", "phi", "width") and n-row timepoints. - eye_parameters (pd.DataFrame): A table of eye parameters with + eye_parameters (pd.DataFrame): A table of eye parameters with 5 columns ("center_x", "center_y", "height", "phi", "width") and n-row timepoints. - monitor_position (np.ndarray): An array describing monitor position - [x, y, z] - monitor_rotations (np.ndarray): An array describing monitor orientation - about [x, y, z] axes. - camera_position (np.ndarray): An array describing camera position - [x, y, z] - camera_rotations (np.ndarray): An array describing camera orientation - about [x, y, z] axes. - led_position (np.ndarray): An array describing LED position [x, y, z] - eye_radius_cm (float): Radius of eye being tracked in cm. - cm_per_pixel (float): Ratio of centimeters per pixel - - Returns: + monitor_position (np.ndarray): An array describing monitor position + [x, y, z] + monitor_rotations (np.ndarray): An array describing monitor orientation + about [x, y, z] axes. + camera_position (np.ndarray): An array describing camera position + [x, y, z] + camera_rotations (np.ndarray): An array describing camera orientation + about [x, y, z] axes. + led_position (np.ndarray): An array describing LED position [x, y, z] + eye_radius_cm (float): Radius of eye being tracked in cm. + cm_per_pixel (float): Ratio of centimeters per pixel + + Returns + ------- dict: A dictionary of gaze mapping outputs with - fields for: `pupil_areas`, `eye_areas`, `pupil_on_monitor_cm`, and - `pupil_on_monitor_deg`. + fields for: `pupil_areas` (in cm^2), `eye_areas` (in cm^2), + `pupil_on_monitor_cm`, and `pupil_on_monitor_deg`. """ output = {} @@ -138,8 +186,11 @@ def run_gaze_mapping(pupil_parameters: pd.DataFrame, eye_radius=eye_radius_cm, cm_per_pixel=cm_per_pixel) - raw_pupil_areas = compute_circular_areas(pupil_parameters) - raw_eye_areas = compute_elliptical_areas(eye_parameters) + pupil_params_in_cm = pupil_parameters * cm_per_pixel + raw_pupil_areas = compute_circular_areas(pupil_params_in_cm) + + eye_params_in_cm = eye_parameters * cm_per_pixel + raw_eye_areas = compute_elliptical_areas(eye_params_in_cm) raw_pupil_on_monitor_cm = gaze_mapper.pupil_position_on_monitor_in_cm( cam_pupil_params=pupil_parameters[["center_x", "center_y"]].values, @@ -222,6 +273,39 @@ def write_gaze_mapping_output_to_h5(output_savepath: Path, version.to_hdf(output_savepath, key="version", mode="a") +def load_sync_file_timings(sync_file: Path, + pupil_params_rows: int) -> pd.Series: + """Load sync file timings from .h5 file. + + Parameters + ---------- + sync_file : Path + Path to .h5 sync file. + pupil_params_rows : int + Number of rows in pupil params. + + Returns + ------- + pd.Series + A series of frame times. (New frame times according to synchronized + timings from DAQ) + + Raises + ------ + RuntimeError + If the number of eye tracking frames (pupil_params_rows) does not match + up with number of new frame times from the sync file. + """ + # Add synchronized frame times + frame_times = get_synchronized_camera_frame_times(sync_file) + if (pupil_params_rows != len(frame_times)): + raise RuntimeError("The number of camera sync pulses in the " + f"sync file ({len(frame_times)}) do not match " + "with the number of eye tracking frames " + f"({pupil_params_rows})!!!") + return frame_times + + def main(): logging.basicConfig(format=('%(asctime)s:%(funcName)s' @@ -231,27 +315,11 @@ def main(): schema_type=InputSchema, output_schema_type=OutputSchema) - args = repackage_input_args(parser.args) - - # TODO: Some ellipses.h5 files have the 'cr' key as complex type instead of - # float. For now, when loading ellipses.h5 files, always coerce to float - # but this should eventually be resolved upstream... - pupil_params = pd.read_hdf(args['input_file'], key="pupil").astype(float) - cr_params = pd.read_hdf(args['input_file'], key="cr").astype(float) - eye_params = pd.read_hdf(args['input_file'], key="eye").astype(float) + args = preprocess_input_args(parser.args) - num_frames_match = ((pupil_params.shape[0] == cr_params.shape[0]) - and (cr_params.shape[0] == eye_params.shape[0])) - if not num_frames_match: - raise RuntimeError("The number of frames for ellipse fits don't " - "match when they should: " - f"pupil_params ({pupil_params.shape[0]}), " - f"cr_params ({cr_params.shape[0]}), " - f"eye_params ({eye_params.shape[0]}).") - - output = run_gaze_mapping(pupil_parameters=pupil_params, - cr_parameters=cr_params, - eye_parameters=eye_params, + output = run_gaze_mapping(pupil_parameters=args["pupil_params"], + cr_parameters=args["cr_params"], + eye_parameters=args["eye_params"], monitor_position=args["monitor_position"], monitor_rotations=args["monitor_rotations"], camera_position=args["camera_position"], @@ -260,14 +328,8 @@ def main(): eye_radius_cm=args["eye_radius_cm"], cm_per_pixel=args["cm_per_pixel"]) - # Add synchronized frame times - frame_times = get_synchronized_camera_frame_times(args["session_sync_file"]) - if (pupil_params.shape[0] != len(frame_times)): - raise RuntimeError("The number of camera sync pulses in the " - f"sync file ({len(frame_times)}) do not match " - "with the number of eye tracking frames " - f"({pupil_params.shape[0]})!!!") - output["synced_frame_timestamps_sec"] = frame_times + output["synced_frame_timestamps_sec"] = load_sync_file_timings(args["session_sync_file"], + args["pupil_params"].shape[0]) write_gaze_mapping_output_to_h5(args["output_file"], output) module_output = {"screen_mapping_file": str(args["output_file"])} diff --git a/allensdk/test/brain_observatory/gaze_mapping/test_main.py b/allensdk/test/brain_observatory/gaze_mapping/test_main.py new file mode 100644 index 000000000..50ff54beb --- /dev/null +++ b/allensdk/test/brain_observatory/gaze_mapping/test_main.py @@ -0,0 +1,157 @@ +from pathlib import Path +import pytest + +import numpy as np +import pandas as pd + +import allensdk.brain_observatory.gaze_mapping.__main__ as main + + +def create_sample_ellipse_hdf(output_file: Path, + cr_data: pd.DataFrame, + eye_data: pd.DataFrame, + pupil_data: pd.DataFrame): + cr_data.to_hdf(output_file, key='cr', mode='w') + eye_data.to_hdf(output_file, key='eye', mode='a') + pupil_data.to_hdf(output_file, key='pupil', mode='a') + + +@pytest.fixture +def ellipse_fits_fixture(tmp_path, request) -> dict: + cr = {"center_x": [300, 305, 295, 310, 280], + "center_y": [300, 305, 295, 310, 280], + "width": [7, 8, 6, 7, 10], + "height": [6, 9, 5, 6, 8], + "phi": [0, 0.1, 0.15, 0.1, 0]} + + eye = {"center_x": [300, 305, 295, 310, 280], + "center_y": [300, 305, 295, 310, 280], + "width": [150, 155, 160, 150, 155], + "height": [120, 115, 120, 110, 100], + "phi": [0, 0.1, 0.15, 0.1, 0]} + + pupil = {"center_x": [300, 305, 295, 310, 280], + "center_y": [300, 305, 295, 310, 280], + "width": [30, 35, 40, 25, 50], + "height": [25, 27, 30, 20, 45], + "phi": [0, 0.1, 0.15, 0.1, 0]} + + test_dir = tmp_path / "test_load_ellipse_fit_params" + test_dir.mkdir() + + if request.param["create_good_fits_file"]: + test_path = test_dir / "good_ellipse_fits.h5" + else: + test_path = test_dir / "bad_ellipse_fits.h5" + pupil = {"center_x": [300], "center_y": [300], "width": [30], + "height": [25], "phi": [0]} + + cr = pd.DataFrame(cr) + eye = pd.DataFrame(eye) + pupil = pd.DataFrame(pupil) + + create_sample_ellipse_hdf(test_path, cr, eye, pupil) + + return {"cr": pd.DataFrame(cr), + "eye": pd.DataFrame(eye), + "pupil": pd.DataFrame(pupil), + "file_path": test_path} + + +@pytest.mark.parametrize("ellipse_fits_fixture, expect_good_file", [ + ({"create_good_fits_file": True}, True), + ({"create_good_fits_file": False}, False) +], indirect=["ellipse_fits_fixture"]) +def test_load_ellipse_fit_params(ellipse_fits_fixture: dict, expect_good_file: bool): + expected = {"cr_params": pd.DataFrame(ellipse_fits_fixture["cr"]).astype(float), + "pupil_params": pd.DataFrame(ellipse_fits_fixture["pupil"]).astype(float), + "eye_params": pd.DataFrame(ellipse_fits_fixture["eye"]).astype(float)} + + if expect_good_file: + obtained = main.load_ellipse_fit_params(ellipse_fits_fixture["file_path"]) + for key in expected.keys(): + pd.testing.assert_frame_equal(obtained[key], expected[key]) + else: + with pytest.raises(RuntimeError, match="ellipse fits don't match"): + obtained = main.load_ellipse_fit_params(ellipse_fits_fixture["file_path"]) + + +@pytest.mark.parametrize("input_args, expected", [ + ({"input_file": Path("input_file.h5"), + "session_sync_file": Path("sync_file.h5"), + "output_file": Path("output_file.h5"), + "monitor_position_x_mm": 100.0, + "monitor_position_y_mm": 500.0, + "monitor_position_z_mm": 300.0, + "monitor_rotation_x_deg": 30, + "monitor_rotation_y_deg": 60, + "monitor_rotation_z_deg": 90, + "camera_position_x_mm": 200.0, + "camera_position_y_mm": 600.0, + "camera_position_z_mm": 700.0, + "camera_rotation_x_deg": 20, + "camera_rotation_y_deg": 180, + "camera_rotation_z_deg": 5, + "led_position_x_mm": 800.0, + "led_position_y_mm": 900.0, + "led_position_z_mm": 1000.0, + "eye_radius_cm": 0.1682, + "cm_per_pixel": 0.0001, + "equipment": "Rig A", + "date_of_acquisition": "Some Date", + "eye_video_file": Path("eye_video.avi")}, + + {"pupil_params": "pupil_params_placeholder", + "cr_params": "cr_params_placeholder", + "eye_params": "eye_params_placeholder", + "session_sync_file": Path("sync_file.h5"), + "output_file": Path("output_file.h5"), + "monitor_position": np.array([10.0, 50.0, 30.0]), + "monitor_rotations": np.array([np.pi / 6, np.pi / 3, np.pi / 2]), + "camera_position": np.array([20.0, 60.0, 70.0]), + "camera_rotations": np.array([np.pi / 9, np.pi, np.pi / 36]), + "led_position": np.array([80.0, 90.0, 100.0]), + "eye_radius_cm": 0.1682, + "cm_per_pixel": 0.0001, + "equipment": "Rig A", + "date_of_acquisition": "Some Date", + "eye_video_file": Path("eye_video.avi")} + ), + +]) +def test_preprocess_input_args(monkeypatch, input_args: dict, expected: dict): + def mock_load_ellipse_fit_params(*args, **kwargs): + return {"pupil_params": "pupil_params_placeholder", + "cr_params": "cr_params_placeholder", + "eye_params": "eye_params_placeholder"} + + monkeypatch.setattr(main, "load_ellipse_fit_params", + mock_load_ellipse_fit_params) + + obtained = main.preprocess_input_args(input_args) + + for key in expected.keys(): + if isinstance(obtained[key], np.ndarray): + assert np.allclose(obtained[key], expected[key]) + else: + assert obtained[key] == expected[key] + + +@pytest.mark.parametrize("pupil_params_rows, expected, expect_fail", [ + (5, pd.Series([1, 2, 3, 4, 5]), False), + (4, None, True) +]) +def test_load_sync_file_timings(monkeypatch, pupil_params_rows, expected, expect_fail): + def mock_get_synchronized_camera_frame_times(*args, **kwargs): + return pd.Series([1, 2, 3, 4, 5]) + + monkeypatch.setattr(main, "get_synchronized_camera_frame_times", + mock_get_synchronized_camera_frame_times) + + if expect_fail: + with pytest.raises(RuntimeError, match="number of camera sync pulses"): + main.load_sync_file_timings(Path("."), pupil_params_rows) + + else: + obtained = main.load_sync_file_timings(Path("."), pupil_params_rows) + assert expected.equals(obtained) From 9145ae43a028129e8d2a680444c2e9e75d49623b Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Mon, 11 Nov 2019 14:12:34 -0800 Subject: [PATCH 41/60] add birth date get method minor fixes --- allensdk/core/cache_method_utilities.py | 4 ++-- allensdk/internal/api/behavior_data_lims_api.py | 15 ++++++++++++++- .../behavior/test_rewards_processing.py | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/allensdk/core/cache_method_utilities.py b/allensdk/core/cache_method_utilities.py index faeff6aa9..a487852fd 100644 --- a/allensdk/core/cache_method_utilities.py +++ b/allensdk/core/cache_method_utilities.py @@ -4,11 +4,11 @@ class CachedInstanceMethodMixin(object): def cache_clear(self): """ - Calls `clear_cache` method on all bound methods in this instance + Calls `cache_clear` method on all bound methods in this instance (where valid). Intended to clear calls cached with the `memoize` decorator. Note that this will also clear functions decorated with `lru_cache` and - `lfu_cache` in this class (or any other function with `clear_cache` + `lfu_cache` in this class (or any other function with `cache_clear` attribute). """ for _, method in inspect.getmembers(self, inspect.ismethod): diff --git a/allensdk/internal/api/behavior_data_lims_api.py b/allensdk/internal/api/behavior_data_lims_api.py index 45c649233..c5f59f6b6 100644 --- a/allensdk/internal/api/behavior_data_lims_api.py +++ b/allensdk/internal/api/behavior_data_lims_api.py @@ -182,7 +182,7 @@ def get_running_speed(self) -> RunningSpeed: return RunningSpeed(timestamps=running_data_df.index.values, values=running_data_df.speed.values) - def get_stimulus_frame_rate(self): + def get_stimulus_frame_rate(self) -> float: stimulus_timestamps = self.get_stimulus_timestamps() return np.round(1 / np.mean(np.diff(stimulus_timestamps)), 0) @@ -297,6 +297,19 @@ def get_trials(self) -> pd.DataFrame: return trial_df + @memoize + def get_birth_date(self) -> datetime.date: + """Returns the birth date of the animal. + :rtype: datetime.date + """ + query = f""" + SELECT d.date_of_birth + FROM behavior_sessions bs + JOIN donors d on d.id = bs.donor_id + WHERE bs.id = {self.behavior_session_id} + """ + return self.fetchone(query, strict=True).date() + @memoize def get_sex(self) -> str: """Returns sex of the animal (M/F) diff --git a/allensdk/test/brain_observatory/behavior/test_rewards_processing.py b/allensdk/test/brain_observatory/behavior/test_rewards_processing.py index 1df7f310a..fcb7cbd68 100644 --- a/allensdk/test/brain_observatory/behavior/test_rewards_processing.py +++ b/allensdk/test/brain_observatory/behavior/test_rewards_processing.py @@ -24,6 +24,6 @@ def test_get_rewards(): expected = pd.DataFrame( {"volume": [0.007], "timestamps": [1086.965144219165], - "auto_rewarded": False}).set_index("timestamps", drop=True) + "autorewarded": False}).set_index("timestamps", drop=True) pd.testing.assert_frame_equal(expected, get_rewards(data, lambda x: x+1.0)) From 2a04a9317e19f347a7be46f1d761064deadd3cca Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 12 Nov 2019 12:39:59 -0800 Subject: [PATCH 42/60] don't reinstall conda on appveyor --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index b78fce112..549ae99c1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -20,7 +20,6 @@ environment: install: - set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH% - conda config --set always_yes yes --set changeps1 no - - conda install conda==4.6.14 - conda create -q -n test-environment python=%PYTHON% pip - activate test-environment - conda install statsmodels From 850f8ed9d3afaf217312ebc79726134f38cc1b46 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 12 Nov 2019 14:06:30 -0800 Subject: [PATCH 43/60] update cache clear warning for consistency --- allensdk/brain_observatory/behavior/behavior_ophys_session.py | 4 ++-- allensdk/brain_observatory/behavior/internal/behavior_base.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_ophys_session.py b/allensdk/brain_observatory/behavior/behavior_ophys_session.py index f2b5702eb..6b561b226 100644 --- a/allensdk/brain_observatory/behavior/behavior_ophys_session.py +++ b/allensdk/brain_observatory/behavior/behavior_ophys_session.py @@ -196,8 +196,8 @@ def cache_clear(self) -> None: self.api.cache_clear() except AttributeError: logging.getLogger("BehaviorOphysSession").warning( - f"Attempted to clear API cache, but method `clear_cache`" - " does not exist on {self.api.__name__}") + "Attempted to clear API cache, but method `cache_clear`" + f" does not exist on {self.api.__class__.__name__}") def get_roi_masks(self, cell_specimen_ids=None): """ Obtains boolean masks indicating the location of one or more cell's ROIs in this session. diff --git a/allensdk/brain_observatory/behavior/internal/behavior_base.py b/allensdk/brain_observatory/behavior/internal/behavior_base.py index 41863d2a9..cd198780a 100644 --- a/allensdk/brain_observatory/behavior/internal/behavior_base.py +++ b/allensdk/brain_observatory/behavior/internal/behavior_base.py @@ -12,7 +12,7 @@ class BehaviorBase(abc.ABC): behavior session data. Child classes should be instantiated with a fetch API that implements these - methods. Both fetch API and session object should inherit from this base. + methods. """ @abc.abstractmethod def get_licks(self) -> pd.DataFrame: From 8174d465f3b732278c2fa8b19872c6a4a11d38c9 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 12 Nov 2019 14:06:57 -0800 Subject: [PATCH 44/60] BehaviorDataSession with tests --- .../behavior/behavior_data_session.py | 191 ++++++++++++++++++ .../behavior/test_behavior_data_session.py | 58 ++++++ 2 files changed, 249 insertions(+) create mode 100644 allensdk/brain_observatory/behavior/behavior_data_session.py create mode 100644 allensdk/test/brain_observatory/behavior/test_behavior_data_session.py diff --git a/allensdk/brain_observatory/behavior/behavior_data_session.py b/allensdk/brain_observatory/behavior/behavior_data_session.py new file mode 100644 index 000000000..c86bbaa00 --- /dev/null +++ b/allensdk/brain_observatory/behavior/behavior_data_session.py @@ -0,0 +1,191 @@ +from typing import Any, Optional, List, Dict, Type, Tuple +import logging +import pandas as pd +import numpy as np +import inspect + +from allensdk.internal.api.behavior_data_lims_api import BehaviorDataLimsApi +from allensdk.brain_observatory.behavior.internal import BehaviorBase +from allensdk.brain_observatory.running_speed import RunningSpeed + +BehaviorDataApi = Type[BehaviorBase] + + +class BehaviorDataSession(object): + def __init__(self, api: Optional[BehaviorDataApi] = None): + self.api = api + + @classmethod + def from_lims(cls, behavior_session_id: int) -> "BehaviorDataSession": + return cls(api=BehaviorDataLimsApi(behavior_session_id)) + + @classmethod + def from_nwb_path( + cls, nwb_path: str, **api_kwargs: Any) -> "BehaviorDataSession": + return NotImplementedError + + @property + def behavior_session_id(self) -> int: + """Unique identifier for this experimental session. + :rtype: int + """ + return self.api.behavior_session_id + + @property + def ophys_session_id(self) -> Optional[int]: + """The unique identifier for the ophys session associated + with this behavior session (if one exists) + :rtype: int + """ + return self.api.ophys_session_id + + @property + def ophys_experiment_ids(self) -> Optional[List[int]]: + """The unique identifiers for the ophys experiment(s) associated + with this behavior session (if one exists) + :rtype: int + """ + return self.api.ophys_experiment_ids + + @property + def licks(self) -> pd.DataFrame: + """Get lick data from pkl file. + + Returns + ------- + np.ndarray + A dataframe containing lick timestamps. + """ + return self.api.get_licks() + + @property + def rewards(self) -> pd.DataFrame: + """Get reward data from pkl file. + + Returns + ------- + pd.DataFrame + A dataframe containing timestamps of delivered rewards. + """ + return self.api.get_rewards() + + @property + def running_data_df(self) -> pd.DataFrame: + """Get running speed data. + + Returns + ------- + pd.DataFrame + Dataframe containing various signals used to compute running speed. + """ + return self.api.get_running_data_df() + + def running_speed(self) -> RunningSpeed: + """Get running speed using timestamps from + self.get_stimulus_timestamps. + + NOTE: Do not correct for monitor delay. + + Returns + ------- + RunningSpeed (NamedTuple with two fields) + timestamps : np.ndarray + Timestamps of running speed data samples + values : np.ndarray + Running speed of the experimental subject (in cm / s). + """ + return self.api.get_running_speed() + + @property + def stimulus_presentations(self) -> pd.DataFrame: + """Get stimulus presentation data. + + NOTE: Uses timestamps that do not account for monitor delay. + + Returns + ------- + pd.DataFrame + Table whose rows are stimulus presentations + (i.e. a given image, for a given duration, typically 250 ms) + and whose columns are presentation characteristics. + """ + return self.api.get_stimulus_presentations() + + @property + def stimulus_templates(self) -> Dict[str, np.ndarray]: + """Get stimulus templates (movies, scenes) for behavior session. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary containing the stimulus images presented during the + session. Keys are data set names, and values are 3D numpy arrays. + """ + return self.api.get_stimulus_templates() + + @property + def stimulus_timestamps(self) -> np.ndarray: + """Get stimulus timestamps from pkl file. + + NOTE: Located with behavior_session_id + + Returns + ------- + np.ndarray + Timestamps associated with stimulus presentations on the monitor + that do no account for monitor delay. + """ + return self.api.get_stimulus_timestamps() + + @property + def task_parameters(self) -> dict: + """Get task parameters from pkl file. + + Returns + ------- + dict + A dictionary containing parameters used to define the task runtime + behavior. + """ + return self.api.get_task_parameters() + + @property + def trials(self) -> pd.DataFrame: + """Get trials from pkl file + + Returns + ------- + pd.DataFrame + A dataframe containing behavioral trial start/stop times, + and trial data + """ + return self.api.get_trials() + + @property + def metadata(self) -> Dict[str, Any]: + """Return metadata about the session. + :rtype: dict + """ + return self.api.get_metadata() + + def cache_clear(self) -> None: + """Convenience method to clear the api cache, if applicable.""" + try: + self.api.cache_clear() + except AttributeError: + logging.getLogger("BehaviorOphysSession").warning( + "Attempted to clear API cache, but method `cache_clear`" + f" does not exist on {self.api.__class__.__name__}") + + def list_api_methods(self) -> List[Tuple[str, str]]: + """Convenience method to expose list of API `get` methods. These methods + can be accessed by referencing the API used to initialize this + BehaviorDataSession via its `api` instance attribute. + :rtype: list of tuples, where the first value in the tuple is the + method name, and the second value is the method docstring. + """ + methods = [m for m in inspect.getmembers(self.api, inspect.ismethod) + if m[0].startswith("get_")] + docs = [inspect.getdoc(m[1]) or "" for m in methods] + method_names = [m[0] for m in methods] + return list(zip(method_names, docs)) diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_data_session.py b/allensdk/test/brain_observatory/behavior/test_behavior_data_session.py new file mode 100644 index 000000000..cd3aa4c2a --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_behavior_data_session.py @@ -0,0 +1,58 @@ +import logging + +from allensdk.brain_observatory.behavior.behavior_data_session import ( + BehaviorDataSession) + + +class DummyApi(object): + def __init__(self): + pass + + def get_method(self): + """Method docstring""" + pass + + def get_no_docstring_method(self): + pass + + def _other_method(self): + """Other Method docstring""" + pass + + +class DummyApiCache(object): + def cache_clear(self): + pass + + +class TestBehaviorDataSession: + """Tests for BehaviorDataSession. + The vast majority of methods in BehaviorDataSession are simply calling + functions from the underlying API. The API required for instantiating a + BehaviorDataSession is annotated to show that it requires an class that + inherits from BehaviorBase, it is ensured that those methods exist in + the API class. These methods should be covered by unit tests on the + API class and will not be re-tested here. + """ + @classmethod + def setup_class(cls): + cls.behavior_session = BehaviorDataSession(api=DummyApi()) + + def test_list_api_methods(self): + expected = [("get_method", "Method docstring"), + ("get_no_docstring_method", "")] + actual = self.behavior_session.list_api_methods() + assert expected == actual + + def test_cache_clear_raises_warning(self, caplog): + expected_msg = ("Attempted to clear API cache, but method" + " `cache_clear` does not exist on DummyApi") + self.behavior_session.cache_clear() + assert caplog.record_tuples == [ + ("BehaviorOphysSession", logging.WARNING, expected_msg)] + + def test_cache_clear_no_warning(self, caplog): + caplog.clear() + bs = BehaviorDataSession(api=DummyApiCache()) + bs.cache_clear() + assert len(caplog.record_tuples) == 0 From 533cfefda4649ac7bdf2b69a165abab014a3b821 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 12 Nov 2019 10:39:19 -0800 Subject: [PATCH 45/60] add behavior project lims api with sql builder tests comment out methods we are not implementing for now --- .../behavior/behavior_project_lims_api.py | 283 ++++++++++++++++++ .../internal/behavior_project_base.py | 66 ++++ .../test_behavior_project_lims_api.py | 112 +++++++ 3 files changed, 461 insertions(+) create mode 100644 allensdk/brain_observatory/behavior/behavior_project_lims_api.py create mode 100644 allensdk/brain_observatory/behavior/internal/behavior_project_base.py create mode 100644 allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py new file mode 100644 index 000000000..5bb76d147 --- /dev/null +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -0,0 +1,283 @@ +import pandas as pd +from typing import Optional, List, Union, Dict, Any +import logging + +from allensdk.brain_observatory.behavior.internal.behavior_project_base\ + import BehaviorProjectBase +from allensdk.brain_observatory.behavior.behavior_data_session import ( + BehaviorDataSession) +from allensdk.brain_observatory.behavior.behavior_ophys_session import ( + BehaviorOphysSession) +from allensdk.internal.api.behavior_data_lims_api import BehaviorDataLimsApi +from allensdk.internal.api.behavior_ophys_api import BehaviorOphysLimsApi +from allensdk.internal.api import PostgresQueryMixin +from allensdk.brain_observatory.ecephys.ecephys_project_api.http_engine import ( + HttpEngine) + + +class BehaviorProjectLimsApi(BehaviorProjectBase): + def __init__(self, postgres_engine, app_engine): + """ Downloads visual behavior data from the Allen Institute's + internal Laboratory Information Management System (LIMS). Only + functional if connected to the Allen Institute Network. Used to load + data into BehaviorProjectCache. + + Typically want to construct an instance of this class by calling + `BehaviorProjectLimsApi.default()`. + + Note -- Currently the app engine is unused because we aren't yet + supporting the download of stimulus templates for visual behavior + data. This feature will be added at a later date. + + Parameters + ---------- + postgres_engine : + used for making queries against the LIMS postgres database. Must + implement: + select : takes a postgres query as a string. Returns a pandas + dataframe of results + fetchall : takes a postgres query as a string. If there is + exactly one column in the response, return the values as a + list. + app_engine : + used for making queries agains the lims web application. Must + implement: + stream : takes a url as a string. Returns an iterable yielding + the response body as bytes. + """ + self.postgres_engine = postgres_engine + self.app_engine = app_engine + self.logger = logging.getLogger("BehaviorProjectLimsApi") + + @classmethod + def default( + cls, + pg_kwargs: Optional[Dict[str, Any]] = None, + app_kwargs: Optional[Dict[str, Any]] = None) -> \ + "BehaviorProjectLimsApi": + """Construct a BehaviorProjectLimsApi instance with default + postgres and app engines. + + :param pg_kwargs: dict of keyword arguments to pass to the + PostgresQueryMixin class instance. Valid arguments include: + "dbname", "user", "host", "password", "port". Will use + defaults in PostGresQueryMixin.__init__ if unspecified. + :type pg_kwargs: dict + :param app_kwargs: dict of keyword arguments to pass to the + HTTPEngine class instance. Valid arguments include: + "scheme", "host". Will default to scheme=http, host=lims2 + if left unspecified. + :type app_kwargs: dict + :rtype: BehaviorProjectLimsApi + """ + _pg_kwargs = pg_kwargs or dict() + + _app_kwargs = {"scheme": "http", "host": "lims2"} + if app_kwargs: + _app_kwargs.update(app_kwargs) + + pg_engine = PostgresQueryMixin(**_pg_kwargs) + app_engine = HttpEngine(**_app_kwargs) + return cls(pg_engine, app_engine) + + @staticmethod + def _build_in_list_selector_query( + col, + valid_list: Optional[List[Union[str, int]]] = None, + operator: str = "WHERE") -> str: + """ + Filter for rows where the value of a column is contained in a list. + If no list is specified in `valid_list`, return an empty string. + + NOTE: if string ids are used, then the strings in `valid_list` must + be enclosed in single quotes, or else the query will throw a column + does not exist error. E.g. ["'mystringid1'", "'mystringid2'"...] + + :param col: name of column to compare if in a list + :type col: str + :param valid_list: iterable of values that can be mapped to str + (e.g. string, int, float). + :type valid_list: list + :param operator: SQL operator to start the clause. Default="WHERE". + Valid inputs: "AND", "OR", "WHERE" (not case-sensitive). + :type operator: str + """ + if not valid_list: + return "" + session_query = ( + f"""{operator} {col} IN ({",".join( + sorted(set(map(str, valid_list))))})""") + return session_query + + @staticmethod + def _build_experiment_from_session_query() -> str: + """Aggregate sql sub-query to get all ophys_experiment_ids associated + with a single ophys_session_id.""" + query = f""" + -- -- begin getting all ophys_experiment_ids -- -- + SELECT + (ARRAY_AGG(DISTINCT(oe.id))) as experiment_ids, os.id + FROM ophys_sessions os + RIGHT JOIN ophys_experiments oe ON oe.ophys_session_id = os.id + GROUP BY os.id + -- -- end getting all ophys_experiment_ids -- -- + """ + return query + + def _get_behavior_summary_table(self, + session_sub_query: str) -> pd.DataFrame: + """Build and execute query to retrieve summary data for all data, + or a subset of session_ids (via the session_sub_query). + Should pass an empty string to `session_sub_query` if want to get + all data in the database. + :param session_sub_query: additional filtering logic to get a + subset of sessions. + :type session_sub_query: str + :rtype: pd.DataFrame + """ + query = f""" + SELECT + bs.id as behavior_session_id, + bs.ophys_session_id, + bs.behavior_training_id, + sp.id as specimen_id, + d.full_genotype as genotype, + g.name as sex, + bs.foraging_id + FROM behavior_sessions bs + JOIN donors d on bs.donor_id = d.id + JOIN genders g on g.id = d.gender_id + JOIN specimens sp ON sp.donor_id = d.id + {session_sub_query} + """ + return self.postgres_engine.select(query) + + def _get_foraging_ids_from_behavior_session( + self, behavior_session_ids: List[int]) -> List[str]: + behav_ids = self._build_in_list_selector_query("id", + behavior_session_ids, + operator="AND") + forag_ids_query = f""" + SELECT foraging_id + FROM behavior_sessions + WHERE foraging_id IS NOT NULL + {behav_ids}; + """ + self.logger.debug("get_foraging_ids_from_behavior_session query: \n" + f"{forag_ids_query}") + foraging_ids = self.postgres_engine.fetchall(forag_ids_query) + + self.logger.debug(f"Retrieved {len(foraging_ids)} foraging ids for" + f" behavior stage query. Ids = {foraging_ids}") + return foraging_ids + + def _get_behavior_stage_table( + self, + behavior_session_ids: Optional[List[int]] = None, + mtrain_db: Optional[PostgresQueryMixin] = None): + # Select fewer rows if possible via behavior_session_id + if behavior_session_ids: + foraging_ids = self._get_foraging_ids_from_behavior_session( + behavior_session_ids) + # Otherwise just get the full table from mtrain + else: + foraging_ids = None + + foraging_ids_query = self._build_in_list_selector_query( + "bs.id", foraging_ids) + + # TODO: this password has already been exposed in code but we really + # need to move towards using a secrets database + if not mtrain_db: + mtrain_db = PostgresQueryMixin( + dbname="mtrain", user="mtrainreader", + host="prodmtrain1", port=5432, password="mtrainro") + query = f""" + SELECT + stages.name as session_type, + bs.id AS foraging_id + FROM behavior_sessions bs + JOIN stages ON stages.id = bs.state_id + {foraging_ids_query}; + """ + return mtrain_db.select(query) + + def get_session_data(self, ophys_session_id: int) -> BehaviorOphysSession: + """Returns a BehaviorOphysSession object that contains methods + to analyze a single behavior+ophys session. + :param ophys_session_id: id that corresponds to a behavior session + :type ophys_session_id: int + :rtype: BehaviorOphysSession + """ + return BehaviorOphysSession(BehaviorOphysLimsApi(ophys_session_id)) + + def get_session_table( + self, + ophys_session_ids: Optional[List[int]] = None) -> pd.DataFrame: + """Return a pd.Dataframe table with all ophys_session_ids and relevant + metadata. + Return columns: ophys_session_id, behavior_session_id, specimen_id, + ophys_experiment_ids, isi_experiment_id, session_type, + date_of_acquisition, genotype, sex, age_in_days + + :param ophys_session_ids: optional list of ophys_session_ids to include + :rtype: pd.DataFrame + """ + if not ophys_session_ids: + self.logger.warning("Getting all ophys sessions." + " This might take a while.") + session_query = self._build_in_list_selector_query("os.id", + ophys_session_ids) + experiment_query = self._build_experiment_from_session_query() + query = f""" + SELECT + os.id as ophys_session_id, + bs.id as behavior_session_id, + os.specimen_id, + os.isi_experiment_id, + os.stimulus_name as session_type, + os.date_of_acquisition, + d.full_genotype as genotype, + g.name as sex, + DATE_PART('day', os.date_of_acquisition - d.date_of_birth) + AS age_in_days + FROM ophys_sessions os + JOIN behavior_sessions bs ON os.id = bs.ophys_session_id + JOIN donors d ON d.id = bs.donor_id + JOIN genders g ON g.id = d.gender_id + JOIN ( + {experiment_query} + ) exp_ids ON os.id = exp_ids.id + {session_query}; + """ + self.logger.debug(f"get_session_table query: \n{query}") + return self.postgres_engine.select(query) + + def get_behavior_only_session_data( + self, behavior_session_id: int) -> BehaviorDataSession: + """Returns a BehaviorDataSession object that contains methods to + analyze a single behavior session. + :param behavior_session_id: id that corresponds to a behavior session + :type behavior_session_id: int + :rtype: BehaviorDataSession + """ + return BehaviorDataSession(BehaviorDataLimsApi(behavior_session_id)) + + def get_behavior_only_session_table( + self, + behavior_session_ids: Optional[List[int]] = None) -> pd.DataFrame: + """Returns a pd.DataFrame table with all behavior session_ids to the + user with additional metadata. + + Can't return age at time of session because there is no field for + acquisition date for behavior sessions (only in the stimulus pkl file) + :rtype: pd.DataFrame + """ + self.logger.warning("Getting behavior-only session data. " + "This might take a while...") + session_query = self._build_in_list_selector_query( + "bs.id", behavior_session_ids) + summary_tbl = self._get_behavior_summary_table(session_query) + stimulus_names = self._get_behavior_stage_table(behavior_session_ids) + return summary_tbl.merge(stimulus_names, + on=["foraging_id"], how="left") diff --git a/allensdk/brain_observatory/behavior/internal/behavior_project_base.py b/allensdk/brain_observatory/behavior/internal/behavior_project_base.py new file mode 100644 index 000000000..02451f450 --- /dev/null +++ b/allensdk/brain_observatory/behavior/internal/behavior_project_base.py @@ -0,0 +1,66 @@ +from abc import ABC, abstractmethod +from typing import Iterable + +from allensdk.brain_observatory.behavior.behavior_ophys_session import ( + BehaviorOphysSession) +from allensdk.brain_observatory.behavior.behavior_data_session import ( + BehaviorDataSession) +import pandas as pd + + +class BehaviorProjectBase(ABC): + @abstractmethod + def get_session_data(self, ophys_session_id: int) -> BehaviorOphysSession: + """Returns a BehaviorOphysSession object that contains methods + to analyze a single behavior+ophys session. + :param ophys_session_id: id that corresponds to a behavior session + :type ophys_session_id: int + :rtype: BehaviorOphysSession + """ + pass + + @abstractmethod + def get_session_table(self) -> pd.DataFrame: + """Return a pd.Dataframe table with all ophys_session_ids and relevant + metadata.""" + pass + + @abstractmethod + def get_behavior_only_session_data( + self, behavior_session_id: int) -> BehaviorDataSession: + """Returns a BehaviorDataSession object that contains methods to + analyze a single behavior session. + :param behavior_session_id: id that corresponds to a behavior session + :type behavior_session_id: int + :rtype: BehaviorDataSession + """ + pass + + @abstractmethod + def get_behavior_only_session_table(self) -> pd.DataFrame: + """Returns a pd.DataFrame table with all behavior session_ids to the + user with additional metadata. + :rtype: pd.DataFrame + """ + pass + + # @abstractmethod + # def get_natural_movie_template(self, number: int) -> Iterable[bytes]: + # """Download a template for the natural scene stimulus. This is the + # actual image that was shown during the recording session. + # :param number: idenfifier for this movie (note that this is an int, + # so to get the template for natural_movie_three should pass 3) + # :type number: int + # :returns: iterable yielding a tiff file as bytes + # """ + # pass + + # @abstractmethod + # def get_natural_scene_template(self, number: int) -> Iterable[bytes]: + # """ Download a template for the natural movie stimulus. This is the + # actual movie that was shown during the recording session. + # :param number: identifier for this scene + # :type number: int + # :returns: An iterable yielding an npy file as bytes + # """ + # pass diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py new file mode 100644 index 000000000..5a2e6ee6d --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py @@ -0,0 +1,112 @@ +import pytest + +from allensdk.brain_observatory.behavior.behavior_project_lims_api import ( + BehaviorProjectLimsApi) +from allensdk.test_utilities.custom_comparators import ( + WhitespaceStrippedString) + + +class MockQueryEngine: + def __init__(self, **kwargs): + pass + + def select(self, query): + return query + + def fetchall(self, query): + return query + + def stream(self, endpoint): + return endpoint + + +@pytest.fixture +def MockBehaviorProjectLimsApi(): + return BehaviorProjectLimsApi(MockQueryEngine(), MockQueryEngine()) + + +@pytest.mark.parametrize( + "col,valid_list,operator,expected", [ + ("os.id", [1, 2, 3], "WHERE", "WHERE os.id IN (1,2,3)"), + ("id2", ["'a'", "'b'"], "AND", "AND id2 IN ('a','b')"), + ("id3", [1.0], "OR", "OR id3 IN (1.0)"), + ("id4", None, "WHERE", "")] +) +def test_build_in_list_selector_query( + col, valid_list, operator, expected, MockBehaviorProjectLimsApi): + assert (expected + == MockBehaviorProjectLimsApi._build_in_list_selector_query( + col, valid_list, operator)) + + +@pytest.mark.parametrize( + "behavior_session_ids,expected", [ + (None, + WhitespaceStrippedString(""" + SELECT foraging_id + FROM behavior_sessions + WHERE foraging_id IS NOT NULL + ; + """)), + (["'id1'", "'id2'"], + WhitespaceStrippedString(""" + SELECT foraging_id + FROM behavior_sessions + WHERE foraging_id IS NOT NULL + AND id IN ('id1','id2'); + """)) + ] +) +def test_get_foraging_ids_from_behavior_session( + behavior_session_ids, expected, MockBehaviorProjectLimsApi): + mock_api = MockBehaviorProjectLimsApi + assert expected == mock_api._get_foraging_ids_from_behavior_session( + behavior_session_ids) + + +def test_get_behavior_stage_table(MockBehaviorProjectLimsApi): + expected = WhitespaceStrippedString(""" + SELECT + stages.name as session_type, + bs.id AS foraging_id + FROM behavior_sessions bs + JOIN stages ON stages.id = bs.state_id + ; + """) + mock_api = MockBehaviorProjectLimsApi + actual = mock_api._get_behavior_stage_table(mtrain_db=MockQueryEngine()) + assert expected == actual + + +@pytest.mark.parametrize( + "ophys_session_ids,expected", [ + (None, WhitespaceStrippedString(""" + SELECT + os.id as ophys_session_id, + bs.id as behavior_session_id, + os.specimen_id, + os.isi_experiment_id, + os.stimulus_name as session_type, + os.date_of_acquisition, + d.full_genotype as genotype, + g.name as sex, + DATE_PART('day', os.date_of_acquisition - d.date_of_birth) + AS age_in_days + FROM ophys_sessions os + JOIN behavior_sessions bs ON os.id = bs.ophys_session_id + JOIN donors d ON d.id = bs.donor_id + JOIN genders g ON g.id = d.gender_id + JOIN (-- -- begin getting all ophys_experiment_ids -- -- + SELECT + (ARRAY_AGG(DISTINCT(oe.id))) as experiment_ids, os.id + FROM ophys_sessions os + RIGHT JOIN ophys_experiments oe ON oe.ophys_session_id = os.id + GROUP BY os.id + -- -- end getting all ophys_experiment_ids -- -- + ) exp_ids ON os.id = exp_ids.id; + """))] +) +def test_get_session_table(ophys_session_ids, expected, + MockBehaviorProjectLimsApi): + actual = MockBehaviorProjectLimsApi.get_session_table() + assert expected == actual From 57006a688fd90b760badea7a7c2f3700c4e89371 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 13 Nov 2019 13:05:35 -0800 Subject: [PATCH 46/60] helper methods for comparing strings (class to strip whitespace and pytest hook) --- .../brain_observatory/behavior/conftest.py | 12 +++++++++ allensdk/test_utilities/custom_comparators.py | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 allensdk/test_utilities/custom_comparators.py diff --git a/allensdk/test/brain_observatory/behavior/conftest.py b/allensdk/test/brain_observatory/behavior/conftest.py index 19d53295d..7999284bd 100644 --- a/allensdk/test/brain_observatory/behavior/conftest.py +++ b/allensdk/test/brain_observatory/behavior/conftest.py @@ -8,6 +8,18 @@ import os import json +from allensdk.test_utilities.custom_comparators import WhitespaceStrippedString + + +def pytest_assertrepr_compare(config, op, left, right): + if isinstance(left, WhitespaceStrippedString) and op == "==": + if isinstance(right, WhitespaceStrippedString): + right_compare = right.orig + else: + right_compare = right + return ["Comparing strings with whitespace stripped. ", + f"{left.orig} != {right_compare}.", "Diff:"] + left.diff + def pytest_ignore_collect(path, config): ''' The brain_observatory.ecephys submodule uses python 3.6 features that may not be backwards compatible! diff --git a/allensdk/test_utilities/custom_comparators.py b/allensdk/test_utilities/custom_comparators.py new file mode 100644 index 000000000..5183b45bf --- /dev/null +++ b/allensdk/test_utilities/custom_comparators.py @@ -0,0 +1,25 @@ +import re +from typing import Union +import difflib + + +class WhitespaceStrippedString(object): + """Comparator class to compare strings that have been stripped of + whitespace. By default removes any unicode whitespace character that + matches the regex \s, (which includes [ \t\n\r\f\v], and other unicode + whitespace characters). + """ + def __init__(self, string: str, whitespace_chars: str = r"\s", + ASCII: bool = False): + self.orig = string + self.whitespace_chars = whitespace_chars + self.flags = re.ASCII if ASCII else 0 + self.differ = difflib.Differ() + self.value = re.sub(self.whitespace_chars, "", string, self.flags) + + def __eq__(self, other: Union[str, "WhitespaceStrippedString"]): + if isinstance(other, str): + other = WhitespaceStrippedString( + other, self.whitespace_chars, self.flags) + self.diff = list(self.differ.compare(self.value, other.value)) + return self.value == other.value From 0531f7148eee0b145dce528c534d8d07b543c901 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 13 Nov 2019 14:24:43 -0800 Subject: [PATCH 47/60] set index of summary tables to the relevant id --- .../behavior/behavior_project_lims_api.py | 25 ++++++++++++++++--- .../test_behavior_project_lims_api.py | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py index 5bb76d147..7a592a5c7 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -211,10 +211,11 @@ def get_session_data(self, ophys_session_id: int) -> BehaviorOphysSession: """ return BehaviorOphysSession(BehaviorOphysLimsApi(ophys_session_id)) - def get_session_table( + def _get_session_table( self, ophys_session_ids: Optional[List[int]] = None) -> pd.DataFrame: - """Return a pd.Dataframe table with all ophys_session_ids and relevant + """Helper function for easier testing. + Return a pd.Dataframe table with all ophys_session_ids and relevant metadata. Return columns: ophys_session_id, behavior_session_id, specimen_id, ophys_experiment_ids, isi_experiment_id, session_type, @@ -253,6 +254,21 @@ def get_session_table( self.logger.debug(f"get_session_table query: \n{query}") return self.postgres_engine.select(query) + def get_session_table( + self, + ophys_session_ids: Optional[List[int]] = None) -> pd.DataFrame: + """Return a pd.Dataframe table with all ophys_session_ids and relevant + metadata. + Return columns: ophys_session_id, behavior_session_id, specimen_id, + ophys_experiment_ids, isi_experiment_id, session_type, + date_of_acquisition, genotype, sex, age_in_days + + :param ophys_session_ids: optional list of ophys_session_ids to include + :rtype: pd.DataFrame + """ + return (self._get_session_table(ophys_session_ids) + .set_index("ophys_session_id")) + def get_behavior_only_session_data( self, behavior_session_id: int) -> BehaviorDataSession: """Returns a BehaviorDataSession object that contains methods to @@ -279,5 +295,6 @@ def get_behavior_only_session_table( "bs.id", behavior_session_ids) summary_tbl = self._get_behavior_summary_table(session_query) stimulus_names = self._get_behavior_stage_table(behavior_session_ids) - return summary_tbl.merge(stimulus_names, - on=["foraging_id"], how="left") + return (summary_tbl.merge(stimulus_names, + on=["foraging_id"], how="left") + .set_index("behavior_session_id")) diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py index 5a2e6ee6d..a1eb0b883 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py @@ -108,5 +108,5 @@ def test_get_behavior_stage_table(MockBehaviorProjectLimsApi): ) def test_get_session_table(ophys_session_ids, expected, MockBehaviorProjectLimsApi): - actual = MockBehaviorProjectLimsApi.get_session_table() + actual = MockBehaviorProjectLimsApi._get_session_table() assert expected == actual From e936bfa630cb23276cc1285ff8a07ab1c6ed52ea Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Wed, 13 Nov 2019 14:19:04 -0800 Subject: [PATCH 48/60] Remove extraneous assignments and add output assert to test_run_glifneuron --- allensdk/test/model/test_glif.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/allensdk/test/model/test_glif.py b/allensdk/test/model/test_glif.py index ee172f900..b62e64985 100644 --- a/allensdk/test/model/test_glif.py +++ b/allensdk/test/model/test_glif.py @@ -85,7 +85,6 @@ def configured_glif_api(glif_api, neuronal_model_id, neuron_config_file, return glif_api - @pytest.fixture def output(neuron_config_file, ephys_sweeps_file): neuron_config = json_utilities.read(neuron_config_file) @@ -145,9 +144,13 @@ def test_run_glifneuron(configured_glif_api, neuron_config_file): # simulate the neuron output = neuron.run(stimulus) - voltage = output['voltage'] - threshold = output['threshold'] - spike_times = output['interpolated_spike_times'] + expected_fields = {"AScurrents", "grid_spike_times", + "interpolated_spike_threshold", + "interpolated_spike_times", + "interpolated_spike_voltage", + "spike_time_steps", "threshold", "voltage"} + + assert expected_fields.difference(output.keys()) == set() @pytest.mark.skipif(True, reason="needs nwb file") From c45ed5da568362fd86850b5f4a5bb31fa34c9e49 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Wed, 13 Nov 2019 14:29:45 -0800 Subject: [PATCH 49/60] Remove incomplete/perpetually skipped test_glif tests --- allensdk/test/model/test_glif.py | 98 -------------------------------- 1 file changed, 98 deletions(-) diff --git a/allensdk/test/model/test_glif.py b/allensdk/test/model/test_glif.py index b62e64985..5132905fa 100644 --- a/allensdk/test/model/test_glif.py +++ b/allensdk/test/model/test_glif.py @@ -37,10 +37,8 @@ from allensdk.api.queries.glif_api import GlifApi import allensdk.core.json_utilities as json_utilities from allensdk.model.glif.glif_neuron import GlifNeuron -from allensdk.model.glif.simulate_neuron import simulate_neuron from allensdk.core.nwb_data_set import NwbDataSet import os -# import matplotlib.pyplot as plt @pytest.fixture @@ -111,7 +109,6 @@ def output(neuron_config_file, ephys_sweeps_file): @pytest.fixture def stimulus(neuron_config_file, ephys_sweeps_file): - neuron_config = json_utilities.read(neuron_config_file) ephys_sweeps = json_utilities.read(ephys_sweeps_file) ephys_file_name = 'stimulus.nwb' @@ -151,98 +148,3 @@ def test_run_glifneuron(configured_glif_api, neuron_config_file): "spike_time_steps", "threshold", "voltage"} assert expected_fields.difference(output.keys()) == set() - - -@pytest.mark.skipif(True, reason="needs nwb file") -def test_3(configured_glif_api, neuron_config_file, ephys_sweeps_file): - neuron_config = json_utilities.read(neuron_config_file) - ephys_sweeps = json_utilities.read(ephys_sweeps_file) - ephys_file_name = 'stimulus.nwb' - - neuron = GlifNeuron.from_dict(neuron_config) - - # sweep_numbers = [ s['sweep_number'] for s in ephys_sweeps - # if s['stimulus_units'] == 'Amps' ] - sweep_numbers = [7] - simulate_neuron(neuron, sweep_numbers, - ephys_file_name, ephys_file_name, 0.05) - - -@pytest.mark.skipif(True, reason="needs nwb file") -def test_4(output): - voltage = output['voltage'] - threshold = output['threshold'] - spike_times = output['interpolated_spike_times'] - - -@pytest.mark.skipif(True, reason="needs nwb file") -def test_5(output): - voltage = output['voltage'] - threshold = output['threshold'] - interpolated_spike_times = output['interpolated_spike_times'] - spike_times = output['interpolated_spike_times'] - interpolated_spike_voltages = output['interpolated_spike_voltage'] - interpolated_spike_thresholds = output['interpolated_spike_threshold'] - grid_spike_indices = output['spike_time_steps'] - grid_spike_times = output['grid_spike_times'] - after_spike_currents = output['AScurrents'] - -# # create a time array for plotting -# time = np.arange(len(stimulus))*neuron.dt -# -# plt.figure(figsize=(10, 10)) -# -# # plot stimulus -# plt.subplot(3,1,1) -# plt.plot(time, stimulus) -# plt.xlabel('time (s)') -# plt.ylabel('current (A)') -# plt.title('Stimulus') -# -# # plot model output -# plt.subplot(3,1,2) -# plt.plot(time, voltage, label='voltage') -# plt.plot(time, threshold, label='threshold') -# -# if grid_spike_indices: -# plt.plot(interpolated_spike_times, interpolated_spike_voltages, 'x', -# label='interpolated spike') -# -# plt.plot((grid_spike_indices-1)*neuron.dt, voltage[grid_spike_indices-1], '.', -# label='last step before spike') -# -# plt.xlabel('time (s)') -# plt.ylabel('voltage (V)') -# plt.legend(loc=3) -# plt.title('Model Response') -# -# # plot after spike currents -# plt.subplot(3,1,3) -# for ii in range(np.shape(after_spike_currents)[1]): -# plt.plot(time, after_spike_currents[:,ii]) -# plt.xlabel('time (s)') -# plt.ylabel('current (A)') -# plt.title('After Spike Currents') -# -# plt.tight_layout() -# plt.show() - - -@pytest.mark.skipif(True, reason="needs nwb file") -def test_6(configured_glif_api, neuron_config_file, stimulus): - # define your own custom voltage reset rule - # this one linearly scales the input voltage - def custom_voltage_reset_rule(neuron, voltage_t0, custom_param_a, custom_param_b): - return custom_param_a * voltage_t0 + custom_param_b - - # initialize a neuron from a neuron config file - neuron_config = json_utilities.read(neuron_config_file) - neuron = GlifNeuron.from_dict(neuron_config) - - # configure a new method and overwrite the neuron's old method - method = neuron.configure_method('custom', custom_voltage_reset_rule, - {'custom_param_a': 0.1, 'custom_param_b': 0.0}) - neuron.voltage_reset_method = method - - truncate = 56041 - output = neuron.run(stimulus[0:truncate]) From f4d9c219a32982c5f42439ff17459d4434b5890c Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Wed, 13 Nov 2019 15:02:42 -0800 Subject: [PATCH 50/60] Remove test_cache_stimulus test All this was really testing was whether it was possible to download a specific stimulus nwb from "api.brain-map.org" --- allensdk/test/model/test_glif.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/allensdk/test/model/test_glif.py b/allensdk/test/model/test_glif.py index 5132905fa..e96dd0d25 100644 --- a/allensdk/test/model/test_glif.py +++ b/allensdk/test/model/test_glif.py @@ -121,12 +121,6 @@ def stimulus(neuron_config_file, ephys_sweeps_file): return stimulus -def test_cache_stimulus(neuron_config_file, ephys_sweeps_file, fn_temp_dir, - configured_glif_api): - nwb_path = os.path.join(fn_temp_dir, 'stimulus.nwb') - configured_glif_api.cache_stimulus_file(nwb_path) - - def test_run_glifneuron(configured_glif_api, neuron_config_file): # initialize the neuron neuron_config = json_utilities.read(neuron_config_file) From 9533663198a7068cedc8cf5f4aa1a46b273c89d3 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 13 Nov 2019 16:30:12 -0800 Subject: [PATCH 51/60] update typing to string-able objects get experiment_id in ophys summary table --- .../behavior/behavior_project_lims_api.py | 14 ++++++++++---- allensdk/core/typing.py | 9 +++++++++ .../behavior/test_behavior_project_lims_api.py | 1 + 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 allensdk/core/typing.py diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py index 7a592a5c7..292417484 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -1,5 +1,5 @@ import pandas as pd -from typing import Optional, List, Union, Dict, Any +from typing import Optional, List, Dict, Any import logging from allensdk.brain_observatory.behavior.internal.behavior_project_base\ @@ -13,6 +13,7 @@ from allensdk.internal.api import PostgresQueryMixin from allensdk.brain_observatory.ecephys.ecephys_project_api.http_engine import ( HttpEngine) +from allensdk.core.typing import SupportsStr class BehaviorProjectLimsApi(BehaviorProjectBase): @@ -83,7 +84,7 @@ def default( @staticmethod def _build_in_list_selector_query( col, - valid_list: Optional[List[Union[str, int]]] = None, + valid_list: Optional[SupportsStr] = None, operator: str = "WHERE") -> str: """ Filter for rows where the value of a column is contained in a list. @@ -234,6 +235,7 @@ def _get_session_table( SELECT os.id as ophys_session_id, bs.id as behavior_session_id, + experiment_ids as ophys_experiment_id, os.specimen_id, os.isi_experiment_id, os.stimulus_name as session_type, @@ -266,8 +268,12 @@ def get_session_table( :param ophys_session_ids: optional list of ophys_session_ids to include :rtype: pd.DataFrame """ - return (self._get_session_table(ophys_session_ids) - .set_index("ophys_session_id")) + # There is one ophys_session_id from 2018 that has multiple behavior + # ids, causing duplicates -- drop all dupes for now; # TODO + table = (self._get_session_table(ophys_session_ids) + .drop_duplicates(subset=["ophys_session_id"], keep=False) + .set_index("ophys_session_id")) + return table def get_behavior_only_session_data( self, behavior_session_id: int) -> BehaviorDataSession: diff --git a/allensdk/core/typing.py b/allensdk/core/typing.py new file mode 100644 index 000000000..df00f7be0 --- /dev/null +++ b/allensdk/core/typing.py @@ -0,0 +1,9 @@ +from typing import _Protocol +from abc import abstractmethod + + +class SupportsStr(_Protocol): + """Classes that support the __str__ method""" + @abstractmethod + def __str__(self) -> str: + pass diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py index a1eb0b883..36657619a 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py @@ -84,6 +84,7 @@ def test_get_behavior_stage_table(MockBehaviorProjectLimsApi): SELECT os.id as ophys_session_id, bs.id as behavior_session_id, + experiment_ids as ophys_experiment_id, os.specimen_id, os.isi_experiment_id, os.stimulus_name as session_type, From bd9d88247b095eb7640a6d9d5390ab07188c2395 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Thu, 14 Nov 2019 14:39:40 -0800 Subject: [PATCH 52/60] Calculate `vsynccount` if key not available for time_sync module Relates to: #1177 --- .../internal/brain_observatory/time_sync.py | 25 +++++++++++++++++-- .../brain_observatory/test_time_sync.py | 15 +++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/allensdk/internal/brain_observatory/time_sync.py b/allensdk/internal/brain_observatory/time_sync.py index b79d6512c..125048daf 100644 --- a/allensdk/internal/brain_observatory/time_sync.py +++ b/allensdk/internal/brain_observatory/time_sync.py @@ -139,9 +139,30 @@ def get_ophys_data_length(filename): return f["data"].shape[1] -def get_stim_data_length(filename): +def get_stim_data_length(filename: str) -> int: + """Get stimulus data length from .pkl file. + + Parameters + ---------- + filename : str + Path of stimulus data .pkl file. + + Returns + ------- + int + Stimulus data length. + """ stim_data = pd.read_pickle(filename) - return stim_data["vsynccount"] + + # A subset of stimulus .pkl files do not have the "vsynccount" field. + # MPE *won't* be backfilling the "vsynccount" field for these .pkl files. + # So the least worst option is to recalculate the vsync_count. + try: + vsync_count = stim_data["vsynccount"] + except KeyError: + vsync_count = len(stim_data["items"]["behavior"]["intervalsms"]) + 1 + + return vsync_count def corrected_video_timestamps(video_name, timestamps, data_length): diff --git a/allensdk/test/internal/brain_observatory/test_time_sync.py b/allensdk/test/internal/brain_observatory/test_time_sync.py index da15cf458..86f655f3f 100644 --- a/allensdk/test/internal/brain_observatory/test_time_sync.py +++ b/allensdk/test/internal/brain_observatory/test_time_sync.py @@ -434,3 +434,18 @@ def test_monitor_delay(scientifica_input): delay = ts.monitor_delay(dset, stim_times, "stim_photodiode", assumed_delay=30) assert delay == 30 + + +@pytest.mark.parametrize("deserialized_pkl,expected", [ + ({"vsynccount": 100}, 100), + ({"items": {"behavior": {"intervalsms": [2, 2, 2, 2, 2]}}}, 6), + ({"vsynccount": 20, "items": {"behavior": {"intervalsms": [3, 3]}}}, 20) +]) +def test_get_stim_data_length(monkeypatch, deserialized_pkl, expected): + def mock_read_pickle(*args, **kwargs): + return deserialized_pkl + + monkeypatch.setattr(ts.pd, "read_pickle", mock_read_pickle) + obtained = ts.get_stim_data_length("dummy_filepath") + + assert obtained == expected From 09eaae2b28bcd7ff160447709d8f11314839b49f Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 15 Nov 2019 12:11:16 -0800 Subject: [PATCH 53/60] use typing extension protocol; keep unimplemented methods in base class for project api --- .../behavior/behavior_project_lims_api.py | 21 +++++++++- .../internal/behavior_project_base.py | 38 +++++++++---------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py index 292417484..cb0c4ac3d 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -1,5 +1,5 @@ import pandas as pd -from typing import Optional, List, Dict, Any +from typing import Optional, List, Dict, Any, Iterable import logging from allensdk.brain_observatory.behavior.internal.behavior_project_base\ @@ -304,3 +304,22 @@ def get_behavior_only_session_table( return (summary_tbl.merge(stimulus_names, on=["foraging_id"], how="left") .set_index("behavior_session_id")) + + def get_natural_movie_template(self, number: int) -> Iterable[bytes]: + """Download a template for the natural scene stimulus. This is the + actual image that was shown during the recording session. + :param number: idenfifier for this movie (note that this is an int, + so to get the template for natural_movie_three should pass 3) + :type number: int + :returns: iterable yielding a tiff file as bytes + """ + raise NotImplementedError() + + def get_natural_scene_template(self, number: int) -> Iterable[bytes]: + """ Download a template for the natural movie stimulus. This is the + actual movie that was shown during the recording session. + :param number: identifier for this scene + :type number: int + :returns: An iterable yielding an npy file as bytes + """ + raise NotImplementedError() diff --git a/allensdk/brain_observatory/behavior/internal/behavior_project_base.py b/allensdk/brain_observatory/behavior/internal/behavior_project_base.py index 02451f450..ead386708 100644 --- a/allensdk/brain_observatory/behavior/internal/behavior_project_base.py +++ b/allensdk/brain_observatory/behavior/internal/behavior_project_base.py @@ -44,23 +44,23 @@ def get_behavior_only_session_table(self) -> pd.DataFrame: """ pass - # @abstractmethod - # def get_natural_movie_template(self, number: int) -> Iterable[bytes]: - # """Download a template for the natural scene stimulus. This is the - # actual image that was shown during the recording session. - # :param number: idenfifier for this movie (note that this is an int, - # so to get the template for natural_movie_three should pass 3) - # :type number: int - # :returns: iterable yielding a tiff file as bytes - # """ - # pass + @abstractmethod + def get_natural_movie_template(self, number: int) -> Iterable[bytes]: + """Download a template for the natural scene stimulus. This is the + actual image that was shown during the recording session. + :param number: idenfifier for this movie (note that this is an int, + so to get the template for natural_movie_three should pass 3) + :type number: int + :returns: iterable yielding a tiff file as bytes + """ + pass - # @abstractmethod - # def get_natural_scene_template(self, number: int) -> Iterable[bytes]: - # """ Download a template for the natural movie stimulus. This is the - # actual movie that was shown during the recording session. - # :param number: identifier for this scene - # :type number: int - # :returns: An iterable yielding an npy file as bytes - # """ - # pass + @abstractmethod + def get_natural_scene_template(self, number: int) -> Iterable[bytes]: + """ Download a template for the natural movie stimulus. This is the + actual movie that was shown during the recording session. + :param number: identifier for this scene + :type number: int + :returns: An iterable yielding an npy file as bytes + """ + pass From 27d6f6d0c53f7d83b642737aa0b71d760ca60960 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Tue, 19 Nov 2019 11:14:03 -0800 Subject: [PATCH 54/60] Add get sync timestamps for behavior This is a combination of 5 commits: 1) Add behavior camera line label to sync `Dataset` 2) Refactor get_synchronized_camera_frame_times and add tests get_synchronized_camera_frame_times will now be located in allensdk.brain_observatory.sync_utilities. The function has also been renamed to: get_synchronized_frame_times 3) Update gaze_mapping with refactored get sync times function 4) Update ecephys write_nwb with refactored get sync frames function 5) Fix comma omission --- .../ecephys/write_nwb/__main__.py | 14 +++--- .../gaze_mapping/__main__.py | 9 ++-- .../gaze_mapping/_sync_frames.py | 31 ------------- allensdk/brain_observatory/sync_dataset.py | 3 +- .../sync_utilities/__init__.py | 42 ++++++++++++++++- .../gaze_mapping/test_main.py | 6 +-- .../sync_utilities/test_sync_utilities.py | 46 ++++++++++++++++++- 7 files changed, 104 insertions(+), 47 deletions(-) delete mode 100644 allensdk/brain_observatory/gaze_mapping/_sync_frames.py diff --git a/allensdk/brain_observatory/ecephys/write_nwb/__main__.py b/allensdk/brain_observatory/ecephys/write_nwb/__main__.py index bb5be585d..d67fab6ab 100644 --- a/allensdk/brain_observatory/ecephys/write_nwb/__main__.py +++ b/allensdk/brain_observatory/ecephys/write_nwb/__main__.py @@ -32,7 +32,8 @@ from allensdk.brain_observatory import dict_to_indexed_array from allensdk.brain_observatory.ecephys.file_io.continuous_file import ContinuousFile from allensdk.brain_observatory.ecephys.nwb import EcephysProbe, EcephysLabMetaData -from allensdk.brain_observatory.gaze_mapping._sync_frames import get_synchronized_camera_frame_times +from allensdk.brain_observatory.sync_dataset import Dataset +import allensdk.brain_observatory.sync_utilities as su STIM_TABLE_RENAMES_MAP = {"Start": "start_time", "End": "stop_time"} @@ -753,24 +754,25 @@ def write_ecephys_nwb( add_raw_running_data_to_nwbfile(nwbfile, raw_running_data) # --- Add eye tracking ellipse fits to nwb file --- - eye_tracking_frame_times = get_synchronized_camera_frame_times(session_sync_path) + eye_tracking_frame_times = su.get_synchronized_frame_times(session_sync_file=session_sync_path, + sync_line_label_keys=Dataset.EYE_TRACKING_KEYS) eye_dlc_tracking_data = read_eye_dlc_tracking_ellipses(Path(eye_dlc_ellipses_path)) if eye_tracking_data_is_valid(eye_dlc_tracking_data=eye_dlc_tracking_data, synced_timestamps=eye_tracking_frame_times): add_eye_tracking_ellipse_fit_data_to_nwbfile(nwbfile, - eye_dlc_tracking_data=eye_dlc_tracking_data, - synced_timestamps=eye_tracking_frame_times) + eye_dlc_tracking_data=eye_dlc_tracking_data, + synced_timestamps=eye_tracking_frame_times) # --- Append eye tracking rig geometry info to nwb file (with eye tracking) --- append_eye_tracking_rig_geometry_data_to_nwbfile(nwbfile, - eye_tracking_rig_geometry=eye_tracking_rig_geometry) + eye_tracking_rig_geometry=eye_tracking_rig_geometry) # --- Add gaze mapped positions to nwb file --- if eye_gaze_mapping_path: eye_gaze_data = read_eye_gaze_mappings(Path(eye_gaze_mapping_path)) add_eye_gaze_mapping_data_to_nwbfile(nwbfile, - eye_gaze_data=eye_gaze_data) + eye_gaze_data=eye_gaze_data) Manifest.safe_make_parent_dirs(output_path) io = pynwb.NWBHDF5IO(output_path, mode='w') diff --git a/allensdk/brain_observatory/gaze_mapping/__main__.py b/allensdk/brain_observatory/gaze_mapping/__main__.py index f243a96f7..eb0eca1e1 100644 --- a/allensdk/brain_observatory/gaze_mapping/__main__.py +++ b/allensdk/brain_observatory/gaze_mapping/__main__.py @@ -25,9 +25,9 @@ post_process_areas, post_process_cr, ) -from allensdk.brain_observatory.gaze_mapping._sync_frames import ( - get_synchronized_camera_frame_times -) + +from allensdk.brain_observatory.sync_dataset import Dataset +import allensdk.brain_observatory.sync_utilities as su def load_ellipse_fit_params(input_file: Path) -> Dict[str, pd.DataFrame]: @@ -297,7 +297,8 @@ def load_sync_file_timings(sync_file: Path, up with number of new frame times from the sync file. """ # Add synchronized frame times - frame_times = get_synchronized_camera_frame_times(sync_file) + frame_times = su.get_synchronized_frame_times(session_sync_file=sync_file, + sync_line_label_keys=Dataset.EYE_TRACKING_KEYS) if (pupil_params_rows != len(frame_times)): raise RuntimeError("The number of camera sync pulses in the " f"sync file ({len(frame_times)}) do not match " diff --git a/allensdk/brain_observatory/gaze_mapping/_sync_frames.py b/allensdk/brain_observatory/gaze_mapping/_sync_frames.py deleted file mode 100644 index 8a249c245..000000000 --- a/allensdk/brain_observatory/gaze_mapping/_sync_frames.py +++ /dev/null @@ -1,31 +0,0 @@ -import pandas as pd -from pathlib import Path - -from allensdk.brain_observatory.sync_dataset import Dataset -from allensdk.brain_observatory import sync_utilities - - -def get_synchronized_camera_frame_times(session_sync_file: Path) -> pd.Series: - """Get eye tracking camera frame times from an experiment session sync file. - - Args: - session_sync_file (Path): Path to an ephys session sync file. - The sync file contains rising/falling edges from a daq system which - indicates when certain events occur (so they can be related to - each other). - - Returns: - pandas.Series: An array of times when frames for the eye tracking - camera were acquired. - """ - sync_dataset = Dataset(str(session_sync_file)) - - frame_times = sync_dataset.get_edges( - "rising", Dataset.EYE_TRACKING_KEYS, units="seconds" - ) - - # Occasionally an extra set of frame times are acquired after the rest of - # the signals. We detect and remove these. - frame_times = sync_utilities.trim_discontiguous_times(frame_times) - - return pd.Series(frame_times) diff --git a/allensdk/brain_observatory/sync_dataset.py b/allensdk/brain_observatory/sync_dataset.py index d50f101bf..8dc0a8af6 100644 --- a/allensdk/brain_observatory/sync_dataset.py +++ b/allensdk/brain_observatory/sync_dataset.py @@ -90,6 +90,7 @@ class Dataset(object): OPTOGENETIC_STIMULATION_KEYS = ("LED_sync", "opto_trial") EYE_TRACKING_KEYS = ("cam2_exposure", # clocks eye tracking frame pulses (port 0, line 9) "eyetracking") # previous line label for eye tracking (prior to ~ Oct. 2018) + BEHAVIOR_TRACKING_KEYS = ("cam1_exposure",) # clocks behavior tracking frame pulses (port 0, line 8) def __init__(self, path): self.dfile = self.load(path) @@ -302,7 +303,7 @@ def get_edges(self, kind, keys, units='seconds'): fn = self.get_rising_edges elif kind == 'all': return np.sort(np.concatenate([ - self.get_edges('rising', keys, units), + self.get_edges('rising', keys, units), self.get_edges('falling', keys, units) ])) diff --git a/allensdk/brain_observatory/sync_utilities/__init__.py b/allensdk/brain_observatory/sync_utilities/__init__.py index 659b59d40..08829daef 100644 --- a/allensdk/brain_observatory/sync_utilities/__init__.py +++ b/allensdk/brain_observatory/sync_utilities/__init__.py @@ -1,4 +1,10 @@ +from pathlib import Path +from typing import Tuple + import numpy as np +import pandas as pd + +from allensdk.brain_observatory.sync_dataset import Dataset def trim_discontiguous_times(times, threshold=100): @@ -13,4 +19,38 @@ def trim_discontiguous_times(times, threshold=100): if len(gap_indices) == 0: return times - return times[:gap_indices[0]+1] \ No newline at end of file + return times[:gap_indices[0] + 1] + + +def get_synchronized_frame_times(session_sync_file: Path, + sync_line_label_keys: Tuple[str, ...]) -> pd.Series: + """Get experimental frame times from an experiment session sync file. + + Parameters + ---------- + session_sync_file : Path + Path to an ephys session sync file. + The sync file contains rising/falling edges from a daq system which + indicates when certain events occur (so they can be related to + each other). + sync_line_label_keys : Tuple[str, ...] + Line label keys to get times for. See class attributes of + allensdk.brain_observatory.sync_dataset.Dataset for a listing of + possible keys. + + Returns + ------- + pd.Series + An array of times when frames for the eye tracking camera were acquired. + """ + sync_dataset = Dataset(str(session_sync_file)) + + frame_times = sync_dataset.get_edges( + "rising", sync_line_label_keys, units="seconds" + ) + + # Occasionally an extra set of frame times are acquired after the rest of + # the signals. We detect and remove these. + frame_times = trim_discontiguous_times(frame_times) + + return pd.Series(frame_times) diff --git a/allensdk/test/brain_observatory/gaze_mapping/test_main.py b/allensdk/test/brain_observatory/gaze_mapping/test_main.py index 50ff54beb..3c2ba14ec 100644 --- a/allensdk/test/brain_observatory/gaze_mapping/test_main.py +++ b/allensdk/test/brain_observatory/gaze_mapping/test_main.py @@ -142,11 +142,11 @@ def mock_load_ellipse_fit_params(*args, **kwargs): (4, None, True) ]) def test_load_sync_file_timings(monkeypatch, pupil_params_rows, expected, expect_fail): - def mock_get_synchronized_camera_frame_times(*args, **kwargs): + def mock_get_synchronized_frame_times(*args, **kwargs): return pd.Series([1, 2, 3, 4, 5]) - monkeypatch.setattr(main, "get_synchronized_camera_frame_times", - mock_get_synchronized_camera_frame_times) + monkeypatch.setattr(main.su, "get_synchronized_frame_times", + mock_get_synchronized_frame_times) if expect_fail: with pytest.raises(RuntimeError, match="number of camera sync pulses"): diff --git a/allensdk/test/brain_observatory/sync_utilities/test_sync_utilities.py b/allensdk/test/brain_observatory/sync_utilities/test_sync_utilities.py index d92342ef4..829aa60cb 100644 --- a/allensdk/test/brain_observatory/sync_utilities/test_sync_utilities.py +++ b/allensdk/test/brain_observatory/sync_utilities/test_sync_utilities.py @@ -1,7 +1,36 @@ import pytest import numpy as np +from functools import partial + from allensdk.brain_observatory import sync_utilities as su +from allensdk.brain_observatory.sync_dataset import Dataset + + +class MockDataset(Dataset): + def __init__(self, path: str, + eye_tracking_timings, behavior_tracking_timings): + # Note: eye_tracking_timings and behavior_tracking_timings are test + # inputs that can be parametrized and do not exist in the real + # `Dataset` class. + self.eye_tracking_timings = eye_tracking_timings + self.behavior_tracking_timings = behavior_tracking_timings + + def get_edges(self, kind, keys, units='seconds'): + if keys == self.EYE_TRACKING_KEYS: + return self.eye_tracking_timings + elif keys == self.BEHAVIOR_TRACKING_KEYS: + return self.behavior_tracking_timings + + +@pytest.fixture +def mock_dataset_fixture(request): + test_params = { + "eye_tracking_timings": [], + "behavior_tracking_timings": [] + } + test_params.update(request.param) + return partial(MockDataset, **test_params) @pytest.mark.parametrize('vs_times, expected', [ @@ -9,4 +38,19 @@ ]) def test_trim_discontiguous_vsyncs(vs_times, expected): obtained = su.trim_discontiguous_times(vs_times) - assert np.allclose(obtained, expected) \ No newline at end of file + assert np.allclose(obtained, expected) + + +@pytest.mark.parametrize("mock_dataset_fixture,sync_line_label_keys,expected", [ + ({"eye_tracking_timings": [0.020, 0.030, 0.040, 0.050, 3.0]}, + Dataset.EYE_TRACKING_KEYS, [0.020, 0.030, 0.040, 0.050]), + + ({"behavior_tracking_timings": [0.080, 0.090, 0.100, 0.110, 8.0]}, + Dataset.BEHAVIOR_TRACKING_KEYS, [0.08, 0.090, 0.100, 0.110]) +], indirect=["mock_dataset_fixture"]) +def test_get_synchronized_frame_times(monkeypatch, mock_dataset_fixture, + sync_line_label_keys, expected): + monkeypatch.setattr(su, "Dataset", mock_dataset_fixture) + + obtained = su.get_synchronized_frame_times("dummy_path", sync_line_label_keys) + assert np.allclose(obtained, expected) From e02fb6d9c7c660d07f56e38272c5f90ed281cda3 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Fri, 15 Nov 2019 12:11:16 -0800 Subject: [PATCH 55/60] use typing extension protocol; keep unimplemented methods in base class for project api --- allensdk/core/typing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/allensdk/core/typing.py b/allensdk/core/typing.py index df00f7be0..5a055810f 100644 --- a/allensdk/core/typing.py +++ b/allensdk/core/typing.py @@ -1,8 +1,8 @@ -from typing import _Protocol +from typing import Protocol from abc import abstractmethod -class SupportsStr(_Protocol): +class SupportsStr(Protocol): """Classes that support the __str__ method""" @abstractmethod def __str__(self) -> str: From fa93ebd43277519dea91a78a84f0cee8811900dd Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Thu, 14 Nov 2019 14:00:45 -0800 Subject: [PATCH 56/60] GH #1146: visual behavior project cache add logging to cache for debugging and testing project cache with tests, manifest; rename swdb tests and deprecate swdb cache --- allensdk/api/caching_utilities.py | 13 +- .../behavior/behavior_project_cache.py | 324 ++++++++++++++++++ .../behavior/swdb/behavior_project_cache.py | 4 + allensdk/core/exceptions.py | 4 + allensdk/core/typing.py | 4 +- .../behavior/test_behavior_project_cache.py | 271 +++++++++------ .../test_swdb_behavior_project_cache.py | 116 +++++++ 7 files changed, 623 insertions(+), 113 deletions(-) create mode 100644 allensdk/brain_observatory/behavior/behavior_project_cache.py create mode 100644 allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py diff --git a/allensdk/api/caching_utilities.py b/allensdk/api/caching_utilities.py index bc1525cbb..ab5de267e 100644 --- a/allensdk/api/caching_utilities.py +++ b/allensdk/api/caching_utilities.py @@ -2,6 +2,7 @@ from pathlib import Path import warnings import os +import logging from typing import overload, Callable, Any, Union, Optional, TypeVar @@ -87,18 +88,23 @@ def call_caching( The result of calling read """ + logger = logging.getLogger("call_caching") try: if not lazy or read is None: + logger.info("Fetching data from remote") data = fetch() if pre_write is not None: data = pre_write(data) + logger.info("Writing data to cache") write(data) - if read is not None: + if read is not None: + logger.info("Reading data from cache") return read() - - except Exception: + except Exception as e: + if isinstance(e, FileNotFoundError): + logger.info("No cache file found.") if cleanup is not None and not lazy: cleanup() @@ -150,7 +156,6 @@ def one_file_call_caching( Path at which the data will be stored """ - def safe_unlink(): try: os.unlink(path) diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache.py b/allensdk/brain_observatory/behavior/behavior_project_cache.py new file mode 100644 index 000000000..f9f42291c --- /dev/null +++ b/allensdk/brain_observatory/behavior/behavior_project_cache.py @@ -0,0 +1,324 @@ +import numpy as np +import os.path +import csv +from functools import partial +from typing import Type, Callable, Optional, List, Any +import pandas as pd +import time +import logging + +from allensdk.api.cache import Cache + +from allensdk.brain_observatory.behavior.behavior_project_lims_api import ( + BehaviorProjectLimsApi) +from allensdk.brain_observatory.behavior.internal.behavior_project_base\ + import BehaviorProjectBase +from allensdk.api.caching_utilities import one_file_call_caching, call_caching +from allensdk.core.exceptions import MissingDataError + +BehaviorProjectApi = Type[BehaviorProjectBase] + + +class BehaviorProjectCache(Cache): + + MANIFEST_VERSION = "0.0.1-alpha" + OPHYS_SESSIONS_KEY = "ophys_sessions" + BEHAVIOR_SESSIONS_KEY = "behavior_sessions" + + # Temporary way for scientists to keep track of analyses + OPHYS_ANALYSIS_LOG_KEY = "ophys_analysis_log" + BEHAVIOR_ANALYSIS_LOG_KEY = "behavior_analysis_log" + + MANIFEST_CONFIG = { + OPHYS_SESSIONS_KEY: { + "spec": f"{OPHYS_SESSIONS_KEY}.csv", + "parent_key": "BASEDIR", + "typename": "file" + }, + BEHAVIOR_SESSIONS_KEY: { + "spec": f"{BEHAVIOR_SESSIONS_KEY}.csv", + "parent_key": "BASEDIR", + "typename": "file" + }, + OPHYS_ANALYSIS_LOG_KEY: { + "spec": f"{OPHYS_ANALYSIS_LOG_KEY}.csv", + "parent_key": "BASEDIR", + "typename": "file" + }, + BEHAVIOR_ANALYSIS_LOG_KEY: { + "spec": f"{BEHAVIOR_ANALYSIS_LOG_KEY}.csv", + "parent_key": "BASEDIR", + "typename": "file" + }, + } + + def __init__( + self, + fetch_api: BehaviorProjectApi = BehaviorProjectLimsApi.default(), + fetch_tries: int = 2, + **kwargs): + """ Entrypoint for accessing visual behavior data. Supports + access to summaries of session data and provides tools for + downloading detailed session data (such as dff traces). + + --- NOTE --- + Because NWB files are not currently supported for this project (as of + 11/2019), this cache will not actually save any files of session data + to the local machine. Only summary tables will be saved to the local + cache. File retrievals for specific sessions will be handled by + the fetch api used for the Session object, and cached in-memory + only to enable fast retrieval for subsequent calls. + + If you are looping over session objects, be sure to clean up + your memory when it is not needed by calling `cache_clear` from + your session object. + + Parameters + ========== + fetch_api : + Used to pull data from remote sources, after which it is locally + cached. Any object inheriting from BehaviorProjectBase is + suitable. Current options are: + EcephysProjectLimsApi :: Fetches bleeding-edge data from the + Allen Institute"s internal database. Only works if you are + on our internal network. + fetch_tries : + Maximum number of times to attempt a download before giving up and + raising an exception. Note that this is total tries, not retries + **kwargs : + manifest : str or Path + full path at which manifest json will be stored + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + other kwargs are passed to allensdk.api.cache.Cache + """ + kwargs["manifest"] = kwargs.get("manifest", + "behavior_project_manifest.json") + kwargs["version"] = kwargs.get("version", self.MANIFEST_VERSION) + + super().__init__(**kwargs) + self.fetch_api = fetch_api + self.fetch_tries = fetch_tries + self.logger = logging.getLogger(self.__class__.__name__) + + @classmethod + def from_lims(cls, lims_kwargs=None, **kwargs): + lims_kwargs_ = lims_kwargs or {} + return cls(fetch_api=BehaviorProjectLimsApi.default(**lims_kwargs_), + **kwargs) + + def get_session_table( + self, + suppress: Optional[List[str]] = None, + by: str = "ophys_session_id") -> pd.DataFrame: + """ + Return summary table of all ophys_session_ids in the database. + :param suppress: optional list of columns to drop from the resulting + dataframe. + :type suppress: list of str + :param by: (default="ophys_session_id"). Column to index on, either + "ophys_session_id" or "ophys_experiment_id". + If by="ophys_experiment_id", then each row will only have one + experiment id, of type int (vs. an array of 1>more). + :type by: str + :rtype: pd.DataFrame + """ + def write_csv_(path, df): + """Format the array of experiment ids for saving""" + df_ = df.copy() + df_["ophys_experiment_id"] = df_["ophys_experiment_id"].apply( + lambda x: "|".join(map(str, x))) + df_.to_csv(path) + + def read_csv_(path): + df = pd.read_csv(path, index_col="ophys_session_id") + df["ophys_experiment_id"] = df["ophys_experiment_id"].apply( + lambda x: np.fromstring(x, sep="|", dtype=int)) + return df + + sessions = self._get_session_summary( + self.fetch_api.get_session_table, self.OPHYS_SESSIONS_KEY, + write_csv_, read_csv_) + sessions = sessions.rename(columns={"genotype": "full_genotype"}) + if suppress: + sessions.drop(columns=suppress, inplace=True, errors="ignore") + + # Possibly explode and reindex + if by == "ophys_session_id": + pass + elif by == "ophys_experiment_id": + sessions = (sessions.reset_index() + .explode("ophys_experiment_id") + .set_index("ophys_experiment_id")) + else: + self.logger.warning( + f"Invalid value for `by`, '{by}', passed to get_session_table." + " Valid choices for `by` are 'ophys_experiment_id' and " + "'ophys_session_id'.") + return sessions + + def add_manifest_paths(self, manifest_builder): + manifest_builder = super().add_manifest_paths(manifest_builder) + for key, config in self.MANIFEST_CONFIG.items(): + manifest_builder.add_path(key, **config) + return manifest_builder + + def get_behavior_session_table( + self, + suppress: Optional[List[str]] = None) -> pd.DataFrame: + """ + Return summary table of all behavior_session_ids in the database. + :param suppress: optional list of columns to drop from the resulting + dataframe. + :type suppress: list of str + :rtype: pd.DataFrame + """ + read_csv_ = partial(read_csv, index_col="behavior_session_id") + sessions = self._get_session_summary( + self.fetch_api.get_behavior_only_session_table, + self.BEHAVIOR_SESSIONS_KEY, write_csv, read_csv_) + sessions = sessions.rename(columns={"genotype": "full_genotype"}) + if suppress: + sessions.drop(columns=suppress, inplace=True, errors="ignore") + return sessions + + def _get_session_summary( + self, + fetch_call: Callable[[], pd.DataFrame], + cache_key: str, + write_csv: Callable[[str], None], + read_csv: Callable[[str], pd.DataFrame]) -> pd.DataFrame: + """ + Generic helper method for caching calls to get session summary data, + such as `get_behavior_session_table` and `get_session_table`. + """ + path = self.get_cache_path(None, cache_key) + response = one_file_call_caching(path, fetch_call, write_csv, read_csv) + return response + + def get_session_data(self, ophys_experiment_id: int, fixed: bool = False): + """ + Note -- This method mocks the behavior of a cache. No files are + actually downloaded for local access. Instead, it adds the + session id to a csv log. If the "fixed" parameter is true, + then the API will first check to ensure that the log is present + in the record before pulling the data. + """ + # TODO: Future development will include an NWB reader to read from + # a true local cache (once nwb files are created) + # For now just check the log if pass `fixed` + path = self.get_cache_path(None, self.OPHYS_ANALYSIS_LOG_KEY) + if fixed: + self.logger.warning( + "Warning! Passing `fixed=True` does not ensure that the " + "underlying data has not changed, as no data are actually " + "cached locally. The log will be updated each time the data " + "are pulled from the database for tracking purposes.") + try: + record = pd.read_csv(path) + except FileNotFoundError: + raise MissingDataError( + "No analysis log found! Add to the log by getting " + "session data with fixed=False.") + if ophys_experiment_id not in record["ophys_experiment_id"].values: + raise MissingDataError( + f"Data for ophys experiment {ophys_experiment_id} not " + "found!") + + fetch_session = partial(self.fetch_api.get_session_data, + ophys_experiment_id) + write_log = partial(_write_log, path=path, + key_name="ophys_experiment_id", + key_value=ophys_experiment_id) + return call_caching( + fetch_session, + write_log, + lazy=False, + read=fetch_session + ) + + def get_behavior_session_data(self, behavior_session_id: int, + fixed: bool = False): + """ + Note -- This method mocks the behavior of a cache. No files are + actually downloaded for local access. Instead, it adds the + session id to a csv log. If the "fixed" parameter is true, + then the API will first check to ensure that the log is present + in the record before pulling the data. + """ + # TODO: Future development will include an NWB reader to read from + # a true local cache (once nwb files are created) + # For now just check the log if pass `fixed` + path = self.get_cache_path(None, self.BEHAVIOR_ANALYSIS_LOG_KEY) + if fixed: + self.logger.warning( + "Warning! Passing `fixed=True` does not ensure that the " + "underlying data has not changed, as no data are actually " + "cached locally. The log will be updated each time the data " + "are pulled from the database for tracking purposes.") + try: + record = pd.read_csv(path) + except FileNotFoundError: + raise MissingDataError( + "No analysis log found! Add to the log by getting " + "session data with fixed=False.") + if behavior_session_id not in record["behavior_session_id"].values: + raise MissingDataError( + f"Data for ophys experiment {behavior_session_id} not " + "found!") + + fetch_session = partial(self.fetch_api.get_behavior_only_session_data, + behavior_session_id) + write_log = partial(_write_log, path=path, + key_name="behavior_session_id", + key_value=behavior_session_id) + return call_caching( + fetch_session, + write_log, + lazy=False, # can't actually read from file cache + read=fetch_session + ) + + +def read_csv(path: str, index_col: str) -> pd.DataFrame: + return pd.read_csv(path, index_col=index_col) + + +def write_csv(path: str, df: str): + df.to_csv(path) + + +def _write_log(data: Any, path: str, key_name: str, key_value: Any): + """ + Helper method to create and add to a log. Invoked any time a session + object is created via BehaviorProjectCache. + :param data: Unused, required because call_caching method assumes + all writer functions have data as the first positional argument + :param path: Path to save the log file + :type path: str path + :param key_name: Name of the id used to track the session object. + Typically "behavior_session_id" or "ophys_session_id". + :type key_name: str + :param key_value: Value of the id used to track the session object. + Usually an int. + """ + now = round(time.time()) + keys = [key_name, "created_at", "updated_at"] + values = [key_value, now, now] + if os.path.exists(path): + record = (pd.read_csv(path, index_col=key_name) + .to_dict(orient="index")) + experiment = record.get(key_value) + if experiment: + experiment.update({"updated_at": now}) + else: + record.update({key_value: dict(zip(keys[1:], values[1:]))}) + (pd.DataFrame.from_dict(record, orient="index") + .rename_axis(index=key_name) + .to_csv(path)) + else: + with open(path, "w") as f: + w = csv.DictWriter(f, fieldnames=keys) + w.writeheader() + w.writerow(dict(zip(keys, values))) diff --git a/allensdk/brain_observatory/behavior/swdb/behavior_project_cache.py b/allensdk/brain_observatory/behavior/swdb/behavior_project_cache.py index c30f336b5..37fdda4ab 100644 --- a/allensdk/brain_observatory/behavior/swdb/behavior_project_cache.py +++ b/allensdk/brain_observatory/behavior/swdb/behavior_project_cache.py @@ -10,6 +10,7 @@ from allensdk.core.lazy_property import LazyProperty from allensdk.brain_observatory.behavior.trials_processing import calculate_reward_rate from allensdk.brain_observatory.behavior.image_api import ImageApi +from allensdk.deprecated import deprecated csv_io = { 'reader': lambda path: pd.read_csv(path, index_col='Unnamed: 0'), @@ -19,6 +20,9 @@ cache_path_example = '/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/SWDB_2019/cache_20190813' +@deprecated("swdb.behavior_project_cache.BehaviorProjectCache is deprecated " + "and will be removed in version 1.3. Please use brain_observatory." + "behavior.behavior_project_cache.BehaviorProjectCache.") class BehaviorProjectCache(object): def __init__(self, cache_base): ''' diff --git a/allensdk/core/exceptions.py b/allensdk/core/exceptions.py index 61bcf4d5a..94dc8f028 100644 --- a/allensdk/core/exceptions.py +++ b/allensdk/core/exceptions.py @@ -9,6 +9,7 @@ def __init__(self, msg, caught_exception=None): error_string = msg super().__init__(error_string) + class DataFrameIndexError(LookupError): """More verbose method for accessing invalid rows or columns in a dataframe. Should be used when an index error is thrown on a dataframe. @@ -20,3 +21,6 @@ def __init__(self, msg, caught_exception=None): error_string = msg super().__init__(error_string) + +class MissingDataError(ValueError): + pass diff --git a/allensdk/core/typing.py b/allensdk/core/typing.py index 5a055810f..df00f7be0 100644 --- a/allensdk/core/typing.py +++ b/allensdk/core/typing.py @@ -1,8 +1,8 @@ -from typing import Protocol +from typing import _Protocol from abc import abstractmethod -class SupportsStr(Protocol): +class SupportsStr(_Protocol): """Classes that support the __str__ method""" @abstractmethod def __str__(self) -> str: diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py index a45c1479b..e00af1987 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py @@ -1,116 +1,173 @@ import os -import numpy as np -import pandas as pd import pytest -from allensdk.brain_observatory.behavior.swdb import behavior_project_cache as bpc +import pandas as pd +import tempfile +import logging +import time +from allensdk.brain_observatory.behavior.behavior_project_cache import ( + BehaviorProjectCache) +from allensdk.core.exceptions import MissingDataError @pytest.fixture -def cache_test_base(): - return '/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/SWDB_2019/test_data' +def session_table(): + return (pd.DataFrame({"ophys_session_id": [1, 2, 3], + "ophys_experiment_id": [[4], [5, 6], [7]]}) + .set_index("ophys_session_id")) + @pytest.fixture -def cache(cache_test_base): - return bpc.BehaviorProjectCache(cache_test_base) +def behavior_table(): + return (pd.DataFrame({"behavior_session_id": [1, 2, 3], "b": [4, 5, 6]}) + .set_index("behavior_session_id")) + @pytest.fixture -def session(cache): - return cache.get_session(792815735) - -# Test trials extra columns -@pytest.mark.requires_bamboo -def test_extra_trials_columns(session): - for new_key in ['reward_rate', 'response_binary']: - assert new_key in session.trials.keys() - -@pytest.mark.requires_bamboo -def test_extra_stimulus_presentation_columns(session): - for new_key in [ - 'absolute_flash_number', - 'time_from_last_lick', - 'time_from_last_reward', - 'time_from_last_change', - 'block_index', - 'image_block_repetition', - 'repeat_within_block']: - assert new_key in session.stimulus_presentations.keys() - -@pytest.mark.requires_bamboo -def test_stimulus_presentations_image_set(session): - # We made the image set just 'A' or 'B' - assert session.stimulus_presentations['image_set'].unique() == np.array(['A']) - -@pytest.mark.requires_bamboo -def test_stimulus_templates(session): - # Was a dict with only one key, where the value was a 3d array. - # We made it a dict with image names as keys and 2d arrs (the images) as values - for image_name, image_arr in session.stimulus_templates.items(): - assert image_arr.ndim == 2 - -# Test trial response df -@pytest.mark.requires_bamboo -@pytest.mark.parametrize('key, output', [ - ('mean_response', 0.0053334), - ('baseline_response', -0.0020357), - ('p_value', 0.6478659), -]) -def test_session_trial_response(key, output, session): - trial_response = session.trial_response_df - np.testing.assert_almost_equal(trial_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) - -@pytest.mark.requires_bamboo -@pytest.mark.parametrize('key, output', [ - ('time_from_last_lick', 7.3577), - ('mean_running_speed', 22.143871), - ('duration', 0.25024), -]) -def test_session_flash_response(key, output, session): - flash_response = session.flash_response_df - np.testing.assert_almost_equal(flash_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) - -@pytest.mark.requires_bamboo -def test_analysis_files_metadata(cache): - assert cache.analysis_files_metadata[ - 'trial_response_df_params' - ]['response_window_duration_seconds'] == 0.5 - -@pytest.mark.requires_bamboo -def test_session_image_loading(session): - assert isinstance(session.max_projection.data, np.ndarray) - -@pytest.mark.requires_bamboo -def test_no_invalid_rois(session): - # We made the cache return sessions without the invalid rois - assert session.cell_specimen_table['valid_roi'].all() - -@pytest.mark.requires_bamboo -def test_get_container_sessions(cache): - container_id = cache.experiment_table['container_id'].unique()[0] - container_sessions = cache.get_container_sessions(container_id) - session = container_sessions['OPHYS_1_images_A'] - assert isinstance(session, bpc.ExtendedBehaviorSession) - np.testing.assert_almost_equal(session.dff_traces.loc[817103993]['dff'][0], 0.3538657529565) - -@pytest.mark.requires_bamboo -def test_binarized_segmentation_mask_image(session): - np.testing.assert_array_equal( - np.unique(np.array(session.segmentation_mask_image.data).ravel()), - np.array([0, 1]) - ) - -@pytest.mark.requires_bamboo -def test_no_nan_flash_running_speed(session): - assert not pd.isnull(session.stimulus_presentations['mean_running_speed']).any() - -@pytest.mark.requires_bamboo -def test_licks_correct_colname(session): - assert session.licks.columns == ['timestamps'] - -@pytest.mark.requires_bamboo -def test_rewards_correct_colname(session): - assert (session.rewards.columns == ['timestamps', 'volume', 'autorewarded']).all() - -@pytest.mark.requires_bamboo -def test_dff_traces_correct_colname(session): - # This is a Friday-harbor specific change - assert 'cell_roi_id' not in session.dff_traces.columns +def mock_api(session_table, behavior_table): + class MockApi: + def get_session_table(self): + return session_table + + def get_behavior_only_session_table(self): + return behavior_table + + def get_session_data(self, ophys_session_id): + return ophys_session_id + + def get_behavior_only_session_data(self, behavior_session_id): + return behavior_session_id + return MockApi + + +@pytest.fixture +def TempdirBehaviorCache(mock_api): + temp_dir = tempfile.TemporaryDirectory() + manifest = os.path.join(temp_dir.name, "manifest.json") + yield BehaviorProjectCache(fetch_api=mock_api(), + manifest=manifest) + temp_dir.cleanup() + + +def test_get_session_table(TempdirBehaviorCache, session_table): + cache = TempdirBehaviorCache + actual = cache.get_session_table() + path = cache.manifest.path_info.get("ophys_sessions").get("spec") + assert os.path.exists(path) + pd.testing.assert_frame_equal(session_table, actual) + + +def test_get_behavior_table(TempdirBehaviorCache, behavior_table): + cache = TempdirBehaviorCache + actual = cache.get_behavior_session_table() + path = cache.manifest.path_info.get("behavior_sessions").get("spec") + assert os.path.exists(path) + pd.testing.assert_frame_equal(behavior_table, actual) + + +def test_session_table_reads_from_cache(TempdirBehaviorCache, session_table, + caplog): + caplog.set_level(logging.INFO, logger="call_caching") + cache = TempdirBehaviorCache + cache.get_session_table() + expected_first = [ + ("call_caching", logging.INFO, "Reading data from cache"), + ("call_caching", logging.INFO, "No cache file found."), + ("call_caching", logging.INFO, "Fetching data from remote"), + ("call_caching", logging.INFO, "Writing data to cache"), + ("call_caching", logging.INFO, "Reading data from cache")] + assert expected_first == caplog.record_tuples + caplog.clear() + cache.get_session_table() + assert [expected_first[0]] == caplog.record_tuples + + +def test_behavior_table_reads_from_cache(TempdirBehaviorCache, behavior_table, + caplog): + caplog.set_level(logging.INFO, logger="call_caching") + cache = TempdirBehaviorCache + cache.get_behavior_session_table() + expected_first = [ + ("call_caching", logging.INFO, "Reading data from cache"), + ("call_caching", logging.INFO, "No cache file found."), + ("call_caching", logging.INFO, "Fetching data from remote"), + ("call_caching", logging.INFO, "Writing data to cache"), + ("call_caching", logging.INFO, "Reading data from cache")] + assert expected_first == caplog.record_tuples + caplog.clear() + cache.get_behavior_session_table() + assert [expected_first[0]] == caplog.record_tuples + + +def test_behavior_session_fails_fixed_if_no_cache(TempdirBehaviorCache): + cache = TempdirBehaviorCache + with pytest.raises(MissingDataError): + cache.get_behavior_session_data(1, fixed=True) + cache.get_behavior_session_data(1) + # Also fails if there is a cache, but the id is not contained therein + with pytest.raises(MissingDataError): + cache.get_behavior_session_data(2, fixed=True) + + +def test_session_fails_fixed_if_no_cache(TempdirBehaviorCache): + cache = TempdirBehaviorCache + with pytest.raises(MissingDataError): + cache.get_session_data(1, fixed=True) + cache.get_session_data(1) + # Also fails if there is a cache, but the id is not contained therein + with pytest.raises(MissingDataError): + cache.get_session_data(2, fixed=True) + + +def test_get_session_table_by_experiment(TempdirBehaviorCache): + expected = (pd.DataFrame({"ophys_session_id": [1, 2, 2, 3], + "ophys_experiment_id": [4, 5, 6, 7]}) + .set_index("ophys_experiment_id")) + actual = TempdirBehaviorCache.get_session_table(by="ophys_experiment_id") + pd.testing.assert_frame_equal(expected, actual) + + +def test_write_behavior_log(TempdirBehaviorCache): + expected_cols = ["behavior_session_id", "created_at", "updated_at"] + expected_ids = [1, 2] + expected_times = [False, True] + cache = TempdirBehaviorCache + cache.get_behavior_session_data(1) + cache.get_behavior_session_data(2) + time.sleep(1) + cache.get_behavior_session_data(1) + path = cache.manifest.path_info.get("behavior_analysis_log").get("spec") + # Log exists + assert os.path.exists(path) + actual = pd.read_csv(path) + # columns exist + assert list(actual) == expected_cols + # ids exist + assert actual["behavior_session_id"].values.tolist() == expected_ids + # first one should have updated different than created since accessed 2x + assert ((actual["created_at"] == actual["updated_at"]).values.tolist() + == expected_times) + + +def test_write_session_log(TempdirBehaviorCache): + expected_cols = ["ophys_experiment_id", "created_at", "updated_at"] + expected_ids = [1, 2] + expected_times = [False, True] + cache = TempdirBehaviorCache + cache.get_session_data(1) + cache.get_session_data(2) + time.sleep(1) + cache.get_session_data(1) + path = cache.manifest.path_info.get("ophys_analysis_log").get("spec") + # Log exists + assert os.path.exists(path) + actual = pd.read_csv(path) + # columns exist + assert list(actual) == expected_cols + # ids exist + assert actual["ophys_experiment_id"].values.tolist() == expected_ids + # first one should have updated different than created since accessed 2x + assert ((actual["created_at"] == actual["updated_at"]).values.tolist() + == expected_times) + + + diff --git a/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py b/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py new file mode 100644 index 000000000..a45c1479b --- /dev/null +++ b/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py @@ -0,0 +1,116 @@ +import os +import numpy as np +import pandas as pd +import pytest +from allensdk.brain_observatory.behavior.swdb import behavior_project_cache as bpc + + +@pytest.fixture +def cache_test_base(): + return '/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/SWDB_2019/test_data' + +@pytest.fixture +def cache(cache_test_base): + return bpc.BehaviorProjectCache(cache_test_base) + +@pytest.fixture +def session(cache): + return cache.get_session(792815735) + +# Test trials extra columns +@pytest.mark.requires_bamboo +def test_extra_trials_columns(session): + for new_key in ['reward_rate', 'response_binary']: + assert new_key in session.trials.keys() + +@pytest.mark.requires_bamboo +def test_extra_stimulus_presentation_columns(session): + for new_key in [ + 'absolute_flash_number', + 'time_from_last_lick', + 'time_from_last_reward', + 'time_from_last_change', + 'block_index', + 'image_block_repetition', + 'repeat_within_block']: + assert new_key in session.stimulus_presentations.keys() + +@pytest.mark.requires_bamboo +def test_stimulus_presentations_image_set(session): + # We made the image set just 'A' or 'B' + assert session.stimulus_presentations['image_set'].unique() == np.array(['A']) + +@pytest.mark.requires_bamboo +def test_stimulus_templates(session): + # Was a dict with only one key, where the value was a 3d array. + # We made it a dict with image names as keys and 2d arrs (the images) as values + for image_name, image_arr in session.stimulus_templates.items(): + assert image_arr.ndim == 2 + +# Test trial response df +@pytest.mark.requires_bamboo +@pytest.mark.parametrize('key, output', [ + ('mean_response', 0.0053334), + ('baseline_response', -0.0020357), + ('p_value', 0.6478659), +]) +def test_session_trial_response(key, output, session): + trial_response = session.trial_response_df + np.testing.assert_almost_equal(trial_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) + +@pytest.mark.requires_bamboo +@pytest.mark.parametrize('key, output', [ + ('time_from_last_lick', 7.3577), + ('mean_running_speed', 22.143871), + ('duration', 0.25024), +]) +def test_session_flash_response(key, output, session): + flash_response = session.flash_response_df + np.testing.assert_almost_equal(flash_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) + +@pytest.mark.requires_bamboo +def test_analysis_files_metadata(cache): + assert cache.analysis_files_metadata[ + 'trial_response_df_params' + ]['response_window_duration_seconds'] == 0.5 + +@pytest.mark.requires_bamboo +def test_session_image_loading(session): + assert isinstance(session.max_projection.data, np.ndarray) + +@pytest.mark.requires_bamboo +def test_no_invalid_rois(session): + # We made the cache return sessions without the invalid rois + assert session.cell_specimen_table['valid_roi'].all() + +@pytest.mark.requires_bamboo +def test_get_container_sessions(cache): + container_id = cache.experiment_table['container_id'].unique()[0] + container_sessions = cache.get_container_sessions(container_id) + session = container_sessions['OPHYS_1_images_A'] + assert isinstance(session, bpc.ExtendedBehaviorSession) + np.testing.assert_almost_equal(session.dff_traces.loc[817103993]['dff'][0], 0.3538657529565) + +@pytest.mark.requires_bamboo +def test_binarized_segmentation_mask_image(session): + np.testing.assert_array_equal( + np.unique(np.array(session.segmentation_mask_image.data).ravel()), + np.array([0, 1]) + ) + +@pytest.mark.requires_bamboo +def test_no_nan_flash_running_speed(session): + assert not pd.isnull(session.stimulus_presentations['mean_running_speed']).any() + +@pytest.mark.requires_bamboo +def test_licks_correct_colname(session): + assert session.licks.columns == ['timestamps'] + +@pytest.mark.requires_bamboo +def test_rewards_correct_colname(session): + assert (session.rewards.columns == ['timestamps', 'volume', 'autorewarded']).all() + +@pytest.mark.requires_bamboo +def test_dff_traces_correct_colname(session): + # This is a Friday-harbor specific change + assert 'cell_roi_id' not in session.dff_traces.columns From 6e1994e58c241ab2092ea769f334c0e10439fc8b Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 19 Nov 2019 14:23:49 -0800 Subject: [PATCH 57/60] update queries to add fields to session table; get more data with experiment table --- .../behavior/behavior_data_session.py | 1 + .../behavior/behavior_project_cache.py | 95 +++++++--- .../behavior/behavior_project_lims_api.py | 166 ++++++++++++++++-- .../internal/api/behavior_data_lims_api.py | 2 +- .../behavior/test_behavior_project_cache.py | 14 +- .../test_behavior_project_lims_api.py | 57 +++--- 6 files changed, 255 insertions(+), 80 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_data_session.py b/allensdk/brain_observatory/behavior/behavior_data_session.py index c86bbaa00..9aca0997c 100644 --- a/allensdk/brain_observatory/behavior/behavior_data_session.py +++ b/allensdk/brain_observatory/behavior/behavior_data_session.py @@ -80,6 +80,7 @@ def running_data_df(self) -> pd.DataFrame: """ return self.api.get_running_data_df() + @property def running_speed(self) -> RunningSpeed: """Get running speed using timestamps from self.get_stimulus_timestamps. diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache.py b/allensdk/brain_observatory/behavior/behavior_project_cache.py index f9f42291c..3f00e2f91 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache.py @@ -24,6 +24,7 @@ class BehaviorProjectCache(Cache): MANIFEST_VERSION = "0.0.1-alpha" OPHYS_SESSIONS_KEY = "ophys_sessions" BEHAVIOR_SESSIONS_KEY = "behavior_sessions" + OPHYS_EXPERIMENTS_KEY = "ophys_experiments" # Temporary way for scientists to keep track of analyses OPHYS_ANALYSIS_LOG_KEY = "ophys_analysis_log" @@ -40,6 +41,11 @@ class BehaviorProjectCache(Cache): "parent_key": "BASEDIR", "typename": "file" }, + OPHYS_EXPERIMENTS_KEY: { + "spec": f"{OPHYS_EXPERIMENTS_KEY}.csv", + "parent_key": "BASEDIR", + "typename": "file" + }, OPHYS_ANALYSIS_LOG_KEY: { "spec": f"{OPHYS_ANALYSIS_LOG_KEY}.csv", "parent_key": "BASEDIR", @@ -124,22 +130,18 @@ def get_session_table( :type by: str :rtype: pd.DataFrame """ - def write_csv_(path, df): - """Format the array of experiment ids for saving""" - df_ = df.copy() - df_["ophys_experiment_id"] = df_["ophys_experiment_id"].apply( - lambda x: "|".join(map(str, x))) - df_.to_csv(path) - - def read_csv_(path): - df = pd.read_csv(path, index_col="ophys_session_id") - df["ophys_experiment_id"] = df["ophys_experiment_id"].apply( - lambda x: np.fromstring(x, sep="|", dtype=int)) - return df - + write_csv = partial( + _write_csv, + array_fields=["reporter_line", "driver_line", + "ophys_experiment_id"]) + read_csv = partial( + _read_csv, index_col="ophys_session_id", + array_fields=["reporter_line", "driver_line", + "ophys_experiment_id"], + array_types=[str, str, int]) sessions = self._get_session_summary( self.fetch_api.get_session_table, self.OPHYS_SESSIONS_KEY, - write_csv_, read_csv_) + write_csv, read_csv) sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") @@ -164,6 +166,31 @@ def add_manifest_paths(self, manifest_builder): manifest_builder.add_path(key, **config) return manifest_builder + def get_experiment_table( + self, + suppress: Optional[List[str]] = None) -> pd.DataFrame: + """ + Return summary table of all ophys_experiment_ids in the database. + :param suppress: optional list of columns to drop from the resulting + dataframe. + :type suppress: list of str + :rtype: pd.DataFrame + """ + write_csv = partial( + _write_csv, + array_fields=["reporter_line", "driver_line"]) + read_csv = partial( + _read_csv, index_col="ophys_experiment_id", + array_fields=["reporter_line", "driver_line"], + array_types=[str, str]) + experiments = self._get_session_summary( + self.fetch_api.get_experiment_table, self.OPHYS_EXPERIMENTS_KEY, + write_csv, read_csv) + experiments = experiments.rename(columns={"genotype": "full_genotype"}) + if suppress: + experiments.drop(columns=suppress, inplace=True, errors="ignore") + return experiments + def get_behavior_session_table( self, suppress: Optional[List[str]] = None) -> pd.DataFrame: @@ -174,10 +201,15 @@ def get_behavior_session_table( :type suppress: list of str :rtype: pd.DataFrame """ - read_csv_ = partial(read_csv, index_col="behavior_session_id") + read_csv = partial( + _read_csv, index_col="behavior_session_id", + array_fields=["reporter_line", "driver_line"], + array_types=[str, str]) + write_csv = partial( + _write_csv, array_fields=["reporter_line", "driver_line"]) sessions = self._get_session_summary( self.fetch_api.get_behavior_only_session_table, - self.BEHAVIOR_SESSIONS_KEY, write_csv, read_csv_) + self.BEHAVIOR_SESSIONS_KEY, write_csv, read_csv) sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") @@ -281,14 +313,6 @@ def get_behavior_session_data(self, behavior_session_id: int, ) -def read_csv(path: str, index_col: str) -> pd.DataFrame: - return pd.read_csv(path, index_col=index_col) - - -def write_csv(path: str, df: str): - df.to_csv(path) - - def _write_log(data: Any, path: str, key_name: str, key_value: Any): """ Helper method to create and add to a log. Invoked any time a session @@ -322,3 +346,26 @@ def _write_log(data: Any, path: str, key_name: str, key_value: Any): w = csv.DictWriter(f, fieldnames=keys) w.writeheader() w.writerow(dict(zip(keys, values))) + + +def _write_csv(path, df, array_fields=None): + """Private writer that encodes array fields into pipe-delimited strings + for saving a csv. + """ + df_ = df.copy() + for field in array_fields: + df_[field] = df_[field].apply(lambda x: "|".join(map(str, x))) + df_.to_csv(path) + + +def _read_csv(path, index_col, array_fields=None, array_types=None): + """Private reader that can open a csv with pipe-delimited array + fields and convert them to array.""" + df = pd.read_csv(path, index_col=index_col) + for field, type_ in zip(array_fields, array_types): + if type_ == str: + df[field] = df[field].apply(lambda x: x.split("|")) + else: + df[field] = df[field].apply( + lambda x: np.fromstring(x, sep="|", dtype=type_)) + return df diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py index cb0c4ac3d..238efd7e1 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -117,13 +117,31 @@ def _build_experiment_from_session_query() -> str: query = f""" -- -- begin getting all ophys_experiment_ids -- -- SELECT - (ARRAY_AGG(DISTINCT(oe.id))) as experiment_ids, os.id + (ARRAY_AGG(DISTINCT(oe.id))) AS experiment_ids, os.id FROM ophys_sessions os RIGHT JOIN ophys_experiments oe ON oe.ophys_session_id = os.id GROUP BY os.id -- -- end getting all ophys_experiment_ids -- -- """ return query + + @staticmethod + def _build_line_from_donor_query(line="driver") -> str: + """Sub-query to get a line from a donor. + :param line: 'driver' or 'reporter' + """ + query = f""" + -- -- begin getting {line} line from donors -- -- + SELECT ARRAY_AGG (g.name) AS {line}_line, d.id AS donor_id + FROM donors d + LEFT JOIN donors_genotypes dg ON dg.donor_id=d.id + LEFT JOIN genotypes g ON g.id=dg.genotype_id + LEFT JOIN genotype_types gt ON gt.id=g.genotype_type_id + WHERE gt.name='{line}' + GROUP BY d.id + -- -- end getting {line} line from donors -- -- + """ + return query def _get_behavior_summary_table(self, session_sub_query: str) -> pd.DataFrame: @@ -138,17 +156,26 @@ def _get_behavior_summary_table(self, """ query = f""" SELECT - bs.id as behavior_session_id, + bs.id AS behavior_session_id, bs.ophys_session_id, bs.behavior_training_id, - sp.id as specimen_id, - d.full_genotype as genotype, - g.name as sex, + equipment.name as equipment_name, + d.id as donor_id, + d.full_genotype AS genotype, + reporter.reporter_line, + driver.driver_line, + g.name AS sex, bs.foraging_id FROM behavior_sessions bs JOIN donors d on bs.donor_id = d.id JOIN genders g on g.id = d.gender_id - JOIN specimens sp ON sp.donor_id = d.id + JOIN ( + {self._build_line_from_donor_query("reporter")} + ) reporter on reporter.donor_id = d.id + JOIN ( + {self._build_line_from_donor_query("driver")} + ) driver on driver.donor_id = d.id + JOIN equipment ON equipment.id = bs.equipment_id {session_sub_query} """ return self.postgres_engine.select(query) @@ -212,15 +239,86 @@ def get_session_data(self, ophys_session_id: int) -> BehaviorOphysSession: """ return BehaviorOphysSession(BehaviorOphysLimsApi(ophys_session_id)) + def _get_experiment_table( + self, + ophys_experiment_ids: Optional[List[int]] = None) -> pd.DataFrame: + """ + Helper function for easier testing. + Return a pd.Dataframe table with all ophys_experiment_ids and relevant + metadata. + Return columns: ophys_session_id, behavior_session_id, + ophys_experiment_id, project_code, session_name, + session_type, equipment_name, date_of_acquisition, + specimen_id, genotype, sex, age_in_days, + reporter_line, driver_line + + :param ophys_experiment_ids: optional list of ophys_experiment_ids + to include + :rtype: pd.DataFrame + """ + if not ophys_experiment_ids: + self.logger.warning("Getting all ophys sessions." + " This might take a while.") + experiment_query = self._build_in_list_selector_query( + "oe.id", ophys_experiment_ids) + query = f""" + SELECT + oe.id as ophys_experiment_id, + os.id as ophys_session_id, + bs.id as behavior_session_id, + oec.visual_behavior_experiment_container_id as container_id, + pr.code as project_code, + vbc.workflow_state as container_workflow_state, + oe.workflow_state as experiment_workflow_state, + os.name as session_name, + os.stimulus_name as session_type, + equipment.name as equipment_name, + os.date_of_acquisition, + os.isi_experiment_id, + os.specimen_id, + g.name as sex, + DATE_PART('day', os.date_of_acquisition - d.date_of_birth) + AS age_in_days, + d.full_genotype as genotype, + reporter.reporter_line, + driver.driver_line, + id.depth as imaging_depth, + st.acronym as targeted_structure, + vbc.published_at + FROM ophys_experiments_visual_behavior_experiment_containers oec + JOIN visual_behavior_experiment_containers vbc + ON oec.visual_behavior_experiment_container_id = vbc.id + JOIN ophys_experiments oe ON oe.id = oec.ophys_experiment_id + JOIN ophys_sessions os ON os.id = oe.ophys_session_id + JOIN behavior_sessions bs ON os.id = bs.ophys_session_id + JOIN projects pr ON pr.id = os.project_id + JOIN donors d ON d.id = bs.donor_id + JOIN genders g ON g.id = d.gender_id + JOIN ( + {self._build_line_from_donor_query(line="reporter")} + ) reporter on reporter.donor_id = d.id + JOIN ( + {self._build_line_from_donor_query(line="driver")} + ) driver on driver.donor_id = d.id + LEFT JOIN imaging_depths id ON id.id = os.imaging_depth_id + JOIN structures st ON st.id = oe.targeted_structure_id + JOIN equipment ON equipment.id = os.equipment_id + {experiment_query}; + """ + self.logger.debug(f"get_experiment_table query: \n{query}") + return self.postgres_engine.select(query) + def _get_session_table( self, ophys_session_ids: Optional[List[int]] = None) -> pd.DataFrame: """Helper function for easier testing. Return a pd.Dataframe table with all ophys_session_ids and relevant metadata. - Return columns: ophys_session_id, behavior_session_id, specimen_id, - ophys_experiment_ids, isi_experiment_id, session_type, - date_of_acquisition, genotype, sex, age_in_days + Return columns: ophys_session_id, behavior_session_id, + ophys_experiment_id, project_code, session_name, + session_type, equipment_name, date_of_acquisition, + specimen_id, genotype, sex, age_in_days, + reporter_line, driver_line :param ophys_session_ids: optional list of ophys_session_ids to include :rtype: pd.DataFrame @@ -230,27 +328,38 @@ def _get_session_table( " This might take a while.") session_query = self._build_in_list_selector_query("os.id", ophys_session_ids) - experiment_query = self._build_experiment_from_session_query() query = f""" SELECT os.id as ophys_session_id, bs.id as behavior_session_id, experiment_ids as ophys_experiment_id, - os.specimen_id, - os.isi_experiment_id, + pr.code as project_code, + os.name as session_name, os.stimulus_name as session_type, + equipment.name as equipment_name, os.date_of_acquisition, - d.full_genotype as genotype, + os.specimen_id, g.name as sex, DATE_PART('day', os.date_of_acquisition - d.date_of_birth) - AS age_in_days + AS age_in_days, + d.full_genotype as genotype, + reporter.reporter_line, + driver.driver_line FROM ophys_sessions os JOIN behavior_sessions bs ON os.id = bs.ophys_session_id + JOIN projects pr ON pr.id = os.project_id JOIN donors d ON d.id = bs.donor_id JOIN genders g ON g.id = d.gender_id JOIN ( - {experiment_query} + {self._build_experiment_from_session_query()} ) exp_ids ON os.id = exp_ids.id + JOIN ( + {self._build_line_from_donor_query(line="reporter")} + ) reporter on reporter.donor_id = d.id + JOIN ( + {self._build_line_from_donor_query(line="driver")} + ) driver on driver.donor_id = d.id + JOIN equipment ON equipment.id = os.equipment_id {session_query}; """ self.logger.debug(f"get_session_table query: \n{query}") @@ -261,9 +370,11 @@ def get_session_table( ophys_session_ids: Optional[List[int]] = None) -> pd.DataFrame: """Return a pd.Dataframe table with all ophys_session_ids and relevant metadata. - Return columns: ophys_session_id, behavior_session_id, specimen_id, - ophys_experiment_ids, isi_experiment_id, session_type, - date_of_acquisition, genotype, sex, age_in_days + Return columns: ophys_session_id, behavior_session_id, + ophys_experiment_id, project_code, session_name, + session_type, equipment_name, date_of_acquisition, + specimen_id, genotype, sex, age_in_days, + reporter_line, driver_line :param ophys_session_ids: optional list of ophys_session_ids to include :rtype: pd.DataFrame @@ -285,6 +396,25 @@ def get_behavior_only_session_data( """ return BehaviorDataSession(BehaviorDataLimsApi(behavior_session_id)) + def get_experiment_table( + self, + ophys_experiment_ids: Optional[List[int]] = None) -> pd.DataFrame: + """Return a pd.Dataframe table with all ophys_experiment_ids and + relevant metadata. This is the most specific and most informative + level to examine the data. + Return columns: + ophys_experiment_id, ophys_session_id, behavior_session_id, + container_id, project_code, container_workflow_state, + experiment_workflow_state, session_name, session_type, + equipment_name, date_of_acquisition, isi_experiment_id, + specimen_id, sex, age_in_days, genotype, reporter_line, + driver_line, imaging_depth, targeted_structure, published_at + :param ophys_experiment_ids: optional list of ophys_experiment_ids + to include + :rtype: pd.DataFrame + """ + return self._get_experiment_table().set_index("ophys_experiment_id") + def get_behavior_only_session_table( self, behavior_session_ids: Optional[List[int]] = None) -> pd.DataFrame: diff --git a/allensdk/internal/api/behavior_data_lims_api.py b/allensdk/internal/api/behavior_data_lims_api.py index c5f59f6b6..04f17266f 100644 --- a/allensdk/internal/api/behavior_data_lims_api.py +++ b/allensdk/internal/api/behavior_data_lims_api.py @@ -462,7 +462,7 @@ def get_metadata(self) -> Dict[str, Any]: "sex": self.get_sex(), "age": self.get_age(), "ophys_experiment_id": self.ophys_experiment_ids, - "experiment_container_id": self.experiment_container_id, + "experiment_container_id": self.ophys_container_id, "stimulus_frame_rate": self.get_stimulus_frame_rate(), "session_type": self.get_stimulus_name(), "experiment_datetime": self.get_experiment_date(), diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py index e00af1987..92ca14cfd 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_cache.py @@ -12,13 +12,17 @@ @pytest.fixture def session_table(): return (pd.DataFrame({"ophys_session_id": [1, 2, 3], - "ophys_experiment_id": [[4], [5, 6], [7]]}) + "ophys_experiment_id": [[4], [5, 6], [7]], + "reporter_line": [["aa"], ["aa", "bb"], ["cc"]], + "driver_line": [["aa"], ["aa", "bb"], ["cc"]]}) .set_index("ophys_session_id")) @pytest.fixture def behavior_table(): - return (pd.DataFrame({"behavior_session_id": [1, 2, 3], "b": [4, 5, 6]}) + return (pd.DataFrame({"behavior_session_id": [1, 2, 3], + "reporter_line": [["aa"], ["aa", "bb"], ["cc"]], + "driver_line": [["aa"], ["aa", "bb"], ["cc"]]}) .set_index("behavior_session_id")) @@ -122,7 +126,8 @@ def test_get_session_table_by_experiment(TempdirBehaviorCache): expected = (pd.DataFrame({"ophys_session_id": [1, 2, 2, 3], "ophys_experiment_id": [4, 5, 6, 7]}) .set_index("ophys_experiment_id")) - actual = TempdirBehaviorCache.get_session_table(by="ophys_experiment_id") + actual = TempdirBehaviorCache.get_session_table(by="ophys_experiment_id")[ + ["ophys_session_id"]] pd.testing.assert_frame_equal(expected, actual) @@ -168,6 +173,3 @@ def test_write_session_log(TempdirBehaviorCache): # first one should have updated different than created since accessed 2x assert ((actual["created_at"] == actual["updated_at"]).values.tolist() == expected_times) - - - diff --git a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py index 36657619a..f84eef414 100644 --- a/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py +++ b/allensdk/test/brain_observatory/behavior/test_behavior_project_lims_api.py @@ -79,35 +79,30 @@ def test_get_behavior_stage_table(MockBehaviorProjectLimsApi): @pytest.mark.parametrize( - "ophys_session_ids,expected", [ - (None, WhitespaceStrippedString(""" - SELECT - os.id as ophys_session_id, - bs.id as behavior_session_id, - experiment_ids as ophys_experiment_id, - os.specimen_id, - os.isi_experiment_id, - os.stimulus_name as session_type, - os.date_of_acquisition, - d.full_genotype as genotype, - g.name as sex, - DATE_PART('day', os.date_of_acquisition - d.date_of_birth) - AS age_in_days - FROM ophys_sessions os - JOIN behavior_sessions bs ON os.id = bs.ophys_session_id - JOIN donors d ON d.id = bs.donor_id - JOIN genders g ON g.id = d.gender_id - JOIN (-- -- begin getting all ophys_experiment_ids -- -- - SELECT - (ARRAY_AGG(DISTINCT(oe.id))) as experiment_ids, os.id - FROM ophys_sessions os - RIGHT JOIN ophys_experiments oe ON oe.ophys_session_id = os.id - GROUP BY os.id - -- -- end getting all ophys_experiment_ids -- -- - ) exp_ids ON os.id = exp_ids.id; - """))] + "line,expected", [ + ("reporter", WhitespaceStrippedString( + """-- -- begin getting reporter line from donors -- -- + SELECT ARRAY_AGG (g.name) AS reporter_line, d.id AS donor_id + FROM donors d + LEFT JOIN donors_genotypes dg ON dg.donor_id=d.id + LEFT JOIN genotypes g ON g.id=dg.genotype_id + LEFT JOIN genotype_types gt ON gt.id=g.genotype_type_id + WHERE gt.name='reporter' + GROUP BY d.id + -- -- end getting reporter line from donors -- --""")), + ("driver", WhitespaceStrippedString( + """-- -- begin getting driver line from donors -- -- + SELECT ARRAY_AGG (g.name) AS driver_line, d.id AS donor_id + FROM donors d + LEFT JOIN donors_genotypes dg ON dg.donor_id=d.id + LEFT JOIN genotypes g ON g.id=dg.genotype_id + LEFT JOIN genotype_types gt ON gt.id=g.genotype_type_id + WHERE gt.name='driver' + GROUP BY d.id + -- -- end getting driver line from donors -- --""")) + ] ) -def test_get_session_table(ophys_session_ids, expected, - MockBehaviorProjectLimsApi): - actual = MockBehaviorProjectLimsApi._get_session_table() - assert expected == actual +def test_build_line_from_donor_query(line, expected, + MockBehaviorProjectLimsApi): + mbp_api = MockBehaviorProjectLimsApi + assert expected == mbp_api._build_line_from_donor_query(line=line) From 04f5d7441d2b050b73039d03a8099c876384ac97 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Tue, 19 Nov 2019 15:28:49 -0800 Subject: [PATCH 58/60] add an example notebook --- .../Lims Behavior Project Cache.ipynb | 927 ++++++++++++++++++ 1 file changed, 927 insertions(+) create mode 100644 doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb diff --git a/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb b/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb new file mode 100644 index 000000000..b6228fd06 --- /dev/null +++ b/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb @@ -0,0 +1,927 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visual Behavior Data Project Cache\n", + "A short introduction to analyzing the Visual Behavior data.\n", + "This notebook uses the LIMS API to access data, so it will only work on the Allen Institute network.\n", + "\n", + "Please note that local caching functionality has not been implemented, as there are currently no NWB files for these data. Because the data may change over time, whenever a Session object is created through the cache, we note when the data were accessed in a session log." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os.path\n", + "import pandas as pd\n", + "import SimpleITK as sitk\n", + "import matplotlib.pyplot as plt\n", + "from allensdk.brain_observatory.behavior.behavior_project_cache import BehaviorProjectCache\n", + "from allensdk.brain_observatory.behavior.image_api import ImageApi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `BehaviorProjectCache` is the main entry point to the Visual Behavior project dataset. It allows you to view cross-session summary information and create classes to analyze individual sessions. It supports both behavior-only sessions as well as sessions with behavior and optical physiology recordings." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# this path determines where the downloaded data will be stored\n", + "\n", + "manifest_path = os.path.join(\"example_behavior_project_cache\", \"manifest.json\")\n", + "\n", + "cache = BehaviorProjectCache.from_lims(manifest=manifest_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can view all session records present in the LIMS database." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['VisualBehaviorMultiscope', 'VisualBehavior',\n", + " 'VisBIntTestDatacube', 'VisBehNeuroModAx', 'VisualBehaviorTask1B',\n", + " 'MesoscopeDevelopment', 'VisBehViralDev',\n", + " 'VisualBehaviorDevelopment', 'VisualBehaviorIntegrationTest',\n", + " 'DevelopmentMultiscope4areasx2d',\n", + " 'VisualBehaviorMultiscope4areasx2d'], dtype=object)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ophys_sessions = cache.get_session_table()\n", + "ophys_sessions.head()\n", + "ophys_sessions.project_code.unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we want to get more specific, we can look at all experiment records in the LIMS database. We can then filter down to what we're interested in." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ophys_session_idbehavior_session_idcontainer_idproject_codecontainer_workflow_stateexperiment_workflow_statesession_namesession_typeequipment_namedate_of_acquisitionisi_experiment_idspecimen_idsexage_in_daysfull_genotypereporter_linedriver_lineimaging_depthtargeted_structurepublished_at
ophys_experiment_id
953659741952430817952554548949264660VisualBehaviorMultiscopeholdingfailed20190923_457841_2imagesAOPHYS_2_images_A_passiveMESO.12019-09-23 08:13:07.627573858992726850862430F209.0Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt[Ai148(TIT2L-GC6f-ICL-tTA2)][Sst-IRES-Cre]NaNVISpNaN
953659756952430817952554548949264660VisualBehaviorMultiscopeholdingfailed20190923_457841_2imagesAOPHYS_2_images_A_passiveMESO.12019-09-23 08:13:07.627573858992726850862430F209.0Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt[Ai148(TIT2L-GC6f-ICL-tTA2)][Sst-IRES-Cre]NaNVISlNaN
953659749952430817952554548949264660VisualBehaviorMultiscopeholdingpassed20190923_457841_2imagesAOPHYS_2_images_A_passiveMESO.12019-09-23 08:13:07.627573858992726850862430F209.0Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt[Ai148(TIT2L-GC6f-ICL-tTA2)][Sst-IRES-Cre]NaNVISlNaN
953659747952430817952554548949264660VisualBehaviorMultiscopeholdingfailed20190923_457841_2imagesAOPHYS_2_images_A_passiveMESO.12019-09-23 08:13:07.627573858992726850862430F209.0Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt[Ai148(TIT2L-GC6f-ICL-tTA2)][Sst-IRES-Cre]NaNVISpNaN
953659743952430817952554548949264660VisualBehaviorMultiscopeholdingpassed20190923_457841_2imagesAOPHYS_2_images_A_passiveMESO.12019-09-23 08:13:07.627573858992726850862430F209.0Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt[Ai148(TIT2L-GC6f-ICL-tTA2)][Sst-IRES-Cre]NaNVISpNaN
\n", + "
" + ], + "text/plain": [ + " ophys_session_id behavior_session_id container_id \\\n", + "ophys_experiment_id \n", + "953659741 952430817 952554548 949264660 \n", + "953659756 952430817 952554548 949264660 \n", + "953659749 952430817 952554548 949264660 \n", + "953659747 952430817 952554548 949264660 \n", + "953659743 952430817 952554548 949264660 \n", + "\n", + " project_code container_workflow_state \\\n", + "ophys_experiment_id \n", + "953659741 VisualBehaviorMultiscope holding \n", + "953659756 VisualBehaviorMultiscope holding \n", + "953659749 VisualBehaviorMultiscope holding \n", + "953659747 VisualBehaviorMultiscope holding \n", + "953659743 VisualBehaviorMultiscope holding \n", + "\n", + " experiment_workflow_state session_name \\\n", + "ophys_experiment_id \n", + "953659741 failed 20190923_457841_2imagesA \n", + "953659756 failed 20190923_457841_2imagesA \n", + "953659749 passed 20190923_457841_2imagesA \n", + "953659747 failed 20190923_457841_2imagesA \n", + "953659743 passed 20190923_457841_2imagesA \n", + "\n", + " session_type equipment_name \\\n", + "ophys_experiment_id \n", + "953659741 OPHYS_2_images_A_passive MESO.1 \n", + "953659756 OPHYS_2_images_A_passive MESO.1 \n", + "953659749 OPHYS_2_images_A_passive MESO.1 \n", + "953659747 OPHYS_2_images_A_passive MESO.1 \n", + "953659743 OPHYS_2_images_A_passive MESO.1 \n", + "\n", + " date_of_acquisition isi_experiment_id \\\n", + "ophys_experiment_id \n", + "953659741 2019-09-23 08:13:07.627573 858992726 \n", + "953659756 2019-09-23 08:13:07.627573 858992726 \n", + "953659749 2019-09-23 08:13:07.627573 858992726 \n", + "953659747 2019-09-23 08:13:07.627573 858992726 \n", + "953659743 2019-09-23 08:13:07.627573 858992726 \n", + "\n", + " specimen_id sex age_in_days \\\n", + "ophys_experiment_id \n", + "953659741 850862430 F 209.0 \n", + "953659756 850862430 F 209.0 \n", + "953659749 850862430 F 209.0 \n", + "953659747 850862430 F 209.0 \n", + "953659743 850862430 F 209.0 \n", + "\n", + " full_genotype \\\n", + "ophys_experiment_id \n", + "953659741 Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt \n", + "953659756 Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt \n", + "953659749 Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt \n", + "953659747 Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt \n", + "953659743 Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt \n", + "\n", + " reporter_line driver_line \\\n", + "ophys_experiment_id \n", + "953659741 [Ai148(TIT2L-GC6f-ICL-tTA2)] [Sst-IRES-Cre] \n", + "953659756 [Ai148(TIT2L-GC6f-ICL-tTA2)] [Sst-IRES-Cre] \n", + "953659749 [Ai148(TIT2L-GC6f-ICL-tTA2)] [Sst-IRES-Cre] \n", + "953659747 [Ai148(TIT2L-GC6f-ICL-tTA2)] [Sst-IRES-Cre] \n", + "953659743 [Ai148(TIT2L-GC6f-ICL-tTA2)] [Sst-IRES-Cre] \n", + "\n", + " imaging_depth targeted_structure published_at \n", + "ophys_experiment_id \n", + "953659741 NaN VISp NaN \n", + "953659756 NaN VISl NaN \n", + "953659749 NaN VISl NaN \n", + "953659747 NaN VISp NaN \n", + "953659743 NaN VISp NaN " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ophys_experiments = cache.get_experiment_table()\n", + "ophys_experiments.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's analyze the most recent passed experiment for the Visual Behavior project.\n", + "\n", + "Note that the session data will not be downloaded to your local machine. However, the time this ID was last accessed will be recorded in the analysis log (in seconds since epoch)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latest experiment id: 978244684. Acquired on 2019-11-07 20:02:18.000000. Session: OPHYS_4_images_B\n" + ] + } + ], + "source": [ + "latest = ophys_experiments.query(\"project_code == 'VisualBehavior'\"\n", + " \"& experiment_workflow_state == 'passed'\")\\\n", + " .sort_values(\"date_of_acquisition\", ascending=False).iloc[0]\n", + "\n", + "print(f\"Latest experiment id: {latest.name}. Acquired on {latest['date_of_acquisition']}. \"\n", + " f\"Session: {latest['session_type']}\")\n", + " \n", + "session = cache.get_session_data(latest.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can look at metadata about the session:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'rig_name': 'CAM2P.5',\n", + " 'sex': 'M',\n", + " 'age': 'unknown',\n", + " 'excitation_lambda': 910.0,\n", + " 'emission_lambda': 520.0,\n", + " 'indicator': 'GCAMP6f',\n", + " 'field_of_view_width': 447,\n", + " 'field_of_view_height': 512,\n", + " 'ophys_experiment_id': 978244684,\n", + " 'experiment_container_id': 968890932,\n", + " 'ophys_frame_rate': 31.0,\n", + " 'stimulus_frame_rate': 60.0,\n", + " 'targeted_structure': 'VISp',\n", + " 'imaging_depth': 175,\n", + " 'session_type': 'OPHYS_4_images_B',\n", + " 'experiment_datetime': Timestamp('2019-11-07 20:02:18+0000', tz='UTC'),\n", + " 'reporter_line': ['Ai93(TITL-GCaMP6f)'],\n", + " 'driver_line': ['Camk2a-tTA', 'Slc17a7-IRES2-Cre'],\n", + " 'LabTracks_ID': 483803,\n", + " 'full_genotype': 'Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt',\n", + " 'behavior_session_uuid': UUID('d7593ceb-4420-4d42-8c70-41f8ae71b9a6')}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "session.metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can examine 2d images of cells:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1, 3)\n", + "ax[0].imshow(session.max_projection)\n", + "ax[1].imshow(session.average_projection)\n", + "ax[2].imshow(session.segmentation_mask_image)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also look at data for behavior-only sessions. These data do not have optical physiology recordings associated with them." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ophys_session_idbehavior_training_idequipment_namedonor_idfull_genotypereporter_linedriver_linesexforaging_idsession_type
behavior_session_id
846710859846605051.0NaNCAM2P.4814111925Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G...[Ai94(TITL-GCaMP6s)][Camk2a-tTA, Slc17a7-IRES2-Cre]Fb183a85b-6a29-4054-9d3c-915a0408a10aOPHYS_5_images_B_passive
820627398NaNNaNMESO.1703198154Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...[Ai93(TITL-GCaMP6f)][Camk2a-tTA, Slc17a7-IRES2-Cre]Me3bc6a4b-3477-41bc-966b-ef8e3d35b6cdOPHYS_1_images_A
767197943NaNNaNCAM2P.3642238406Slc17a7-IRES2-Cre/wt;Ai162(TIT2L-GC6s-ICL-tTA2...[Ai162(TIT2L-GC6s-ICL-tTA2)][Slc17a7-IRES2-Cre]M7a1d6674-df2c-53e7-ad85-9065a520634d0_gratings_autorewards_15min
768638583768434190.0NaNCAM2P.4703210569Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...[Ai93(TITL-GCaMP6f)][Camk2a-tTA, Slc17a7-IRES2-Cre]Fcf1769ef-623e-4bb9-a698-a82986e917275_images_a_ophys
768879508NaNNaNCAM2P.3642238406Slc17a7-IRES2-Cre/wt;Ai162(TIT2L-GC6s-ICL-tTA2...[Ai162(TIT2L-GC6s-ICL-tTA2)][Slc17a7-IRES2-Cre]M3e2e0696-70f9-5839-888a-a90668e66a4d1_gratings
\n", + "
" + ], + "text/plain": [ + " ophys_session_id behavior_training_id equipment_name \\\n", + "behavior_session_id \n", + "846710859 846605051.0 NaN CAM2P.4 \n", + "820627398 NaN NaN MESO.1 \n", + "767197943 NaN NaN CAM2P.3 \n", + "768638583 768434190.0 NaN CAM2P.4 \n", + "768879508 NaN NaN CAM2P.3 \n", + "\n", + " donor_id \\\n", + "behavior_session_id \n", + "846710859 814111925 \n", + "820627398 703198154 \n", + "767197943 642238406 \n", + "768638583 703210569 \n", + "768879508 642238406 \n", + "\n", + " full_genotype \\\n", + "behavior_session_id \n", + "846710859 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-G... \n", + "820627398 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... \n", + "767197943 Slc17a7-IRES2-Cre/wt;Ai162(TIT2L-GC6s-ICL-tTA2... \n", + "768638583 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... \n", + "768879508 Slc17a7-IRES2-Cre/wt;Ai162(TIT2L-GC6s-ICL-tTA2... \n", + "\n", + " reporter_line \\\n", + "behavior_session_id \n", + "846710859 [Ai94(TITL-GCaMP6s)] \n", + "820627398 [Ai93(TITL-GCaMP6f)] \n", + "767197943 [Ai162(TIT2L-GC6s-ICL-tTA2)] \n", + "768638583 [Ai93(TITL-GCaMP6f)] \n", + "768879508 [Ai162(TIT2L-GC6s-ICL-tTA2)] \n", + "\n", + " driver_line sex \\\n", + "behavior_session_id \n", + "846710859 [Camk2a-tTA, Slc17a7-IRES2-Cre] F \n", + "820627398 [Camk2a-tTA, Slc17a7-IRES2-Cre] M \n", + "767197943 [Slc17a7-IRES2-Cre] M \n", + "768638583 [Camk2a-tTA, Slc17a7-IRES2-Cre] F \n", + "768879508 [Slc17a7-IRES2-Cre] M \n", + "\n", + " foraging_id \\\n", + "behavior_session_id \n", + "846710859 b183a85b-6a29-4054-9d3c-915a0408a10a \n", + "820627398 e3bc6a4b-3477-41bc-966b-ef8e3d35b6cd \n", + "767197943 7a1d6674-df2c-53e7-ad85-9065a520634d \n", + "768638583 cf1769ef-623e-4bb9-a698-a82986e91727 \n", + "768879508 3e2e0696-70f9-5839-888a-a90668e66a4d \n", + "\n", + " session_type \n", + "behavior_session_id \n", + "846710859 OPHYS_5_images_B_passive \n", + "820627398 OPHYS_1_images_A \n", + "767197943 0_gratings_autorewards_15min \n", + "768638583 5_images_a_ophys \n", + "768879508 1_gratings " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get all behavior sessions\n", + "behavior_sessions = cache.get_behavior_session_table()\n", + "behavior_sessions.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at one behavior session.\n", + "\n", + "Note that when accessing behavior data using `get_behavior_session_data`, different data sources may be used than when using `get_session_data` for shared attributes. In addition, there may also be some changes to how the data are processed. For example, when examining the stimulus presentations for a behavior session, there is no correction for monitor delay (unlike for ophys sessions). Behavior sessions also use lower temporal resolution lick monitors than ophys sessions.\n", + "\n", + "Please see the documentation for additional details." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---- Metadata ----\n", + "rig_name: CAM2P.4\n", + "sex: F\n", + "age: P123\n", + "ophys_experiment_id: [847241639]\n", + "experiment_container_id: 876693136\n", + "stimulus_frame_rate: 60.0\n", + "session_type: OPHYS_5_images_B_passive\n", + "experiment_datetime: 2019-04-05 15:57:39.013000+00:00\n", + "reporter_line: ['Ai94(TITL-GCaMP6s)']\n", + "driver_line: ['Camk2a-tTA', 'Slc17a7-IRES2-Cre']\n", + "LabTracks_ID: 442709\n", + "full_genotype: Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai94(TITL-GCaMP6s)/wt\n", + "behavior_session_uuid: b183a85b-6a29-4054-9d3c-915a0408a10a\n", + "foraging_id: b183a85b-6a29-4054-9d3c-915a0408a10a\n", + "behavior_session_id: 846710859\n", + "behavior_training_id: None\n", + "------------------\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "behav_sess = cache.get_behavior_session_data(846710859)\n", + "\n", + "print(\"---- Metadata ----\")\n", + "print(\"\\n\".join([f\"{k}: {v}\" for k, v in behav_sess.metadata.items()]))\n", + "print(\"------------------\")\n", + "\n", + "# Plot the running speed\n", + "plt.plot(behav_sess.running_speed.timestamps, behav_sess.running_speed.values)\n", + "_ = plt.xlabel('Time (Second)')\n", + "_ = plt.ylabel('Speed (cm/Second)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Memory management\n", + "Session objects cache certain values to enable faster access, such as time-consuming queries to the LIMS database.\n", + "In order to prevent issues with running out of memory when working with session objects in a loop, you should use the `cache_clear` method to clear the caches." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current cache size:\n", + "1\n", + "Cache size after clearing: \n", + "0\n" + ] + } + ], + "source": [ + "# Example of a cached query\n", + "# You can look at the cache of any call by invoking `cache_size` on a cached function\n", + "print(\"Current cache size:\")\n", + "print(behav_sess.api.get_driver_line.cache_size())\n", + "# Clear the cache from the session object\n", + "behav_sess.cache_clear()\n", + "print(\"Cache size after clearing: \")\n", + "print(behav_sess.api.get_driver_line.cache_size())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our analysis log has recorded which sessions we've accessed. If we want to mock out the behavior of a \"fixed\" cache, like `EcephysProjectCache.fixed()`, we pass `fixed=True` to `get_session_data`. If the ophys_experiment_id is not in the log, then it will not load." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ophys_experiment_idcreated_atupdated_at
097824468415742035031574206089
\n", + "
" + ], + "text/plain": [ + " ophys_experiment_id created_at updated_at\n", + "0 978244684 1574203503 1574206089" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# See our log\n", + "pd.read_csv(\"example_behavior_project_cache/ophys_analysis_log.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning! Passing `fixed=True` does not ensure that the underlying data has not changed, as no data are actually cached locally. The log will be updated each time the data are pulled from the database for tracking purposes.\n" + ] + }, + { + "ename": "MissingDataError", + "evalue": "Data for ophys experiment 12345 not found!", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mMissingDataError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Trying to load an ID that isn't in the log will raise an error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_session_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12345\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfixed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/allensdk-bpc/lib/python3.6/site-packages/allensdk/brain_observatory/behavior/behavior_project_cache.py\u001b[0m in \u001b[0;36mget_session_data\u001b[0;34m(self, ophys_experiment_id, fixed)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mophys_experiment_id\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrecord\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"ophys_experiment_id\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m raise MissingDataError(\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0;34mf\"Data for ophys experiment {ophys_experiment_id} not \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \"found!\")\n\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mMissingDataError\u001b[0m: Data for ophys experiment 12345 not found!" + ] + } + ], + "source": [ + "# Trying to load an ID that isn't in the log will raise an error\n", + "cache.get_session_data(12345, fixed=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning! Passing `fixed=True` does not ensure that the underlying data has not changed, as no data are actually cached locally. The log will be updated each time the data are pulled from the database for tracking purposes.\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# But it will work if we use one that already exists\n", + "cache.get_session_data(978244684, fixed=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:allensdk-bpc]", + "language": "python", + "name": "conda-env-allensdk-bpc-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 74d236215d7245b2392b50a88a1a2cd111cb3e40 Mon Sep 17 00:00:00 2001 From: Kat Schelonka Date: Wed, 20 Nov 2019 10:01:39 -0800 Subject: [PATCH 59/60] updates for clarity; skip tests for deprecated swdb cache --- .../behavior/behavior_project_cache.py | 34 +++++++------------ .../behavior/behavior_project_lims_api.py | 14 ++++---- .../test_swdb_behavior_project_cache.py | 29 ++++++++++++++++ 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache.py b/allensdk/brain_observatory/behavior/behavior_project_cache.py index 3f00e2f91..676edc267 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache.py @@ -139,10 +139,11 @@ def get_session_table( array_fields=["reporter_line", "driver_line", "ophys_experiment_id"], array_types=[str, str, int]) - sessions = self._get_session_summary( - self.fetch_api.get_session_table, self.OPHYS_SESSIONS_KEY, + path = self.get_cache_path(None, self.OPHYS_SESSIONS_KEY) + sessions = one_file_call_caching( + path, + self.fetch_api.get_session_table, write_csv, read_csv) - sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") @@ -183,10 +184,11 @@ def get_experiment_table( _read_csv, index_col="ophys_experiment_id", array_fields=["reporter_line", "driver_line"], array_types=[str, str]) - experiments = self._get_session_summary( - self.fetch_api.get_experiment_table, self.OPHYS_EXPERIMENTS_KEY, + path = self.get_cache_path(None, self.OPHYS_EXPERIMENTS_KEY) + experiments = one_file_call_caching( + path, + self.fetch_api.get_experiment_table, write_csv, read_csv) - experiments = experiments.rename(columns={"genotype": "full_genotype"}) if suppress: experiments.drop(columns=suppress, inplace=True, errors="ignore") return experiments @@ -207,28 +209,16 @@ def get_behavior_session_table( array_types=[str, str]) write_csv = partial( _write_csv, array_fields=["reporter_line", "driver_line"]) - sessions = self._get_session_summary( + path = self.get_cache_path(None, self.BEHAVIOR_SESSIONS_KEY) + sessions = one_file_call_caching( + path, self.fetch_api.get_behavior_only_session_table, - self.BEHAVIOR_SESSIONS_KEY, write_csv, read_csv) + write_csv, read_csv) sessions = sessions.rename(columns={"genotype": "full_genotype"}) if suppress: sessions.drop(columns=suppress, inplace=True, errors="ignore") return sessions - def _get_session_summary( - self, - fetch_call: Callable[[], pd.DataFrame], - cache_key: str, - write_csv: Callable[[str], None], - read_csv: Callable[[str], pd.DataFrame]) -> pd.DataFrame: - """ - Generic helper method for caching calls to get session summary data, - such as `get_behavior_session_table` and `get_session_table`. - """ - path = self.get_cache_path(None, cache_key) - response = one_file_call_caching(path, fetch_call, write_csv, read_csv) - return response - def get_session_data(self, ophys_experiment_id: int, fixed: bool = False): """ Note -- This method mocks the behavior of a cache. No files are diff --git a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py index 238efd7e1..0e50c0d19 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_lims_api.py @@ -161,7 +161,7 @@ def _get_behavior_summary_table(self, bs.behavior_training_id, equipment.name as equipment_name, d.id as donor_id, - d.full_genotype AS genotype, + d.full_genotype, reporter.reporter_line, driver.driver_line, g.name AS sex, @@ -249,7 +249,7 @@ def _get_experiment_table( Return columns: ophys_session_id, behavior_session_id, ophys_experiment_id, project_code, session_name, session_type, equipment_name, date_of_acquisition, - specimen_id, genotype, sex, age_in_days, + specimen_id, full_genotype, sex, age_in_days, reporter_line, driver_line :param ophys_experiment_ids: optional list of ophys_experiment_ids @@ -279,7 +279,7 @@ def _get_experiment_table( g.name as sex, DATE_PART('day', os.date_of_acquisition - d.date_of_birth) AS age_in_days, - d.full_genotype as genotype, + d.full_genotype, reporter.reporter_line, driver.driver_line, id.depth as imaging_depth, @@ -317,7 +317,7 @@ def _get_session_table( Return columns: ophys_session_id, behavior_session_id, ophys_experiment_id, project_code, session_name, session_type, equipment_name, date_of_acquisition, - specimen_id, genotype, sex, age_in_days, + specimen_id, full_genotype, sex, age_in_days, reporter_line, driver_line :param ophys_session_ids: optional list of ophys_session_ids to include @@ -342,7 +342,7 @@ def _get_session_table( g.name as sex, DATE_PART('day', os.date_of_acquisition - d.date_of_birth) AS age_in_days, - d.full_genotype as genotype, + d.full_genotype, reporter.reporter_line, driver.driver_line FROM ophys_sessions os @@ -373,7 +373,7 @@ def get_session_table( Return columns: ophys_session_id, behavior_session_id, ophys_experiment_id, project_code, session_name, session_type, equipment_name, date_of_acquisition, - specimen_id, genotype, sex, age_in_days, + specimen_id, full_genotype, sex, age_in_days, reporter_line, driver_line :param ophys_session_ids: optional list of ophys_session_ids to include @@ -407,7 +407,7 @@ def get_experiment_table( container_id, project_code, container_workflow_state, experiment_workflow_state, session_name, session_type, equipment_name, date_of_acquisition, isi_experiment_id, - specimen_id, sex, age_in_days, genotype, reporter_line, + specimen_id, sex, age_in_days, full_genotype, reporter_line, driver_line, imaging_depth, targeted_structure, published_at :param ophys_experiment_ids: optional list of ophys_experiment_ids to include diff --git a/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py b/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py index a45c1479b..bf2aaca0f 100644 --- a/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py +++ b/allensdk/test/brain_observatory/behavior/test_swdb_behavior_project_cache.py @@ -18,11 +18,14 @@ def session(cache): return cache.get_session(792815735) # Test trials extra columns +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_extra_trials_columns(session): for new_key in ['reward_rate', 'response_binary']: assert new_key in session.trials.keys() + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_extra_stimulus_presentation_columns(session): for new_key in [ @@ -35,11 +38,15 @@ def test_extra_stimulus_presentation_columns(session): 'repeat_within_block']: assert new_key in session.stimulus_presentations.keys() + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_stimulus_presentations_image_set(session): # We made the image set just 'A' or 'B' assert session.stimulus_presentations['image_set'].unique() == np.array(['A']) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_stimulus_templates(session): # Was a dict with only one key, where the value was a 3d array. @@ -48,6 +55,7 @@ def test_stimulus_templates(session): assert image_arr.ndim == 2 # Test trial response df +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo @pytest.mark.parametrize('key, output', [ ('mean_response', 0.0053334), @@ -58,6 +66,8 @@ def test_session_trial_response(key, output, session): trial_response = session.trial_response_df np.testing.assert_almost_equal(trial_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo @pytest.mark.parametrize('key, output', [ ('time_from_last_lick', 7.3577), @@ -68,21 +78,29 @@ def test_session_flash_response(key, output, session): flash_response = session.flash_response_df np.testing.assert_almost_equal(flash_response.query("cell_specimen_id == 817103993").iloc[0][key], output, decimal=6) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_analysis_files_metadata(cache): assert cache.analysis_files_metadata[ 'trial_response_df_params' ]['response_window_duration_seconds'] == 0.5 + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_session_image_loading(session): assert isinstance(session.max_projection.data, np.ndarray) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_no_invalid_rois(session): # We made the cache return sessions without the invalid rois assert session.cell_specimen_table['valid_roi'].all() + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_get_container_sessions(cache): container_id = cache.experiment_table['container_id'].unique()[0] @@ -91,25 +109,36 @@ def test_get_container_sessions(cache): assert isinstance(session, bpc.ExtendedBehaviorSession) np.testing.assert_almost_equal(session.dff_traces.loc[817103993]['dff'][0], 0.3538657529565) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_binarized_segmentation_mask_image(session): np.testing.assert_array_equal( np.unique(np.array(session.segmentation_mask_image.data).ravel()), np.array([0, 1]) + ) + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_no_nan_flash_running_speed(session): assert not pd.isnull(session.stimulus_presentations['mean_running_speed']).any() + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_licks_correct_colname(session): assert session.licks.columns == ['timestamps'] + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_rewards_correct_colname(session): assert (session.rewards.columns == ['timestamps', 'volume', 'autorewarded']).all() + +@pytest.mark.skip(reason="deprecated") @pytest.mark.requires_bamboo def test_dff_traces_correct_colname(session): # This is a Friday-harbor specific change From 2e1c6801f804e961e0b19902909348262b146c42 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Thu, 21 Nov 2019 15:02:31 -0800 Subject: [PATCH 60/60] v.1.2.0 docs update --- doc_template/index.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc_template/index.rst b/doc_template/index.rst index ac360a8be..8bbdc722d 100644 --- a/doc_template/index.rst +++ b/doc_template/index.rst @@ -90,6 +90,20 @@ The Allen SDK provides Python code for accessing experimental metadata along wit See the `mouse connectivity section `_ for more details. +What's New - 1.2.0 (November 21, 2019) +----------------------------------------------------------------------- + +The 1.2.0 release adds +- (internal feature) A project cache for the Behavior Ophys project, with example notebook +- (internal feature) A major overhaul of the `BehaviorOphysLimsApi` +- (internal feature) Updates to the `EcephysProjectLimsApi` such that it returns data in the same format as the `EcephyProjectWarehouseApi` +- improved eye-tracking area calculation + +and fixes +- several flaky tests +- regress tests which depend on scipy's `ks_2samp` +- (internal feature) duplicate caching on the Bevavior Ophys Lims Api + What's New - 1.1.1 (November 12, 2019) -----------------------------------------------------------------------