diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache.py b/allensdk/brain_observatory/behavior/behavior_project_cache.py index eeccd5650..4730ef2c8 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache.py @@ -2,7 +2,8 @@ import os.path import csv from functools import partial -from typing import Type, Callable, Optional, List, Any, Dict +from typing import Type, Optional, List, Any, Dict, Union +from pathlib import Path import pandas as pd import time import logging @@ -15,8 +16,7 @@ import BehaviorProjectBase from allensdk.api.caching_utilities import one_file_call_caching, call_caching from allensdk.core.exceptions import MissingDataError -from allensdk.core.auth_config import LIMS_DB_CREDENTIAL_MAP -from allensdk.core.authentication import credential_injector, DbCredentials +from allensdk.core.authentication import DbCredentials BehaviorProjectApi = Type[BehaviorProjectBase] @@ -64,11 +64,17 @@ def __init__( self, fetch_api: BehaviorProjectApi = BehaviorProjectLimsApi.default(), fetch_tries: int = 2, - **kwargs): + manifest: Optional[Union[str, Path]] = None, + version: Optional[str] = None, + cache: bool = True): """ Entrypoint for accessing visual behavior data. Supports access to summaries of session data and provides tools for downloading detailed session data (such as dff traces). + Likely you will want to use a class constructor, such as `from_lims`, + to initialize a BehaviorProjectCache, rather than calling this + directly. + --- NOTE --- Because NWB files are not currently supported for this project (as of 11/2019), this cache will not actually save any files of session data @@ -87,38 +93,88 @@ def __init__( Used to pull data from remote sources, after which it is locally cached. Any object inheriting from BehaviorProjectBase is suitable. Current options are: - EcephysProjectLimsApi :: Fetches bleeding-edge data from the + BehaviorProjectLimsApi :: Fetches bleeding-edge data from the Allen Institute"s internal database. Only works if you are on our internal network. fetch_tries : Maximum number of times to attempt a download before giving up and - raising an exception. Note that this is total tries, not retries - **kwargs : - manifest : str or Path - full path at which manifest json will be stored - version : str - version of manifest file. If this mismatches the version - recorded in the file at manifest, an error will be raised. - other kwargs are passed to allensdk.api.cache.Cache + raising an exception. Note that this is total tries, not retries. + Default=2. + manifest : str or Path + full path at which manifest json will be stored. Defaults + to "behavior_project_manifest.json" in the local directory. + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + Defaults to the manifest version in the class. + cache : bool + Whether to write to the cache. Default=True. """ - kwargs["manifest"] = kwargs.get("manifest", - "behavior_project_manifest.json") - kwargs["version"] = kwargs.get("version", self.MANIFEST_VERSION) + manifest_ = manifest or "behavior_project_manifest.json" + version_ = version or self.MANIFEST_VERSION - super().__init__(**kwargs) + super().__init__(manifest=manifest_, version=version_, cache=cache) self.fetch_api = fetch_api self.fetch_tries = fetch_tries self.logger = logging.getLogger(self.__class__.__name__) @classmethod - def from_lims(cls, lims_credentials: Optional[DbCredentials] = None, + def from_lims(cls, manifest: Optional[Union[str, Path]] = None, + version: Optional[str] = None, + cache: bool = True, + fetch_tries: int = 2, + lims_credentials: Optional[DbCredentials] = None, mtrain_credentials: Optional[DbCredentials] = None, - app_kwargs: Dict[str, Any] = None, **kwargs): - return cls(fetch_api=BehaviorProjectLimsApi.default( + host: Optional[str] = None, + scheme: Optional[str] = None, + asynchronous: Optional[str] = None) -> "BehaviorProjectCache": + """ + Construct a BehaviorProjectCache with a lims api. Use this method + to create a BehaviorProjectCache instance rather than calling + BehaviorProjectCache directly. + + Parameters + ========== + manifest : str or Path + full path at which manifest json will be stored + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + cache : bool + Whether to write to the cache + fetch_tries : int + Maximum number of times to attempt a download before giving up and + raising an exception. Note that this is total tries, not retries + lims_credentials : DbCredentials + Optional credentials to access LIMS database. + If not set, will look for credentials in environment variables. + mtrain_credentials: DbCredentials + Optional credentials to access mtrain database. + If not set, will look for credentials in environment variables. + host : str + Web host for the app_engine. Currently unused. This argument is + included for consistency with EcephysProjectCache.from_lims. + scheme : str + URI scheme, such as "http". Currently unused. This argument is + included for consistency with EcephysProjectCache.from_lims. + asynchronous : bool + Whether to fetch from web asynchronously. Currently unused. + Returns + ======= + BehaviorProjectCache + BehaviorProjectCache instance with a LIMS fetch API + """ + if host and scheme: + app_kwargs = {"host": host, "scheme": scheme, + "asynchronous": asynchronous} + else: + app_kwargs = None + fetch_api = BehaviorProjectLimsApi.default( lims_credentials=lims_credentials, mtrain_credentials=mtrain_credentials, - app_kwargs=app_kwargs), - **kwargs) + app_kwargs=app_kwargs) + return cls(fetch_api=fetch_api, manifest=manifest, version=version, + cache=cache, fetch_tries=fetch_tries) def get_session_table( self, diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py index e615ced9f..61cf084d0 100644 --- a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py +++ b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py @@ -1,6 +1,6 @@ from functools import partial from pathlib import Path -from typing import Any, List, Optional +from typing import Any, List, Optional, Union, Callable import ast import pandas as pd @@ -9,13 +9,13 @@ import pynwb from allensdk.api.cache import Cache - +from allensdk.core.authentication import DbCredentials from allensdk.brain_observatory.ecephys.ecephys_project_api import ( EcephysProjectApi, EcephysProjectLimsApi, EcephysProjectWarehouseApi, EcephysProjectFixedApi ) from allensdk.brain_observatory.ecephys.ecephys_project_api.rma_engine import ( - AsyncRmaEngine, + AsyncRmaEngine, RmaEngine ) from allensdk.brain_observatory.ecephys.ecephys_project_api.http_engine import ( write_bytes_from_coroutine, write_from_stream @@ -27,7 +27,6 @@ from allensdk.brain_observatory.ecephys import get_unit_filter_value from allensdk.api.caching_utilities import one_file_call_caching - class EcephysProjectCache(Cache): SESSIONS_KEY = 'sessions' @@ -74,11 +73,13 @@ class EcephysProjectCache(Cache): ) def __init__( - self, - fetch_api: EcephysProjectApi = EcephysProjectWarehouseApi.default(), - fetch_tries: int = 2, - stream_writer = write_from_stream, - **kwargs): + self, + fetch_api: EcephysProjectApi = EcephysProjectWarehouseApi.default(), + fetch_tries: int = 2, + stream_writer: Callable = write_from_stream, + manifest: Optional[Union[str, Path]] = None, + version: Optional[str] = None, + cache: bool = True): """ Entrypoint for accessing ecephys (neuropixels) data. Supports access to cross-session data (like stimulus templates) and high-level summaries of sessionwise data and provides tools for downloading detailed @@ -88,33 +89,34 @@ def __init__( ========== fetch_api : Used to pull data from remote sources, after which it is locally - cached. Any object exposing the EcephysProjectApi interface is + cached. Any object exposing the EcephysProjectApi interface is suitable. Standard options are: - EcephysProjectWarehouseApi :: The default. Fetches publically + EcephysProjectWarehouseApi :: The default. Fetches publically available Allen Institute data - EcephysProjectFixedApi :: Refuses to fetch any data - only the - existing local cache is accessible. Useful if you want to - settle on a fixed dataset for analysis. - EcephysProjectLimsApi :: Fetches bleeding-edge data from the - Allen Institute's internal database. Only works if you are + EcephysProjectFixedApi :: Refuses to fetch any data - only the + existing local cache is accessible. Useful if you want to + settle on a fixed dataset for analysis + EcephysProjectLimsApi :: Fetches bleeding-edge data from the + Allen Institute's internal database. Only works if you are on our internal network. - fetch_tries : - Maximum number of times to attempt a download before giving up and + fetch_tries : int + Maximum number of times to attempt a download before giving up and raising an exception. Note that this is total tries, not retries - **kwargs : - manifest : str or Path - full path at which manifest json will be stored - version : str - version of manifest file. If this mismatches the version - recorded in the file at manifest, an error will be raised. - other kwargs are passed to allensdk.api.cache.Cache - + manifest : str or Path + full path at which manifest json will be stored (default = + "ecephys_project_manifest.json" in the local directory.) + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + cache: bool + Whether to write to the cache (default=True) """ + manifest_ = manifest or "ecephys_project_manifest.json" + version_ = version or self.MANIFEST_VERSION - kwargs['manifest'] = kwargs.get('manifest', 'ecephys_project_manifest.json') - kwargs['version'] = kwargs.get('version', self.MANIFEST_VERSION) - - super(EcephysProjectCache, self).__init__(**kwargs) + super(EcephysProjectCache, self).__init__(manifest=manifest_, + version=version_, + cache=cache) self.fetch_api = fetch_api self.fetch_tries = fetch_tries self.stream_writer = stream_writer @@ -516,7 +518,7 @@ def _from_http_source_default(cls, fetch_api_cls, fetch_api_kwargs, **kwargs): "asynchronous": True } if fetch_api_kwargs is None else fetch_api_kwargs - if "stream_writer" not in kwargs: + if kwargs.get("stream_writer") is None: if fetch_api_kwargs.get("asynchronous", True): kwargs["stream_writer"] = write_bytes_from_coroutine else: @@ -528,21 +530,124 @@ def _from_http_source_default(cls, fetch_api_cls, fetch_api_kwargs, **kwargs): ) @classmethod - def from_lims(cls, lims_kwargs=None, **kwargs): + def from_lims(cls, lims_credentials: Optional[DbCredentials] = None, + scheme: Optional[str] = None, + host: Optional[str] = None, + asynchronous: bool = True, + manifest: Optional[str] = None, + version: Optional[str] = None, + cache: bool = True, + fetch_tries: int = 2): + """ + Create an instance of EcephysProjectCache with an + EcephysProjectLimsApi. Retrieves bleeding-edge data stored + locally on Allen Institute servers. Only available for use + on-site at the Allen Institute or through a vpn. Requires Allen + Institute database credentials. + + Parameters + ========== + lims_credentials : DbCredentials + Credentials to access LIMS database. If not provided will + attempt to find credentials in environment variables. + scheme : str + URI scheme, such as "http". Defaults to + EcephysProjectLimsApi.default value if unspecified. + Will not be used unless `host` is also specified. + host : str + Web host. Defaults to EcephysProjectLimsApi.default + value if unspecified. Will not be used unless `scheme` is + also specified. + asynchronous : bool + Whether to fetch file asynchronously. Defaults to True. + manifest : str or Path + full path at which manifest json will be stored + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + cache: bool + Whether to write to the cache (default=True) + fetch_tries : int + Maximum number of times to attempt a download before giving up and + raising an exception. Note that this is total tries, not retries + """ + if scheme and host: + app_kwargs = {"scheme": scheme, "host": host} + else: + app_kwargs = None return cls._from_http_source_default( - EcephysProjectLimsApi, lims_kwargs, **kwargs - ) + EcephysProjectLimsApi, + {"lims_credentials": lims_credentials, + "app_kwargs": app_kwargs, + "asynchronous": asynchronous, + }, # expects dictionary of kwargs + manifest=manifest, version=version, cache=cache, + fetch_tries=fetch_tries) @classmethod - def from_warehouse(cls, warehouse_kwargs=None, **kwargs): + def from_warehouse(cls, + scheme: Optional[str] = None, + host: Optional[str] = None, + asynchronous: bool = True, + manifest: Optional[Union[str, Path]] = None, + version: Optional[str] = None, + cache: bool = True, + fetch_tries: int = 2): + """ + Create an instance of EcephysProjectCache with an + EcephysProjectWarehouseApi. Retrieves released data stored in + the warehouse. + + Parameters + ========== + scheme : str + URI scheme, such as "http". Defaults to + EcephysProjectWarehouseAPI.default value if unspecified. + Will not be used unless `host` is also specified. + host : str + Web host. Defaults to EcephysProjectWarehouseApi.default + value if unspecified. Will not be used unless `scheme` is also + specified. + asynchronous : bool + Whether to fetch file asynchronously. Defaults to True. + manifest : str or Path + full path at which manifest json will be stored + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + cache: bool + Whether to write to the cache (default=True) + fetch_tries : int + Maximum number of times to attempt a download before giving up and + raising an exception. Note that this is total tries, not retries + """ + if scheme and host: + app_kwargs = {"scheme": scheme, "host": host, + "asynchronous": asynchronous} + else: + app_kwargs = None return cls._from_http_source_default( - EcephysProjectWarehouseApi, warehouse_kwargs, **kwargs + EcephysProjectWarehouseApi, app_kwargs, manifest=manifest, + version=version, cache=cache, fetch_tries=fetch_tries ) - @classmethod - def fixed(cls, **kwargs): - return cls(fetch_api=EcephysProjectFixedApi(), **kwargs) + def fixed(cls, manifest=None, version=None): + """ + Creates a EcephysProjectCache that refuses to fetch any data + - only the existing local cache is accessible. Useful if you + want to settle on a fixed dataset for analysis. + + Parameters + ========== + manifest : str or Path + full path to existing manifest json + version : str + version of manifest file. If this mismatches the version + recorded in the file at manifest, an error will be raised. + """ + return cls(fetch_api=EcephysProjectFixedApi(), manifest=manifest, + version=version) def count_owned(this, other, foreign_key, count_key, inplace=False): diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py index 85c56992d..6ff10a082 100644 --- a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py +++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_cache.py @@ -373,7 +373,7 @@ def test_from_lims_default(tmpdir_factory): tmpdir = str(tmpdir_factory.mktemp("test_from_lims_default")) cache = epc.EcephysProjectCache.from_lims( - manifest_path=os.path.join(tmpdir, "manifest.json") + manifest=os.path.join(tmpdir, "manifest.json") ) assert isinstance(cache.fetch_api.app_engine, AsyncHttpEngine) assert cache.stream_writer is epc.write_bytes_from_coroutine \ No newline at end of file diff --git a/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb b/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb index b6228fd06..ef9348104 100644 --- a/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb +++ b/doc_template/examples_root/examples/internal/Lims Behavior Project Cache.ipynb @@ -892,7 +892,7 @@ ], "source": [ "# But it will work if we use one that already exists\n", - "cache.get_session_data(978244684, fixed=True)" + "cache.get_session_data(latest.name, fixed=True)" ] }, {