From 3083513f5d3af1c56e23fd334736c60b6d2f7d6e Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 26 Jul 2024 12:32:43 +0200 Subject: [PATCH 01/24] Proof of concept of "buffer_description_api" and xarray reference API bridge --- neo/rawio/baserawio.py | 17 +++++++ neo/rawio/spikeglxrawio.py | 30 +++++++++++- neo/rawio/utils.py | 56 ++++++++++++++++++++++ neo/rawio/xarray_utils.py | 95 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 neo/rawio/utils.py create mode 100644 neo/rawio/xarray_utils.py diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 1ed8fe6c7..465bfcc5f 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -139,6 +139,9 @@ class BaseRawIO: rawmode = None # one key from possible_raw_modes + # If true then + has_buffer_description_api = False + # TODO Why multi-file would have a single filename is confusing here - shouldn't # the name of this argument be filenames_list or filenames_base or similar? # @@ -1340,6 +1343,20 @@ def _rescale_event_timestamp(self, event_timestamps: np.ndarray, dtype: np.dtype def _rescale_epoch_duration(self, raw_duration: np.ndarray, dtype: np.dtype): raise (NotImplementedError) + ### + # json buffer api zone + # must be implemented if has_buffer_description_api=True + def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, stream_index: int = 0): + if not self.has_buffer_description_api: + raise ValueError("This reader do not support buffer_description API") + descr = self._get_analogsignal_buffer_description(block_index, seg_index, stream_index) + return descr + + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + raise (NotImplementedError) + + + def pprint_vector(vector, lim: int = 8): vector = np.asarray(vector) diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 0b008d9a2..5ef6e9926 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -63,6 +63,7 @@ _spike_channel_dtype, _event_channel_dtype, ) +from .utils import get_memmap_shape, get_memmap_chunk_from_open_file class SpikeGLXRawIO(BaseRawIO): @@ -106,6 +107,8 @@ class SpikeGLXRawIO(BaseRawIO): extensions = ["meta", "bin"] rawmode = "one-dir" + has_buffer_description_api = True + def __init__(self, dirname="", load_sync_channel=False, load_channel_location=False): BaseRawIO.__init__(self) self.dirname = dirname @@ -123,11 +126,14 @@ def _parse_header(self): stream_names = sorted(list(srates.keys()), key=lambda e: srates[e])[::-1] nb_segment = np.unique([info["seg_index"] for info in self.signals_info_list]).size + self._memmaps = {} self.signals_info_dict = {} + # on block + self._buffer_descriptions = {0 :{}} for info in self.signals_info_list: - # key is (seg_index, stream_name) - key = (info["seg_index"], info["stream_name"]) + seg_index, stream_name = info["seg_index"], info["stream_name"] + key = (seg_index, stream_name) if key in self.signals_info_dict: raise KeyError(f"key {key} is already in the signals_info_dict") self.signals_info_dict[key] = info @@ -139,7 +145,22 @@ def _parse_header(self): data = data.reshape(-1, info["num_chan"]) self._memmaps[key] = data + stream_index = stream_names.index(info["stream_name"]) + if seg_index not in self._buffer_descriptions[0]: + self._buffer_descriptions[0][seg_index] = {} + + self._buffer_descriptions[0][seg_index][stream_index] = { + "type" : "binary", + "file_path" : info["bin_file"], + "dtype" : "int16", + "order": "C", + "file_offset" : 0, + "shape" : get_memmap_shape(info["bin_file"], "int16", num_channels=info["num_chan"], offset=0), + } + + # create channel header + signal_streams = [] signal_channels = [] for stream_name in stream_names: @@ -327,6 +348,11 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): return None + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + return self._buffer_descriptions[block_index][seg_index][stream_index] + + + def scan_files(dirname): """ diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py new file mode 100644 index 000000000..0f0f7fdf5 --- /dev/null +++ b/neo/rawio/utils.py @@ -0,0 +1,56 @@ +import mmap +import numpy as np + +def get_memmap_shape(filename, dtype, num_channels=None, offset=0): + dtype = np.dtype(dtype) + with open(filename, mode='rb') as f: + f.seek(0, 2) + flen = f.tell() + bytes = flen - offset + if bytes % dtype.itemsize != 0: + raise ValueError("Size of available data is not a multiple of the data-type size.") + size = bytes // dtype.itemsize + if num_channels is None: + shape = (size,) + else: + shape = (size // num_channels, num_channels) + return shape + + +def get_memmap_chunk_from_open_file(fid, num_channels, start, stop, dtype, file_offset=0): + """ + Utility fonction to get a chunk as a memmap array directly from an opened file. + Using this instead memmap can avoid memmory consumption when multiprocessing. + + Similar mechanism is used in spikeinterface. + + """ + bytes_per_sample = num_channels * dtype.itemsize + + # Calculate byte offsets + start_byte = file_offset + start * bytes_per_sample + end_byte = file_offset + stop * bytes_per_sample + + # Calculate the length of the data chunk to load into memory + length = end_byte - start_byte + + # The mmap offset must be a multiple of mmap.ALLOCATIONGRANULARITY + memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY) + memmap_offset *= mmap.ALLOCATIONGRANULARITY + + # Adjust the length so it includes the extra data from rounding down + # the memmap offset to a multiple of ALLOCATIONGRANULARITY + length += start_offset + + memmap_obj = mmap.mmap(fid.fileno(), length=length, access=mmap.ACCESS_READ, offset=memmap_offset) + + arr = np.ndarray( + shape=((stop - start), num_channels), + dtype=dtype, + buffer=memmap_obj, + offset=start_offset, + ) + + return arr + + diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py new file mode 100644 index 000000000..e37c9de7d --- /dev/null +++ b/neo/rawio/xarray_utils.py @@ -0,0 +1,95 @@ +import json + +import numpy as np + + + +def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): + """ + Transform the buffer_description_api into a dict ready for the xarray API 'reference://' + """ + rfs = dict() + rfs["version"] = 1 + rfs["refs"] = dict() + rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) + + # rawio_reader. + signal_streams = rawio_reader.header["signal_streams"] + for stream_index in range(len(signal_streams)): + stream_name = signal_streams["name"][stream_index] + stream_id = signal_streams["id"][stream_index] + print(stream_index, stream_name, stream_id) + + + descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, + stream_index=stream_index) + + if descr["type"] == "binary": + dtype = np.dtype(descr["dtype"]) + zarray = dict( + chunks=descr["shape"], + compressor=None, + dtype=dtype.str, + fill_value=None, + filters=None, + order=descr["order"], + shape=descr["shape"], + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=[f'time_{stream_id}', f'channel_{stream_id}'], + ) + # unique big chunk + # TODO : optional split in several small chunks + array_size = np.prod(descr["shape"], dtype='int64') + chunk_size = array_size * dtype.itemsize + rfs["refs"][f"{stream_id}/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] + rfs["refs"][f"{stream_id}/.zarray"] =json.dumps(zarray) + rfs["refs"][f"{stream_id}/.zattrs"] =json.dumps(zattrs) + elif descr["type"] == "hdf5": + raise NotImplementedError + else: + raise ValueError(f"buffer_description type not handled {descr['type']}") + + + + # TODO magnitude gain and offset + # TODO channel names + # TODO sampling rate + # TODO annotations + # TODO array_annotations + + + return rfs + + + +def to_xarray_dataset(rawio_reader): + """ + Utils fonction that transorm an instance a rawio into a xarray.Dataset + with lazy access. + This works only for rawio class that implement the has_buffer_description_api. + + + Note : the original idea of the function is from Ben Dichter in this page + https://gist.github.com/bendichter/30a9afb34b2178098c99f3b01fe72e75 + """ + import xarray as xr + + rfs = to_xarray_reference_api(rawio_reader) + + ds = xr.open_dataset( + "reference://", + mask_and_scale=False, + engine="zarr", + backend_kwargs={ + "storage_options": dict( + fo=rfs, + remote_protocol="file", + ), + "consolidated": False, + }, + ) + return ds + + From e70c014d8dfc0f3447909ab220eb4bc253f533e5 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Tue, 27 Aug 2024 20:43:04 +0200 Subject: [PATCH 02/24] Implement get_analogsignal_chunk() generically when a rawio class has_buffer_description_api=True This should also solve the memmap and memmory leak problem. --- neo/rawio/baserawio.py | 71 ++++++++++++++++++++++++++++++++++-- neo/rawio/spikeglxrawio.py | 74 +++++++++++++++++++------------------- neo/rawio/utils.py | 2 +- 3 files changed, 106 insertions(+), 41 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 465bfcc5f..b50bc6941 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -77,6 +77,8 @@ from neo import logging_handler +from .utils import get_memmap_chunk_from_opened_file + possible_raw_modes = [ "one-file", @@ -656,7 +658,12 @@ def get_signal_size(self, block_index: int, seg_index: int, stream_index: int | """ stream_index = self._get_stream_index_from_arg(stream_index) - return self._get_signal_size(block_index, seg_index, stream_index) + + if not self.has_buffer_description_api: + return self._get_signal_size(block_index, seg_index, stream_index) + else: + # use the buffer description + return self._get_signal_size_generic(block_index, seg_index, stream_index) def get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int | None = None): """ @@ -805,7 +812,11 @@ def get_analogsignal_chunk( if np.all(np.diff(channel_indexes) == 1): channel_indexes = slice(channel_indexes[0], channel_indexes[-1] + 1) - raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + if not self.has_buffer_description_api: + raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + else: + # use the buffer description + raw_chunk = self._get_analogsignal_chunk_generic(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) return raw_chunk @@ -1277,6 +1288,7 @@ def _get_signal_size(self, block_index: int, seg_index: int, stream_index: int): All channels indexed must have the same size and t_start. """ + # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) def _get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int): @@ -1304,7 +1316,7 @@ def _get_analogsignal_chunk( ------- array of samples, with each requested channel in a column """ - + # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) ### @@ -1355,6 +1367,59 @@ def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: i def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): raise (NotImplementedError) + def _get_signal_size_generic(self, block_index, seg_index, stream_index): + # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) + + buffer_desc = self._get_signal_size(block_index, seg_index, stream_index) + return buffer_desc['shape'][0] + + def _get_analogsignal_chunk_generic( + self, + block_index: int, + seg_index: int, + i_start: int | None, + i_stop: int | None, + stream_index: int, + channel_indexes: list[int] | None, + ): + # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) + buffer_desc = self._get_analogsignal_buffer_description( block_index, seg_index, stream_index) + + if buffer_desc['type'] == 'binary': + + # open files on demand and keep reference to opened file + if not hasattr(self, '_memmap_analogsignal_streams'): + self._memmap_analogsignal_streams = {} + if block_index not in self._memmap_analogsignal_streams: + self._memmap_analogsignal_streams[block_index] = {} + if seg_index not in self._memmap_analogsignal_streams[block_index]: + self._memmap_analogsignal_streams[block_index][seg_index] = {} + if stream_index not in self._memmap_analogsignal_streams[block_index][seg_index]: + fid = open(buffer_desc['file_path'], mode='rb') + self._memmap_analogsignal_streams[block_index][seg_index] = fid + else: + fid = self._memmap_analogsignal_streams[block_index][seg_index] + + + i_start = i_start or 0 + i_stop = i_stop or buffer_desc['shape'][0] + + num_channels = buffer_desc['shape'][1] + + raw_sigs = get_memmap_chunk_from_opened_file(fid, num_channels, i_start, i_stop, np.dtype(buffer_desc['dtype']), file_offset=buffer_desc['file_offset']) + + # this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False) + channel_slice = buffer_desc.get('channel_slice', None) + if channel_slice is not None: + raw_sigs = raw_sigs[:, channel_slice] + + # channel slice requested + if channel_indexes is not None: + raw_sigs = raw_sigs[:, channel_indexes] + else: + raise NotImplementedError() + + return raw_sigs diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 5ef6e9926..278704b38 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -63,7 +63,7 @@ _spike_channel_dtype, _event_channel_dtype, ) -from .utils import get_memmap_shape, get_memmap_chunk_from_open_file +from .utils import get_memmap_shape class SpikeGLXRawIO(BaseRawIO): @@ -127,7 +127,7 @@ def _parse_header(self): nb_segment = np.unique([info["seg_index"] for info in self.signals_info_list]).size - self._memmaps = {} + # self._memmaps = {} self.signals_info_dict = {} # on block self._buffer_descriptions = {0 :{}} @@ -139,11 +139,11 @@ def _parse_header(self): self.signals_info_dict[key] = info # create memmap - data = np.memmap(info["bin_file"], dtype="int16", mode="r", offset=0, order="C") + # data = np.memmap(info["bin_file"], dtype="int16", mode="r", offset=0, order="C") # this should be (info['sample_length'], info['num_chan']) # be some file are shorten - data = data.reshape(-1, info["num_chan"]) - self._memmaps[key] = data + # data = data.reshape(-1, info["num_chan"]) + # self._memmaps[key] = data stream_index = stream_names.index(info["stream_name"]) if seg_index not in self._buffer_descriptions[0]: @@ -273,42 +273,42 @@ def _segment_t_start(self, block_index, seg_index): def _segment_t_stop(self, block_index, seg_index): return self._t_stops[seg_index] - def _get_signal_size(self, block_index, seg_index, stream_index): - stream_id = self.header["signal_streams"][stream_index]["id"] - memmap = self._memmaps[seg_index, stream_id] - return int(memmap.shape[0]) + # def _get_signal_size(self, block_index, seg_index, stream_index): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # memmap = self._memmaps[seg_index, stream_id] + # return int(memmap.shape[0]) def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - stream_id = self.header["signal_streams"][stream_index]["id"] - memmap = self._memmaps[seg_index, stream_id] - stream_name = self.header["signal_streams"]["name"][stream_index] - - # take care of sync channel - info = self.signals_info_dict[0, stream_name] - if not self.load_sync_channel and info["has_sync_trace"]: - memmap = memmap[:, :-1] - - # since we cut the memmap, we can simplify the channel selection - if channel_indexes is None: - channel_selection = slice(None) - elif isinstance(channel_indexes, slice): - channel_selection = channel_indexes - elif not isinstance(channel_indexes, slice): - if np.all(np.diff(channel_indexes) == 1): - # consecutive channel then slice this avoid a copy (because of ndarray.take(...) - # and so keep the underlying memmap - channel_selection = slice(channel_indexes[0], channel_indexes[0] + len(channel_indexes)) - else: - channel_selection = channel_indexes - else: - raise ValueError("get_analogsignal_chunk : channel_indexes" "must be slice or list or array of int") - - raw_signals = memmap[slice(i_start, i_stop), channel_selection] - - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # memmap = self._memmaps[seg_index, stream_id] + # stream_name = self.header["signal_streams"]["name"][stream_index] + + # # take care of sync channel + # info = self.signals_info_dict[0, stream_name] + # if not self.load_sync_channel and info["has_sync_trace"]: + # memmap = memmap[:, :-1] + + # # since we cut the memmap, we can simplify the channel selection + # if channel_indexes is None: + # channel_selection = slice(None) + # elif isinstance(channel_indexes, slice): + # channel_selection = channel_indexes + # elif not isinstance(channel_indexes, slice): + # if np.all(np.diff(channel_indexes) == 1): + # # consecutive channel then slice this avoid a copy (because of ndarray.take(...) + # # and so keep the underlying memmap + # channel_selection = slice(channel_indexes[0], channel_indexes[0] + len(channel_indexes)) + # else: + # channel_selection = channel_indexes + # else: + # raise ValueError("get_analogsignal_chunk : channel_indexes" "must be slice or list or array of int") + + # raw_signals = memmap[slice(i_start, i_stop), channel_selection] + + # return raw_signals def _event_count(self, event_channel_idx, block_index=None, seg_index=None): timestamps, _, _ = self._get_event_timestamps(block_index, seg_index, event_channel_idx, None, None) diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py index 0f0f7fdf5..cb56ccbb4 100644 --- a/neo/rawio/utils.py +++ b/neo/rawio/utils.py @@ -17,7 +17,7 @@ def get_memmap_shape(filename, dtype, num_channels=None, offset=0): return shape -def get_memmap_chunk_from_open_file(fid, num_channels, start, stop, dtype, file_offset=0): +def get_memmap_chunk_from_opened_file(fid, num_channels, start, stop, dtype, file_offset=0): """ Utility fonction to get a chunk as a memmap array directly from an opened file. Using this instead memmap can avoid memmory consumption when multiprocessing. From 24fe98e9c5d106b56f7d6b64ca697ded36419a2b Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Thu, 29 Aug 2024 18:00:28 +0200 Subject: [PATCH 03/24] wip --- neo/rawio/xarray_utils.py | 125 ++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index e37c9de7d..80d537e0b 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -2,11 +2,19 @@ import numpy as np +import base64 -def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): + +def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Transform the buffer_description_api into a dict ready for the xarray API 'reference://' + + + See https://fsspec.github.io/kerchunk/spec.html + See https://docs.xarray.dev/en/latest/user-guide/io.html#kerchunk + + """ rfs = dict() rfs["version"] = 1 @@ -15,56 +23,80 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): # rawio_reader. signal_streams = rawio_reader.header["signal_streams"] - for stream_index in range(len(signal_streams)): - stream_name = signal_streams["name"][stream_index] - stream_id = signal_streams["id"][stream_index] - print(stream_index, stream_name, stream_id) - - - descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, - stream_index=stream_index) - - if descr["type"] == "binary": - dtype = np.dtype(descr["dtype"]) - zarray = dict( - chunks=descr["shape"], - compressor=None, - dtype=dtype.str, - fill_value=None, - filters=None, - order=descr["order"], - shape=descr["shape"], - zarr_format=2, - ) - zattrs = dict( - _ARRAY_DIMENSIONS=[f'time_{stream_id}', f'channel_{stream_id}'], - ) - # unique big chunk - # TODO : optional split in several small chunks - array_size = np.prod(descr["shape"], dtype='int64') - chunk_size = array_size * dtype.itemsize - rfs["refs"][f"{stream_id}/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] - rfs["refs"][f"{stream_id}/.zarray"] =json.dumps(zarray) - rfs["refs"][f"{stream_id}/.zattrs"] =json.dumps(zattrs) - elif descr["type"] == "hdf5": - raise NotImplementedError - else: - raise ValueError(f"buffer_description type not handled {descr['type']}") - - - - # TODO magnitude gain and offset - # TODO channel names - # TODO sampling rate - # TODO annotations - # TODO array_annotations + + stream_name = signal_streams["name"][stream_index] + stream_id = signal_streams["id"][stream_index] + # print(stream_index, stream_name, stream_id) + + + descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, + stream_index=stream_index) + + if descr["type"] == "binary": + dtype = np.dtype(descr["dtype"]) + zarray = dict( + chunks=descr["shape"], + compressor=None, + dtype=dtype.str, + fill_value=None, + filters=None, + order=descr["order"], + shape=descr["shape"], + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['time', 'channel'], + name=stream_name, + stream_id=stream_id, + ) + # unique big chunk + # TODO : optional split in several small chunks + array_size = np.prod(descr["shape"], dtype='int64') + chunk_size = array_size * dtype.itemsize + rfs["refs"]["traces/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] + rfs["refs"]["traces/.zarray"] =json.dumps(zarray) + rfs["refs"]["traces/.zattrs"] =json.dumps(zattrs) + + # small enough can be internal + mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id + channel_ids = rawio_reader.header["signal_channels"][mask]["id"] + base64_encoded = base64.b64encode(channel_ids.tobytes()) + rfs["refs"]["yep/0"] = "base64:" + base64_encoded.decode() + zarray = dict( + chunks=channel_ids.shape, + compressor=None, + dtype=channel_ids.dtype.str, + fill_value=None, + filters=None, + order="C", + shape=channel_ids.shape, + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['channel'], + ) + rfs["refs"]["yep/.zattrs"] =json.dumps(zattrs) + rfs["refs"]["yep/.zarray"] =json.dumps(zarray) + + elif descr["type"] == "hdf5": + raise NotImplementedError + else: + raise ValueError(f"buffer_description type not handled {descr['type']}") + + + + # TODO magnitude gain and offset + # TODO channel names + # TODO sampling rate + # TODO annotations + # TODO array_annotations return rfs -def to_xarray_dataset(rawio_reader): +def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Utils fonction that transorm an instance a rawio into a xarray.Dataset with lazy access. @@ -76,10 +108,11 @@ def to_xarray_dataset(rawio_reader): """ import xarray as xr - rfs = to_xarray_reference_api(rawio_reader) + rfs = to_xarray_reference_api(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) ds = xr.open_dataset( "reference://", + # mask_and_scale=True, mask_and_scale=False, engine="zarr", backend_kwargs={ From 22c669896311c26a6876b8f1a3aba548b1160555 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 30 Aug 2024 15:19:59 +0200 Subject: [PATCH 04/24] test on micromed --- neo/rawio/micromedrawio.py | 46 +++++++--- neo/rawio/xarray_utils.py | 170 ++++++++++++++++++++++++++++--------- 2 files changed, 164 insertions(+), 52 deletions(-) diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 34a7a3a43..ed3b47c4d 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -20,6 +20,7 @@ _spike_channel_dtype, _event_channel_dtype, ) +from .utils import get_memmap_shape from neo.core import NeoReadWriteError @@ -44,11 +45,16 @@ class MicromedRawIO(BaseRawIO): extensions = ["trc", "TRC"] rawmode = "one-file" + has_buffer_description_api = True + def __init__(self, filename=""): BaseRawIO.__init__(self) self.filename = filename def _parse_header(self): + + self._buffer_descriptions = {0 :{ 0: {}}} + with open(self.filename, "rb") as fid: f = StructFile(fid) @@ -96,9 +102,20 @@ def _parse_header(self): # raw signals memmap sig_dtype = "u" + str(Bytes) - self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=Data_Start_Offset).reshape( - -1, Num_Chan - ) + # self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=Data_Start_Offset).reshape( + # -1, Num_Chan + # ) + signal_shape = get_memmap_shape(self.filename, sig_dtype, num_channels=Num_Chan, offset=Data_Start_Offset) + self._buffer_descriptions[0][0][0] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : sig_dtype, + "order": "C", + "file_offset" : 0, + "shape" : signal_shape, + } + + # Reading Code Info zname2, pos, length = zones["ORDER"] @@ -161,7 +178,7 @@ def _parse_header(self): keep = ( (rawevent["start"] >= rawevent["start"][0]) - & (rawevent["start"] < self._raw_signals.shape[0]) + & (rawevent["start"] < signal_shape[0]) & (rawevent["start"] != 0) ) rawevent = rawevent[keep] @@ -204,21 +221,21 @@ def _segment_t_stop(self, block_index, seg_index): t_stop = self._raw_signals.shape[0] / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("`stream_index` must be 0") - return self._raw_signals.shape[0] + # def _get_signal_size(self, block_index, seg_index, stream_index): + # if stream_index != 0: + # raise ValueError("`stream_index` must be 0") + # return self._raw_signals.shape[0] def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("`stream_index` must be 0") return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(channel_indexes) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # if channel_indexes is None: + # channel_indexes = slice(channel_indexes) + # raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] + # return raw_signals def _spike_count(self, block_index, seg_index, unit_index): return 0 @@ -260,3 +277,6 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): durations = raw_duration.astype(dtype) / self._sampling_rate return durations + + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + return self._buffer_descriptions[block_index][seg_index][stream_index] diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 80d537e0b..5b20b15fc 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -1,3 +1,15 @@ +""" +Module to export a rawio reader that support the buffer_description API +to xarray dataset using zarr specification format v2. + +A block/segment/stream correspond to one xarray.DataSet + +A xarray.DataTree can also be expose to get all at block/segment/stream + +Note : + * only some IOs support this at the moment + +""" import json import numpy as np @@ -6,26 +18,71 @@ -def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_index=0): +# TODO ios with memmap: +# * spikeglx DONE +# * openephysbinary +# * winwcprawio +# * winedr +# * tdc +# * rawmcs +# * rawbinarysignal +# * neuroscope +# * neuroexplorer +# * micromed +# * brainvision +# * blackrock +# * bci200 +# * + +# TODO ios with HDF5: +# * nix +# * mearec +# * maxwell +# * biocam + +# TODO with chunk by channel (more complicated): +# * axona +# * openephys legacy +# axograph + + +# add some test in stanard compliance + + + + +# TODO implement zarr v3 + + +def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Transform the buffer_description_api into a dict ready for the xarray API 'reference://' See https://fsspec.github.io/kerchunk/spec.html See https://docs.xarray.dev/en/latest/user-guide/io.html#kerchunk + + See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html + Usefull read also https://github.com/saalfeldlab/n5 + """ - rfs = dict() - rfs["version"] = 1 - rfs["refs"] = dict() - rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) # rawio_reader. signal_streams = rawio_reader.header["signal_streams"] stream_name = signal_streams["name"][stream_index] stream_id = signal_streams["id"][stream_index] + + + rfs = dict() + rfs["version"] = 1 + rfs["refs"] = dict() + rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) + zattrs = dict(name=stream_name) + rfs["refs"][".zattrs"] = json.dumps(zattrs) + # print(stream_index, stream_name, stream_id) @@ -33,6 +90,31 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind stream_index=stream_index) if descr["type"] == "binary": + + + # channel : small enough can be internal with base64 + mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id + channels = rawio_reader.header["signal_channels"][mask] + channel_ids = channels["id"] + base64_encoded = base64.b64encode(channel_ids.tobytes()) + rfs["refs"]["channel/0"] = "base64:" + base64_encoded.decode() + zarray = dict( + chunks=channel_ids.shape, + compressor=None, + dtype=channel_ids.dtype.str, + fill_value=None, + filters=None, + order="C", + shape=channel_ids.shape, + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['channel'], + ) + rfs["refs"]["channel/.zattrs"] =json.dumps(zattrs) + rfs["refs"]["channel/.zarray"] =json.dumps(zarray) + + # traces buffer dtype = np.dtype(descr["dtype"]) zarray = dict( chunks=descr["shape"], @@ -47,8 +129,17 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind zattrs = dict( _ARRAY_DIMENSIONS=['time', 'channel'], name=stream_name, - stream_id=stream_id, ) + units = np.unique(channels['units']) + if units.size == 1: + zattrs['units'] = units[0] + gain = np.unique(channels['gain']) + offset = np.unique(channels['offset']) + if gain.size == 1 and offset.size: + zattrs['scale_factor'] = gain[0] + zattrs['add_offset'] = offset[0] + zattrs['sampling_rate'] = float(channels['sampling_rate'][0]) + # unique big chunk # TODO : optional split in several small chunks array_size = np.prod(descr["shape"], dtype='int64') @@ -57,40 +148,12 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind rfs["refs"]["traces/.zarray"] =json.dumps(zarray) rfs["refs"]["traces/.zattrs"] =json.dumps(zattrs) - # small enough can be internal - mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id - channel_ids = rawio_reader.header["signal_channels"][mask]["id"] - base64_encoded = base64.b64encode(channel_ids.tobytes()) - rfs["refs"]["yep/0"] = "base64:" + base64_encoded.decode() - zarray = dict( - chunks=channel_ids.shape, - compressor=None, - dtype=channel_ids.dtype.str, - fill_value=None, - filters=None, - order="C", - shape=channel_ids.shape, - zarr_format=2, - ) - zattrs = dict( - _ARRAY_DIMENSIONS=['channel'], - ) - rfs["refs"]["yep/.zattrs"] =json.dumps(zattrs) - rfs["refs"]["yep/.zarray"] =json.dumps(zarray) - elif descr["type"] == "hdf5": raise NotImplementedError else: raise ValueError(f"buffer_description type not handled {descr['type']}") - - - # TODO magnitude gain and offset - # TODO channel names - # TODO sampling rate - # TODO annotations - # TODO array_annotations - + # TODO channel array_annotations return rfs @@ -108,12 +171,11 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ import xarray as xr - rfs = to_xarray_reference_api(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + rfs = to_zarr_v2_reference(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) ds = xr.open_dataset( "reference://", - # mask_and_scale=True, - mask_and_scale=False, + mask_and_scale=True, engine="zarr", backend_kwargs={ "storage_options": dict( @@ -125,4 +187,34 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): ) return ds - +def to_xarray_datatree(rawio_reader): + """ + Expose a neo.rawio reader class to a xarray DataTree to lazily read signals. + """ + try: + # not released in xarray 2024.7.0, this will be released soon + from xarray import DataTree + except: + # this need the experimental DataTree in pypi xarray-datatree + try: + from datatree import DataTree + except: + raise ImportError("use xarray dev branch or pip install xarray-datatree") + + num_streams = rawio_reader.header['signal_streams'].size + print(num_streams) + + tree = DataTree(name="root") + + num_block = rawio_reader.header['nb_block'] + for block_index in range(num_block): + block = DataTree(name=f'block{block_index}', parent=tree) + num_seg = rawio_reader.header['nb_segment'][block_index] + for seg_index in range(num_seg): + segment = DataTree(name=f'segment{block_index}', parent=block) + + for stream_index in range(num_streams): + ds = to_xarray_dataset(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + DataTree(data=ds, name=ds.attrs['name'], parent=segment) + + return tree From 38a84f6415135866227d343cb435b3463f97aafc Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 9 Sep 2024 11:11:44 +0200 Subject: [PATCH 05/24] rebase on buffer_id --- neo/rawio/baserawio.py | 17 +++++++ neo/rawio/spikeglxrawio.py | 30 +++++++++++- neo/rawio/utils.py | 56 ++++++++++++++++++++++ neo/rawio/xarray_utils.py | 95 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 neo/rawio/utils.py create mode 100644 neo/rawio/xarray_utils.py diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 7cc61de32..b65fb8b7a 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -146,6 +146,9 @@ class BaseRawIO: rawmode = None # one key from possible_raw_modes + # If true then + has_buffer_description_api = False + # TODO Why multi-file would have a single filename is confusing here - shouldn't # the name of this argument be filenames_list or filenames_base or similar? # @@ -1347,6 +1350,20 @@ def _rescale_event_timestamp(self, event_timestamps: np.ndarray, dtype: np.dtype def _rescale_epoch_duration(self, raw_duration: np.ndarray, dtype: np.dtype): raise (NotImplementedError) + ### + # json buffer api zone + # must be implemented if has_buffer_description_api=True + def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, stream_index: int = 0): + if not self.has_buffer_description_api: + raise ValueError("This reader do not support buffer_description API") + descr = self._get_analogsignal_buffer_description(block_index, seg_index, stream_index) + return descr + + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + raise (NotImplementedError) + + + def pprint_vector(vector, lim: int = 8): vector = np.asarray(vector) diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 9595dc04a..cb39d3140 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -64,6 +64,7 @@ _spike_channel_dtype, _event_channel_dtype, ) +from .utils import get_memmap_shape, get_memmap_chunk_from_open_file class SpikeGLXRawIO(BaseRawIO): @@ -107,6 +108,8 @@ class SpikeGLXRawIO(BaseRawIO): extensions = ["meta", "bin"] rawmode = "one-dir" + has_buffer_description_api = True + def __init__(self, dirname="", load_sync_channel=False, load_channel_location=False): BaseRawIO.__init__(self) self.dirname = dirname @@ -124,11 +127,14 @@ def _parse_header(self): stream_names = sorted(list(srates.keys()), key=lambda e: srates[e])[::-1] nb_segment = np.unique([info["seg_index"] for info in self.signals_info_list]).size + self._memmaps = {} self.signals_info_dict = {} + # on block + self._buffer_descriptions = {0 :{}} for info in self.signals_info_list: - # key is (seg_index, stream_name) - key = (info["seg_index"], info["stream_name"]) + seg_index, stream_name = info["seg_index"], info["stream_name"] + key = (seg_index, stream_name) if key in self.signals_info_dict: raise KeyError(f"key {key} is already in the signals_info_dict") self.signals_info_dict[key] = info @@ -140,7 +146,22 @@ def _parse_header(self): data = data.reshape(-1, info["num_chan"]) self._memmaps[key] = data + stream_index = stream_names.index(info["stream_name"]) + if seg_index not in self._buffer_descriptions[0]: + self._buffer_descriptions[0][seg_index] = {} + + self._buffer_descriptions[0][seg_index][stream_index] = { + "type" : "binary", + "file_path" : info["bin_file"], + "dtype" : "int16", + "order": "C", + "file_offset" : 0, + "shape" : get_memmap_shape(info["bin_file"], "int16", num_channels=info["num_chan"], offset=0), + } + + # create channel header + signal_buffers = [] signal_streams = [] signal_channels = [] @@ -336,6 +357,11 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): return None + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + return self._buffer_descriptions[block_index][seg_index][stream_index] + + + def scan_files(dirname): """ diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py new file mode 100644 index 000000000..0f0f7fdf5 --- /dev/null +++ b/neo/rawio/utils.py @@ -0,0 +1,56 @@ +import mmap +import numpy as np + +def get_memmap_shape(filename, dtype, num_channels=None, offset=0): + dtype = np.dtype(dtype) + with open(filename, mode='rb') as f: + f.seek(0, 2) + flen = f.tell() + bytes = flen - offset + if bytes % dtype.itemsize != 0: + raise ValueError("Size of available data is not a multiple of the data-type size.") + size = bytes // dtype.itemsize + if num_channels is None: + shape = (size,) + else: + shape = (size // num_channels, num_channels) + return shape + + +def get_memmap_chunk_from_open_file(fid, num_channels, start, stop, dtype, file_offset=0): + """ + Utility fonction to get a chunk as a memmap array directly from an opened file. + Using this instead memmap can avoid memmory consumption when multiprocessing. + + Similar mechanism is used in spikeinterface. + + """ + bytes_per_sample = num_channels * dtype.itemsize + + # Calculate byte offsets + start_byte = file_offset + start * bytes_per_sample + end_byte = file_offset + stop * bytes_per_sample + + # Calculate the length of the data chunk to load into memory + length = end_byte - start_byte + + # The mmap offset must be a multiple of mmap.ALLOCATIONGRANULARITY + memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY) + memmap_offset *= mmap.ALLOCATIONGRANULARITY + + # Adjust the length so it includes the extra data from rounding down + # the memmap offset to a multiple of ALLOCATIONGRANULARITY + length += start_offset + + memmap_obj = mmap.mmap(fid.fileno(), length=length, access=mmap.ACCESS_READ, offset=memmap_offset) + + arr = np.ndarray( + shape=((stop - start), num_channels), + dtype=dtype, + buffer=memmap_obj, + offset=start_offset, + ) + + return arr + + diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py new file mode 100644 index 000000000..e37c9de7d --- /dev/null +++ b/neo/rawio/xarray_utils.py @@ -0,0 +1,95 @@ +import json + +import numpy as np + + + +def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): + """ + Transform the buffer_description_api into a dict ready for the xarray API 'reference://' + """ + rfs = dict() + rfs["version"] = 1 + rfs["refs"] = dict() + rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) + + # rawio_reader. + signal_streams = rawio_reader.header["signal_streams"] + for stream_index in range(len(signal_streams)): + stream_name = signal_streams["name"][stream_index] + stream_id = signal_streams["id"][stream_index] + print(stream_index, stream_name, stream_id) + + + descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, + stream_index=stream_index) + + if descr["type"] == "binary": + dtype = np.dtype(descr["dtype"]) + zarray = dict( + chunks=descr["shape"], + compressor=None, + dtype=dtype.str, + fill_value=None, + filters=None, + order=descr["order"], + shape=descr["shape"], + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=[f'time_{stream_id}', f'channel_{stream_id}'], + ) + # unique big chunk + # TODO : optional split in several small chunks + array_size = np.prod(descr["shape"], dtype='int64') + chunk_size = array_size * dtype.itemsize + rfs["refs"][f"{stream_id}/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] + rfs["refs"][f"{stream_id}/.zarray"] =json.dumps(zarray) + rfs["refs"][f"{stream_id}/.zattrs"] =json.dumps(zattrs) + elif descr["type"] == "hdf5": + raise NotImplementedError + else: + raise ValueError(f"buffer_description type not handled {descr['type']}") + + + + # TODO magnitude gain and offset + # TODO channel names + # TODO sampling rate + # TODO annotations + # TODO array_annotations + + + return rfs + + + +def to_xarray_dataset(rawio_reader): + """ + Utils fonction that transorm an instance a rawio into a xarray.Dataset + with lazy access. + This works only for rawio class that implement the has_buffer_description_api. + + + Note : the original idea of the function is from Ben Dichter in this page + https://gist.github.com/bendichter/30a9afb34b2178098c99f3b01fe72e75 + """ + import xarray as xr + + rfs = to_xarray_reference_api(rawio_reader) + + ds = xr.open_dataset( + "reference://", + mask_and_scale=False, + engine="zarr", + backend_kwargs={ + "storage_options": dict( + fo=rfs, + remote_protocol="file", + ), + "consolidated": False, + }, + ) + return ds + + From 4d31b75aca3bee53a755256f4c05c0712dedccba Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Tue, 27 Aug 2024 20:43:04 +0200 Subject: [PATCH 06/24] Implement get_analogsignal_chunk() generically when a rawio class has_buffer_description_api=True This should also solve the memmap and memmory leak problem. --- neo/rawio/baserawio.py | 71 ++++++++++++++++++++++++++++++++++-- neo/rawio/spikeglxrawio.py | 74 +++++++++++++++++++------------------- neo/rawio/utils.py | 2 +- 3 files changed, 106 insertions(+), 41 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index b65fb8b7a..926223d14 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -77,6 +77,8 @@ from neo import logging_handler +from .utils import get_memmap_chunk_from_opened_file + possible_raw_modes = [ "one-file", @@ -663,7 +665,12 @@ def get_signal_size(self, block_index: int, seg_index: int, stream_index: int | """ stream_index = self._get_stream_index_from_arg(stream_index) - return self._get_signal_size(block_index, seg_index, stream_index) + + if not self.has_buffer_description_api: + return self._get_signal_size(block_index, seg_index, stream_index) + else: + # use the buffer description + return self._get_signal_size_generic(block_index, seg_index, stream_index) def get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int | None = None): """ @@ -812,7 +819,11 @@ def get_analogsignal_chunk( if np.all(np.diff(channel_indexes) == 1): channel_indexes = slice(channel_indexes[0], channel_indexes[-1] + 1) - raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + if not self.has_buffer_description_api: + raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + else: + # use the buffer description + raw_chunk = self._get_analogsignal_chunk_generic(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) return raw_chunk @@ -1284,6 +1295,7 @@ def _get_signal_size(self, block_index: int, seg_index: int, stream_index: int): All channels indexed must have the same size and t_start. """ + # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) def _get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int): @@ -1311,7 +1323,7 @@ def _get_analogsignal_chunk( ------- array of samples, with each requested channel in a column """ - + # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) ### @@ -1362,6 +1374,59 @@ def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: i def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): raise (NotImplementedError) + def _get_signal_size_generic(self, block_index, seg_index, stream_index): + # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) + + buffer_desc = self._get_signal_size(block_index, seg_index, stream_index) + return buffer_desc['shape'][0] + + def _get_analogsignal_chunk_generic( + self, + block_index: int, + seg_index: int, + i_start: int | None, + i_stop: int | None, + stream_index: int, + channel_indexes: list[int] | None, + ): + # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) + buffer_desc = self._get_analogsignal_buffer_description( block_index, seg_index, stream_index) + + if buffer_desc['type'] == 'binary': + + # open files on demand and keep reference to opened file + if not hasattr(self, '_memmap_analogsignal_streams'): + self._memmap_analogsignal_streams = {} + if block_index not in self._memmap_analogsignal_streams: + self._memmap_analogsignal_streams[block_index] = {} + if seg_index not in self._memmap_analogsignal_streams[block_index]: + self._memmap_analogsignal_streams[block_index][seg_index] = {} + if stream_index not in self._memmap_analogsignal_streams[block_index][seg_index]: + fid = open(buffer_desc['file_path'], mode='rb') + self._memmap_analogsignal_streams[block_index][seg_index] = fid + else: + fid = self._memmap_analogsignal_streams[block_index][seg_index] + + + i_start = i_start or 0 + i_stop = i_stop or buffer_desc['shape'][0] + + num_channels = buffer_desc['shape'][1] + + raw_sigs = get_memmap_chunk_from_opened_file(fid, num_channels, i_start, i_stop, np.dtype(buffer_desc['dtype']), file_offset=buffer_desc['file_offset']) + + # this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False) + channel_slice = buffer_desc.get('channel_slice', None) + if channel_slice is not None: + raw_sigs = raw_sigs[:, channel_slice] + + # channel slice requested + if channel_indexes is not None: + raw_sigs = raw_sigs[:, channel_indexes] + else: + raise NotImplementedError() + + return raw_sigs diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index cb39d3140..97d4fdfec 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -64,7 +64,7 @@ _spike_channel_dtype, _event_channel_dtype, ) -from .utils import get_memmap_shape, get_memmap_chunk_from_open_file +from .utils import get_memmap_shape class SpikeGLXRawIO(BaseRawIO): @@ -128,7 +128,7 @@ def _parse_header(self): nb_segment = np.unique([info["seg_index"] for info in self.signals_info_list]).size - self._memmaps = {} + # self._memmaps = {} self.signals_info_dict = {} # on block self._buffer_descriptions = {0 :{}} @@ -140,11 +140,11 @@ def _parse_header(self): self.signals_info_dict[key] = info # create memmap - data = np.memmap(info["bin_file"], dtype="int16", mode="r", offset=0, order="C") + # data = np.memmap(info["bin_file"], dtype="int16", mode="r", offset=0, order="C") # this should be (info['sample_length'], info['num_chan']) # be some file are shorten - data = data.reshape(-1, info["num_chan"]) - self._memmaps[key] = data + # data = data.reshape(-1, info["num_chan"]) + # self._memmaps[key] = data stream_index = stream_names.index(info["stream_name"]) if seg_index not in self._buffer_descriptions[0]: @@ -282,42 +282,42 @@ def _segment_t_start(self, block_index, seg_index): def _segment_t_stop(self, block_index, seg_index): return self._t_stops[seg_index] - def _get_signal_size(self, block_index, seg_index, stream_index): - stream_id = self.header["signal_streams"][stream_index]["id"] - memmap = self._memmaps[seg_index, stream_id] - return int(memmap.shape[0]) + # def _get_signal_size(self, block_index, seg_index, stream_index): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # memmap = self._memmaps[seg_index, stream_id] + # return int(memmap.shape[0]) def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - stream_id = self.header["signal_streams"][stream_index]["id"] - memmap = self._memmaps[seg_index, stream_id] - stream_name = self.header["signal_streams"]["name"][stream_index] - - # take care of sync channel - info = self.signals_info_dict[0, stream_name] - if not self.load_sync_channel and info["has_sync_trace"]: - memmap = memmap[:, :-1] - - # since we cut the memmap, we can simplify the channel selection - if channel_indexes is None: - channel_selection = slice(None) - elif isinstance(channel_indexes, slice): - channel_selection = channel_indexes - elif not isinstance(channel_indexes, slice): - if np.all(np.diff(channel_indexes) == 1): - # consecutive channel then slice this avoid a copy (because of ndarray.take(...) - # and so keep the underlying memmap - channel_selection = slice(channel_indexes[0], channel_indexes[0] + len(channel_indexes)) - else: - channel_selection = channel_indexes - else: - raise ValueError("get_analogsignal_chunk : channel_indexes" "must be slice or list or array of int") - - raw_signals = memmap[slice(i_start, i_stop), channel_selection] - - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # memmap = self._memmaps[seg_index, stream_id] + # stream_name = self.header["signal_streams"]["name"][stream_index] + + # # take care of sync channel + # info = self.signals_info_dict[0, stream_name] + # if not self.load_sync_channel and info["has_sync_trace"]: + # memmap = memmap[:, :-1] + + # # since we cut the memmap, we can simplify the channel selection + # if channel_indexes is None: + # channel_selection = slice(None) + # elif isinstance(channel_indexes, slice): + # channel_selection = channel_indexes + # elif not isinstance(channel_indexes, slice): + # if np.all(np.diff(channel_indexes) == 1): + # # consecutive channel then slice this avoid a copy (because of ndarray.take(...) + # # and so keep the underlying memmap + # channel_selection = slice(channel_indexes[0], channel_indexes[0] + len(channel_indexes)) + # else: + # channel_selection = channel_indexes + # else: + # raise ValueError("get_analogsignal_chunk : channel_indexes" "must be slice or list or array of int") + + # raw_signals = memmap[slice(i_start, i_stop), channel_selection] + + # return raw_signals def _event_count(self, event_channel_idx, block_index=None, seg_index=None): timestamps, _, _ = self._get_event_timestamps(block_index, seg_index, event_channel_idx, None, None) diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py index 0f0f7fdf5..cb56ccbb4 100644 --- a/neo/rawio/utils.py +++ b/neo/rawio/utils.py @@ -17,7 +17,7 @@ def get_memmap_shape(filename, dtype, num_channels=None, offset=0): return shape -def get_memmap_chunk_from_open_file(fid, num_channels, start, stop, dtype, file_offset=0): +def get_memmap_chunk_from_opened_file(fid, num_channels, start, stop, dtype, file_offset=0): """ Utility fonction to get a chunk as a memmap array directly from an opened file. Using this instead memmap can avoid memmory consumption when multiprocessing. From 5506b43ca28afce990679e81b25774913f5b1fd5 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Thu, 29 Aug 2024 18:00:28 +0200 Subject: [PATCH 07/24] wip --- neo/rawio/xarray_utils.py | 125 ++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index e37c9de7d..80d537e0b 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -2,11 +2,19 @@ import numpy as np +import base64 -def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): + +def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Transform the buffer_description_api into a dict ready for the xarray API 'reference://' + + + See https://fsspec.github.io/kerchunk/spec.html + See https://docs.xarray.dev/en/latest/user-guide/io.html#kerchunk + + """ rfs = dict() rfs["version"] = 1 @@ -15,56 +23,80 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0): # rawio_reader. signal_streams = rawio_reader.header["signal_streams"] - for stream_index in range(len(signal_streams)): - stream_name = signal_streams["name"][stream_index] - stream_id = signal_streams["id"][stream_index] - print(stream_index, stream_name, stream_id) - - - descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, - stream_index=stream_index) - - if descr["type"] == "binary": - dtype = np.dtype(descr["dtype"]) - zarray = dict( - chunks=descr["shape"], - compressor=None, - dtype=dtype.str, - fill_value=None, - filters=None, - order=descr["order"], - shape=descr["shape"], - zarr_format=2, - ) - zattrs = dict( - _ARRAY_DIMENSIONS=[f'time_{stream_id}', f'channel_{stream_id}'], - ) - # unique big chunk - # TODO : optional split in several small chunks - array_size = np.prod(descr["shape"], dtype='int64') - chunk_size = array_size * dtype.itemsize - rfs["refs"][f"{stream_id}/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] - rfs["refs"][f"{stream_id}/.zarray"] =json.dumps(zarray) - rfs["refs"][f"{stream_id}/.zattrs"] =json.dumps(zattrs) - elif descr["type"] == "hdf5": - raise NotImplementedError - else: - raise ValueError(f"buffer_description type not handled {descr['type']}") - - - - # TODO magnitude gain and offset - # TODO channel names - # TODO sampling rate - # TODO annotations - # TODO array_annotations + + stream_name = signal_streams["name"][stream_index] + stream_id = signal_streams["id"][stream_index] + # print(stream_index, stream_name, stream_id) + + + descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, + stream_index=stream_index) + + if descr["type"] == "binary": + dtype = np.dtype(descr["dtype"]) + zarray = dict( + chunks=descr["shape"], + compressor=None, + dtype=dtype.str, + fill_value=None, + filters=None, + order=descr["order"], + shape=descr["shape"], + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['time', 'channel'], + name=stream_name, + stream_id=stream_id, + ) + # unique big chunk + # TODO : optional split in several small chunks + array_size = np.prod(descr["shape"], dtype='int64') + chunk_size = array_size * dtype.itemsize + rfs["refs"]["traces/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] + rfs["refs"]["traces/.zarray"] =json.dumps(zarray) + rfs["refs"]["traces/.zattrs"] =json.dumps(zattrs) + + # small enough can be internal + mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id + channel_ids = rawio_reader.header["signal_channels"][mask]["id"] + base64_encoded = base64.b64encode(channel_ids.tobytes()) + rfs["refs"]["yep/0"] = "base64:" + base64_encoded.decode() + zarray = dict( + chunks=channel_ids.shape, + compressor=None, + dtype=channel_ids.dtype.str, + fill_value=None, + filters=None, + order="C", + shape=channel_ids.shape, + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['channel'], + ) + rfs["refs"]["yep/.zattrs"] =json.dumps(zattrs) + rfs["refs"]["yep/.zarray"] =json.dumps(zarray) + + elif descr["type"] == "hdf5": + raise NotImplementedError + else: + raise ValueError(f"buffer_description type not handled {descr['type']}") + + + + # TODO magnitude gain and offset + # TODO channel names + # TODO sampling rate + # TODO annotations + # TODO array_annotations return rfs -def to_xarray_dataset(rawio_reader): +def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Utils fonction that transorm an instance a rawio into a xarray.Dataset with lazy access. @@ -76,10 +108,11 @@ def to_xarray_dataset(rawio_reader): """ import xarray as xr - rfs = to_xarray_reference_api(rawio_reader) + rfs = to_xarray_reference_api(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) ds = xr.open_dataset( "reference://", + # mask_and_scale=True, mask_and_scale=False, engine="zarr", backend_kwargs={ From 7a0de15fa333c14709ed3b472ba455f634c650e3 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 30 Aug 2024 15:19:59 +0200 Subject: [PATCH 08/24] test on micromed --- neo/rawio/micromedrawio.py | 46 +++++++--- neo/rawio/xarray_utils.py | 170 ++++++++++++++++++++++++++++--------- 2 files changed, 164 insertions(+), 52 deletions(-) diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 39d932f09..9dde65def 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -21,6 +21,7 @@ _spike_channel_dtype, _event_channel_dtype, ) +from .utils import get_memmap_shape from neo.core import NeoReadWriteError @@ -45,11 +46,16 @@ class MicromedRawIO(BaseRawIO): extensions = ["trc", "TRC"] rawmode = "one-file" + has_buffer_description_api = True + def __init__(self, filename=""): BaseRawIO.__init__(self) self.filename = filename def _parse_header(self): + + self._buffer_descriptions = {0 :{ 0: {}}} + with open(self.filename, "rb") as fid: f = StructFile(fid) @@ -97,9 +103,20 @@ def _parse_header(self): # raw signals memmap sig_dtype = "u" + str(Bytes) - self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=Data_Start_Offset).reshape( - -1, Num_Chan - ) + # self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=Data_Start_Offset).reshape( + # -1, Num_Chan + # ) + signal_shape = get_memmap_shape(self.filename, sig_dtype, num_channels=Num_Chan, offset=Data_Start_Offset) + self._buffer_descriptions[0][0][0] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : sig_dtype, + "order": "C", + "file_offset" : 0, + "shape" : signal_shape, + } + + # Reading Code Info zname2, pos, length = zones["ORDER"] @@ -164,7 +181,7 @@ def _parse_header(self): keep = ( (rawevent["start"] >= rawevent["start"][0]) - & (rawevent["start"] < self._raw_signals.shape[0]) + & (rawevent["start"] < signal_shape[0]) & (rawevent["start"] != 0) ) rawevent = rawevent[keep] @@ -208,21 +225,21 @@ def _segment_t_stop(self, block_index, seg_index): t_stop = self._raw_signals.shape[0] / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("`stream_index` must be 0") - return self._raw_signals.shape[0] + # def _get_signal_size(self, block_index, seg_index, stream_index): + # if stream_index != 0: + # raise ValueError("`stream_index` must be 0") + # return self._raw_signals.shape[0] def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("`stream_index` must be 0") return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(channel_indexes) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # if channel_indexes is None: + # channel_indexes = slice(channel_indexes) + # raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] + # return raw_signals def _spike_count(self, block_index, seg_index, unit_index): return 0 @@ -264,3 +281,6 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): durations = raw_duration.astype(dtype) / self._sampling_rate return durations + + def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + return self._buffer_descriptions[block_index][seg_index][stream_index] diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 80d537e0b..5b20b15fc 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -1,3 +1,15 @@ +""" +Module to export a rawio reader that support the buffer_description API +to xarray dataset using zarr specification format v2. + +A block/segment/stream correspond to one xarray.DataSet + +A xarray.DataTree can also be expose to get all at block/segment/stream + +Note : + * only some IOs support this at the moment + +""" import json import numpy as np @@ -6,26 +18,71 @@ -def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_index=0): +# TODO ios with memmap: +# * spikeglx DONE +# * openephysbinary +# * winwcprawio +# * winedr +# * tdc +# * rawmcs +# * rawbinarysignal +# * neuroscope +# * neuroexplorer +# * micromed +# * brainvision +# * blackrock +# * bci200 +# * + +# TODO ios with HDF5: +# * nix +# * mearec +# * maxwell +# * biocam + +# TODO with chunk by channel (more complicated): +# * axona +# * openephys legacy +# axograph + + +# add some test in stanard compliance + + + + +# TODO implement zarr v3 + + +def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ Transform the buffer_description_api into a dict ready for the xarray API 'reference://' See https://fsspec.github.io/kerchunk/spec.html See https://docs.xarray.dev/en/latest/user-guide/io.html#kerchunk + + See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html + Usefull read also https://github.com/saalfeldlab/n5 + """ - rfs = dict() - rfs["version"] = 1 - rfs["refs"] = dict() - rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) # rawio_reader. signal_streams = rawio_reader.header["signal_streams"] stream_name = signal_streams["name"][stream_index] stream_id = signal_streams["id"][stream_index] + + + rfs = dict() + rfs["version"] = 1 + rfs["refs"] = dict() + rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) + zattrs = dict(name=stream_name) + rfs["refs"][".zattrs"] = json.dumps(zattrs) + # print(stream_index, stream_name, stream_id) @@ -33,6 +90,31 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind stream_index=stream_index) if descr["type"] == "binary": + + + # channel : small enough can be internal with base64 + mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id + channels = rawio_reader.header["signal_channels"][mask] + channel_ids = channels["id"] + base64_encoded = base64.b64encode(channel_ids.tobytes()) + rfs["refs"]["channel/0"] = "base64:" + base64_encoded.decode() + zarray = dict( + chunks=channel_ids.shape, + compressor=None, + dtype=channel_ids.dtype.str, + fill_value=None, + filters=None, + order="C", + shape=channel_ids.shape, + zarr_format=2, + ) + zattrs = dict( + _ARRAY_DIMENSIONS=['channel'], + ) + rfs["refs"]["channel/.zattrs"] =json.dumps(zattrs) + rfs["refs"]["channel/.zarray"] =json.dumps(zarray) + + # traces buffer dtype = np.dtype(descr["dtype"]) zarray = dict( chunks=descr["shape"], @@ -47,8 +129,17 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind zattrs = dict( _ARRAY_DIMENSIONS=['time', 'channel'], name=stream_name, - stream_id=stream_id, ) + units = np.unique(channels['units']) + if units.size == 1: + zattrs['units'] = units[0] + gain = np.unique(channels['gain']) + offset = np.unique(channels['offset']) + if gain.size == 1 and offset.size: + zattrs['scale_factor'] = gain[0] + zattrs['add_offset'] = offset[0] + zattrs['sampling_rate'] = float(channels['sampling_rate'][0]) + # unique big chunk # TODO : optional split in several small chunks array_size = np.prod(descr["shape"], dtype='int64') @@ -57,40 +148,12 @@ def to_xarray_reference_api(rawio_reader, block_index=0, seg_index=0, stream_ind rfs["refs"]["traces/.zarray"] =json.dumps(zarray) rfs["refs"]["traces/.zattrs"] =json.dumps(zattrs) - # small enough can be internal - mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id - channel_ids = rawio_reader.header["signal_channels"][mask]["id"] - base64_encoded = base64.b64encode(channel_ids.tobytes()) - rfs["refs"]["yep/0"] = "base64:" + base64_encoded.decode() - zarray = dict( - chunks=channel_ids.shape, - compressor=None, - dtype=channel_ids.dtype.str, - fill_value=None, - filters=None, - order="C", - shape=channel_ids.shape, - zarr_format=2, - ) - zattrs = dict( - _ARRAY_DIMENSIONS=['channel'], - ) - rfs["refs"]["yep/.zattrs"] =json.dumps(zattrs) - rfs["refs"]["yep/.zarray"] =json.dumps(zarray) - elif descr["type"] == "hdf5": raise NotImplementedError else: raise ValueError(f"buffer_description type not handled {descr['type']}") - - - # TODO magnitude gain and offset - # TODO channel names - # TODO sampling rate - # TODO annotations - # TODO array_annotations - + # TODO channel array_annotations return rfs @@ -108,12 +171,11 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ import xarray as xr - rfs = to_xarray_reference_api(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + rfs = to_zarr_v2_reference(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) ds = xr.open_dataset( "reference://", - # mask_and_scale=True, - mask_and_scale=False, + mask_and_scale=True, engine="zarr", backend_kwargs={ "storage_options": dict( @@ -125,4 +187,34 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): ) return ds - +def to_xarray_datatree(rawio_reader): + """ + Expose a neo.rawio reader class to a xarray DataTree to lazily read signals. + """ + try: + # not released in xarray 2024.7.0, this will be released soon + from xarray import DataTree + except: + # this need the experimental DataTree in pypi xarray-datatree + try: + from datatree import DataTree + except: + raise ImportError("use xarray dev branch or pip install xarray-datatree") + + num_streams = rawio_reader.header['signal_streams'].size + print(num_streams) + + tree = DataTree(name="root") + + num_block = rawio_reader.header['nb_block'] + for block_index in range(num_block): + block = DataTree(name=f'block{block_index}', parent=tree) + num_seg = rawio_reader.header['nb_segment'][block_index] + for seg_index in range(num_seg): + segment = DataTree(name=f'segment{block_index}', parent=block) + + for stream_index in range(num_streams): + ds = to_xarray_dataset(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + DataTree(data=ds, name=ds.attrs['name'], parent=segment) + + return tree From a53d14769b15952542e6526b601518fa19ceaefc Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 9 Sep 2024 11:30:33 +0200 Subject: [PATCH 09/24] some fix --- neo/rawio/baserawio.py | 2 +- neo/rawio/micromedrawio.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 926223d14..3e6bdaba1 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1377,7 +1377,7 @@ def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_in def _get_signal_size_generic(self, block_index, seg_index, stream_index): # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) - buffer_desc = self._get_signal_size(block_index, seg_index, stream_index) + buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, stream_index) return buffer_desc['shape'][0] def _get_analogsignal_chunk_generic( diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 9dde65def..a09581634 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -222,7 +222,8 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop # def _get_signal_size(self, block_index, seg_index, stream_index): From f6ea345e365eeace6997c38cbf811c0161285bcf Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 13 Sep 2024 14:58:25 +0200 Subject: [PATCH 10/24] make strema a slice of buffer and xarray api use buffer_id --- neo/rawio/baserawio.py | 20 +++++++++++++------- neo/rawio/micromedrawio.py | 18 ++++++++++-------- neo/rawio/spikeglxrawio.py | 16 ++++++++++++---- neo/rawio/xarray_utils.py | 31 +++++++++++++++---------------- 4 files changed, 50 insertions(+), 35 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index c6f4f4bfe..39d76436e 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1365,10 +1365,10 @@ def _rescale_epoch_duration(self, raw_duration: np.ndarray, dtype: np.dtype): ### # json buffer api zone # must be implemented if has_buffer_description_api=True - def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, stream_index: int = 0): + def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, buffer_id: str = None): if not self.has_buffer_description_api: raise ValueError("This reader do not support buffer_description API") - descr = self._get_analogsignal_buffer_description(block_index, seg_index, stream_index) + descr = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) return descr def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): @@ -1390,8 +1390,15 @@ def _get_analogsignal_chunk_generic( stream_index: int, channel_indexes: list[int] | None, ): + + stream_id = self.header["signal_streams"][stream_index]["id"] + buffer_id = self.header["signal_streams"][stream_index]["buffer_id"] + + buffer_slice = self._stream_buffer_slice[stream_id] + + # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) - buffer_desc = self._get_analogsignal_buffer_description( block_index, seg_index, stream_index) + buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) if buffer_desc['type'] == 'binary': @@ -1417,10 +1424,9 @@ def _get_analogsignal_chunk_generic( raw_sigs = get_memmap_chunk_from_opened_file(fid, num_channels, i_start, i_stop, np.dtype(buffer_desc['dtype']), file_offset=buffer_desc['file_offset']) # this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False) - channel_slice = buffer_desc.get('channel_slice', None) - if channel_slice is not None: - raw_sigs = raw_sigs[:, channel_slice] - + if buffer_slice is not None: + raw_sigs = raw_sigs[:, buffer_slice] + # channel slice requested if channel_indexes is not None: raw_sigs = raw_sigs[:, channel_indexes] diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index a09581634..1983724f2 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -107,7 +107,9 @@ def _parse_header(self): # -1, Num_Chan # ) signal_shape = get_memmap_shape(self.filename, sig_dtype, num_channels=Num_Chan, offset=Data_Start_Offset) - self._buffer_descriptions[0][0][0] = { + buffer_id = "0" + stream_id = "0" + self._buffer_descriptions[0][0][buffer_id] = { "type" : "binary", "file_path" : str(self.filename), "dtype" : sig_dtype, @@ -145,14 +147,14 @@ def _parse_header(self): (sampling_rate,) = f.read_f("H") sampling_rate *= Rate_Min chan_id = str(c) - stream_id = "0" - buffer_id = "0" + signal_channels.append((chan_name, chan_id, sampling_rate, sig_dtype, units, gain, offset, stream_id, buffer_id)) signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype) - - signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) + + self._stream_buffer_slice = {"0": slice(None)} + signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) if np.unique(signal_channels["sampling_rate"]).size != 1: raise NeoReadWriteError("The sampling rates must be the same across signal channels") @@ -283,5 +285,5 @@ def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): durations = raw_duration.astype(dtype) / self._sampling_rate return durations - def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): - return self._buffer_descriptions[block_index][seg_index][stream_index] + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 3e49e23ea..24f39d41e 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -130,8 +130,9 @@ def _parse_header(self): # self._memmaps = {} self.signals_info_dict = {} - # on block + # one unique block self._buffer_descriptions = {0 :{}} + self._stream_buffer_slice = {} for info in self.signals_info_list: seg_index, stream_name = info["seg_index"], info["stream_name"] key = (seg_index, stream_name) @@ -146,11 +147,13 @@ def _parse_header(self): # data = data.reshape(-1, info["num_chan"]) # self._memmaps[key] = data - stream_index = stream_names.index(info["stream_name"]) + buffer_id = stream_name + block_index = 0 + if seg_index not in self._buffer_descriptions[0]: - self._buffer_descriptions[0][seg_index] = {} + self._buffer_descriptions[block_index][seg_index] = {} - self._buffer_descriptions[0][seg_index][stream_index] = { + self._buffer_descriptions[block_index][seg_index][buffer_id] = { "type" : "binary", "file_path" : info["bin_file"], "dtype" : "int16", @@ -158,6 +161,7 @@ def _parse_header(self): "file_offset" : 0, "shape" : get_memmap_shape(info["bin_file"], "int16", num_channels=info["num_chan"], offset=0), } + # create channel header @@ -193,10 +197,14 @@ def _parse_header(self): buffer_id, ) ) + + # all channel by dafult unless load_sync_channel=False + self._stream_buffer_slice[stream_id] = None # check sync channel validity if "nidq" not in stream_name: if not self.load_sync_channel and info["has_sync_trace"]: signal_channels = signal_channels[:-1] + self._stream_buffer_slice[stream_id] = slice(0, -1) if self.load_sync_channel and not info["has_sync_trace"]: raise ValueError("SYNC channel is not present in the recording. " "Set load_sync_channel to False") diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 5b20b15fc..3af2c6fb1 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -54,7 +54,7 @@ # TODO implement zarr v3 -def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index=0): +def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=None): """ Transform the buffer_description_api into a dict ready for the xarray API 'reference://' @@ -70,30 +70,29 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index= """ # rawio_reader. - signal_streams = rawio_reader.header["signal_streams"] + signal_buffers = rawio_reader.header["signal_buffers"] - stream_name = signal_streams["name"][stream_index] - stream_id = signal_streams["id"][stream_index] + buffer_index = list(signal_buffers["id"]).index(buffer_id) + + buffer_name = signal_buffers["name"][buffer_index] rfs = dict() rfs["version"] = 1 rfs["refs"] = dict() rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) - zattrs = dict(name=stream_name) + zattrs = dict(name=buffer_name) rfs["refs"][".zattrs"] = json.dumps(zattrs) - # print(stream_index, stream_name, stream_id) - descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, - stream_index=stream_index) + buffer_id=buffer_id) if descr["type"] == "binary": # channel : small enough can be internal with base64 - mask = rawio_reader.header["signal_channels"]["stream_id"] == stream_id + mask = rawio_reader.header["signal_channels"]["buffer_id"] == buffer_id channels = rawio_reader.header["signal_channels"][mask] channel_ids = channels["id"] base64_encoded = base64.b64encode(channel_ids.tobytes()) @@ -128,7 +127,7 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index= ) zattrs = dict( _ARRAY_DIMENSIONS=['time', 'channel'], - name=stream_name, + name=buffer_name, ) units = np.unique(channels['units']) if units.size == 1: @@ -159,7 +158,7 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, stream_index= -def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): +def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, buffer_id=None): """ Utils fonction that transorm an instance a rawio into a xarray.Dataset with lazy access. @@ -171,7 +170,7 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, stream_index=0): """ import xarray as xr - rfs = to_zarr_v2_reference(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + rfs = to_zarr_v2_reference(rawio_reader, block_index=block_index, seg_index=seg_index, buffer_id=buffer_id) ds = xr.open_dataset( "reference://", @@ -201,8 +200,8 @@ def to_xarray_datatree(rawio_reader): except: raise ImportError("use xarray dev branch or pip install xarray-datatree") - num_streams = rawio_reader.header['signal_streams'].size - print(num_streams) + signal_buffers = rawio_reader.header['signal_buffers'] + buffer_ids = signal_buffers["id"] tree = DataTree(name="root") @@ -213,8 +212,8 @@ def to_xarray_datatree(rawio_reader): for seg_index in range(num_seg): segment = DataTree(name=f'segment{block_index}', parent=block) - for stream_index in range(num_streams): - ds = to_xarray_dataset(rawio_reader, block_index=block_index, seg_index=seg_index, stream_index=stream_index) + for buffer_id in buffer_ids: + ds = to_xarray_dataset(rawio_reader, block_index=block_index, seg_index=seg_index, buffer_id=buffer_id) DataTree(data=ds, name=ds.attrs['name'], parent=segment) return tree From 81c7121543d17ea5f182ed27d0fba0f5a5065ff0 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 16 Sep 2024 12:39:45 +0200 Subject: [PATCH 11/24] json api : winedr + winwcp --- neo/rawio/baserawio.py | 6 +++--- neo/rawio/winedrrawio.py | 42 ++++++++++++++++++-------------------- neo/rawio/winwcprawio.py | 43 ++++++++++++++++++++------------------- neo/rawio/xarray_utils.py | 3 +-- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 39d76436e..4d8fe9d70 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1371,14 +1371,14 @@ def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: i descr = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) return descr - def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): raise (NotImplementedError) def _get_signal_size_generic(self, block_index, seg_index, stream_index): # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) - buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, stream_index) - buffer_desc = self._get_signal_size(block_index, seg_index, stream_index) + buffer_id = self.header["signal_streams"][stream_index]["buffer_id"] + buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) return buffer_desc['shape'][0] def _get_analogsignal_chunk_generic( diff --git a/neo/rawio/winedrrawio.py b/neo/rawio/winedrrawio.py index 00b0d31c2..2efed30c5 100644 --- a/neo/rawio/winedrrawio.py +++ b/neo/rawio/winedrrawio.py @@ -25,6 +25,7 @@ class WinEdrRawIO(BaseRawIO): extensions = ["EDR", "edr"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=""): """ @@ -62,16 +63,19 @@ def _parse_header(self): val = float(val) header[key] = val - self._raw_signals = np.memmap( - self.filename, - dtype="int16", - mode="r", - shape=( - header["NP"] // header["NC"], - header["NC"], - ), - offset=header["NBH"], - ) + # one unique block with one unique segment + # one unique buffer splited in several streams + buffer_id = "0" + self._buffer_descriptions = {0 :{0 :{}}} + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : "int16", + "order": "C", + "file_offset" : int(header["NBH"]), + "shape" : (header["NP"] // header["NC"], header["NC"]), + } + DT = header["DT"] if "TU" in header: @@ -101,12 +105,14 @@ def _parse_header(self): characteristics = signal_channels[_common_sig_characteristics] unique_characteristics = np.unique(characteristics) signal_streams = [] + self._stream_buffer_slice = {} for i in range(unique_characteristics.size): mask = unique_characteristics[i] == characteristics signal_channels["stream_id"][mask] = str(i) # unique buffer for all streams buffer_id = "0" signal_streams.append((f"stream {i}", str(i), buffer_id)) + self._stream_buffer_slice[stream_id] = np.flatnonzero(mask) signal_streams = np.array(signal_streams, dtype=_signal_stream_dtype) # all stream are in the same unique buffer : memmap @@ -137,20 +143,12 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - stream_id = self.header["signal_streams"][stream_index]["id"] - (global_channel_indexes,) = np.nonzero(self.header["signal_channels"]["stream_id"] == stream_id) - if channel_indexes is None: - channel_indexes = slice(None) - global_channel_indexes = global_channel_indexes[channel_indexes] - raw_signals = self._raw_signals[slice(i_start, i_stop), global_channel_indexes] - return raw_signals + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/winwcprawio.py b/neo/rawio/winwcprawio.py index 83e5a25d3..90abb4257 100644 --- a/neo/rawio/winwcprawio.py +++ b/neo/rawio/winwcprawio.py @@ -36,6 +36,7 @@ class WinWcpRawIO(BaseRawIO): extensions = ["wcp"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=""): BaseRawIO.__init__(self) @@ -47,9 +48,9 @@ def _source_name(self): def _parse_header(self): SECTORSIZE = 512 - # only one memmap for all segment to avoid - # "error: [Errno 24] Too many open files" - self._memmap = np.memmap(self.filename, dtype="uint8", mode="r") + # one unique block with several segments + # one unique buffer splited in several streams + self._buffer_descriptions = {0 :{}} with open(self.filename, "rb") as fid: @@ -59,7 +60,6 @@ def _parse_header(self): for line in headertext.split("\r\n"): if "=" not in line: continue - # print '#' , line , '#' key, val = line.split("=") if key in [ "NC", @@ -81,7 +81,6 @@ def _parse_header(self): header[key] = val nb_segment = header["NR"] - self._raw_signals = {} all_sampling_interval = [] # loop for record number for seg_index in range(header["NR"]): @@ -96,9 +95,16 @@ def _parse_header(self): NP = NP // header["NC"] NC = header["NC"] ind0 = offset + header["NBA"] * SECTORSIZE - ind1 = ind0 + NP * NC * 2 - sigs = self._memmap[ind0:ind1].view("int16").reshape(NP, NC) - self._raw_signals[seg_index] = sigs + buffer_id = "0" + self._buffer_descriptions[0][seg_index] = {} + self._buffer_descriptions[0][seg_index][buffer_id] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : "int16", + "order": "C", + "file_offset" : ind0, + "shape" : (NP, NC), + } all_sampling_interval.append(analysisHeader["SamplingInterval"]) @@ -126,12 +132,15 @@ def _parse_header(self): characteristics = signal_channels[_common_sig_characteristics] unique_characteristics = np.unique(characteristics) signal_streams = [] + self._stream_buffer_slice = {} for i in range(unique_characteristics.size): mask = unique_characteristics[i] == characteristics signal_channels["stream_id"][mask] = str(i) # unique buffer for all streams buffer_id = "0" - signal_streams.append((f"stream {i}", str(i), buffer_id)) + stream_id = str(i) + signal_streams.append((f"stream {i}", stream_id, buffer_id)) + self._stream_buffer_slice[stream_id] = np.flatnonzero(mask) signal_streams = np.array(signal_streams, dtype=_signal_stream_dtype) # all stream are in the same unique buffer : memmap @@ -162,23 +171,15 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals[seg_index].shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - return self._raw_signals[seg_index].shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - stream_id = self.header["signal_streams"][stream_index]["id"] - (global_channel_indexes,) = np.nonzero(self.header["signal_channels"]["stream_id"] == stream_id) - if channel_indexes is None: - channel_indexes = slice(None) - inds = global_channel_indexes[channel_indexes] - raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), inds] - return raw_signals + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] AnalysisDescription = [ diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 3af2c6fb1..98fe10879 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -210,8 +210,7 @@ def to_xarray_datatree(rawio_reader): block = DataTree(name=f'block{block_index}', parent=tree) num_seg = rawio_reader.header['nb_segment'][block_index] for seg_index in range(num_seg): - segment = DataTree(name=f'segment{block_index}', parent=block) - + segment = DataTree(name=f'segment{seg_index}', parent=block) for buffer_id in buffer_ids: ds = to_xarray_dataset(rawio_reader, block_index=block_index, seg_index=seg_index, buffer_id=buffer_id) DataTree(data=ds, name=ds.attrs['name'], parent=segment) From 2d21e481f509bc0bbcee57365c77cfbc17d3de44 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 16 Sep 2024 16:56:43 +0200 Subject: [PATCH 12/24] buffer api : RawBinarySignalRawIO + RawMCSRawIO --- neo/rawio/baserawio.py | 1 - neo/rawio/rawbinarysignalrawio.py | 42 +++++++++++++----------- neo/rawio/rawmcsrawio.py | 54 ++++++++++++++++++++----------- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 4d8fe9d70..7e6f0a7d6 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1414,7 +1414,6 @@ def _get_analogsignal_chunk_generic( self._memmap_analogsignal_streams[block_index][seg_index] = fid else: fid = self._memmap_analogsignal_streams[block_index][seg_index] - i_start = i_start or 0 i_stop = i_stop or buffer_desc['shape'][0] diff --git a/neo/rawio/rawbinarysignalrawio.py b/neo/rawio/rawbinarysignalrawio.py index af401be37..a1fb28d41 100644 --- a/neo/rawio/rawbinarysignalrawio.py +++ b/neo/rawio/rawbinarysignalrawio.py @@ -29,6 +29,7 @@ class RawBinarySignalIO _spike_channel_dtype, _event_channel_dtype, ) +from .utils import get_memmap_shape class RawBinarySignalRawIO(BaseRawIO): @@ -54,6 +55,7 @@ class RawBinarySignalRawIO(BaseRawIO): extensions = ["raw", "bin", "dat"] rawmode = "one-file" + has_buffer_description_api = True def __init__( self, @@ -80,21 +82,32 @@ def _source_name(self): def _parse_header(self): if os.path.exists(self.filename): - self._raw_signals = np.memmap(self.filename, dtype=self.dtype, mode="r", offset=self.bytesoffset).reshape( - -1, self.nb_channel - ) + # on unique buffer and stream + buffer_id = "0" + stream_id = "0" + shape = get_memmap_shape(self.filename, self.dtype, num_channels=self.nb_channel, offset=self.bytesoffset) + self._buffer_descriptions = {0:{0:{}}} + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : "uint16", + "order": "C", + "file_offset" : self.bytesoffset, + "shape" : shape, + } + self._stream_buffer_slice = {stream_id : None} + + else: # The the neo.io.RawBinarySignalIO is used for write_segment - self._raw_signals = None + self._buffer_descriptions = None signal_channels = [] - if self._raw_signals is not None: + if self._buffer_descriptions is not None: for c in range(self.nb_channel): name = f"ch{c}" chan_id = f"{c}" units = "" - stream_id = "0" - buffer_id = "0" signal_channels.append( ( name, @@ -143,21 +156,14 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self.sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self.sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("stream_index must be 0") - return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("stream_index must be 0") return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/rawmcsrawio.py b/neo/rawio/rawmcsrawio.py index 585e5fdd0..484c3ef24 100644 --- a/neo/rawio/rawmcsrawio.py +++ b/neo/rawio/rawmcsrawio.py @@ -24,7 +24,7 @@ _spike_channel_dtype, _event_channel_dtype, ) - +from .utils import get_memmap_shape class RawMCSRawIO(BaseRawIO): """ @@ -39,6 +39,7 @@ class RawMCSRawIO(BaseRawIO): extensions = ["raw"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=""): BaseRawIO.__init__(self) @@ -54,19 +55,31 @@ def _parse_header(self): self.sampling_rate = info["sampling_rate"] self.nb_channel = len(info["channel_names"]) - # one unique stream and buffer - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) - signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) - - self._raw_signals = np.memmap(self.filename, dtype=self.dtype, mode="r", offset=info["header_size"]).reshape( - -1, self.nb_channel - ) + # one unique stream and buffer with all channels + stream_id = "0" + buffer_id = "0" + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) + signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) + + # self._raw_signals = np.memmap(self.filename, dtype=self.dtype, mode="r", offset=info["header_size"]).reshape( + # -1, self.nb_channel + # ) + file_offset = info["header_size"] + shape = get_memmap_shape(self.filename, self.dtype, num_channels=self.nb_channel, offset=file_offset) + self._buffer_descriptions = {0:{0:{}}} + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : "uint16", + "order": "C", + "file_offset" : file_offset, + "shape" : shape, + } + self._stream_buffer_slice = {stream_id : None} sig_channels = [] for c in range(self.nb_channel): chan_id = str(c) - stream_id = "0" - buffer_id = "0" sig_channels.append( ( info["channel_names"][c], @@ -107,20 +120,25 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self.sampling_rate + # t_stop = self._raw_signals.shape[0] / self.sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self.sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - return self._raw_signals.shape[0] + # def _get_signal_size(self, block_index, seg_index, stream_index): + # return self._raw_signals.shape[0] def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # if channel_indexes is None: + # channel_indexes = slice(None) + # raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] + # return raw_signals + + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] def parse_mcs_raw_header(filename): From a64b0542fd530a8d7ab76c3abb19ea93ec81126f Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 18 Sep 2024 08:53:29 +0200 Subject: [PATCH 13/24] json api : neuroscope + openephysraw --- neo/rawio/micromedrawio.py | 11 ----- neo/rawio/neuroscoperawio.py | 39 +++++++++++------- neo/rawio/openephysbinaryrawio.py | 68 +++++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 43 deletions(-) diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 1983724f2..d67450d3f 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -228,22 +228,11 @@ def _segment_t_stop(self, block_index, seg_index): t_stop = sig_size / self._sampling_rate return t_stop - # def _get_signal_size(self, block_index, seg_index, stream_index): - # if stream_index != 0: - # raise ValueError("`stream_index` must be 0") - # return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("`stream_index` must be 0") return 0.0 - # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - # if channel_indexes is None: - # channel_indexes = slice(channel_indexes) - # raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - # return raw_signals - def _spike_count(self, block_index, seg_index, unit_index): return 0 diff --git a/neo/rawio/neuroscoperawio.py b/neo/rawio/neuroscoperawio.py index 9582bb5db..05a23e749 100644 --- a/neo/rawio/neuroscoperawio.py +++ b/neo/rawio/neuroscoperawio.py @@ -30,10 +30,13 @@ _event_channel_dtype, ) +from .utils import get_memmap_shape + class NeuroScopeRawIO(BaseRawIO): extensions = ["xml", "dat", "lfp", "eeg"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename, binary_file=None): """raw reader for Neuroscope @@ -106,11 +109,24 @@ def _parse_header(self): raise (NotImplementedError) # Extract signal from the data file - self._raw_signals = np.memmap(self.data_file_path, dtype=sig_dtype, mode="r", offset=0).reshape(-1, nb_channel) + # one unique stream and buffer + shape = get_memmap_shape(self.data_file_path, sig_dtype, num_channels=nb_channel, offset=0) + buffer_id = "0" + stream_id = "0" + self._buffer_descriptions = {0: {0:{}}} + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : str(self.data_file_path), + "dtype" : sig_dtype, + "order": "C", + "file_offset" : 0, + "shape" : shape, + } + self._stream_buffer_slice = {stream_id : None} # one unique stream and buffer - signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) + signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) # signals sig_channels = [] @@ -148,25 +164,15 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("`stream_index` must be 0") - return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("`stream_index` must be 0") return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals - def _resolve_xml_and_data_paths(self): """ Resolves XML and data paths from the provided filename and binary_file attributes. @@ -210,3 +216,6 @@ def _resolve_xml_and_data_paths(self): self.xml_file_path = xml_file_path self.data_file_path = data_file_path + + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/openephysbinaryrawio.py b/neo/rawio/openephysbinaryrawio.py index b03d129ee..320773b26 100644 --- a/neo/rawio/openephysbinaryrawio.py +++ b/neo/rawio/openephysbinaryrawio.py @@ -24,6 +24,8 @@ _event_channel_dtype, ) +from .utils import get_memmap_shape + class OpenEphysBinaryRawIO(BaseRawIO): """ @@ -60,6 +62,7 @@ class OpenEphysBinaryRawIO(BaseRawIO): extensions = ["xml", "oebin", "txt", "dat", "npy"] rawmode = "one-dir" + has_buffer_description_api = True def __init__(self, dirname="", load_sync_channel=False, experiment_names=None): BaseRawIO.__init__(self) @@ -160,13 +163,30 @@ def _parse_header(self): signal_buffers = np.array(signal_buffers, dtype=_signal_buffer_dtype) # create memmap for signals + self._buffer_descriptions = {} + self._stream_buffer_slice = {} for block_index in range(nb_block): + self._buffer_descriptions[block_index] = {} for seg_index in range(nb_segment_per_block[block_index]): + self._buffer_descriptions[block_index][seg_index] = {} for stream_index, info in self._sig_streams[block_index][seg_index].items(): num_channels = len(info["channels"]) - memmap_sigs = np.memmap(info["raw_filename"], info["dtype"], order="C", mode="r").reshape( - -1, num_channels - ) + # memmap_sigs = np.memmap(info["raw_filename"], info["dtype"], order="C", mode="r").reshape( + # -1, num_channels + # ) + stream_id = str(stream_index) + buffer_id = str(stream_index) + shape = get_memmap_shape(info["raw_filename"], info["dtype"], num_channels=num_channels, + offset=0) + self._buffer_descriptions[block_index][seg_index][buffer_id] = { + "type" : "binary", + "file_path" : str(info["raw_filename"]), + "dtype" : info["dtype"], + "order": "C", + "file_offset" : 0, + "shape" : shape, + } + has_sync_trace = self._sig_streams[block_index][seg_index][stream_index]["has_sync_trace"] # check sync channel validity (only for AP and LF) @@ -174,7 +194,13 @@ def _parse_header(self): raise ValueError( "SYNC channel is not present in the recording. " "Set load_sync_channel to False" ) - info["memmap"] = memmap_sigs + + if has_sync_trace: + self._stream_buffer_slice[stream_id] = slice(None, -1) + else: + self._stream_buffer_slice[stream_id] = None + + # info["memmap"] = memmap_sigs # events zone # channel map: one channel one stream @@ -275,7 +301,11 @@ def _parse_header(self): # loop over signals for stream_index, info in self._sig_streams[block_index][seg_index].items(): t_start = info["t_start"] - dur = info["memmap"].shape[0] / float(info["sample_rate"]) + stream_id = str(stream_index) + buffer_id = str(stream_index) + sig_size = self._buffer_descriptions[block_index][seg_index][buffer_id]["shape"][0] + # dur = info["memmap"].shape[0] / float(info["sample_rate"]) + dur = sig_size / float(info["sample_rate"]) t_stop = t_start + dur if global_t_start is None or global_t_start > t_start: global_t_start = t_start @@ -373,25 +403,25 @@ def _channels_to_group_id(self, channel_indexes): group_id = group_ids[0] return group_id - def _get_signal_size(self, block_index, seg_index, stream_index): - sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] - return sigs.shape[0] + # def _get_signal_size(self, block_index, seg_index, stream_index): + # sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] + # return sigs.shape[0] def _get_signal_t_start(self, block_index, seg_index, stream_index): t_start = self._sig_streams[block_index][seg_index][stream_index]["t_start"] return t_start - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] - has_sync_trace = self._sig_streams[block_index][seg_index][stream_index]["has_sync_trace"] + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] + # has_sync_trace = self._sig_streams[block_index][seg_index][stream_index]["has_sync_trace"] - if not self.load_sync_channel and has_sync_trace: - sigs = sigs[:, :-1] + # if not self.load_sync_channel and has_sync_trace: + # sigs = sigs[:, :-1] - sigs = sigs[i_start:i_stop, :] - if channel_indexes is not None: - sigs = sigs[:, channel_indexes] - return sigs + # sigs = sigs[i_start:i_stop, :] + # if channel_indexes is not None: + # sigs = sigs[:, channel_indexes] + # return sigs def _spike_count(self, block_index, seg_index, unit_index): pass @@ -450,6 +480,10 @@ def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): durations = raw_duration.astype(dtype) return durations + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] + + _possible_event_stream_names = ( "timestamps", From bc5a1224e81d2611dc8e8a96ee69130a272bfbbf Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 25 Sep 2024 14:29:18 +0200 Subject: [PATCH 14/24] More reader with buffer description --- neo/rawio/axonrawio.py | 53 ++++++++++++++++++++++++----------- neo/rawio/bci2000rawio.py | 13 +++++---- neo/rawio/brainvisionrawio.py | 42 +++++++++++++++------------ neo/rawio/elanrawio.py | 43 +++++++++++++++------------- neo/rawio/spikeglxrawio.py | 4 +-- 5 files changed, 94 insertions(+), 61 deletions(-) diff --git a/neo/rawio/axonrawio.py b/neo/rawio/axonrawio.py index 67af44e3f..781a6fabb 100644 --- a/neo/rawio/axonrawio.py +++ b/neo/rawio/axonrawio.py @@ -90,6 +90,7 @@ class AxonRawIO(BaseRawIO): extensions = ["abf"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=""): BaseRawIO.__init__(self) @@ -115,7 +116,7 @@ def _parse_header(self): head_offset = info["sections"]["DataSection"]["uBlockIndex"] * BLOCKSIZE totalsize = info["sections"]["DataSection"]["llNumEntries"] - self._raw_data = np.memmap(self.filename, dtype=sig_dtype, mode="r", shape=(totalsize,), offset=head_offset) + # self._raw_data = np.memmap(self.filename, dtype=sig_dtype, mode="r", shape=(totalsize,), offset=head_offset) # 3 possible modes if version < 2.0: @@ -142,7 +143,7 @@ def _parse_header(self): ) else: episode_array = np.empty(1, [("offset", "i4"), ("len", "i4")]) - episode_array[0]["len"] = self._raw_data.size + episode_array[0]["len"] = totalsize episode_array[0]["offset"] = 0 # sampling_rate @@ -154,9 +155,14 @@ def _parse_header(self): # one sweep = one segment nb_segment = episode_array.size + stream_id = "0" + buffer_id = "0" + # Get raw data by segment - self._raw_signals = {} + # self._raw_signals = {} self._t_starts = {} + self._buffer_descriptions = {0 :{}} + self._stream_buffer_slice = {stream_id : None} pos = 0 for seg_index in range(nb_segment): length = episode_array[seg_index]["len"] @@ -169,7 +175,17 @@ def _parse_header(self): if (fSynchTimeUnit != 0) and (mode == 1): length /= fSynchTimeUnit - self._raw_signals[seg_index] = self._raw_data[pos : pos + length].reshape(-1, nbchannel) + # self._raw_signals[seg_index] = self._raw_data[pos : pos + length].reshape(-1, nbchannel) + + self._buffer_descriptions[0][seg_index] = {} + self._buffer_descriptions[0][seg_index][buffer_id] = { + "type" : "binary", + "file_path" : str(self.filename), + "dtype" : str(sig_dtype), + "order": "C", + "file_offset" : head_offset + pos * sig_dtype.itemsize, + "shape" : (int(length // nbchannel), int(nbchannel)), + } pos += length t_start = float(episode_array[seg_index]["offset"]) @@ -227,15 +243,13 @@ def _parse_header(self): offset -= info["listADCInfo"][chan_id]["fSignalOffset"] else: gain, offset = 1.0, 0.0 - stream_id = "0" - buffer_id = "0" signal_channels.append((name, str(chan_id), self._sampling_rate, sig_dtype, units, gain, offset, stream_id, buffer_id)) signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype) # one unique signal stream and buffer - signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) + signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) # only one events channel : tag # In ABF timstamps are not attached too any particular segment @@ -293,21 +307,26 @@ def _segment_t_start(self, block_index, seg_index): return self._t_starts[seg_index] def _segment_t_stop(self, block_index, seg_index): - t_stop = self._t_starts[seg_index] + self._raw_signals[seg_index].shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = self._t_starts[seg_index] + sig_size / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - shape = self._raw_signals[seg_index].shape - return shape[0] + # def _get_signal_size(self, block_index, seg_index, stream_index): + # shape = self._raw_signals[seg_index].shape + # return shape[0] def _get_signal_t_start(self, block_index, seg_index, stream_index): return self._t_starts[seg_index] - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), channel_indexes] - return raw_signals + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # if channel_indexes is None: + # channel_indexes = slice(None) + # raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), channel_indexes] + # return raw_signals + + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] + def _event_count(self, block_index, seg_index, event_channel_index): return self._raw_ev_timestamps.size diff --git a/neo/rawio/bci2000rawio.py b/neo/rawio/bci2000rawio.py index 96fac9183..d7e7cf003 100644 --- a/neo/rawio/bci2000rawio.py +++ b/neo/rawio/bci2000rawio.py @@ -1,6 +1,9 @@ """ BCI2000RawIO is a class to read BCI2000 .dat files. https://www.bci2000.org/mediawiki/index.php/Technical_Reference:BCI2000_File_Format + +Note : BCI2000RawIO cannot implemented using has_buffer_description_api because the buffer +is not compact. The buffer of signals is not compact (has some interleaved state uint in between channels) """ import numpy as np @@ -50,9 +53,11 @@ def _parse_header(self): self.header["nb_block"] = 1 self.header["nb_segment"] = [1] - # one unique stream and buffer - signal_buffers = np.array(("Signals", "0"), dtype=_signal_buffer_dtype) - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) + # one unique stream but no buffer because channels are not compact + stream_id = "0" + buffer_id = "" + signal_buffers = np.array([], dtype=_signal_buffer_dtype) + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) self.header["signal_buffers"] = signal_buffers self.header["signal_streams"] = signal_streams @@ -80,8 +85,6 @@ def _parse_header(self): if isinstance(offset, str): offset = float(offset) - stream_id = "0" - buffer_id = "0" sig_channels.append((ch_name, chan_id, sr, dtype, units, gain, offset, stream_id, buffer_id)) self.header["signal_channels"] = np.array(sig_channels, dtype=_signal_channel_dtype) diff --git a/neo/rawio/brainvisionrawio.py b/neo/rawio/brainvisionrawio.py index fff568ba6..f956c0047 100644 --- a/neo/rawio/brainvisionrawio.py +++ b/neo/rawio/brainvisionrawio.py @@ -21,6 +21,8 @@ _event_channel_dtype, ) +from .utils import get_memmap_shape + from neo.core import NeoReadWriteError @@ -40,10 +42,11 @@ class BrainVisionRawIO(BaseRawIO): extensions = ["vhdr"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=""): BaseRawIO.__init__(self) - self.filename = filename + self.filename = str(filename) def _parse_header(self): # Read header file (vhdr) @@ -78,11 +81,21 @@ def _parse_header(self): sig_dtype = fmts[fmt] - # raw signals memmap - sigs = np.memmap(binary_filename, dtype=sig_dtype, mode="r", offset=0) - if sigs.size % nb_channel != 0: - sigs = sigs[: -sigs.size % nb_channel] - self._raw_signals = sigs.reshape(-1, nb_channel) + + stream_id = "0" + buffer_id = "0" + self._buffer_descriptions = {0 :{0 : {}}} + self._stream_buffer_slice = {} + shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0) + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : binary_filename, + "dtype" : sig_dtype, + "order": "C", + "file_offset" : 0, + "shape" : shape, + } + self._stream_buffer_slice[stream_id] = None signal_buffers = np.array(("Signals", "0"), dtype=_signal_buffer_dtype) signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) @@ -165,24 +178,14 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop ### - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("`stream_index` must be 0") - return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals - ### def _spike_count(self, block_index, seg_index, unit_index): return 0 @@ -216,6 +219,9 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) event_times = event_timestamps.astype(dtype) / self._sampling_rate return event_times + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] + def read_brainvsion_soup(filename): with open(filename, "r", encoding="utf8") as f: diff --git a/neo/rawio/elanrawio.py b/neo/rawio/elanrawio.py index b44d77075..0c6f18088 100644 --- a/neo/rawio/elanrawio.py +++ b/neo/rawio/elanrawio.py @@ -31,6 +31,8 @@ _event_channel_dtype, ) +from .utils import get_memmap_shape + from neo.core import NeoReadWriteError @@ -57,6 +59,7 @@ class ElanRawIO(BaseRawIO): extensions = ["eeg"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename=None, entfile=None, posfile=None): BaseRawIO.__init__(self) @@ -156,10 +159,11 @@ def _parse_header(self): sig_dtype = np.dtype(">i" + str(n)) # unique buffer and stream - signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) - signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) + stream_id = "0" + buffer_id = "0" + signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) + signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) - sig_channels = [] for c, chan_info in enumerate(channel_infos[:-2]): chan_name = chan_info["label"] @@ -169,8 +173,6 @@ def _parse_header(self): chan_info["max_logic"] - chan_info["min_logic"] ) offset = -chan_info["min_logic"] * gain + chan_info["min_physic"] - stream_id = "0" - buffer_id = "0" sig_channels.append( (chan_name, chan_id, self._sampling_rate, sig_dtype, chan_info["units"], gain, offset, stream_id, buffer_id) ) @@ -178,8 +180,18 @@ def _parse_header(self): sig_channels = np.array(sig_channels, dtype=_signal_channel_dtype) # raw data - self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=0).reshape(-1, nb_channel + 2) - self._raw_signals = self._raw_signals[:, :-2] + self._buffer_descriptions = {0 :{0 : {}}} + self._stream_buffer_slice = {} + shape = get_memmap_shape(self.filename, sig_dtype, num_channels=nb_channel + 2, offset=0) + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : self.filename, + "dtype" : sig_dtype, + "order": "C", + "file_offset" : 0, + "shape" : shape, + } + self._stream_buffer_slice[stream_id] = slice(0, -2) # triggers with open(self.posfile, mode="rt", encoding="ascii", newline=None) as f: @@ -237,25 +249,15 @@ def _segment_t_start(self, block_index, seg_index): return 0.0 def _segment_t_stop(self, block_index, seg_index): - t_stop = self._raw_signals.shape[0] / self._sampling_rate + sig_size = self.get_signal_size(block_index, seg_index, 0) + t_stop = sig_size / self._sampling_rate return t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - if stream_index != 0: - raise ValueError("`stream_index` must be 0") - return self._raw_signals.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): if stream_index != 0: raise ValueError("`stream_index` must be 0") return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - if channel_indexes is None: - channel_indexes = slice(None) - raw_signals = self._raw_signals[slice(i_start, i_stop), channel_indexes] - return raw_signals - def _spike_count(self, block_index, seg_index, unit_index): return 0 @@ -282,3 +284,6 @@ def _get_event_timestamps(self, block_index, seg_index, event_channel_index, t_s def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index): event_times = event_timestamps.astype(dtype) / self._sampling_rate return event_times + + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 24f39d41e..ce8ec0984 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -364,8 +364,8 @@ def _rescale_event_timestamp(self, event_timestamps, dtype, event_channel_index) def _rescale_epoch_duration(self, raw_duration, dtype, event_channel_index): return None - def _get_analogsignal_buffer_description(self, block_index, seg_index, stream_index): - return self._buffer_descriptions[block_index][seg_index][stream_index] + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] From 471453539c296edcca57037d6b3e568ef54df26d Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 11 Oct 2024 12:03:49 +0200 Subject: [PATCH 15/24] wip --- neo/rawio/neuronexusrawio.py | 48 ++++++++++---------------- neo/rawio/utils.py | 2 +- neo/test/rawiotest/rawio_compliance.py | 5 ++- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/neo/rawio/neuronexusrawio.py b/neo/rawio/neuronexusrawio.py index 9763d5e74..b9c882da7 100644 --- a/neo/rawio/neuronexusrawio.py +++ b/neo/rawio/neuronexusrawio.py @@ -57,6 +57,7 @@ class NeuroNexusRawIO(BaseRawIO): extensions = ["xdat", "json"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename: str | Path = ""): """ @@ -134,14 +135,18 @@ def _parse_header(self): binary_file = self.binary_file timestamp_file = self.timestamp_file - # Make the two memory maps - self._raw_data = np.memmap( - binary_file, - dtype=BINARY_DTYPE, - mode="r", - shape=(self._n_samples, self._n_channels), - offset=0, # headerless binary file - ) + # the will cretae a memory map with teh generic mechanism + buffer_id = "0" + self._buffer_descriptions = {0 :{0 :{}}} + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "binary", + "file_path" : str(binary_file), + "dtype" : BINARY_DTYPE, + "order": "C", + "file_offset" : 0, + "shape" : (self._n_samples, self._n_channels), + } + # Make the memory map for timestamp self._timestamps = np.memmap( timestamp_file, dtype=np.int64, # this is from the allego sample reader timestamps are np.int64 @@ -205,10 +210,12 @@ def _parse_header(self): signal_streams["id"] = [str(stream_id) for stream_id in stream_ids] # One unique buffer signal_streams["buffer_id"] = buffer_id - + self._stream_buffer_slice = {} for stream_index, stream_id in enumerate(stream_ids): name = stream_id_to_stream_name.get(int(stream_id), "") signal_streams["name"][stream_index] = name + chan_inds = np.flatnonzero(signal_channels["stream_id"] == stream_id) + self._stream_buffer_slice[stream_id] = chan_inds # No events event_channels = [] @@ -245,26 +252,6 @@ def _parse_header(self): for d in (bl_annotations, seg_annotations): d["rec_datetime"] = rec_datetime - def _get_signal_size(self, block_index, seg_index, stream_index): - - # All streams have the same size so just return the raw_data (num_samples, num_chans) - return self._raw_data.shape[0] - - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - - if i_start is None: - i_start = 0 - if i_stop is None: - i_stop = self._get_signal_size(block_index, seg_index, stream_index) - - raw_data = self._raw_data[i_start:i_stop, :] - - if channel_indexes is None: - channel_indexes = slice(None) - - raw_data = raw_data[:, channel_indexes] - return raw_data - def _segment_t_stop(self, block_index, seg_index): t_stop = self.metadata["status"]["t_range"][1] @@ -303,6 +290,9 @@ def read_metadata(self, fname_metadata): return metadata + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] + # this is pretty useless right now, but I think after a # refactor with sub streams we could adapt this for the sub-streams diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py index cb56ccbb4..c577c8c07 100644 --- a/neo/rawio/utils.py +++ b/neo/rawio/utils.py @@ -41,7 +41,7 @@ def get_memmap_chunk_from_opened_file(fid, num_channels, start, stop, dtype, fi # Adjust the length so it includes the extra data from rounding down # the memmap offset to a multiple of ALLOCATIONGRANULARITY length += start_offset - + memmap_obj = mmap.mmap(fid.fileno(), length=length, access=mmap.ACCESS_READ, offset=memmap_offset) arr = np.ndarray( diff --git a/neo/test/rawiotest/rawio_compliance.py b/neo/test/rawiotest/rawio_compliance.py index 57a917bad..bc1f5ada9 100644 --- a/neo/test/rawiotest/rawio_compliance.py +++ b/neo/test/rawiotest/rawio_compliance.py @@ -151,7 +151,10 @@ def iter_over_sig_chunks(reader, stream_index, channel_indexes, chunksize=1024): for seg_index in range(nb_seg): sig_size = reader.get_signal_size(block_index, seg_index, stream_index) - nb = sig_size // chunksize + 1 + nb = int(np.floor(sig_size / chunksize)) + if sig_size % chunksize > 0: + nb += 1 + for i in range(nb): i_start = i * chunksize i_stop = min((i + 1) * chunksize, sig_size) From 334a882fb883fd1ab4dab2ead9d743fa31f238f8 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 11 Oct 2024 14:35:58 +0200 Subject: [PATCH 16/24] json api start hdf5 on maxwell --- neo/rawio/baserawio.py | 74 ++++++++++++++++------ neo/rawio/maxwellrawio.py | 130 ++++++++++++++++++++++---------------- 2 files changed, 132 insertions(+), 72 deletions(-) diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 7e6f0a7d6..96621c1ae 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1379,7 +1379,10 @@ def _get_signal_size_generic(self, block_index, seg_index, stream_index): buffer_id = self.header["signal_streams"][stream_index]["buffer_id"] buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) - return buffer_desc['shape'][0] + + # some hdf5 revert teh buffer + time_axis = buffer_desc.get("time_axis", 0) + return buffer_desc['shape'][time_axis] def _get_analogsignal_chunk_generic( self, @@ -1400,38 +1403,73 @@ def _get_analogsignal_chunk_generic( # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) + i_start = i_start or 0 + i_stop = i_stop or buffer_desc['shape'][0] + if buffer_desc['type'] == 'binary': # open files on demand and keep reference to opened file - if not hasattr(self, '_memmap_analogsignal_streams'): - self._memmap_analogsignal_streams = {} - if block_index not in self._memmap_analogsignal_streams: - self._memmap_analogsignal_streams[block_index] = {} - if seg_index not in self._memmap_analogsignal_streams[block_index]: - self._memmap_analogsignal_streams[block_index][seg_index] = {} - if stream_index not in self._memmap_analogsignal_streams[block_index][seg_index]: + if not hasattr(self, '_memmap_analogsignal_buffers'): + self._memmap_analogsignal_buffers = {} + if block_index not in self._memmap_analogsignal_buffers: + self._memmap_analogsignal_buffers[block_index] = {} + if seg_index not in self._memmap_analogsignal_buffers[block_index]: + self._memmap_analogsignal_buffers[block_index][seg_index] = {} + if buffer_id not in self._memmap_analogsignal_buffers[block_index][seg_index]: fid = open(buffer_desc['file_path'], mode='rb') - self._memmap_analogsignal_streams[block_index][seg_index] = fid + self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] = fid else: - fid = self._memmap_analogsignal_streams[block_index][seg_index] + fid = self._memmap_analogsignal_buffers[block_index][seg_index][buffer_id] - i_start = i_start or 0 - i_stop = i_stop or buffer_desc['shape'][0] - num_channels = buffer_desc['shape'][1] raw_sigs = get_memmap_chunk_from_opened_file(fid, num_channels, i_start, i_stop, np.dtype(buffer_desc['dtype']), file_offset=buffer_desc['file_offset']) - # this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False) + + elif buffer_desc['type'] == 'hdf5': + + # open files on demand and keep reference to opened file + if not hasattr(self, '_hdf5_analogsignal_buffers'): + self._hdf5_analogsignal_buffers = {} + if block_index not in self._hdf5_analogsignal_buffers: + self._hdf5_analogsignal_buffers[block_index] = {} + if seg_index not in self._hdf5_analogsignal_buffers[block_index]: + self._hdf5_analogsignal_buffers[block_index][seg_index] = {} + if buffer_id not in self._hdf5_analogsignal_buffers[block_index][seg_index]: + import h5py + h5file = h5py.File(buffer_desc['file_path'], mode="r") + self._hdf5_analogsignal_buffers[block_index][seg_index][buffer_id] = h5file + else: + h5file = self._hdf5_analogsignal_buffers[block_index][seg_index][buffer_id] + + hdf5_path = buffer_desc["hdf5_path"] + full_raw_sigs = h5file[hdf5_path] + + time_axis = buffer_desc.get("time_axis", 0) + if time_axis == 0: + raw_sigs = full_raw_sigs[i_start:i_stop, :] + elif time_axis == 1: + raw_sigs = full_raw_sigs[:, i_start:i_stop].T + else: + raise RuntimeError("Should never happen") + if buffer_slice is not None: raw_sigs = raw_sigs[:, buffer_slice] - - # channel slice requested - if channel_indexes is not None: - raw_sigs = raw_sigs[:, channel_indexes] + + + else: raise NotImplementedError() + # this is a pre slicing when the stream do not contain all channels (for instance spikeglx when load_sync_channel=False) + if buffer_slice is not None: + raw_sigs = raw_sigs[:, buffer_slice] + + # channel slice requested + if channel_indexes is not None: + raw_sigs = raw_sigs[:, channel_indexes] + + return raw_sigs diff --git a/neo/rawio/maxwellrawio.py b/neo/rawio/maxwellrawio.py index c093c795f..b8a6259e4 100644 --- a/neo/rawio/maxwellrawio.py +++ b/neo/rawio/maxwellrawio.py @@ -57,6 +57,7 @@ class MaxwellRawIO(BaseRawIO): extensions = ["h5"] rawmode = "one-file" + has_buffer_description_api = True def __init__(self, filename="", rec_name=None): BaseRawIO.__init__(self) @@ -120,10 +121,13 @@ def _parse_header(self): # create signal channels max_sig_length = 0 - self._signals = {} + # self._signals = {} + self._buffer_descriptions = {0 :{0 :{}}} + self._stream_buffer_slice = {} sig_channels = [] well_indices_to_remove = [] for stream_index, stream_id in enumerate(signal_streams["id"]): + if int(version) == 20160704: sr = 20000.0 settings = h5file["settings"] @@ -136,11 +140,13 @@ def _parse_header(self): else: gain = settings["gain"][0] gain_uV = 3.3 / (1024 * gain) * 1e6 - sigs = h5file["sig"] + # sigs = h5file["sig"] + hdf5_path = "sig" mapping = h5file["mapping"] ids = np.array(mapping["channel"]) ids = ids[ids >= 0] - self._channel_slice = ids + # self._channel_slice = ids + self._stream_buffer_slice[stream_id] = ids elif int(version) > 20160704: settings = h5file["wells"][stream_id][self.rec_name]["settings"] sr = settings["sampling"][0] @@ -155,12 +161,25 @@ def _parse_header(self): gain_uV = 3.3 / (1024 * gain) * 1e6 mapping = settings["mapping"] if "routed" in h5file["wells"][stream_id][self.rec_name]["groups"]: - sigs = h5file["wells"][stream_id][self.rec_name]["groups"]["routed"]["raw"] + # sigs = h5file["wells"][stream_id][self.rec_name]["groups"]["routed"]["raw"] + hdf5_path = f"/wells/{stream_id}/{self.rec_name}/groups/routed/raw" else: warnings.warn(f"No 'routed' group found for well {stream_id}") well_indices_to_remove.append(stream_index) continue - + self._stream_buffer_slice[stream_id] = None + + buffer_id = stream_id + shape = h5file[hdf5_path].shape + self._buffer_descriptions[0][0][buffer_id] = { + "type" : "hdf5", + "file_path" : str(self.filename), + "hdf5_path" : hdf5_path, + "shape" : shape, + "time_axis": 1, + } + self._stream_buffer_slice[stream_id] = slice(None) + channel_ids = np.array(mapping["channel"]) electrode_ids = np.array(mapping["electrode"]) @@ -175,8 +194,8 @@ def _parse_header(self): buffer_id = stream_id sig_channels.append((ch_name, str(chan_id), sr, "uint16", "uV", gain_uV, offset_uV, stream_id, buffer_id)) - self._signals[stream_id] = sigs - max_sig_length = max(max_sig_length, sigs.shape[1]) + # self._signals[stream_id] = sigs + max_sig_length = max(max_sig_length, shape[1]) self._t_stop = max_sig_length / sr @@ -210,57 +229,60 @@ def _segment_t_start(self, block_index, seg_index): def _segment_t_stop(self, block_index, seg_index): return self._t_stop - def _get_signal_size(self, block_index, seg_index, stream_index): - stream_id = self.header["signal_streams"][stream_index]["id"] - sigs = self._signals[stream_id] - return sigs.shape[1] + def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): + return self._buffer_descriptions[block_index][seg_index][buffer_id] + + # def _get_signal_size(self, block_index, seg_index, stream_index): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # sigs = self._signals[stream_id] + # return sigs.shape[1] def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - stream_id = self.header["signal_streams"][stream_index]["id"] - sigs = self._signals[stream_id] - - if i_start is None: - i_start = 0 - if i_stop is None: - i_stop = sigs.shape[1] - - resorted_indexes = None - if channel_indexes is None: - channel_indexes = slice(None) - else: - if np.array(channel_indexes).size > 1 and np.any(np.diff(channel_indexes) < 0): - # get around h5py constraint that it does not allow datasets - # to be indexed out of order - order_f = np.argsort(channel_indexes) - sorted_channel_indexes = channel_indexes[order_f] - # use argsort again on order_f to obtain resorted_indexes - resorted_indexes = np.argsort(order_f) - - try: - if resorted_indexes is None: - if self._old_format: - sigs = sigs[self._channel_slice, i_start:i_stop] - sigs = sigs[channel_indexes] - else: - sigs = sigs[channel_indexes, i_start:i_stop] - else: - if self._old_format: - sigs = sigs[self._channel_slice, i_start:i_stop] - sigs = sigs[sorted_channel_indexes] - else: - sigs = sigs[sorted_channel_indexes, i_start:i_stop] - sigs = sigs[resorted_indexes] - except OSError as e: - print("*" * 10) - print(_hdf_maxwell_error) - print("*" * 10) - raise (e) - sigs = sigs.T - - return sigs + # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + # stream_id = self.header["signal_streams"][stream_index]["id"] + # sigs = self._signals[stream_id] + + # if i_start is None: + # i_start = 0 + # if i_stop is None: + # i_stop = sigs.shape[1] + + # resorted_indexes = None + # if channel_indexes is None: + # channel_indexes = slice(None) + # else: + # if np.array(channel_indexes).size > 1 and np.any(np.diff(channel_indexes) < 0): + # # get around h5py constraint that it does not allow datasets + # # to be indexed out of order + # order_f = np.argsort(channel_indexes) + # sorted_channel_indexes = channel_indexes[order_f] + # # use argsort again on order_f to obtain resorted_indexes + # resorted_indexes = np.argsort(order_f) + + # try: + # if resorted_indexes is None: + # if self._old_format: + # sigs = sigs[self._channel_slice, i_start:i_stop] + # sigs = sigs[channel_indexes] + # else: + # sigs = sigs[channel_indexes, i_start:i_stop] + # else: + # if self._old_format: + # sigs = sigs[self._channel_slice, i_start:i_stop] + # sigs = sigs[sorted_channel_indexes] + # else: + # sigs = sigs[sorted_channel_indexes, i_start:i_stop] + # sigs = sigs[resorted_indexes] + # except OSError as e: + # print("*" * 10) + # print(_hdf_maxwell_error) + # print("*" * 10) + # raise (e) + # sigs = sigs.T + + # return sigs _hdf_maxwell_error = """Maxwell file format is based on HDF5. From 383ada74f71a5f90c643602eba6d85555344788d Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Fri, 11 Oct 2024 16:34:50 +0200 Subject: [PATCH 17/24] doc for signal_stream signal_buffer --- doc/source/rawio.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/source/rawio.rst b/doc/source/rawio.rst index 5f0d41b45..6de4cdbfb 100644 --- a/doc/source/rawio.rst +++ b/doc/source/rawio.rst @@ -281,6 +281,32 @@ Read event timestamps and times In [42]: print(ev_times) [ 0.0317] +Signal streams and signal buffers +--------------------------------- + +For reading analog signal **rawio** has 2 important concepts: + + 1. The **signal_stream** : it is a group of channels that can be read together using :func:`get_analog_signal_chunk()`. + This group of channels are guaranteed to have the sample sampling rate, and the same duration per segment. + Most of the time, this group channel is a "logical" group of channel. In hsort there are from the same headstage + or from the auxilary board. + Optionally, depending the format, a **signal_stream** can be a slice or an entire **signal_buffer**. + + 2. The **signal_buffer** : it is group channel that share the same data layout in a file. The most simple example + is channel that can be read by a simple :func:`signals = np.memmap(file, shape=..., dtype=... , offset=...)`. + A **signal_buffer** can contain one or several **signal_stream** (very often it is only one). + There are two kind of format that handle this concept: + + * the one that can use :func:`np.memmap()` internally + * format based on hdf5 + + There are many format that do not handle at all this concept: + + * the one that use an external python package for reading data (edf, ced, plexon2, ...) + * the one with a complicated data layout (data block are splited without structure) + + To check if a format have the concept you can check the class attribute flag `has_buffer_description_api` of the + rawio class. From 672e8fe66bb691b96294229c42df22ab498e1965 Mon Sep 17 00:00:00 2001 From: Garcia Samuel Date: Wed, 16 Oct 2024 16:47:18 +0200 Subject: [PATCH 18/24] Merci Zach Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com> --- doc/source/rawio.rst | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/rawio.rst b/doc/source/rawio.rst index 6de4cdbfb..c974300d0 100644 --- a/doc/source/rawio.rst +++ b/doc/source/rawio.rst @@ -284,28 +284,28 @@ Read event timestamps and times Signal streams and signal buffers --------------------------------- -For reading analog signal **rawio** has 2 important concepts: +For reading analog signals **neo.rawio** has 2 important concepts: 1. The **signal_stream** : it is a group of channels that can be read together using :func:`get_analog_signal_chunk()`. - This group of channels are guaranteed to have the sample sampling rate, and the same duration per segment. - Most of the time, this group channel is a "logical" group of channel. In hsort there are from the same headstage - or from the auxilary board. - Optionally, depending the format, a **signal_stream** can be a slice or an entire **signal_buffer**. + This group of channels is guaranteed to have the same sampling rate, and the same duration per segment. + Most of the time, this group of channel is a "logical" group of channels. In short they are from the same headstage + or from the same auxiliary board. + Optionally, depending on the format, a **signal_stream** can be a slice of or an entire **signal_buffer**. - 2. The **signal_buffer** : it is group channel that share the same data layout in a file. The most simple example + 2. The **signal_buffer** : it is group of channels that share the same data layout in a file. The most simple example is channel that can be read by a simple :func:`signals = np.memmap(file, shape=..., dtype=... , offset=...)`. - A **signal_buffer** can contain one or several **signal_stream** (very often it is only one). - There are two kind of format that handle this concept: + A **signal_buffer** can contain one or several **signal_stream**'s (very often it is only one). + There are two kind of formats that handle this concept: - * the one that can use :func:`np.memmap()` internally - * format based on hdf5 + * those which use :func:`np.memmap()` internally + * formats based on hdf5 - There are many format that do not handle at all this concept: + There are many formats that do not handle this concept: - * the one that use an external python package for reading data (edf, ced, plexon2, ...) - * the one with a complicated data layout (data block are splited without structure) + * the ones that use an external python package for reading data (edf, ced, plexon2, ...) + * the ones with a complicated data layout (e.g. those where the data blocks are split without structure) - To check if a format have the concept you can check the class attribute flag `has_buffer_description_api` of the + To check if a format makes use of the buffer api you can check the class attribute flag `has_buffer_description_api` of the rawio class. From a540840f0bc74508123d5ab0dbdc0ba7872f1e34 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 16 Oct 2024 20:43:28 +0200 Subject: [PATCH 19/24] Use class approach for buffer api : BaseRawWithBufferApiIO --- neo/rawio/axonrawio.py | 9 ++- neo/rawio/baserawio.py | 77 +++++++++++++++++-------- neo/rawio/brainvisionrawio.py | 11 ++-- neo/rawio/elanrawio.py | 21 ++++--- neo/rawio/maxwellrawio.py | 7 +-- neo/rawio/micromedrawio.py | 9 ++- neo/rawio/neuronexusrawio.py | 9 ++- neo/rawio/neuroscoperawio.py | 9 ++- neo/rawio/openephysbinaryrawio.py | 14 ++--- neo/rawio/rawbinarysignalrawio.py | 9 ++- neo/rawio/rawmcsrawio.py | 9 ++- neo/rawio/spikeglxrawio.py | 16 ++--- neo/rawio/winedrrawio.py | 9 ++- neo/rawio/winwcprawio.py | 9 ++- neo/rawio/xarray_utils.py | 51 +++------------- neo/test/rawiotest/common_rawio_test.py | 4 ++ neo/test/rawiotest/rawio_compliance.py | 33 ++++++++++- 17 files changed, 167 insertions(+), 139 deletions(-) diff --git a/neo/rawio/axonrawio.py b/neo/rawio/axonrawio.py index a73f582f3..803ecc880 100644 --- a/neo/rawio/axonrawio.py +++ b/neo/rawio/axonrawio.py @@ -53,7 +53,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -63,7 +63,7 @@ from neo.core import NeoReadWriteError -class AxonRawIO(BaseRawIO): +class AxonRawIO(BaseRawWithBufferApiIO): """ Class for Class for reading data from pCLAMP and AxoScope files (.abf version 1 and 2) @@ -90,10 +90,9 @@ class AxonRawIO(BaseRawIO): extensions = ["abf"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename def _parse_header(self): @@ -179,7 +178,7 @@ def _parse_header(self): self._buffer_descriptions[0][seg_index] = {} self._buffer_descriptions[0][seg_index][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : str(sig_dtype), "order": "C", diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index a89d76832..ac5a7bd81 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -151,9 +151,6 @@ class BaseRawIO: rawmode = None # one key from possible_raw_modes - # If true then - has_buffer_description_api = False - # TODO Why multi-file would have a single filename is confusing here - shouldn't # the name of this argument be filenames_list or filenames_base or similar? # @@ -187,6 +184,15 @@ def __init__(self, use_cache: bool = False, cache_path: str = "same_as_resource" self.header = None self.is_header_parsed = False + self._has_buffer_description_api = False + + def has_buffer_description_api(self): + """ + Return if the reader handle the buffer API. + If True then the reader support internally `get_analogsignal_buffer_description()` + """ + return self._has_buffer_description_api + def parse_header(self): """ Parses the header of the file(s) to allow for faster computations @@ -196,6 +202,7 @@ def parse_header(self): # this must create # self.header['nb_block'] # self.header['nb_segment'] + # self.header['signal_buffers'] # self.header['signal_streams'] # self.header['signal_channels'] # self.header['spike_channels'] @@ -669,11 +676,7 @@ def get_signal_size(self, block_index: int, seg_index: int, stream_index: int | """ stream_index = self._get_stream_index_from_arg(stream_index) - if not self.has_buffer_description_api: - return self._get_signal_size(block_index, seg_index, stream_index) - else: - # use the buffer description - return self._get_signal_size_generic(block_index, seg_index, stream_index) + return self._get_signal_size(block_index, seg_index, stream_index) def get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int | None = None): """ @@ -822,11 +825,7 @@ def get_analogsignal_chunk( if np.all(np.diff(channel_indexes) == 1): channel_indexes = slice(channel_indexes[0], channel_indexes[-1] + 1) - if not self.has_buffer_description_api: - raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) - else: - # use the buffer description - raw_chunk = self._get_analogsignal_chunk_generic(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + raw_chunk = self._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) return raw_chunk @@ -1298,7 +1297,6 @@ def _get_signal_size(self, block_index: int, seg_index: int, stream_index: int): All channels indexed must have the same size and t_start. """ - # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) def _get_signal_t_start(self, block_index: int, seg_index: int, stream_index: int): @@ -1326,7 +1324,6 @@ def _get_analogsignal_chunk( ------- array of samples, with each requested channel in a column """ - # must NOT be implemented if has_buffer_description_api=True raise (NotImplementedError) ### @@ -1366,7 +1363,7 @@ def _rescale_epoch_duration(self, raw_duration: np.ndarray, dtype: np.dtype): raise (NotImplementedError) ### - # json buffer api zone + # buffer api zone # must be implemented if has_buffer_description_api=True def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: int = 0, buffer_id: str = None): if not self.has_buffer_description_api: @@ -1377,17 +1374,37 @@ def get_analogsignal_buffer_description(self, block_index: int = 0, seg_index: i def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): raise (NotImplementedError) - def _get_signal_size_generic(self, block_index, seg_index, stream_index): - # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) - buffer_id = self.header["signal_streams"][stream_index]["buffer_id"] - buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) +class BaseRawWithBufferApiIO(BaseRawIO): + """ + Generic class for reader that support "buffer api". + + In short reader that are internally based on: + + * np.memmap + * hdf5 + + In theses cases _get_signal_size and _get_analogsignal_chunk are totaly generic and do not need to be implemented in the class. + + For this class sub classes must implements theses two dict: + * self._buffer_descriptions[block_index][seg_index] = buffer_description + * self._stream_buffer_slice[buffer_id] = None or slicer o indices + + """ + + def __init__(self, *arg, **kwargs): + super().__init__(*arg, **kwargs) + self._has_buffer_description_api = True + + def _get_signal_size(self, block_index, seg_index, stream_index): + buffer_id = self.header["signal_streams"][stream_index]["buffer_id"] + buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id) # some hdf5 revert teh buffer time_axis = buffer_desc.get("time_axis", 0) return buffer_desc['shape'][time_axis] - def _get_analogsignal_chunk_generic( + def _get_analogsignal_chunk( self, block_index: int, seg_index: int, @@ -1403,13 +1420,12 @@ def _get_analogsignal_chunk_generic( buffer_slice = self._stream_buffer_slice[stream_id] - # When has_buffer_description_api=True this used to avoid to write _get_analogsignal_chunk()) - buffer_desc = self._get_analogsignal_buffer_description(block_index, seg_index, buffer_id) + buffer_desc = self.get_analogsignal_buffer_description(block_index, seg_index, buffer_id) i_start = i_start or 0 i_stop = i_stop or buffer_desc['shape'][0] - if buffer_desc['type'] == 'binary': + if buffer_desc['type'] == "raw": # open files on demand and keep reference to opened file if not hasattr(self, '_memmap_analogsignal_buffers'): @@ -1475,6 +1491,19 @@ def _get_analogsignal_chunk_generic( return raw_sigs + def __del__(self): + if hasattr(self, '_memmap_analogsignal_buffers'): + for block_index in self._memmap_analogsignal_buffers.keys(): + for seg_index in self._memmap_analogsignal_buffers[block_index].keys(): + for buffer_id, fid in self._memmap_analogsignal_buffers[block_index][seg_index].items(): + fid.close() + del self._memmap_analogsignal_buffers + + if hasattr(self, '_hdf5_analogsignal_buffers'): + for block_index in self._hdf5_analogsignal_buffers.keys(): + for seg_index in self._hdf5_analogsignal_buffers[block_index].keys(): + for buffer_id, h5_file in self._hdf5_analogsignal_buffers[block_index][seg_index].items(): + h5_file.close() def pprint_vector(vector, lim: int = 8): diff --git a/neo/rawio/brainvisionrawio.py b/neo/rawio/brainvisionrawio.py index 5569e7acd..1c0398d04 100644 --- a/neo/rawio/brainvisionrawio.py +++ b/neo/rawio/brainvisionrawio.py @@ -13,7 +13,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -26,7 +26,7 @@ from neo.core import NeoReadWriteError -class BrainVisionRawIO(BaseRawIO): +class BrainVisionRawIO(BaseRawWithBufferApiIO): """Class for reading BrainVision files Parameters @@ -42,10 +42,9 @@ class BrainVisionRawIO(BaseRawIO): extensions = ["vhdr"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = str(filename) def _parse_header(self): @@ -88,7 +87,7 @@ def _parse_header(self): self._stream_buffer_slice = {} shape = get_memmap_shape(binary_filename, sig_dtype, num_channels=nb_channel, offset=0) self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : binary_filename, "dtype" : sig_dtype, "order": "C", @@ -97,7 +96,7 @@ def _parse_header(self): } self._stream_buffer_slice[stream_id] = None - signal_buffers = np.array(("Signals", "0"), dtype=_signal_buffer_dtype) + signal_buffers = np.array([("Signals", "0")], dtype=_signal_buffer_dtype) signal_streams = np.array([("Signals", "0", "0")], dtype=_signal_stream_dtype) sig_channels = [] diff --git a/neo/rawio/elanrawio.py b/neo/rawio/elanrawio.py index 11214ce62..06c928145 100644 --- a/neo/rawio/elanrawio.py +++ b/neo/rawio/elanrawio.py @@ -23,7 +23,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -36,7 +36,7 @@ from neo.core import NeoReadWriteError -class ElanRawIO(BaseRawIO): +class ElanRawIO(BaseRawWithBufferApiIO): """ Class for reading time-frequency EEG data maps from the Elan software @@ -59,10 +59,9 @@ class ElanRawIO(BaseRawIO): extensions = ["eeg"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=None, entfile=None, posfile=None): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = pathlib.Path(filename) # check whether ent and pos files are defined @@ -165,12 +164,18 @@ def _parse_header(self): signal_buffers = np.array([("Signals", buffer_id)], dtype=_signal_buffer_dtype) signal_streams = np.array([("Signals", stream_id, buffer_id)], dtype=_signal_stream_dtype) - sig_channels = [] - for c, chan_info in enumerate(channel_infos[:-2]): + for c, chan_info in enumerate(channel_infos): chan_name = chan_info["label"] chan_id = str(c) + if c < len(channel_infos) - 2: + # standard channels + stream_id = "0" + else: + # last 2 channels are not included in the stream + stream_id = "" + gain = (chan_info["max_physic"] - chan_info["min_physic"]) / ( chan_info["max_logic"] - chan_info["min_logic"] ) @@ -196,14 +201,14 @@ def _parse_header(self): self._stream_buffer_slice = {} shape = get_memmap_shape(self.filename, sig_dtype, num_channels=nb_channel + 2, offset=0) self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : self.filename, "dtype" : sig_dtype, "order": "C", "file_offset" : 0, "shape" : shape, } - self._stream_buffer_slice[stream_id] = slice(0, -2) + self._stream_buffer_slice["0"] = slice(0, -2) # triggers with open(self.posfile, mode="rt", encoding="ascii", newline=None) as f: diff --git a/neo/rawio/maxwellrawio.py b/neo/rawio/maxwellrawio.py index 9a15acf72..8bb320a3c 100644 --- a/neo/rawio/maxwellrawio.py +++ b/neo/rawio/maxwellrawio.py @@ -27,7 +27,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -38,7 +38,7 @@ from neo.core import NeoReadWriteError -class MaxwellRawIO(BaseRawIO): +class MaxwellRawIO(BaseRawWithBufferApiIO): """ Class for reading MaxOne or MaxTwo files. @@ -57,10 +57,9 @@ class MaxwellRawIO(BaseRawIO): extensions = ["h5"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename="", rec_name=None): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename self.rec_name = rec_name diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 96c239971..7e30a3829 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -14,7 +14,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -33,7 +33,7 @@ def read_f(self, fmt, offset=None): return struct.unpack(fmt, self.read(struct.calcsize(fmt))) -class MicromedRawIO(BaseRawIO): +class MicromedRawIO(BaseRawWithBufferApiIO): """ Class for reading data from micromed (.trc). @@ -46,10 +46,9 @@ class MicromedRawIO(BaseRawIO): extensions = ["trc", "TRC"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename def _parse_header(self): @@ -110,7 +109,7 @@ def _parse_header(self): buffer_id = "0" stream_id = "0" self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : sig_dtype, "order": "C", diff --git a/neo/rawio/neuronexusrawio.py b/neo/rawio/neuronexusrawio.py index f2ade8e25..1858b2409 100644 --- a/neo/rawio/neuronexusrawio.py +++ b/neo/rawio/neuronexusrawio.py @@ -43,7 +43,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -53,11 +53,10 @@ from neo.core import NeoReadWriteError -class NeuroNexusRawIO(BaseRawIO): +class NeuroNexusRawIO(BaseRawWithBufferApiIO): extensions = ["xdat", "json"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename: str | Path = ""): """ @@ -95,7 +94,7 @@ def __init__(self, filename: str | Path = ""): """ - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) if not Path(filename).is_file(): raise FileNotFoundError(f"The metadata file {filename} was not found") @@ -139,7 +138,7 @@ def _parse_header(self): buffer_id = "0" self._buffer_descriptions = {0 :{0 :{}}} self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(binary_file), "dtype" : BINARY_DTYPE, "order": "C", diff --git a/neo/rawio/neuroscoperawio.py b/neo/rawio/neuroscoperawio.py index 9a93c1cdf..0090687df 100644 --- a/neo/rawio/neuroscoperawio.py +++ b/neo/rawio/neuroscoperawio.py @@ -22,7 +22,7 @@ from xml.etree import ElementTree from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -33,10 +33,9 @@ from .utils import get_memmap_shape -class NeuroScopeRawIO(BaseRawIO): +class NeuroScopeRawIO(BaseRawWithBufferApiIO): extensions = ["xml", "dat", "lfp", "eeg"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename, binary_file=None): """raw reader for Neuroscope @@ -68,7 +67,7 @@ def __init__(self, filename, binary_file=None): filename provided with a supported data extension (.dat, .lfp, .eeg): - It assumes that the XML file has the same name and a .xml extension. """ - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename self.binary_file = binary_file @@ -115,7 +114,7 @@ def _parse_header(self): stream_id = "0" self._buffer_descriptions = {0: {0:{}}} self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.data_file_path), "dtype" : sig_dtype, "order": "C", diff --git a/neo/rawio/openephysbinaryrawio.py b/neo/rawio/openephysbinaryrawio.py index 320773b26..565771ae0 100644 --- a/neo/rawio/openephysbinaryrawio.py +++ b/neo/rawio/openephysbinaryrawio.py @@ -16,7 +16,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -27,7 +27,7 @@ from .utils import get_memmap_shape -class OpenEphysBinaryRawIO(BaseRawIO): +class OpenEphysBinaryRawIO(BaseRawWithBufferApiIO): """ Handle several Blocks and several Segments. @@ -62,10 +62,9 @@ class OpenEphysBinaryRawIO(BaseRawIO): extensions = ["xml", "oebin", "txt", "dat", "npy"] rawmode = "one-dir" - has_buffer_description_api = True def __init__(self, dirname="", load_sync_channel=False, experiment_names=None): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.dirname = dirname if experiment_names is not None: if isinstance(experiment_names, str): @@ -130,7 +129,8 @@ def _parse_header(self): for chan_info in info["channels"]: chan_id = chan_info["channel_name"] if "SYNC" in chan_id and not self.load_sync_channel: - continue + # the channel is removed from stream but not the buffer + stream_id = "" if chan_info["units"] == "": # in some cases for some OE version the unit is "", but the gain is to "uV" units = "uV" @@ -179,7 +179,7 @@ def _parse_header(self): shape = get_memmap_shape(info["raw_filename"], info["dtype"], num_channels=num_channels, offset=0) self._buffer_descriptions[block_index][seg_index][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(info["raw_filename"]), "dtype" : info["dtype"], "order": "C", @@ -195,7 +195,7 @@ def _parse_header(self): "SYNC channel is not present in the recording. " "Set load_sync_channel to False" ) - if has_sync_trace: + if has_sync_trace and not self.load_sync_channel: self._stream_buffer_slice[stream_id] = slice(None, -1) else: self._stream_buffer_slice[stream_id] = None diff --git a/neo/rawio/rawbinarysignalrawio.py b/neo/rawio/rawbinarysignalrawio.py index 82750d02f..feef2f98b 100644 --- a/neo/rawio/rawbinarysignalrawio.py +++ b/neo/rawio/rawbinarysignalrawio.py @@ -22,7 +22,7 @@ class RawBinarySignalIO import os from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -32,7 +32,7 @@ class RawBinarySignalIO from .utils import get_memmap_shape -class RawBinarySignalRawIO(BaseRawIO): +class RawBinarySignalRawIO(BaseRawWithBufferApiIO): """ Class for reading raw binary files with user specified values Parameters @@ -55,7 +55,6 @@ class RawBinarySignalRawIO(BaseRawIO): extensions = ["raw", "bin", "dat"] rawmode = "one-file" - has_buffer_description_api = True def __init__( self, @@ -67,7 +66,7 @@ def __init__( signal_offset=0.0, bytesoffset=0, ): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename self.dtype = dtype self.sampling_rate = sampling_rate @@ -88,7 +87,7 @@ def _parse_header(self): shape = get_memmap_shape(self.filename, self.dtype, num_channels=self.nb_channel, offset=self.bytesoffset) self._buffer_descriptions = {0:{0:{}}} self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : "uint16", "order": "C", diff --git a/neo/rawio/rawmcsrawio.py b/neo/rawio/rawmcsrawio.py index 484c3ef24..cba8baae6 100644 --- a/neo/rawio/rawmcsrawio.py +++ b/neo/rawio/rawmcsrawio.py @@ -17,7 +17,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -26,7 +26,7 @@ ) from .utils import get_memmap_shape -class RawMCSRawIO(BaseRawIO): +class RawMCSRawIO(BaseRawWithBufferApiIO): """ Class for reading an mcs file converted by the MC_DataToo binary converter @@ -39,10 +39,9 @@ class RawMCSRawIO(BaseRawIO): extensions = ["raw"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename def _source_name(self): @@ -68,7 +67,7 @@ def _parse_header(self): shape = get_memmap_shape(self.filename, self.dtype, num_channels=self.nb_channel, offset=file_offset) self._buffer_descriptions = {0:{0:{}}} self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : "uint16", "order": "C", diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index ce8ec0984..419528e93 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -57,7 +57,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -67,7 +67,7 @@ from .utils import get_memmap_shape -class SpikeGLXRawIO(BaseRawIO): +class SpikeGLXRawIO(BaseRawWithBufferApiIO): """ Class for reading data from a SpikeGLX system @@ -108,10 +108,8 @@ class SpikeGLXRawIO(BaseRawIO): extensions = ["meta", "bin"] rawmode = "one-dir" - has_buffer_description_api = True - def __init__(self, dirname="", load_sync_channel=False, load_channel_location=False): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.dirname = dirname self.load_sync_channel = load_sync_channel self.load_channel_location = load_channel_location @@ -154,7 +152,7 @@ def _parse_header(self): self._buffer_descriptions[block_index][seg_index] = {} self._buffer_descriptions[block_index][seg_index][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : info["bin_file"], "dtype" : "int16", "order": "C", @@ -177,6 +175,7 @@ def _parse_header(self): signal_buffers.append((buffer_name, buffer_id)) stream_id = stream_name + stream_index = stream_names.index(info["stream_name"]) signal_streams.append((stream_name, stream_id, buffer_id)) @@ -203,7 +202,10 @@ def _parse_header(self): # check sync channel validity if "nidq" not in stream_name: if not self.load_sync_channel and info["has_sync_trace"]: - signal_channels = signal_channels[:-1] + # the last channel is remove from the stream but not from the buffer + last_chan = signal_channels[-1] + last_chan = last_chan[:-2] + ("", buffer_id) + signal_channels = signal_channels[:-1] + [last_chan] self._stream_buffer_slice[stream_id] = slice(0, -1) if self.load_sync_channel and not info["has_sync_trace"]: raise ValueError("SYNC channel is not present in the recording. " "Set load_sync_channel to False") diff --git a/neo/rawio/winedrrawio.py b/neo/rawio/winedrrawio.py index f02418e15..78d0db67e 100644 --- a/neo/rawio/winedrrawio.py +++ b/neo/rawio/winedrrawio.py @@ -12,7 +12,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -22,10 +22,9 @@ ) -class WinEdrRawIO(BaseRawIO): +class WinEdrRawIO(BaseRawWithBufferApiIO): extensions = ["EDR", "edr"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): """ @@ -37,7 +36,7 @@ def __init__(self, filename=""): The *.edr file to be loaded """ - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename def _source_name(self): @@ -68,7 +67,7 @@ def _parse_header(self): buffer_id = "0" self._buffer_descriptions = {0 :{0 :{}}} self._buffer_descriptions[0][0][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : "int16", "order": "C", diff --git a/neo/rawio/winwcprawio.py b/neo/rawio/winwcprawio.py index 5968de8c0..36049ae8b 100644 --- a/neo/rawio/winwcprawio.py +++ b/neo/rawio/winwcprawio.py @@ -13,7 +13,7 @@ import numpy as np from .baserawio import ( - BaseRawIO, + BaseRawWithBufferApiIO, _signal_channel_dtype, _signal_stream_dtype, _signal_buffer_dtype, @@ -23,7 +23,7 @@ ) -class WinWcpRawIO(BaseRawIO): +class WinWcpRawIO(BaseRawWithBufferApiIO): """ Class for reading WinWCP data @@ -36,10 +36,9 @@ class WinWcpRawIO(BaseRawIO): extensions = ["wcp"] rawmode = "one-file" - has_buffer_description_api = True def __init__(self, filename=""): - BaseRawIO.__init__(self) + BaseRawWithBufferApiIO.__init__(self) self.filename = filename def _source_name(self): @@ -98,7 +97,7 @@ def _parse_header(self): buffer_id = "0" self._buffer_descriptions[0][seg_index] = {} self._buffer_descriptions[0][seg_index][buffer_id] = { - "type" : "binary", + "type" : "raw", "file_path" : str(self.filename), "dtype" : "int16", "order": "C", diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 98fe10879..8652e8614 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -1,5 +1,5 @@ """ -Module to export a rawio reader that support the buffer_description API +Experimental module to export a rawio reader that support the buffer_description API to xarray dataset using zarr specification format v2. A block/segment/stream correspond to one xarray.DataSet @@ -7,8 +7,7 @@ A xarray.DataTree can also be expose to get all at block/segment/stream Note : - * only some IOs support this at the moment - + * only some IOs support this at the moment has_buffer_description_api()=True """ import json @@ -18,41 +17,6 @@ -# TODO ios with memmap: -# * spikeglx DONE -# * openephysbinary -# * winwcprawio -# * winedr -# * tdc -# * rawmcs -# * rawbinarysignal -# * neuroscope -# * neuroexplorer -# * micromed -# * brainvision -# * blackrock -# * bci200 -# * - -# TODO ios with HDF5: -# * nix -# * mearec -# * maxwell -# * biocam - -# TODO with chunk by channel (more complicated): -# * axona -# * openephys legacy -# axograph - - -# add some test in stanard compliance - - - - -# TODO implement zarr v3 - def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=None): """ @@ -69,6 +33,8 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non """ + # TODO later implement zarr v3 + # rawio_reader. signal_buffers = rawio_reader.header["signal_buffers"] @@ -88,7 +54,7 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, buffer_id=buffer_id) - if descr["type"] == "binary": + if descr["type"] == "raw": # channel : small enough can be internal with base64 @@ -140,7 +106,7 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non zattrs['sampling_rate'] = float(channels['sampling_rate'][0]) # unique big chunk - # TODO : optional split in several small chunks + # TODO later : optional split in several small chunks array_size = np.prod(descr["shape"], dtype='int64') chunk_size = array_size * dtype.itemsize rfs["refs"]["traces/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] @@ -152,7 +118,7 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non else: raise ValueError(f"buffer_description type not handled {descr['type']}") - # TODO channel array_annotations + # TODO later channel array_annotations return rfs @@ -162,7 +128,8 @@ def to_xarray_dataset(rawio_reader, block_index=0, seg_index=0, buffer_id=None): """ Utils fonction that transorm an instance a rawio into a xarray.Dataset with lazy access. - This works only for rawio class that implement the has_buffer_description_api. + This works only for rawio class that return True with has_buffer_description_api() and hinerits from + BaseRawWithBufferApiIO. Note : the original idea of the function is from Ben Dichter in this page diff --git a/neo/test/rawiotest/common_rawio_test.py b/neo/test/rawiotest/common_rawio_test.py index 9ad9853fb..488cb9fbf 100644 --- a/neo/test/rawiotest/common_rawio_test.py +++ b/neo/test/rawiotest/common_rawio_test.py @@ -125,3 +125,7 @@ def test_read_all(self): logging.getLogger().setLevel(logging.INFO) compliance.benchmark_speed_read_signals(reader) logging.getLogger().setLevel(level) + + # buffer api + if reader.has_buffer_description_api(): + compliance.check_buffer_api(reader) diff --git a/neo/test/rawiotest/rawio_compliance.py b/neo/test/rawiotest/rawio_compliance.py index 385016aa9..263c22997 100644 --- a/neo/test/rawiotest/rawio_compliance.py +++ b/neo/test/rawiotest/rawio_compliance.py @@ -93,7 +93,8 @@ def check_signal_stream_buffer_hierachy(reader): assert stream["buffer_id"] in h["signal_buffers"]["id"] for channel in h["signal_channels"]: - assert channel["stream_id"] in h["signal_streams"]["id"] + if channel["stream_id"] != "": + assert channel["stream_id"] in h["signal_streams"]["id"] if channel["buffer_id"] != "": assert channel["buffer_id"] in h["signal_buffers"]["id"] @@ -480,3 +481,33 @@ def read_events(reader): def has_annotations(reader): assert hasattr(reader, "raw_annotations"), "raw_annotation are not set" + + +def check_buffer_api(reader): + buffer_ids = reader.header["signal_buffers"]["id"] + + nb_block = reader.block_count() + nb_event_channel = reader.event_channels_count() + + + for block_index in range(nb_block): + nb_seg = reader.segment_count(block_index) + for seg_index in range(nb_seg): + for buffer_id in buffer_ids: + descr = reader.get_analogsignal_buffer_description( + block_index=block_index, seg_index=seg_index, buffer_id=buffer_id + ) + assert descr["type"] in ("raw", "hdf5"), "buffer_description type uncorrect" + + try: + import xarray + HAVE_XARRAY = True + except ImportError: + HAVE_XARRAY = False + + if HAVE_XARRAY: + # this test quickly the experimental xaray_utils.py if xarray is present on the system + # this is not the case for the CI + from neo.rawio.xarray_utils import to_xarray_dataset + ds = to_xarray_dataset(reader, block_index=block_index, seg_index=seg_index, buffer_id=buffer_id) + assert isinstance(ds, xarray.Dataset) From 282951abd6ee053e179a5725852df4e46d3cd0cd Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 21 Oct 2024 14:09:25 +0200 Subject: [PATCH 20/24] feedback --- neo/rawio/axonrawio.py | 4 ---- neo/rawio/baserawio.py | 1 + neo/rawio/brainvisionrawio.py | 2 +- neo/rawio/utils.py | 2 +- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/neo/rawio/axonrawio.py b/neo/rawio/axonrawio.py index 803ecc880..6980bf17a 100644 --- a/neo/rawio/axonrawio.py +++ b/neo/rawio/axonrawio.py @@ -115,8 +115,6 @@ def _parse_header(self): head_offset = info["sections"]["DataSection"]["uBlockIndex"] * BLOCKSIZE totalsize = info["sections"]["DataSection"]["llNumEntries"] - # self._raw_data = np.memmap(self.filename, dtype=sig_dtype, mode="r", shape=(totalsize,), offset=head_offset) - # 3 possible modes if version < 2.0: mode = info["nOperationMode"] @@ -174,8 +172,6 @@ def _parse_header(self): if (fSynchTimeUnit != 0) and (mode == 1): length /= fSynchTimeUnit - # self._raw_signals[seg_index] = self._raw_data[pos : pos + length].reshape(-1, nbchannel) - self._buffer_descriptions[0][seg_index] = {} self._buffer_descriptions[0][seg_index][buffer_id] = { "type" : "raw", diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index ac5a7bd81..86b3b22eb 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -1504,6 +1504,7 @@ def __del__(self): for seg_index in self._hdf5_analogsignal_buffers[block_index].keys(): for buffer_id, h5_file in self._hdf5_analogsignal_buffers[block_index][seg_index].items(): h5_file.close() + del self._hdf5_analogsignal_buffers def pprint_vector(vector, lim: int = 8): diff --git a/neo/rawio/brainvisionrawio.py b/neo/rawio/brainvisionrawio.py index 1c0398d04..6ac597695 100644 --- a/neo/rawio/brainvisionrawio.py +++ b/neo/rawio/brainvisionrawio.py @@ -89,7 +89,7 @@ def _parse_header(self): self._buffer_descriptions[0][0][buffer_id] = { "type" : "raw", "file_path" : binary_filename, - "dtype" : sig_dtype, + "dtype" : str(sig_dtype), "order": "C", "file_offset" : 0, "shape" : shape, diff --git a/neo/rawio/utils.py b/neo/rawio/utils.py index c577c8c07..0365da625 100644 --- a/neo/rawio/utils.py +++ b/neo/rawio/utils.py @@ -19,7 +19,7 @@ def get_memmap_shape(filename, dtype, num_channels=None, offset=0): def get_memmap_chunk_from_opened_file(fid, num_channels, start, stop, dtype, file_offset=0): """ - Utility fonction to get a chunk as a memmap array directly from an opened file. + Utility function to get a chunk as a memmap array directly from an opened file. Using this instead memmap can avoid memmory consumption when multiprocessing. Similar mechanism is used in spikeinterface. From 401956a69adb5976ccc6a4aea77dc9af30ad77b3 Mon Sep 17 00:00:00 2001 From: Garcia Samuel Date: Mon, 21 Oct 2024 14:14:37 +0200 Subject: [PATCH 21/24] Apply suggestions from code review Co-authored-by: Heberto Mayorquin Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com> --- doc/source/rawio.rst | 4 ++-- neo/rawio/baserawio.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/rawio.rst b/doc/source/rawio.rst index c974300d0..52127700a 100644 --- a/doc/source/rawio.rst +++ b/doc/source/rawio.rst @@ -297,8 +297,8 @@ For reading analog signals **neo.rawio** has 2 important concepts: A **signal_buffer** can contain one or several **signal_stream**'s (very often it is only one). There are two kind of formats that handle this concept: - * those which use :func:`np.memmap()` internally - * formats based on hdf5 + * Formats which use :func:`np.memmap()` internally + * Formats based on hdf5 There are many formats that do not handle this concept: diff --git a/neo/rawio/baserawio.py b/neo/rawio/baserawio.py index 86b3b22eb..34c3e85a3 100644 --- a/neo/rawio/baserawio.py +++ b/neo/rawio/baserawio.py @@ -186,7 +186,7 @@ def __init__(self, use_cache: bool = False, cache_path: str = "same_as_resource" self._has_buffer_description_api = False - def has_buffer_description_api(self): + def has_buffer_description_api(self) -> bool: """ Return if the reader handle the buffer API. If True then the reader support internally `get_analogsignal_buffer_description()` From 2fa19fb5e3d70eaed25b535481a7b3f3bc2511ec Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 21 Oct 2024 15:10:18 +0200 Subject: [PATCH 22/24] clean --- neo/rawio/maxwellrawio.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/neo/rawio/maxwellrawio.py b/neo/rawio/maxwellrawio.py index 8bb320a3c..6cb8ced6f 100644 --- a/neo/rawio/maxwellrawio.py +++ b/neo/rawio/maxwellrawio.py @@ -119,7 +119,6 @@ def _parse_header(self): # create signal channels max_sig_length = 0 - # self._signals = {} self._buffer_descriptions = {0 :{0 :{}}} self._stream_buffer_slice = {} sig_channels = [] @@ -138,12 +137,10 @@ def _parse_header(self): else: gain = settings["gain"][0] gain_uV = 3.3 / (1024 * gain) * 1e6 - # sigs = h5file["sig"] hdf5_path = "sig" mapping = h5file["mapping"] ids = np.array(mapping["channel"]) ids = ids[ids >= 0] - # self._channel_slice = ids self._stream_buffer_slice[stream_id] = ids elif int(version) > 20160704: settings = h5file["wells"][stream_id][self.rec_name]["settings"] @@ -159,7 +156,6 @@ def _parse_header(self): gain_uV = 3.3 / (1024 * gain) * 1e6 mapping = settings["mapping"] if "routed" in h5file["wells"][stream_id][self.rec_name]["groups"]: - # sigs = h5file["wells"][stream_id][self.rec_name]["groups"]["routed"]["raw"] hdf5_path = f"/wells/{stream_id}/{self.rec_name}/groups/routed/raw" else: warnings.warn(f"No 'routed' group found for well {stream_id}") @@ -194,7 +190,6 @@ def _parse_header(self): (ch_name, str(chan_id), sr, "uint16", "uV", gain_uV, offset_uV, stream_id, buffer_id) ) - # self._signals[stream_id] = sigs max_sig_length = max(max_sig_length, shape[1]) self._t_stop = max_sig_length / sr From c7299896d4a6d3dfd9cf4b63aae5cf061be466fb Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 23 Oct 2024 17:54:05 +0200 Subject: [PATCH 23/24] oups --- neo/rawio/brainvisionrawio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neo/rawio/brainvisionrawio.py b/neo/rawio/brainvisionrawio.py index 6ac597695..cd03e67d0 100644 --- a/neo/rawio/brainvisionrawio.py +++ b/neo/rawio/brainvisionrawio.py @@ -78,7 +78,7 @@ def _parse_header(self): if fmt not in fmts: raise NeoReadWriteError(f"the fmt {fmt} is not implmented. Must be one of {fmts}") - sig_dtype = fmts[fmt] + sig_dtype = np.dtype(fmts[fmt]) stream_id = "0" From b001c79244b2526ccbbf9c467661b16b2fcda99b Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 23 Oct 2024 18:51:34 +0200 Subject: [PATCH 24/24] more clean --- neo/rawio/axonrawio.py | 10 ------ neo/rawio/maxwellrawio.py | 56 +++++-------------------------- neo/rawio/micromedrawio.py | 3 -- neo/rawio/openephysbinaryrawio.py | 22 ------------ neo/rawio/spikeglxrawio.py | 41 ---------------------- neo/rawio/xarray_utils.py | 13 ++++--- 6 files changed, 14 insertions(+), 131 deletions(-) diff --git a/neo/rawio/axonrawio.py b/neo/rawio/axonrawio.py index 6980bf17a..1bca18485 100644 --- a/neo/rawio/axonrawio.py +++ b/neo/rawio/axonrawio.py @@ -307,19 +307,9 @@ def _segment_t_stop(self, block_index, seg_index): t_stop = self._t_starts[seg_index] + sig_size / self._sampling_rate return t_stop - # def _get_signal_size(self, block_index, seg_index, stream_index): - # shape = self._raw_signals[seg_index].shape - # return shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): return self._t_starts[seg_index] - # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - # if channel_indexes is None: - # channel_indexes = slice(None) - # raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), channel_indexes] - # return raw_signals - def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): return self._buffer_descriptions[block_index][seg_index][buffer_id] diff --git a/neo/rawio/maxwellrawio.py b/neo/rawio/maxwellrawio.py index 6cb8ced6f..790f2d19a 100644 --- a/neo/rawio/maxwellrawio.py +++ b/neo/rawio/maxwellrawio.py @@ -227,57 +227,17 @@ def _segment_t_stop(self, block_index, seg_index): def _get_analogsignal_buffer_description(self, block_index, seg_index, buffer_id): return self._buffer_descriptions[block_index][seg_index][buffer_id] - # def _get_signal_size(self, block_index, seg_index, stream_index): - # stream_id = self.header["signal_streams"][stream_index]["id"] - # sigs = self._signals[stream_id] - # return sigs.shape[1] - def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - # stream_id = self.header["signal_streams"][stream_index]["id"] - # sigs = self._signals[stream_id] - - # if i_start is None: - # i_start = 0 - # if i_stop is None: - # i_stop = sigs.shape[1] - - # resorted_indexes = None - # if channel_indexes is None: - # channel_indexes = slice(None) - # else: - # if np.array(channel_indexes).size > 1 and np.any(np.diff(channel_indexes) < 0): - # # get around h5py constraint that it does not allow datasets - # # to be indexed out of order - # order_f = np.argsort(channel_indexes) - # sorted_channel_indexes = channel_indexes[order_f] - # # use argsort again on order_f to obtain resorted_indexes - # resorted_indexes = np.argsort(order_f) - - # try: - # if resorted_indexes is None: - # if self._old_format: - # sigs = sigs[self._channel_slice, i_start:i_stop] - # sigs = sigs[channel_indexes] - # else: - # sigs = sigs[channel_indexes, i_start:i_stop] - # else: - # if self._old_format: - # sigs = sigs[self._channel_slice, i_start:i_stop] - # sigs = sigs[sorted_channel_indexes] - # else: - # sigs = sigs[sorted_channel_indexes, i_start:i_stop] - # sigs = sigs[resorted_indexes] - # except OSError as e: - # print("*" * 10) - # print(_hdf_maxwell_error) - # print("*" * 10) - # raise (e) - # sigs = sigs.T - - # return sigs + def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): + try: + return super()._get_analogsignal_chunk(block_index, seg_index, i_start, i_stop, stream_index, channel_indexes) + except OSError as e: + print("*" * 10) + print(_hdf_maxwell_error) + print("*" * 10) + raise (e) _hdf_maxwell_error = """Maxwell file format is based on HDF5. diff --git a/neo/rawio/micromedrawio.py b/neo/rawio/micromedrawio.py index 7e30a3829..1f807f0d3 100644 --- a/neo/rawio/micromedrawio.py +++ b/neo/rawio/micromedrawio.py @@ -102,9 +102,6 @@ def _parse_header(self): # raw signals memmap sig_dtype = "u" + str(Bytes) - # self._raw_signals = np.memmap(self.filename, dtype=sig_dtype, mode="r", offset=Data_Start_Offset).reshape( - # -1, Num_Chan - # ) signal_shape = get_memmap_shape(self.filename, sig_dtype, num_channels=Num_Chan, offset=Data_Start_Offset) buffer_id = "0" stream_id = "0" diff --git a/neo/rawio/openephysbinaryrawio.py b/neo/rawio/openephysbinaryrawio.py index 565771ae0..ade2c2d1b 100644 --- a/neo/rawio/openephysbinaryrawio.py +++ b/neo/rawio/openephysbinaryrawio.py @@ -171,9 +171,6 @@ def _parse_header(self): self._buffer_descriptions[block_index][seg_index] = {} for stream_index, info in self._sig_streams[block_index][seg_index].items(): num_channels = len(info["channels"]) - # memmap_sigs = np.memmap(info["raw_filename"], info["dtype"], order="C", mode="r").reshape( - # -1, num_channels - # ) stream_id = str(stream_index) buffer_id = str(stream_index) shape = get_memmap_shape(info["raw_filename"], info["dtype"], num_channels=num_channels, @@ -200,8 +197,6 @@ def _parse_header(self): else: self._stream_buffer_slice[stream_id] = None - # info["memmap"] = memmap_sigs - # events zone # channel map: one channel one stream event_channels = [] @@ -304,7 +299,6 @@ def _parse_header(self): stream_id = str(stream_index) buffer_id = str(stream_index) sig_size = self._buffer_descriptions[block_index][seg_index][buffer_id]["shape"][0] - # dur = info["memmap"].shape[0] / float(info["sample_rate"]) dur = sig_size / float(info["sample_rate"]) t_stop = t_start + dur if global_t_start is None or global_t_start > t_start: @@ -403,26 +397,10 @@ def _channels_to_group_id(self, channel_indexes): group_id = group_ids[0] return group_id - # def _get_signal_size(self, block_index, seg_index, stream_index): - # sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] - # return sigs.shape[0] - def _get_signal_t_start(self, block_index, seg_index, stream_index): t_start = self._sig_streams[block_index][seg_index][stream_index]["t_start"] return t_start - # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - # sigs = self._sig_streams[block_index][seg_index][stream_index]["memmap"] - # has_sync_trace = self._sig_streams[block_index][seg_index][stream_index]["has_sync_trace"] - - # if not self.load_sync_channel and has_sync_trace: - # sigs = sigs[:, :-1] - - # sigs = sigs[i_start:i_stop, :] - # if channel_indexes is not None: - # sigs = sigs[:, channel_indexes] - # return sigs - def _spike_count(self, block_index, seg_index, unit_index): pass diff --git a/neo/rawio/spikeglxrawio.py b/neo/rawio/spikeglxrawio.py index 419528e93..3a66a9878 100644 --- a/neo/rawio/spikeglxrawio.py +++ b/neo/rawio/spikeglxrawio.py @@ -138,13 +138,6 @@ def _parse_header(self): raise KeyError(f"key {key} is already in the signals_info_dict") self.signals_info_dict[key] = info - # create memmap - # data = np.memmap(info["bin_file"], dtype="int16", mode="r", offset=0, order="C") - # this should be (info['sample_length'], info['num_chan']) - # be some file are shorten - # data = data.reshape(-1, info["num_chan"]) - # self._memmaps[key] = data - buffer_id = stream_name block_index = 0 @@ -291,43 +284,9 @@ def _segment_t_start(self, block_index, seg_index): def _segment_t_stop(self, block_index, seg_index): return self._t_stops[seg_index] - # def _get_signal_size(self, block_index, seg_index, stream_index): - # stream_id = self.header["signal_streams"][stream_index]["id"] - # memmap = self._memmaps[seg_index, stream_id] - # return int(memmap.shape[0]) - def _get_signal_t_start(self, block_index, seg_index, stream_index): return 0.0 - # def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, stream_index, channel_indexes): - # stream_id = self.header["signal_streams"][stream_index]["id"] - # memmap = self._memmaps[seg_index, stream_id] - # stream_name = self.header["signal_streams"]["name"][stream_index] - - # # take care of sync channel - # info = self.signals_info_dict[0, stream_name] - # if not self.load_sync_channel and info["has_sync_trace"]: - # memmap = memmap[:, :-1] - - # # since we cut the memmap, we can simplify the channel selection - # if channel_indexes is None: - # channel_selection = slice(None) - # elif isinstance(channel_indexes, slice): - # channel_selection = channel_indexes - # elif not isinstance(channel_indexes, slice): - # if np.all(np.diff(channel_indexes) == 1): - # # consecutive channel then slice this avoid a copy (because of ndarray.take(...) - # # and so keep the underlying memmap - # channel_selection = slice(channel_indexes[0], channel_indexes[0] + len(channel_indexes)) - # else: - # channel_selection = channel_indexes - # else: - # raise ValueError("get_analogsignal_chunk : channel_indexes" "must be slice or list or array of int") - - # raw_signals = memmap[slice(i_start, i_stop), channel_selection] - - # return raw_signals - def _event_count(self, event_channel_idx, block_index=None, seg_index=None): timestamps, _, _ = self._get_event_timestamps(block_index, seg_index, event_channel_idx, None, None) return timestamps.size diff --git a/neo/rawio/xarray_utils.py b/neo/rawio/xarray_utils.py index 8652e8614..99c868b8d 100644 --- a/neo/rawio/xarray_utils.py +++ b/neo/rawio/xarray_utils.py @@ -9,7 +9,6 @@ Note : * only some IOs support this at the moment has_buffer_description_api()=True """ -import json import numpy as np @@ -46,9 +45,9 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non rfs = dict() rfs["version"] = 1 rfs["refs"] = dict() - rfs["refs"][".zgroup"] = json.dumps(dict(zarr_format=2)) + rfs["refs"][".zgroup"] = dict(zarr_format=2) zattrs = dict(name=buffer_name) - rfs["refs"][".zattrs"] = json.dumps(zattrs) + rfs["refs"][".zattrs"] = zattrs descr = rawio_reader.get_analogsignal_buffer_description(block_index=block_index, seg_index=seg_index, @@ -76,8 +75,8 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non zattrs = dict( _ARRAY_DIMENSIONS=['channel'], ) - rfs["refs"]["channel/.zattrs"] =json.dumps(zattrs) - rfs["refs"]["channel/.zarray"] =json.dumps(zarray) + rfs["refs"]["channel/.zattrs"] = zattrs + rfs["refs"]["channel/.zarray"] = zarray # traces buffer dtype = np.dtype(descr["dtype"]) @@ -110,8 +109,8 @@ def to_zarr_v2_reference(rawio_reader, block_index=0, seg_index=0, buffer_id=Non array_size = np.prod(descr["shape"], dtype='int64') chunk_size = array_size * dtype.itemsize rfs["refs"]["traces/0.0"] = [str(descr["file_path"]), descr["file_offset"], chunk_size] - rfs["refs"]["traces/.zarray"] =json.dumps(zarray) - rfs["refs"]["traces/.zattrs"] =json.dumps(zattrs) + rfs["refs"]["traces/.zarray"] = zarray + rfs["refs"]["traces/.zattrs"] = zattrs elif descr["type"] == "hdf5": raise NotImplementedError