From 66bf38a71426e6e5baaf0434c9c9791e70523b1c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 11:10:48 +0200 Subject: [PATCH 01/25] Try to find datetime offset for all fields with time unit --- setup.cfg | 1 + src/scippnexus/_common.py | 24 ++------------------ src/scippnexus/nxevent_data.py | 8 ++----- src/scippnexus/nxlog.py | 18 +++++++-------- src/scippnexus/nxobject.py | 40 ++++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 37 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4fd0e862..ba6da2ed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,7 @@ package_dir = = src packages = find: install_requires = + dateutil scipp>=0.12 h5py python_requires = >=3.8 diff --git a/src/scippnexus/_common.py b/src/scippnexus/_common.py index b0209b61..aaf4e57b 100644 --- a/src/scippnexus/_common.py +++ b/src/scippnexus/_common.py @@ -9,7 +9,6 @@ def convert_time_to_datetime64( raw_times: sc.Variable, - group_path: str, start: str = None, scaling_factor: Union[float, np.float_] = None) -> sc.Variable: """ @@ -25,8 +24,6 @@ def convert_time_to_datetime64( Args: raw_times: The raw time data from a nexus file. - group_path: The path within the nexus file to the log being read. - Used to generate warnings if loading the log fails. start: Optional, the start time of the log in an ISO8601 string. If not provided, defaults to the beginning of the unix epoch (1970-01-01T00:00:00). @@ -34,31 +31,14 @@ def convert_time_to_datetime64( time series data and the unit of the raw_times Variable. If not provided, defaults to 1 (a no-op scaling factor). """ - try: - raw_times_ns = sc.to_unit(raw_times, sc.units.ns, copy=False) - except sc.UnitError: - raise sc.UnitError( - f"The units of time in the entry at " - f"'{group_path}/time{{units}}' must be convertible to seconds, " - f"but this cannot be done for '{raw_times.unit}'. Skipping " - f"loading group at '{group_path}'.") - - try: - _start_ts = sc.scalar(value=np.datetime64(start or "1970-01-01T00:00:00"), - unit=sc.units.ns, - dtype=sc.DType.datetime64) - except ValueError: - raise ValueError( - f"The date string '{start}' in the entry at " - f"'{group_path}/time@start' failed to parse as an ISO8601 date. " - f"Skipping loading group at '{group_path}'") + raw_times_ns = sc.to_unit(raw_times, sc.units.ns, copy=False) if scaling_factor is None: times = raw_times_ns.astype(sc.DType.int64, copy=False) else: _scale = sc.scalar(value=scaling_factor) times = (raw_times_ns * _scale).astype(sc.DType.int64, copy=False) - return _start_ts + times + return start + times def _to_canonical_select(dims: List[str], select: ScippIndex) -> ScippIndex: diff --git a/src/scippnexus/nxevent_data.py b/src/scippnexus/nxevent_data.py index c08dd8e7..b30b7d75 100644 --- a/src/scippnexus/nxevent_data.py +++ b/src/scippnexus/nxevent_data.py @@ -5,7 +5,7 @@ import numpy as np import scipp as sc -from ._common import to_plain_index, convert_time_to_datetime64 +from ._common import to_plain_index from .nxobject import NXobject, ScippIndex, NexusStructureError _event_dimension = "event" @@ -57,11 +57,7 @@ def _getitem(self, select: ScippIndex) -> sc.DataArray: index = slice(start, stop, stride) event_index = self['event_index'][index].values - event_time_zero = self['event_time_zero'] - event_time_zero = convert_time_to_datetime64( - event_time_zero[index], - start=event_time_zero.attrs.get('offset'), - group_path=self.name) + event_time_zero = self['event_time_zero'][index] num_event = self["event_time_offset"].shape[0] # Some files contain uint64 "max" indices, which turn into negatives during diff --git a/src/scippnexus/nxlog.py b/src/scippnexus/nxlog.py index 35f48060..289a842f 100644 --- a/src/scippnexus/nxlog.py +++ b/src/scippnexus/nxlog.py @@ -39,15 +39,15 @@ def _getitem(self, select: ScippIndex) -> sc.DataArray: # The 'time' field in NXlog contains extra properties 'start' and # 'scaling_factor' that are not handled by NXdata. These are used # to transform to a datetime-coord. - if 'time' in self: - if 'time' not in data.coords: - raise sc.DimensionError( - "NXlog is time-dependent, but failed to load `time` dataset") - data.coords['time'] = convert_time_to_datetime64( - raw_times=data.coords.pop('time'), - start=self['time'].attrs.get('start'), - scaling_factor=self['time'].attrs.get('scaling_factor'), - group_path=self['time'].name) + #if 'time' in self: + # if 'time' not in data.coords: + # raise sc.DimensionError( + # "NXlog is time-dependent, but failed to load `time` dataset") + # data.coords['time'] = convert_time_to_datetime64( + # raw_times=data.coords.pop('time'), + # start=self['time'].attrs.get('start'), + # scaling_factor=self['time'].attrs.get('scaling_factor'), + # group_path=self['time'].name) return data def _get_field_dims(self, name: str) -> Union[None, List[str]]: diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 9cbe7fec..f3c60002 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -6,6 +6,7 @@ from enum import Enum, auto import functools from typing import List, Union, NoReturn, Any, Dict, Tuple, Protocol +import dateutil import numpy as np import scipp as sc import h5py @@ -14,6 +15,7 @@ from ._hdf5_nexus import _ensure_supported_int_type, _warn_latin1_decode from .typing import H5Group, H5Dataset, ScippIndex from ._common import to_plain_index +from ._common import convert_time_to_datetime64 NXobjectIndex = Union[str, ScippIndex] @@ -84,6 +86,9 @@ def __getitem__(self, name: str) -> Any: def __setitem__(self, name: str, val: Any): self._attrs[name] = val + def __iter__(self): + yield from self._attrs + def get(self, name: str, default=None) -> Any: return self[name] if name in self else default @@ -91,6 +96,31 @@ def keys(self): return self._attrs.keys() +def _is_time(obj): + dummy = sc.empty(dims=[], shape=[], unit=obj.unit) + try: + dummy.to(unit='s') + return True + except sc.UnitError: + return False + + +def _as_datetime(obj: Any): + if isinstance(obj, str): + try: + # datetime.fromisoformat cannot parse time zones and recommends dateutil + dt = dateutil.parser.isoparse(obj) + # NumPy and scipp cannot handle timezone information. We therefore strip it, + # i.e., interpret time as local time. If time is given in UTC this will lead + # to misleading results since we have no information about the actual time + # zone. + dt = dt.replace(tzinfo=None) + return sc.datetime(np.datetime64(dt), unit='ns') + except ValueError: + pass + return None + + class Field: """NeXus field. @@ -137,6 +167,16 @@ def __getitem__(self, select) -> sc.Variable: self._dataset.read_direct(variable.values, source_sel=index) else: variable.values = self._dataset[index] + if _is_time(variable): + starts = [] + for name in self.attrs: + if (dt := _as_datetime(self.attrs[name])) is not None: + starts.append(dt) + if len(starts) == 1: + variable = convert_time_to_datetime64( + variable, + start=starts[0], + scaling_factor=self.attrs.get('scaling_factor')) return variable def __repr__(self) -> str: From 4ffef5614db3cce0af710f02090ce5f9ba6a429f Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 11:22:56 +0200 Subject: [PATCH 02/25] Add mechanism for forcing Field to datetime --- src/scippnexus/nxlog.py | 19 +++++-------------- src/scippnexus/nxobject.py | 10 +++++++--- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/scippnexus/nxlog.py b/src/scippnexus/nxlog.py index 289a842f..7037401a 100644 --- a/src/scippnexus/nxlog.py +++ b/src/scippnexus/nxlog.py @@ -35,20 +35,11 @@ def _nxbase(self) -> NXdata: return NXdata(self._group, signal_name_default='value', axes=axes) def _getitem(self, select: ScippIndex) -> sc.DataArray: - data = self._nxbase[select] - # The 'time' field in NXlog contains extra properties 'start' and - # 'scaling_factor' that are not handled by NXdata. These are used - # to transform to a datetime-coord. - #if 'time' in self: - # if 'time' not in data.coords: - # raise sc.DimensionError( - # "NXlog is time-dependent, but failed to load `time` dataset") - # data.coords['time'] = convert_time_to_datetime64( - # raw_times=data.coords.pop('time'), - # start=self['time'].attrs.get('start'), - # scaling_factor=self['time'].attrs.get('scaling_factor'), - # group_path=self['time'].name) - return data + base = self._nxbase + # Field loads datetime offset attributes automatically, but for NXlog this + # may apparently be omitted and must then interpreted as relative to epoch. + base.child_params['time'] = {'is_time': True} + return base[select] def _get_field_dims(self, name: str) -> Union[None, List[str]]: return self._nxbase._get_field_dims(name) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index f3c60002..93217a6b 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -126,9 +126,10 @@ class Field: In HDF5 fields are represented as dataset. """ - def __init__(self, dataset: H5Dataset, dims=None): + def __init__(self, dataset: H5Dataset, dims=None, is_time=None): self._dataset = dataset self._shape = list(self._dataset.shape) + self._is_time = is_time # NeXus treats [] and [1] interchangeably. In general this is ill-defined, but # the best we can do appears to be squeezing unless the file provides names for # dimensions. The shape property of this class does thus not necessarily return @@ -167,11 +168,13 @@ def __getitem__(self, select) -> sc.Variable: self._dataset.read_direct(variable.values, source_sel=index) else: variable.values = self._dataset[index] - if _is_time(variable): + if self._is_time or _is_time(variable): starts = [] for name in self.attrs: if (dt := _as_datetime(self.attrs[name])) is not None: starts.append(dt) + if self._is_time and len(starts) == 0: + starts.append(sc.epoch(unit='ns')) if len(starts) == 1: variable = convert_time_to_datetime64( variable, @@ -248,6 +251,7 @@ class NXobject: """ def __init__(self, group: H5Group): self._group = group + self.child_params = {} def _get_child( self, @@ -260,7 +264,7 @@ def _get_child( item = self._group[name] if hasattr(item, 'shape'): dims = self._get_field_dims(name) if use_field_dims else None - return Field(item, dims=dims) + return Field(item, dims=dims, **self.child_params.get(name, {})) else: return _make(item) da = self._getitem(name) From 516ea4c238823e9a40ecf4d415836f67583ea4ac Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 11:34:02 +0200 Subject: [PATCH 03/25] Support writing datetimes --- src/scippnexus/nxobject.py | 5 +++++ tests/nxdata_test.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 93217a6b..874519d2 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -365,9 +365,14 @@ def create_field(self, name: str, data: DimensionedArray, **kwargs) -> Field: values = data.values if data.dtype == sc.DType.string: values = np.array(data.values, dtype=object) + elif data.dtype == sc.DType.datetime64: + start = sc.epoch(unit=data.unit) + values = (data - start).values dataset = self._group.create_dataset(name, data=values, **kwargs) if data.unit is not None: dataset.attrs['units'] = str(data.unit) + if data.dtype == sc.DType.datetime64: + dataset.attrs['start'] = str(start.value) return Field(dataset, data.dims) def create_class(self, name: str, nx_class: NX_class) -> NXobject: diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 9da98752..a1dd6e09 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -1,4 +1,5 @@ import h5py +import numpy as np import scipp as sc from scippnexus import NXroot, NX_class import pytest @@ -213,6 +214,15 @@ def test_create_field_from_variable(nxroot, unit): assert sc.identical(loaded, var.rename(xx=loaded.dim)) +def test_create_datetime_field_from_variable(nxroot): + var = sc.datetime(np.datetime64('now'), unit='ns') + sc.arange( + 'time', 1, 4, dtype='int64', unit='ns') + nxroot.create_field('field', var) + loaded = nxroot['field'][...] + # Nexus does not support storing dim labels + assert sc.identical(loaded, var.rename(time=loaded.dim)) + + @pytest.mark.parametrize("nx_class", [NX_class.NXdata, NX_class.NXlog]) def test_create_class(nxroot, nx_class): group = nxroot.create_class('group', nx_class) From 29987a37430f886b5eccda9b6b4445ea012864b7 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 12:04:53 +0200 Subject: [PATCH 04/25] Test datetime loading and preserve precision --- src/scippnexus/_common.py | 15 ++++++++++++--- src/scippnexus/nxobject.py | 6 +----- tests/nxdata_test.py | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/scippnexus/_common.py b/src/scippnexus/_common.py index aaf4e57b..ea691d56 100644 --- a/src/scippnexus/_common.py +++ b/src/scippnexus/_common.py @@ -31,13 +31,22 @@ def convert_time_to_datetime64( time series data and the unit of the raw_times Variable. If not provided, defaults to 1 (a no-op scaling factor). """ - raw_times_ns = sc.to_unit(raw_times, sc.units.ns, copy=False) + if (raw_times.dtype + in (sc.DType.float64, sc.DType.float32)) or scaling_factor is not None: + unit = sc.units.ns + else: + # determine more precise unit + ratio = sc.scalar(1.0, unit=start.unit) / sc.scalar( + 1.0, unit=raw_times.unit).to(unit=start.unit) + unit = start.unit if ratio.value < 1.0 else raw_times.unit + + raw_times = raw_times.to(unit=unit, copy=False) if scaling_factor is None: - times = raw_times_ns.astype(sc.DType.int64, copy=False) + times = raw_times.astype(sc.DType.int64, copy=False) else: _scale = sc.scalar(value=scaling_factor) - times = (raw_times_ns * _scale).astype(sc.DType.int64, copy=False) + times = (raw_times * _scale).astype(sc.DType.int64, copy=False) return start + times diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 874519d2..ef891329 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -6,7 +6,6 @@ from enum import Enum, auto import functools from typing import List, Union, NoReturn, Any, Dict, Tuple, Protocol -import dateutil import numpy as np import scipp as sc import h5py @@ -108,14 +107,11 @@ def _is_time(obj): def _as_datetime(obj: Any): if isinstance(obj, str): try: - # datetime.fromisoformat cannot parse time zones and recommends dateutil - dt = dateutil.parser.isoparse(obj) # NumPy and scipp cannot handle timezone information. We therefore strip it, # i.e., interpret time as local time. If time is given in UTC this will lead # to misleading results since we have no information about the actual time # zone. - dt = dt.replace(tzinfo=None) - return sc.datetime(np.datetime64(dt), unit='ns') + return sc.datetime(np.datetime64(obj)) except ValueError: pass return None diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index a1dd6e09..d3060c77 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -301,3 +301,20 @@ def test_unnamed_extra_dims_of_multidim_coords_are_squeezed(nxroot): assert data['xx'].ndim == 1 assert data['xx'].shape == [2] assert sc.identical(data['xx'][...], xx['ignored', 0]) + +def test_fields_with_datetime_attribute_are_loaded_as_datetime(nxroot): + da = sc.DataArray( + sc.epoch(unit='s') + + sc.array(dims=['xx', 'yy'], unit='s', values=[[1, 2, 3], [4, 5, 6]])) + da.coords['xx'] = da.data['yy', 0] + da.coords['xx2'] = da.data['yy', 1] + da.coords['yy'] = da.data['xx', 0] + data = nxroot.create_class('data1', NX_class.NXdata) + data.attrs['axes'] = da.dims + data.attrs['signal'] = 'signal' + data.create_field('signal', da.data) + data.create_field('xx', da.coords['xx']) + data.create_field('xx2', da.coords['xx2']) + data.create_field('yy', da.coords['yy']) + print(data[...], da) + assert sc.identical(data[...], da) From 3033339798ce5a92ed32f1d525a60a68734b3a2c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 12:43:38 +0200 Subject: [PATCH 05/25] Test datetime handlign and strip timezone --- setup.cfg | 1 - src/scippnexus/_common.py | 2 +- src/scippnexus/nxobject.py | 7 ++++++ tests/nexus_test.py | 51 ++++++++++++++++++++++++++++++++++++++ tests/nxdata_test.py | 1 - 5 files changed, 59 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index ba6da2ed..4fd0e862 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,7 +17,6 @@ package_dir = = src packages = find: install_requires = - dateutil scipp>=0.12 h5py python_requires = >=3.8 diff --git a/src/scippnexus/_common.py b/src/scippnexus/_common.py index ea691d56..5efa811e 100644 --- a/src/scippnexus/_common.py +++ b/src/scippnexus/_common.py @@ -47,7 +47,7 @@ def convert_time_to_datetime64( else: _scale = sc.scalar(value=scaling_factor) times = (raw_times * _scale).astype(sc.DType.int64, copy=False) - return start + times + return start.to(unit=unit, copy=False) + times def _to_canonical_select(dims: List[str], select: ScippIndex) -> ScippIndex: diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index ef891329..f60a7b94 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Scipp contributors (https://github.com/scipp) # @author Simon Heybrock from __future__ import annotations +import re import warnings from enum import Enum, auto import functools @@ -111,6 +112,12 @@ def _as_datetime(obj: Any): # i.e., interpret time as local time. If time is given in UTC this will lead # to misleading results since we have no information about the actual time # zone. + # Would like to use dateutil, but with Python's datetime we do not get + # nanosecond precision. + if 'T' in obj: + date, time = obj.split('T') + time = re.split('Z|\+|-', time)[0] + obj = f'{date}T{time}' return sc.datetime(np.datetime64(obj)) except ValueError: pass diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 97082219..4e03e88c 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -212,6 +212,57 @@ def test_field_of_extended_ascii_in_ascii_encoded_dataset_is_loaded_correctly(): sc.array(dims=['dim_0'], values=["run at rot=90°", "run at rot=90°x"])) +def test_ms_field_with_second_datetime_attribute_loaded_as_ms_datetime(nxroot): + nxroot['mytime'] = sc.arange('ignored', 2, unit='ms') + nxroot['mytime'].attrs['start_time'] = '2022-12-12T12:13:14' + assert sc.identical( + nxroot['mytime'][...], + sc.datetimes(dims=['dim_0'], + unit='ms', + values=['2022-12-12T12:13:14.000', '2022-12-12T12:13:14.001'])) + + + +def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): + nxroot['mytime'] = sc.arange('ignored', 2, unit='ns') + nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00' + assert sc.identical( + nxroot['mytime'][...], + sc.datetimes( + dims=['dim_0'], + unit='ns', + values=['1970-01-01T00:00:00.000000000', '1970-01-01T00:00:00.000000001'])) + + + +def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): + nxroot['mytime'] = sc.arange('ignored', 2, unit='s') + nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00.000000000' + assert sc.identical( + nxroot['mytime'][...], + sc.datetimes(dims=['dim_0'], + unit='ns', + values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) + + + +@pytest.mark.parametrize('timezone', ['Z', '+04', '+00', '-02', '+1130', '-0930', '+11:30', '-09:30']) +def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone): + nxroot['mytime'] = sc.arange('ignored', 2, unit='s') + nxroot['mytime'].attrs['start_time'] = f'1970-01-01T00:00:00{timezone}' + print( + nxroot['mytime'][...], + sc.datetimes(dims=['dim_0'], + unit='s', + values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) + assert sc.identical( + nxroot['mytime'][...], + sc.datetimes(dims=['dim_0'], + unit='s', + values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) + + + def create_event_data_ids_1234(group): group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) group['event_time_offset'] = sc.array(dims=[''], diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index d3060c77..66e94a57 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -316,5 +316,4 @@ def test_fields_with_datetime_attribute_are_loaded_as_datetime(nxroot): data.create_field('xx', da.coords['xx']) data.create_field('xx2', da.coords['xx2']) data.create_field('yy', da.coords['yy']) - print(data[...], da) assert sc.identical(data[...], da) From 78e2913455752f78e973638ca8ce9d1f74573186 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 12:58:53 +0200 Subject: [PATCH 06/25] Update Nexus format interpretation docs --- .../docs/our-interpretation-of-the-nexus-format.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md b/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md index ab3931be..af8576e9 100644 --- a/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md +++ b/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md @@ -47,6 +47,16 @@ More concretely this means that, e.g., for loading an `NXdetector` from a NH, th If the above yields no more than one item, the group can be loaded. +## Datetime fields + +HDF5 does not support storing date and time information such as `np.datetime64`. +`NXlog` and `NXevent_data` specify specific attributes for fields that have to be interpreted as date and time, in particular [NXlog/time@start](https://manual.nexusformat.org/classes/base_classes/NXlog.html#nxlog-time-start-attribute) and [NXevent_data/event_time_offset@offset](https://manual.nexusformat.org/classes/base_classes/NXevent_data.html#nxevent-data-event-time-offset-field). +No *general* definition or intention is documented in the NF, but according to TR this is nevertheless standard. +Due to the attribute naming mismatch in the two cases where it *is* specified we need to assume that naming is arbitrary. +Therefore, we search *all* attributes of a field for a date and time offset, provided that the field's unit is a time unit. +It is unclear what should be done in the case of multiple matches. +As of April 2022 we ignore the offset in this case, since guessing which one to use based on the attribute name does not seem desirable. + ## Bin edges For [NXdetector](https://manual.nexusformat.org/classes/base_classes/NXdetector.html) the NF defines a [time_of_flight](https://manual.nexusformat.org/classes/base_classes/NXdetector.html#nxdetector-time-of-flight-field) field, exceeding the data shape by one, i.e., it is meant as bin-edges. From c7efa15377a37b896ec87fc937c4923f8bf5237a Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 13:01:47 +0200 Subject: [PATCH 07/25] Use re.escape --- src/scippnexus/nxobject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index f60a7b94..efd04ee6 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -116,7 +116,7 @@ def _as_datetime(obj: Any): # nanosecond precision. if 'T' in obj: date, time = obj.split('T') - time = re.split('Z|\+|-', time)[0] + time = re.split(f'Z|{re.escape("+")}|-', time)[0] obj = f'{date}T{time}' return sc.datetime(np.datetime64(obj)) except ValueError: From 5c70f3fdd3faed53c72c4f528a57c02f3aee33f9 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 13:06:23 +0200 Subject: [PATCH 08/25] Remove unused --- src/scippnexus/nxlog.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scippnexus/nxlog.py b/src/scippnexus/nxlog.py index 7037401a..e227723b 100644 --- a/src/scippnexus/nxlog.py +++ b/src/scippnexus/nxlog.py @@ -4,7 +4,6 @@ from typing import List, Union import scipp as sc -from ._common import convert_time_to_datetime64 from .nxobject import NXobject, ScippIndex from .nxdata import NXdata From 87a372e03be388aed5ba0fabd78bb65660e1eb5d Mon Sep 17 00:00:00 2001 From: SimonHeybrock Date: Tue, 5 Apr 2022 11:06:51 +0000 Subject: [PATCH 09/25] Apply automatic formatting --- tests/nexus_test.py | 7 ++----- tests/nxdata_test.py | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 4e03e88c..3938c344 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -222,7 +222,6 @@ def test_ms_field_with_second_datetime_attribute_loaded_as_ms_datetime(nxroot): values=['2022-12-12T12:13:14.000', '2022-12-12T12:13:14.001'])) - def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='ns') nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00' @@ -234,7 +233,6 @@ def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): values=['1970-01-01T00:00:00.000000000', '1970-01-01T00:00:00.000000001'])) - def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='s') nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00.000000000' @@ -245,8 +243,8 @@ def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) - -@pytest.mark.parametrize('timezone', ['Z', '+04', '+00', '-02', '+1130', '-0930', '+11:30', '-09:30']) +@pytest.mark.parametrize( + 'timezone', ['Z', '+04', '+00', '-02', '+1130', '-0930', '+11:30', '-09:30']) def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone): nxroot['mytime'] = sc.arange('ignored', 2, unit='s') nxroot['mytime'].attrs['start_time'] = f'1970-01-01T00:00:00{timezone}' @@ -262,7 +260,6 @@ def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone) values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) - def create_event_data_ids_1234(group): group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) group['event_time_offset'] = sc.array(dims=[''], diff --git a/tests/nxdata_test.py b/tests/nxdata_test.py index 66e94a57..077bcf10 100644 --- a/tests/nxdata_test.py +++ b/tests/nxdata_test.py @@ -302,6 +302,7 @@ def test_unnamed_extra_dims_of_multidim_coords_are_squeezed(nxroot): assert data['xx'].shape == [2] assert sc.identical(data['xx'][...], xx['ignored', 0]) + def test_fields_with_datetime_attribute_are_loaded_as_datetime(nxroot): da = sc.DataArray( sc.epoch(unit='s') + From ade5af88dcccc745e51e54308d87d6718c483241 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 15:24:06 +0200 Subject: [PATCH 10/25] Remove print --- tests/nexus_test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 3938c344..ea4d1853 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -248,11 +248,6 @@ def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone): nxroot['mytime'] = sc.arange('ignored', 2, unit='s') nxroot['mytime'].attrs['start_time'] = f'1970-01-01T00:00:00{timezone}' - print( - nxroot['mytime'][...], - sc.datetimes(dims=['dim_0'], - unit='s', - values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) assert sc.identical( nxroot['mytime'][...], sc.datetimes(dims=['dim_0'], From 15c74b590274cbb64a00e0407619d8a67bc41e92 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 5 Apr 2022 15:27:25 +0200 Subject: [PATCH 11/25] Test that multiple candidate offsets are simply ignored --- tests/nexus_test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index ea4d1853..630b6cf0 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -255,6 +255,14 @@ def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone) values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) +def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): + offsets = sc.arange('ignored', 2, unit='ms') + nxroot['mytime'] = offsets + nxroot['mytime'].attrs['offset'] = '2022-12-12T12:13:14' + nxroot['mytime'].attrs['start_time'] = '2022-12-12T12:13:15' + assert sc.identical(nxroot['mytime'][...], offsets.rename(ignored='dim_0')) + + def create_event_data_ids_1234(group): group['event_id'] = sc.array(dims=[''], unit=None, values=[1, 2, 4, 1, 2, 2]) group['event_time_offset'] = sc.array(dims=[''], From b807db854fab69ddfc32fab1766e8cde51ad79ca Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 6 Apr 2022 12:48:43 +0200 Subject: [PATCH 12/25] Improve readability --- src/scippnexus/_common.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/scippnexus/_common.py b/src/scippnexus/_common.py index 5efa811e..fad7edc7 100644 --- a/src/scippnexus/_common.py +++ b/src/scippnexus/_common.py @@ -40,14 +40,12 @@ def convert_time_to_datetime64( 1.0, unit=raw_times.unit).to(unit=start.unit) unit = start.unit if ratio.value < 1.0 else raw_times.unit - raw_times = raw_times.to(unit=unit, copy=False) - if scaling_factor is None: - times = raw_times.astype(sc.DType.int64, copy=False) + times = raw_times else: - _scale = sc.scalar(value=scaling_factor) - times = (raw_times * _scale).astype(sc.DType.int64, copy=False) - return start.to(unit=unit, copy=False) + times + times = raw_times * sc.scalar(value=scaling_factor) + return start.to(unit=unit, copy=False) + times.to( + dtype=sc.DType.int64, unit=unit, copy=False) def _to_canonical_select(dims: List[str], select: ScippIndex) -> ScippIndex: From 4c7a89806bde3323759dc9df1ba11e98f2dc9da7 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 6 Apr 2022 12:50:27 +0200 Subject: [PATCH 13/25] Clarify docs --- src/scippnexus/docs/our-interpretation-of-the-nexus-format.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md b/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md index af8576e9..022d8a0f 100644 --- a/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md +++ b/src/scippnexus/docs/our-interpretation-of-the-nexus-format.md @@ -55,7 +55,7 @@ No *general* definition or intention is documented in the NF, but according to T Due to the attribute naming mismatch in the two cases where it *is* specified we need to assume that naming is arbitrary. Therefore, we search *all* attributes of a field for a date and time offset, provided that the field's unit is a time unit. It is unclear what should be done in the case of multiple matches. -As of April 2022 we ignore the offset in this case, since guessing which one to use based on the attribute name does not seem desirable. +As of April 2022 we ignore the date and time offsets in this case, since guessing which one to use based on the attribute name does not seem desirable. ## Bin edges From bc2d8de24cd13cd546ae57ebf74544ea91ca3951 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 6 Apr 2022 12:57:35 +0200 Subject: [PATCH 14/25] Simpler regex --- src/scippnexus/nxobject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index efd04ee6..1cb04d08 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -116,7 +116,7 @@ def _as_datetime(obj: Any): # nanosecond precision. if 'T' in obj: date, time = obj.split('T') - time = re.split(f'Z|{re.escape("+")}|-', time)[0] + time = re.split(r'Z|\+|-', time)[0] obj = f'{date}T{time}' return sc.datetime(np.datetime64(obj)) except ValueError: From 0422010f765380f9cd453402653cd9ef5b984849 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 7 Apr 2022 15:52:29 +0200 Subject: [PATCH 15/25] Apply instead of drop timezone offset --- src/scippnexus/nxobject.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 1cb04d08..bc812649 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -4,6 +4,7 @@ from __future__ import annotations import re import warnings +import dateutil from enum import Enum, auto import functools from typing import List, Union, NoReturn, Any, Dict, Tuple, Protocol @@ -116,9 +117,18 @@ def _as_datetime(obj: Any): # nanosecond precision. if 'T' in obj: date, time = obj.split('T') - time = re.split(r'Z|\+|-', time)[0] - obj = f'{date}T{time}' - return sc.datetime(np.datetime64(obj)) + time_and_timezone_offset = re.split(r'Z|\+|-', time) + time = time_and_timezone_offset[0] + if len(time_and_timezone_offset) == 1: + dt = np.datetime64(f'{date}T{time}') + else: + timezone_aware = dateutil.parser.isoparse(obj) + offset = timezone_aware.replace(tzinfo=dateutil.tz.tzutc()) + delta = timezone_aware - offset + dt = np.datetime64(f'{date}T{time}') + delta + else: + dt = np.datetime64(obj) + return sc.datetime(dt) except ValueError: pass return None From b0bbe6ceb9e2a3762f3a1889756621bdcddd3c8b Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 7 Apr 2022 16:24:22 +0200 Subject: [PATCH 16/25] Fixes and update tests --- src/scippnexus/nxobject.py | 17 +++++++++-------- tests/nexus_test.py | 19 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index bc812649..10acaec1 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -119,15 +119,16 @@ def _as_datetime(obj: Any): date, time = obj.split('T') time_and_timezone_offset = re.split(r'Z|\+|-', time) time = time_and_timezone_offset[0] - if len(time_and_timezone_offset) == 1: - dt = np.datetime64(f'{date}T{time}') - else: - timezone_aware = dateutil.parser.isoparse(obj) - offset = timezone_aware.replace(tzinfo=dateutil.tz.tzutc()) - delta = timezone_aware - offset - dt = np.datetime64(f'{date}T{time}') + delta + dt = sc.datetime(np.datetime64(f'{date}T{time}')) + if len(time_and_timezone_offset) > 1: + utcoffset = dateutil.parser.isoparse(obj).utcoffset() + seconds = sc.scalar(value=utcoffset.total_seconds(), + unit='s', + dtype='int64') + dt -= seconds.to(unit=dt.unit) + return dt else: - dt = np.datetime64(obj) + return sc.datetime(np.datetime64(obj)) return sc.datetime(dt) except ValueError: pass diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 630b6cf0..4f66c8d4 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -243,16 +243,15 @@ def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) -@pytest.mark.parametrize( - 'timezone', ['Z', '+04', '+00', '-02', '+1130', '-0930', '+11:30', '-09:30']) -def test_timezone_information_in_datetime_attribute_is_dropped(nxroot, timezone): - nxroot['mytime'] = sc.arange('ignored', 2, unit='s') - nxroot['mytime'].attrs['start_time'] = f'1970-01-01T00:00:00{timezone}' - assert sc.identical( - nxroot['mytime'][...], - sc.datetimes(dims=['dim_0'], - unit='s', - values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) +@pytest.mark.parametrize('timezone,hhmm', [('Z', '12:00'), ('+04', '08:00'), + ('+00', '12:00'), ('-02', '14:00'), + ('+1130', '00:30'), ('-0930', '21:30'), + ('+11:30', '00:30'), ('-09:30', '21:30')]) +def test_timezone_information_in_datetime_attribute_is_applied(nxroot, timezone, hhmm): + nxroot['mytime'] = sc.scalar(value=3, unit='s') + nxroot['mytime'].attrs['start_time'] = f'1970-01-01T12:00:00{timezone}' + assert sc.identical(nxroot['mytime'][...], + sc.datetime(unit='s', value=f'1970-01-01T{hhmm}:03')) def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): From b59d5ee68c8a8cd0366795a6cb75904545a804e6 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 7 Apr 2022 16:26:58 +0200 Subject: [PATCH 17/25] Test timezone and ns precision --- tests/nexus_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 4f66c8d4..137ffbc0 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -254,6 +254,13 @@ def test_timezone_information_in_datetime_attribute_is_applied(nxroot, timezone, sc.datetime(unit='s', value=f'1970-01-01T{hhmm}:03')) +def test_timezone_information_in_datetime_attribute_preserves_ns_precision(nxroot): + nxroot['mytime'] = sc.scalar(value=3, unit='s') + nxroot['mytime'].attrs['start_time'] = f'1970-01-01T12:00:00.123456789+0200' + assert sc.identical(nxroot['mytime'][...], + sc.datetime(unit='ns', value=f'1970-01-01T10:00:03.123456789')) + + def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): offsets = sc.arange('ignored', 2, unit='ms') nxroot['mytime'] = offsets From 39c9a4efeba46f9566481747e7850e021dbe80ed Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 7 Apr 2022 16:31:14 +0200 Subject: [PATCH 18/25] Cleanup --- setup.cfg | 1 + src/scippnexus/nxobject.py | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4fd0e862..18b45e56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,7 @@ package_dir = = src packages = find: install_requires = + python-dateutil scipp>=0.12 h5py python_requires = >=3.8 diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 10acaec1..8fb62abe 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -109,27 +109,25 @@ def _is_time(obj): def _as_datetime(obj: Any): if isinstance(obj, str): try: - # NumPy and scipp cannot handle timezone information. We therefore strip it, - # i.e., interpret time as local time. If time is given in UTC this will lead - # to misleading results since we have no information about the actual time - # zone. - # Would like to use dateutil, but with Python's datetime we do not get - # nanosecond precision. - if 'T' in obj: + # NumPy and scipp cannot handle timezone information. We therefore apply it, + # i.e., convert to UTC. + # Would like to use dateutil directly, but with Python's datetime we do not + # get nanosecond precision. Therefore we combine numpy and dateutil parsing. + date_only = 'T' in obj + if date_only: + return sc.datetime(np.datetime64(obj)) + else: date, time = obj.split('T') time_and_timezone_offset = re.split(r'Z|\+|-', time) time = time_and_timezone_offset[0] + # Strip timezone and parse with numpy dt = sc.datetime(np.datetime64(f'{date}T{time}')) if len(time_and_timezone_offset) > 1: + # There is timezone info. Parse with dateutil. utcoffset = dateutil.parser.isoparse(obj).utcoffset() - seconds = sc.scalar(value=utcoffset.total_seconds(), - unit='s', - dtype='int64') - dt -= seconds.to(unit=dt.unit) + seconds = sc.scalar(value=utcoffset.total_seconds(), unit='s') + dt -= seconds.to(unit=dt.unit, dtype='int64') return dt - else: - return sc.datetime(np.datetime64(obj)) - return sc.datetime(dt) except ValueError: pass return None From a5aa5d347803dfb018a44a3462dadf97e764c307 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Thu, 7 Apr 2022 16:35:20 +0200 Subject: [PATCH 19/25] syntax --- tests/nexus_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 137ffbc0..706efc7c 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -256,9 +256,9 @@ def test_timezone_information_in_datetime_attribute_is_applied(nxroot, timezone, def test_timezone_information_in_datetime_attribute_preserves_ns_precision(nxroot): nxroot['mytime'] = sc.scalar(value=3, unit='s') - nxroot['mytime'].attrs['start_time'] = f'1970-01-01T12:00:00.123456789+0200' + nxroot['mytime'].attrs['start_time'] = '1970-01-01T12:00:00.123456789+0200' assert sc.identical(nxroot['mytime'][...], - sc.datetime(unit='ns', value=f'1970-01-01T10:00:03.123456789')) + sc.datetime(unit='ns', value='1970-01-01T10:00:03.123456789')) def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): From 43bd97d4ac7c1144ef429bc9c3536173f8cde466 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 8 Apr 2022 07:12:36 +0200 Subject: [PATCH 20/25] Avoid using epoch where possible --- tests/nexus_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/nexus_test.py b/tests/nexus_test.py index 706efc7c..837c1b82 100644 --- a/tests/nexus_test.py +++ b/tests/nexus_test.py @@ -235,12 +235,12 @@ def test_ns_field_with_second_datetime_attribute_loaded_as_ns_datetime(nxroot): def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): nxroot['mytime'] = sc.arange('ignored', 2, unit='s') - nxroot['mytime'].attrs['start_time'] = '1970-01-01T00:00:00.000000000' + nxroot['mytime'].attrs['start_time'] = '1984-01-01T00:00:00.000000000' assert sc.identical( nxroot['mytime'][...], sc.datetimes(dims=['dim_0'], unit='ns', - values=['1970-01-01T00:00:00', '1970-01-01T00:00:01'])) + values=['1984-01-01T00:00:00', '1984-01-01T00:00:01'])) @pytest.mark.parametrize('timezone,hhmm', [('Z', '12:00'), ('+04', '08:00'), @@ -249,16 +249,16 @@ def test_second_field_with_ns_datetime_attribute_loaded_as_ns_datetime(nxroot): ('+11:30', '00:30'), ('-09:30', '21:30')]) def test_timezone_information_in_datetime_attribute_is_applied(nxroot, timezone, hhmm): nxroot['mytime'] = sc.scalar(value=3, unit='s') - nxroot['mytime'].attrs['start_time'] = f'1970-01-01T12:00:00{timezone}' + nxroot['mytime'].attrs['start_time'] = f'1984-01-01T12:00:00{timezone}' assert sc.identical(nxroot['mytime'][...], - sc.datetime(unit='s', value=f'1970-01-01T{hhmm}:03')) + sc.datetime(unit='s', value=f'1984-01-01T{hhmm}:03')) def test_timezone_information_in_datetime_attribute_preserves_ns_precision(nxroot): nxroot['mytime'] = sc.scalar(value=3, unit='s') - nxroot['mytime'].attrs['start_time'] = '1970-01-01T12:00:00.123456789+0200' + nxroot['mytime'].attrs['start_time'] = '1984-01-01T12:00:00.123456789+0200' assert sc.identical(nxroot['mytime'][...], - sc.datetime(unit='ns', value='1970-01-01T10:00:03.123456789')) + sc.datetime(unit='ns', value='1984-01-01T10:00:03.123456789')) def test_loads_bare_timestamps_if_multiple_candidate_datetime_offsets_found(nxroot): From dfde14eff026f05bcf0fd5ff6ee655d4580b4fb0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 8 Apr 2022 07:38:37 +0200 Subject: [PATCH 21/25] Fix inverted condition --- src/scippnexus/nxobject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 8fb62abe..3917be20 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -113,7 +113,7 @@ def _as_datetime(obj: Any): # i.e., convert to UTC. # Would like to use dateutil directly, but with Python's datetime we do not # get nanosecond precision. Therefore we combine numpy and dateutil parsing. - date_only = 'T' in obj + date_only = 'T' not in obj if date_only: return sc.datetime(np.datetime64(obj)) else: From 48a9389f5370ed14eeb70b91f94632192ce061d5 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 8 Apr 2022 08:09:05 +0200 Subject: [PATCH 22/25] Rewrite timezone-aware parsing to avoid rounding errors --- src/scippnexus/nxobject.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 3917be20..b6a0332d 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -4,6 +4,7 @@ from __future__ import annotations import re import warnings +import datetime import dateutil from enum import Enum, auto import functools @@ -115,19 +116,25 @@ def _as_datetime(obj: Any): # get nanosecond precision. Therefore we combine numpy and dateutil parsing. date_only = 'T' not in obj if date_only: - return sc.datetime(np.datetime64(obj)) + return sc.datetime(obj) + date, time = obj.split('T') + time_and_timezone_offset = re.split(r'Z|\+|-', time) + time = time_and_timezone_offset[0] + if len(time_and_timezone_offset) == 1: + # No timezone, parse directly (scipp based on numpy) + return sc.datetime(f'{date}T{time}') else: - date, time = obj.split('T') - time_and_timezone_offset = re.split(r'Z|\+|-', time) - time = time_and_timezone_offset[0] - # Strip timezone and parse with numpy - dt = sc.datetime(np.datetime64(f'{date}T{time}')) - if len(time_and_timezone_offset) > 1: - # There is timezone info. Parse with dateutil. - utcoffset = dateutil.parser.isoparse(obj).utcoffset() - seconds = sc.scalar(value=utcoffset.total_seconds(), unit='s') - dt -= seconds.to(unit=dt.unit, dtype='int64') - return dt + # There is timezone info. Parse with dateutil. + dt = dateutil.parser.isoparse(obj) + dt = dt.replace(microsecond=0) # handled by numpy + dt = dt.astimezone(datetime.timezone.utc) + dt = dt.replace(tzinfo=None).isoformat() + # We operate with string operations here and thus end up parsing date + # and time twice. The reason is the the timezone-offset arithmetic must + # cannot be done, e.g., in nanoseconds without causing rounding errors. + if '.' in time: + dt += f".{time.split('.')[1]}" + return sc.datetime(dt) except ValueError: pass return None From 5ce78ff03d306b761ec27711027bcc65e8c10a75 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 8 Apr 2022 08:12:07 +0200 Subject: [PATCH 23/25] Treat warnings as errors when running pytest --- pyproject.toml | 4 ++++ src/scippnexus/nxobject.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 38ed7627..e46a344d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,3 +10,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] addopts = "-ra -v" testpaths = "tests" +filterwarnings = [ + "error", + "ignore::UserWarning", +] diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index b6a0332d..120c5e81 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -5,7 +5,7 @@ import re import warnings import datetime -import dateutil +import dateutil.parser from enum import Enum, auto import functools from typing import List, Union, NoReturn, Any, Dict, Tuple, Protocol From 00c576fd5fe212e9b41ac5a434a2fcd161c44799 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Mon, 11 Apr 2022 08:10:55 +0200 Subject: [PATCH 24/25] Update src/scippnexus/nxobject.py Co-authored-by: Jan-Lukas Wynen --- src/scippnexus/nxobject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scippnexus/nxobject.py b/src/scippnexus/nxobject.py index 120c5e81..7210e94c 100644 --- a/src/scippnexus/nxobject.py +++ b/src/scippnexus/nxobject.py @@ -130,7 +130,7 @@ def _as_datetime(obj: Any): dt = dt.astimezone(datetime.timezone.utc) dt = dt.replace(tzinfo=None).isoformat() # We operate with string operations here and thus end up parsing date - # and time twice. The reason is the the timezone-offset arithmetic must + # and time twice. The reason is that the timezone-offset arithmetic # cannot be done, e.g., in nanoseconds without causing rounding errors. if '.' in time: dt += f".{time.split('.')[1]}" From c1185a6ad84323bd09bea3d663ab6a3d1d47d9c4 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 12 Apr 2022 12:39:04 +0200 Subject: [PATCH 25/25] Missing python-dateutil for conda build --- conda/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/meta.yaml b/conda/meta.yaml index b0aae100..36a7cef6 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -9,6 +9,7 @@ requirements: - setuptools run: - python>=3.8 + - python-dateutil - scipp - h5py