From 47a8a122237a1e99e3b18f85a5bd7aa8a604a89a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 16 Feb 2023 16:39:03 +0000 Subject: [PATCH] dont hard-code seconds per year --- pyleoclim/tests/test_core_Series.py | 14 +++++----- pyleoclim/tests/test_utils_tsbase.py | 25 +++++++++++++++-- pyleoclim/utils/tsbase.py | 42 +++++++++++++++++----------- 3 files changed, 54 insertions(+), 27 deletions(-) diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 198d9c76..4986fc92 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1023,9 +1023,10 @@ def test_resample_simple(self, rule, dataframe_dt, metadata): result =ts.resample(rule).mean() result_ser = result.to_pandas() expected_values = np.array([0., 1., 2., 3., 4.]) - expected_idx = pd.DatetimeIndex(['2018-12-30 23:59:59', '2019-12-30 23:59:59', - '2020-12-30 23:59:59', '2021-12-30 23:59:59', - '2022-12-30 23:59:59'], name='datetime').as_unit('s') + expected_idx = pd.DatetimeIndex( + ['2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31', '2022-12-31'], + name='datetime' + ).as_unit('s') expected_ser = pd.Series(expected_values, expected_idx, name='SOI') expected_metadata = { 'time_unit': 'years CE', @@ -1049,9 +1050,9 @@ def test_resample_simple(self, rule, dataframe_dt, metadata): @pytest.mark.parametrize( ('rule', 'expected_idx'), [ - ('1ga', [np.datetime64('2018-12-30 23:59:59'), np.datetime64('1000002018-12-31 00:00:01')]), - ('1ma', [np.datetime64('2018-12-30 23:59:59'), np.datetime64('1002018-12-30 23:59:59')]), - ('2ka', [np.datetime64('2018-12-30 23:59:59'), np.datetime64('4018-12-30 23:59:59')]), + ('1ga', [np.datetime64('2018-12-31'), np.datetime64('1000002018-12-31')]), + ('1ma', [np.datetime64('2018-12-31'), np.datetime64('1002018-12-31')]), + ('2ka', [np.datetime64('2018-12-31'), np.datetime64('4018-12-31')]), ] ) def test_resample_long_periods(self, rule, expected_idx, dataframe_dt, metadata): @@ -1128,5 +1129,4 @@ def test_equals_t3(self): soi_pd.index = soi_pd.index + pd.DateOffset(1) soi2 = pyleo.Series.from_pandas(soi_pd, soi.metadata) same_data, _ = soi.equals(soi2, index_tol= 1.1*86400) - assert same_data diff --git a/pyleoclim/tests/test_utils_tsbase.py b/pyleoclim/tests/test_utils_tsbase.py index 2e136c12..17c792a9 100644 --- a/pyleoclim/tests/test_utils_tsbase.py +++ b/pyleoclim/tests/test_utils_tsbase.py @@ -82,9 +82,13 @@ def test_convert_datetime_index_ka(dataframe_dt): time_unit, time_name=time_name, ) - expected = np.array([-0.06899658, -0.06999658, -0.07099932, -0.07199658, -0.07299658]) + expected = np.array([-0.06899805, -0.06999739, -0.07099946, -0.0719988 , -0.07299814]) assert np.allclose(time.values, expected, rtol=1e-05, atol=1e-08) + # check we can round-trip + result = tsbase.time_to_datetime(time.values, 1950, 3, 'retrograde') + assert np.abs(dataframe_dt.index - result).total_seconds().max() <= 1 + def test_convert_datetime_index_ma(dataframe_dt): time_unit = 'ma' @@ -97,6 +101,10 @@ def test_convert_datetime_index_ma(dataframe_dt): expected = np.array([-6.89965777e-05, -6.99965777e-05, -7.09993155e-05, -7.19965777e-05, -7.29965777e-05]) assert np.allclose(time.values, expected, rtol=1e-05, atol=1e-08) + # check we can round-trip + result = tsbase.time_to_datetime(time.values, 1950, 6, 'retrograde') + assert np.abs(dataframe_dt.index - result).total_seconds().max() <= 1 + def test_convert_datetime_index_ga(dataframe_dt): time_unit = 'ga' @@ -109,6 +117,10 @@ def test_convert_datetime_index_ga(dataframe_dt): expected = np.array([-6.89965777e-08, -6.99965777e-08, -7.09993155e-08, -7.19965777e-08, -7.29965777e-08]) assert np.allclose(time.values, expected, rtol=1e-05, atol=1e-08) + # check we can round-trip + result = tsbase.time_to_datetime(time.values, 1950, 9, 'retrograde') + assert np.abs(dataframe_dt.index - result).total_seconds().max() <= 1 + def test_convert_datetime_index_bp(dataframe_dt): time_unit = 'years B.P.' @@ -118,9 +130,13 @@ def test_convert_datetime_index_bp(dataframe_dt): time_unit, time_name=time_name, ) - expected = np.array([-68.99657769, -69.99657769, -70.99931554, -71.99657769, -72.99657769]) + expected = np.array([-68.99805139, -69.99738827, -70.99946306, -71.99879994, -72.99813682]) assert np.allclose(time.values, expected, rtol=1e-05, atol=1e-08) + # check we can round-trip + result = tsbase.time_to_datetime(time.values, 1950, 0, 'retrograde') + assert np.abs(dataframe_dt.index - result).total_seconds().max() <= 1 + def test_convert_datetime_index_ad(dataframe_dt): time_unit = 'AD' @@ -133,6 +149,10 @@ def test_convert_datetime_index_ad(dataframe_dt): expected = np.array([2018.99657769, 2019.99657769, 2020.99931554, 2021.99657769, 2022.99657769]) assert np.allclose(time.values, expected, rtol=1e-05, atol=1e-08) + # check we can round-trip + result = tsbase.time_to_datetime(time.values, 0, 0, 'prograde') + assert np.abs(dataframe_dt.index - result).total_seconds().max() <= 1 + def test_convert_datetime_index_nondt_index(dataframe): time_unit = 'yr' @@ -143,4 +163,3 @@ def test_convert_datetime_index_nondt_index(dataframe): time_unit, time_name=time_name, ) - diff --git a/pyleoclim/utils/tsbase.py b/pyleoclim/utils/tsbase.py index ed983aca..359f4ee2 100644 --- a/pyleoclim/utils/tsbase.py +++ b/pyleoclim/utils/tsbase.py @@ -21,7 +21,8 @@ 'reduce_duplicated_timestamps', ] -SECONDS_PER_YEAR = 365.25 * 60 * 60 * 24 ## TODO: generalize to different calendars using cftime +# UDUNITS, see: http://cfconventions.org/cf-conventions/cf-conventions#time-coordinate +SECONDS_PER_YEAR = 31556925.974592 # 86400 * 365.24219878 MATCH_A = frozenset(['y', 'yr', 'yrs', 'year', 'years']) MATCH_KA = frozenset(['ka', 'ky', 'kyr', 'kyrs', 'kiloyear', 'kiloyr', 'kiloyrs']) @@ -82,8 +83,17 @@ def time_unit_to_datum_exp_dir(time_unit, time_name=None, verbose=False): return (datum, exponent, direction) def convert_datetime_index_to_time(datetime_index, time_unit, time_name): + """ + Convert a DatetimeIndex to time in a given unit. + + The general formula is: + + datetime_index = datum +/- time*10**exponent + + where we assume ``time`` to use the Gregorian calendar. If dealing with other + calendars, then conversions need to happen before reaching pyleoclim. + """ datum, exponent, direction = time_unit_to_datum_exp_dir(time_unit, time_name) - #import operator if direction == 'prograde': multiplier = 1 elif direction == 'retrograde': @@ -98,19 +108,21 @@ def convert_datetime_index_to_time(datetime_index, time_unit, time_name): "Only 'second' resolution is currently supported. " "Please cast to second resolution with `.as_unit('s')`" ) - year_diff = (datetime_index.year - int(datum)) - numpy_datetime_index = datetime_index.to_numpy() - years_floor = numpy_datetime_index.astype('datetime64[Y]').astype('datetime64[s]') - seconds_diff = (numpy_datetime_index - years_floor).astype('int') - diff = year_diff + seconds_diff / SECONDS_PER_YEAR - time = multiplier * diff / 10**exponent + + return pd.Index((multiplier * (datetime_index.to_numpy() - np.datetime64(str(datum), 's'))).astype(float) / (10**exponent*SECONDS_PER_YEAR)) - return time def time_to_datetime(time, datum=0, exponent=0, direction='prograde', unit='s'): ''' Converts a vector of time values to a pandas datetime object + The general formula is: + + datetime_index = datum +/- time*10**exponent + + where we assume ``time`` to use the Gregorian calendar. If dealing with other + calendars, then conversions need to happen before reaching pyleoclim. + Parameters ---------- time : array-like @@ -129,20 +141,16 @@ def time_to_datetime(time, datum=0, exponent=0, direction='prograde', unit='s'): Returns ------- index, a datetime64[unit] object - ''' + if direction not in ('prograde', 'retrograde'): + raise ValueError(f'Expected one of {"prograde", "retrograde"}, got {direction}') + if direction == 'prograde': op = operator.add elif direction == 'retrograde': op = operator.sub - else: - raise ValueError(f'Expected one of {"prograde", "retrograde"}, got {direction}') - - timedelta = np.array(time) * 10**exponent - years = timedelta.astype('int') - seconds = ((timedelta - timedelta.astype('int')) * SECONDS_PER_YEAR).astype('timedelta64[s]') # incorporate unit here - index = op(op(int(datum), years).astype(str).astype('datetime64[s]'), seconds) # incorporate unit here? + index = op(np.datetime64(str(datum), 's'), (time*SECONDS_PER_YEAR*10**exponent).astype('timedelta64[s]')) return index