Skip to content

Commit

Permalink
Merge pull request #339 from MarcoGorelli/sel
Browse files Browse the repository at this point in the history
Add sel method
  • Loading branch information
khider authored Feb 22, 2023
2 parents e606f8e + be184d6 commit 8883511
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 0 deletions.
148 changes: 148 additions & 0 deletions pyleoclim/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
How to create and manipulate such objects is described in a short example below, while `this notebook <https://nbviewer.jupyter.org/github/LinkedEarth/Pyleoclim_util/blob/master/example_notebooks/pyleoclim_ui_tutorial.ipynb>`_ demonstrates how to apply various Pyleoclim methods to Series objects.
"""

import datetime as dt
import operator
import re

Expand Down Expand Up @@ -2327,6 +2328,153 @@ def segment(self, factor=10):
else:
raise ValueError('No timeseries detected')
return res

def sel(self, value=None, time=None, tolerance=0):
"""
Slice Series based on 'value' or 'time'.
Parameters
----------
value : int, float, slice
If int/float, then the Series will be sliced so that `self.value` is
equal to `value` (+/- `tolerance`).
If slice, then the Series will be sliced so `self.value` is between
slice.start and slice.stop (+/- tolerance).
time : int, float, slice
If int/float, then the Series will be sliced so that `self.time` is
equal to `time`. (+/- `tolerance`)
If slice of int/float, then the Series will be sliced so that
`self.time` is between slice.start and slice.stop.
If slice of `datetime` (or str containing datetime, such as `'2020-01-01'`),
then the Series will be sliced so that `self.datetime_index` is
between `time.start` and `time.stop`.
tolerance : int, float, default 0.
Used by `value` and `time`, see above.
Returns
-------
Copy of `self`, sliced according to `value` and `time`.
Examples
--------
>>> ts = pyleo.Series(
... time=np.array([1, 1.1, 2, 3]), value=np.array([4, .9, 6, 1]), time_unit='years BP'
... )
>>> ts.sel(value=1)
{'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},
{2: 'clean_ts', 'applied': True, 'verbose': False})}
None
time [years BP]
3.0 1.0
Name: value, dtype: float64
If you also want to include the value `3.9`, you could set `tolerance` to `.1`:
>>> ts.sel(value=1, tolerance=.1)
{'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},
{2: 'clean_ts', 'applied': True, 'verbose': False})}
None
time [years BP]
1.1 0.9
3.0 1.0
Name: value, dtype: float64
You can also pass a `slice` to select a range of values:
>>> ts.sel(value=slice(4, 6))
{'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},
{2: 'clean_ts', 'applied': True, 'verbose': False})}
None
time [years BP]
1.0 4.0
2.0 6.0
Name: value, dtype: float64
>>> ts.sel(value=slice(4, None))
{'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},
{2: 'clean_ts', 'applied': True, 'verbose': False})}
None
time [years BP]
1.0 4.0
2.0 6.0
Name: value, dtype: float64
>>> ts.sel(value=slice(None, 4))
{'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},
{2: 'clean_ts', 'applied': True, 'verbose': False})}
None
time [years BP]
1.0 4.0
1.1 0.9
3.0 1.0
Name: value, dtype: float64
Similarly, you filter using `time` instead of `value`.
"""
if value is not None and time is not None:
raise TypeError("Cannot pass both `value` and `time`")

if value is not None:
if isinstance(value, (int, float)):
return self.pandas_method(lambda x: x[x.between(value-tolerance, value+tolerance)])
if isinstance(value, slice):
if isinstance(value.start, (int, float)) and isinstance(value.stop, (int, float)):
return self.pandas_method(lambda x: x[x.between(value.start-tolerance, value.stop+tolerance)])
if isinstance(value.start, (int, float)) and value.stop is None:
return self.pandas_method(lambda x: x[x.ge(value.start-tolerance)])
if isinstance(value.stop, (int, float)) and value.start is None:
return self.pandas_method(lambda x: x[x.le(value.stop-tolerance)])
raise TypeError(f'Expected slice, int, or float, got: {type(value)}')

if time is not None:
if isinstance(time, (int, float)):
return self.slice([time-tolerance, time+tolerance])
if isinstance(time, slice):
if isinstance(time.start, (int, float)) and isinstance(time.stop, (int, float)):
return self.slice([time.start-tolerance, time.stop+tolerance])
if isinstance(time.start, (int, float)) and time.stop is None:
mask = self.time >= time.start-tolerance
new = self.copy()
new.time = new.time[mask]
new.value = new.value[mask]
return new
if isinstance(time.stop, (int, float)) and time.start is None:
mask = self.time <= time.stop-tolerance
new = self.copy()
new.time = new.time[mask]
new.value = new.value[mask]
return new
if isinstance(time.start, str) and isinstance(time.stop, str):
return self.pandas_method(
lambda x: x[(x.index>=(np.datetime64(time.start, 's'))) & (x.index<=np.datetime64(time.stop, 's'))]
)
if isinstance(time.start, str) and time.stop is None:
return self.pandas_method(
lambda x: x[x.index>=(np.datetime64(time.start, 's'))]
)
if isinstance(time.stop, str) and time.start is None:
return self.pandas_method(
lambda x: x[x.index<=(np.datetime64(time.stop, 's'))]
)
if isinstance(time.start, dt.datetime) and isinstance(time.stop, dt.datetime):
return self.pandas_method(
lambda x: x[(x.index>=time.start) & (x.index<=time.stop)]
)
if isinstance(time.start, dt.datetime) and time.stop is None:
return self.pandas_method(
lambda x: x[x.index>=time.start]
)
if isinstance(time.stop, dt.datetime) and time.start is None:
return self.pandas_method(
lambda x: x[x.index<=time.stop]
)
raise TypeError("Expected int or float, or slice of int/float/datetime/str.")


def slice(self, timespan):
''' Slicing the timeseries with a timespan (tuple or list)
Expand Down
50 changes: 50 additions & 0 deletions pyleoclim/tests/test_core_Series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
4. after `pip install pytest-xdist`, one may execute "pytest -n 4" to test in parallel with number of workers specified by `-n`
5. for more details, see https://docs.pytest.org/en/stable/usage.html
'''
import datetime as dt
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -417,6 +418,55 @@ def test_slice(self):
assert min(times) == 10
assert max(times) == 90

class TestSel:
@pytest.mark.parametrize(
('value', 'expected_time', 'expected_value', 'tolerance'),
[
(1, np.array([3]), np.array([1]), 0),
(1, np.array([1, 3]), np.array([4, 1]), 3),
(slice(1, 4), np.array([1, 3]), np.array([4, 1]), 0),
(slice(1, 4), np.array([1, 2, 3]), np.array([4, 6, 1]), 2),
(slice(1, None), np.array([1, 2, 3]), np.array([4, 6, 1]), 0),
(slice(None, 1), np.array([3]), np.array([1]), 0),
]
)
def test_value(self, value, expected_time, expected_value, tolerance):
ts = pyleo.Series(time=np.array([1, 2, 3]), value=np.array([4, 6, 1]), time_unit='years BP')
result = ts.sel(value=value, tolerance=tolerance)
expected = pyleo.Series(time=expected_time, value=expected_value, time_unit='years BP')
values_match, _ = result.equals(expected)
assert values_match

@pytest.mark.parametrize(
('time', 'expected_time', 'expected_value', 'tolerance'),
[
(1, np.array([1]), np.array([4]), 0),
(1, np.array([1, 2]), np.array([4, 6]), 1),
(slice(1, 2), np.array([1, 2]), np.array([4, 6]), 0),
(slice(1, 2), np.array([1, 2, 3]), np.array([4, 6, 1]), 1),
(slice(1, None), np.array([1, 2, 3]), np.array([4, 6, 1]), 0),
(slice(None, 1), np.array([1]), np.array([4]), 0),
(slice('1948', '1949'), np.array([1, 2]), np.array([4, 6]), 0),
(slice('1947', None), np.array([1, 2, 3]), np.array([4, 6, 1]), 0),
(slice(None, '1948'), np.array([3]), np.array([1]), 0),
(slice(dt.datetime(1948, 1, 1), dt.datetime(1949, 1, 1)), np.array([1, 2]), np.array([4, 6]), 0),
(slice(dt.datetime(1947, 1, 1), None), np.array([1, 2, 3]), np.array([4, 6, 1]), 0),
(slice(None, dt.datetime(1948, 1, 1)), np.array([3]), np.array([1]), 0),
]
)
def test_time(self, time, expected_time, expected_value, tolerance):
ts = pyleo.Series(time=np.array([1, 2, 3]), value=np.array([4, 6, 1]), time_unit='years BP')
result = ts.sel(time=time, tolerance=tolerance)
expected = pyleo.Series(time=expected_time, value=expected_value, time_unit='years BP')
values_match, _ = result.equals(expected)
assert values_match

def test_invalid(self):
ts = pyleo.Series(time=np.array([1, 2, 3]), value=np.array([4, 6, 1]), time_unit='years BP')
with pytest.raises(TypeError, match="Cannot pass both `value` and `time`"):
ts.sel(time=1, value=1)


class TestUiSeriesSurrogates:
''' Test Series.surrogates()
'''
Expand Down

0 comments on commit 8883511

Please sign in to comment.