Skip to content

Commit

Permalink
Add support for None in search query (#612)
Browse files Browse the repository at this point in the history
* Add support for None NaN NA is search query - add tests

* run pre-commit
  • Loading branch information
aulemahal authored Jun 19, 2023
1 parent 23d80d6 commit e6b49a8
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 6 deletions.
2 changes: 2 additions & 0 deletions intake_esm/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def search(
mask = df[column].str.contains(value, regex=False)
elif column_is_stringtype and is_pattern(value):
mask = df[column].str.contains(value, regex=True, case=True, flags=0)
elif pd.isna(value):
mask = df[column].isnull()
else:
mask = df[column] == value
local_mask = local_mask | mask
Expand Down
2 changes: 1 addition & 1 deletion intake_esm/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def validate_query(cls, values):
raise ValueError(f'Column {key} not in columns {columns}')
_query = query.copy()
for key, value in _query.items():
if isinstance(value, (str, int, float, bool)):
if isinstance(value, (str, int, float, bool)) or value is None or value is pd.NA:
_query[key] = [value]

values['query'] = _query
Expand Down
6 changes: 5 additions & 1 deletion tests/test_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ def test_esmcatmodel_unique_and_nunique(query, expected_unique_vals, expected_nu

@pytest.mark.parametrize(
'query, columns, require_all_on',
[({'foo': 1}, ['foo', 'bar'], ['bar']), ({'bar': 1}, ['foo', 'bar'], 'foo')],
[
({'foo': 1}, ['foo', 'bar'], ['bar']),
({'bar': 1}, ['foo', 'bar'], 'foo'),
({'foo': None}, ['foo', 'bar'], None),
],
)
def test_query_model(query, columns, require_all_on):
q = QueryModel(query=query, columns=columns, require_all_on=require_all_on)
Expand Down
22 changes: 18 additions & 4 deletions tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re

import numpy as np
import pandas as pd
import pytest

Expand Down Expand Up @@ -116,17 +117,30 @@ def test_is_pattern(value, expected):
{'A': 'NASA', 'B': 'foo', 'C': 'HiSt', 'D': 'tAs'},
],
),
({'A': None}, None, [{'A': None, 'B': None, 'C': 'exp', 'D': 'UA'}]),
({'A': np.nan}, None, [{'A': None, 'B': None, 'C': 'exp', 'D': 'UA'}]),
]


@pytest.mark.parametrize('query, require_all_on, expected', params)
def test_search(query, require_all_on, expected):
df = pd.DataFrame(
{
'A': ['NCAR', 'IPSL', 'IPSL', 'CSIRO', 'IPSL', 'NCAR', 'NOAA', 'NCAR', 'NASA'],
'B': ['CESM', 'FOO', 'FOO', 'BAR', 'FOO', 'CESM', 'GCM', 'WACM', 'foo'],
'C': ['hist', 'control', 'hist', 'control', 'hist', 'control', 'hist', 'hist', 'HiSt'],
'D': ['O2', 'O2', 'O2', 'O2', 'NO2', 'O2', 'O2', 'TA', 'tAs'],
'A': ['NCAR', 'IPSL', 'IPSL', 'CSIRO', 'IPSL', 'NCAR', 'NOAA', 'NCAR', 'NASA', None],
'B': ['CESM', 'FOO', 'FOO', 'BAR', 'FOO', 'CESM', 'GCM', 'WACM', 'foo', None],
'C': [
'hist',
'control',
'hist',
'control',
'hist',
'control',
'hist',
'hist',
'HiSt',
'exp',
],
'D': ['O2', 'O2', 'O2', 'O2', 'NO2', 'O2', 'O2', 'TA', 'tAs', 'UA'],
}
)
query_model = QueryModel(
Expand Down

0 comments on commit e6b49a8

Please sign in to comment.