Skip to content

Commit

Permalink
Bump coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
maxwelllevin committed Sep 26, 2023
1 parent 862b767 commit 5aaa10d
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 51 deletions.
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ source =
./src
omit =
./env/*
_version.py
__main__.py

[report]
exclude_lines =
Expand Down
18 changes: 9 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
.PHONY: build
build:
rm -rf dist/
python -m build
pip install dist/*.whl
rm -rf dist/ \
&& python -m build \
&& pip install dist/*.whl

coverage:
coverage run -m pytest
coverage html
open htmlcov/index.html
coverage run -m pytest \
&& coverage html \
&& open htmlcov/index.html

format:
ruff . --fix --ignore E501 --per-file-ignores="__init__.py:F401"
isort .
black .
ruff . --fix --ignore E501 --per-file-ignores="__init__.py:F401" \
&& isort . \
&& black .
21 changes: 9 additions & 12 deletions src/ncconvert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ def _dump_metadata(dataset: xr.Dataset, filepath: str | Path) -> Path:
def _to_dataframe(
dataset: xr.Dataset, filepath: str | Path, extension: str
) -> tuple[Path, pd.DataFrame]:
if not extension.startswith("."):
extension = "." + extension
extension = extension if extension.startswith(".") else "." + extension

df = dataset.to_dataframe(dim_order=list(dataset.dims))

Expand All @@ -35,8 +34,7 @@ def _to_dataframe_collection(
) -> tuple[tuple[Path, pd.DataFrame], ...]:
outputs: list[tuple[Path, pd.DataFrame]] = []

if extension.startswith("."):
extension = extension[1:]
extension = extension[1:] if extension.startswith(".") else extension

# Get variable dimension groupings
dimension_groups: dict[tuple[str, ...], list[str]] = defaultdict(list)
Expand Down Expand Up @@ -64,8 +62,7 @@ def _to_dataframe_collection(
def _to_faceted_dim_dataframe(
dataset: xr.Dataset, filepath: str | Path, extension: str
) -> tuple[Path, pd.DataFrame]:
if not extension.startswith("."):
extension = "." + extension
extension = extension if extension.startswith(".") else "." + extension

# Get variable dimension groupings
dimension_groups: dict[tuple[str, ...], list[str]] = defaultdict(list)
Expand Down Expand Up @@ -96,24 +93,24 @@ def _to_faceted_dim_dataframe(
ds = dataset[["time"]].copy()
for dims, var_list in dimension_groups.items():
# simple case
if dims == ("time", ):
if dims == ("time",):
ds.update(dataset[var_list])
continue

shape = dataset[var_list[0]].shape

# If scalar, expand to make time the first dimension
if not shape:
_tmp = dataset[var_list].expand_dims({"time": dataset["time"]})
ds.update(_tmp[var_list])
continue

_tmp = dataset[var_list]

# If 1D, expand to make time a dimension (2D)
if len(shape) == 1:
_tmp = _tmp.expand_dims({"time": dataset["time"]})

# For 2D, make time the first dimension and flatten the second
new_dims = ("time", [d for d in dims if d != "time"][0])
_tmp = _tmp.transpose(*new_dims)
Expand All @@ -136,15 +133,15 @@ def _flatten_dataset(ds: xr.Dataset, second_dim: str) -> xr.Dataset:
Returns:
xr.Dataset: The flattened dataset. Preserves attributes.
"""

output = ds[["time"]]

dim_values = ds[second_dim].values

dim_units = ds[second_dim].attrs.get("units")
if not dim_units or dim_units == "1":
dim_units = ""

dim_suffixes = [f"{dim_val}{dim_units}" for dim_val in dim_values]

for var_name, data in ds.data_vars.items():
Expand Down
71 changes: 71 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,74 @@ def dataset() -> xr.Dataset:
"data_level": "c1",
},
)


@pytest.fixture(autouse=True, scope="module")
def bad_dataset() -> xr.Dataset:
return xr.Dataset(
coords={
"time": (
"time",
pd.date_range(
"2022-04-05",
"2022-04-06",
periods=3 + 1,
inclusive="left",
), # type: ignore
{"units": "Seconds since 1970-01-01 00:00:00"},
),
"range": (
"range",
[1, 2, 3],
{},
),
"height": (
"height",
[0, 10, 20, 30],
{"units": "m", "long_name": "Height AGL"},
),
},
data_vars={
"temperature": (
("range", "height"),
[
[88, 80, 75, 70],
[89, 81, 76, 71],
[88.5, 81.5, 75.5, 69.5],
],
{"units": "degF", "_FillValue": -9999.0},
),
"too_large": (
("time", "range", "height"),
[
[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
],
{"units": "1", "_FillValue": -9999.0},
),
"humidity": (
"range",
[60.5, 65.5, 63],
{"units": "%", "_FillValue": -9999.0},
),
"other": (
"height",
[1, 2, 3, 4],
{"units": "1", "_FillValue": -9999.0},
),
"time_var": (
"time",
[1, 2, 3],
{"units": "1", "_FillValue": -9999.0},
),
},
attrs={
"datastream": "bad.buoy.c1",
"title": "title",
"description": "description",
"location_id": "bad",
"dataset_name": "buoy",
"data_level": "c1",
},
)
53 changes: 29 additions & 24 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,35 @@ def test_convert_cli(dataset: xr.Dataset):

runner = CliRunner()

with runner.isolated_filesystem():
dataset.to_netcdf("test.20220405.000000.nc")
dataset.to_netcdf("test.20220405.001200.nc")
dataset.to_netcdf("test.20220406.000000.nc")
dataset.to_netcdf("test.20220406.001200.nc")
dataset.to_netcdf("test.20220410.000000.nc")
dataset.to_netcdf("test.20220410.001200.nc")
dataset.to_netcdf("test.20220420.000000.nc")
dataset.close()
for verbosity in ["--verbose", "--no-verbose"]:
with runner.isolated_filesystem():
dataset.to_netcdf("test.20220405.000000.nc")
dataset.to_netcdf("test.20220405.001200.nc")
dataset.to_netcdf("test.20220406.000000.nc")
dataset.to_netcdf("test.20220406.001200.nc")
dataset.to_netcdf("test.20220410.000000.nc")
dataset.to_netcdf("test.20220410.001200.nc")
dataset.to_netcdf("test.20220420.000000.nc")
dataset.close()

result = runner.invoke(
app,
args=(
"to_csv",
"test.2022040*.nc",
"test.2022041*.nc",
"test.20220420.000000.nc",
"--output-dir",
"outputs",
"--verbose",
),
)
result = runner.invoke(
app,
args=(
"to_csv",
"test.2022040*.nc",
"test.2022041*.nc",
"test.20220420.000000.nc",
"--output-dir",
"outputs",
verbosity,
),
)

assert result.exit_code == 0
assert result.exit_code == 0
if verbosity == "--no-verbose":
assert result.stdout == ""
else:
assert result.stdout != ""

assert len(list(Path("./outputs").glob("*.csv"))) == 7
assert len(list(Path("./outputs").glob("*.json"))) == 7
assert len(list(Path("./outputs").glob("*.csv"))) == 7
assert len(list(Path("./outputs").glob("*.json"))) == 7
32 changes: 26 additions & 6 deletions test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,46 @@ def test_csv_collection(dataset: xr.Dataset):
os.remove(output_path)
os.remove(metadata_path)

def test_faceted_csv(dataset: xr.Dataset):

def test_faceted_csv(dataset: xr.Dataset, bad_dataset: xr.Dataset):
from ncconvert.csv import to_faceted_dim_csv

filepath = Path(".tmp/data/faceted.csv")

output_path, metadata_path = to_faceted_dim_csv(dataset, filepath)

assert output_path == filepath
assert metadata_path is not None
assert metadata_path == filepath.with_suffix(".json")

df = pd.read_csv(output_path)


# cols=time, humidity, static, and [temperature, other]@each height
df = pd.read_csv(output_path)
assert len(df.index) == len(dataset.time)
assert len(df.columns) == 2*len(dataset.height) + 3
assert len(df.columns) == 2 * len(dataset.height) + 3

# should preserve metadata just like the others
meta = json.loads(metadata_path.read_text())
assert "datastream" in meta["attrs"]
assert "time" in meta["coords"]

os.remove(output_path)
os.remove(metadata_path)

# test with the bad dataset.
# We should see some warnings in the logs
filepath = Path(".tmp/data/bad_faceted.csv")
output_path, metadata_path = to_faceted_dim_csv(bad_dataset, filepath)

assert output_path == filepath
assert metadata_path is not None
assert metadata_path == filepath.with_suffix(".json")

# cols=time, time_var, [humidity]@each range, [other]@each height
df = pd.read_csv(output_path)
assert len(df.index) == len(bad_dataset.time)
assert len(df.columns) == 2 + len(bad_dataset.height) + len(bad_dataset.range)

# should preserve metadata just like the others
meta = json.loads(metadata_path.read_text())
assert "datastream" in meta["attrs"]
assert "time" in meta["coords"]

Expand Down

0 comments on commit 5aaa10d

Please sign in to comment.