Bump coverage

tsdat · Sep 26, 2023 · 5aaa10d · 5aaa10d
1 parent 862b767
commit 5aaa10d
Show file tree

Hide file tree

Showing 6 changed files with 146 additions and 51 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -4,6 +4,8 @@ source =
     ./src
 omit =
     ./env/*
+    _version.py
+    __main__.py
 
 [report]
 exclude_lines =

diff --git a/Makefile b/Makefile
@@ -1,15 +1,15 @@
 .PHONY: build
 build:
-	rm -rf dist/
-	python -m build
-	pip install dist/*.whl
+	rm -rf dist/ \
+		&& python -m build \
+		&& pip install dist/*.whl
 
 coverage:
-	coverage run -m pytest
-	coverage html
-	open htmlcov/index.html
+	coverage run -m pytest \
+		&& coverage html \
+		&& open htmlcov/index.html
 
 format:
-	ruff . --fix --ignore E501 --per-file-ignores="__init__.py:F401"
-	isort .
-	black .
+	ruff . --fix --ignore E501 --per-file-ignores="__init__.py:F401" \
+		&& isort . \
+		&& black .
diff --git a/src/ncconvert/utils.py b/src/ncconvert/utils.py
@@ -22,8 +22,7 @@ def _dump_metadata(dataset: xr.Dataset, filepath: str | Path) -> Path:
 def _to_dataframe(
     dataset: xr.Dataset, filepath: str | Path, extension: str
 ) -> tuple[Path, pd.DataFrame]:
-    if not extension.startswith("."):
-        extension = "." + extension
+    extension = extension if extension.startswith(".") else "." + extension
 
     df = dataset.to_dataframe(dim_order=list(dataset.dims))
 
@@ -35,8 +34,7 @@ def _to_dataframe_collection(
 ) -> tuple[tuple[Path, pd.DataFrame], ...]:
     outputs: list[tuple[Path, pd.DataFrame]] = []
 
-    if extension.startswith("."):
-        extension = extension[1:]
+    extension = extension[1:] if extension.startswith(".") else extension
 
     # Get variable dimension groupings
     dimension_groups: dict[tuple[str, ...], list[str]] = defaultdict(list)
@@ -64,8 +62,7 @@ def _to_dataframe_collection(
 def _to_faceted_dim_dataframe(
     dataset: xr.Dataset, filepath: str | Path, extension: str
 ) -> tuple[Path, pd.DataFrame]:
-    if not extension.startswith("."):
-        extension = "." + extension
+    extension = extension if extension.startswith(".") else "." + extension
 
     # Get variable dimension groupings
     dimension_groups: dict[tuple[str, ...], list[str]] = defaultdict(list)
@@ -96,24 +93,24 @@ def _to_faceted_dim_dataframe(
     ds = dataset[["time"]].copy()
     for dims, var_list in dimension_groups.items():
         # simple case
-        if dims == ("time", ):
+        if dims == ("time",):
             ds.update(dataset[var_list])
             continue
-        
+
         shape = dataset[var_list[0]].shape
 
         # If scalar, expand to make time the first dimension
         if not shape:
             _tmp = dataset[var_list].expand_dims({"time": dataset["time"]})
             ds.update(_tmp[var_list])
             continue
-        
+
         _tmp = dataset[var_list]
 
         # If 1D, expand to make time a dimension (2D)
         if len(shape) == 1:
             _tmp = _tmp.expand_dims({"time": dataset["time"]})
-            
+
         # For 2D, make time the first dimension and flatten the second
         new_dims = ("time", [d for d in dims if d != "time"][0])
         _tmp = _tmp.transpose(*new_dims)
@@ -136,15 +133,15 @@ def _flatten_dataset(ds: xr.Dataset, second_dim: str) -> xr.Dataset:
     Returns:
         xr.Dataset: The flattened dataset. Preserves attributes.
     """
-    
+
     output = ds[["time"]]
 
     dim_values = ds[second_dim].values
 
     dim_units = ds[second_dim].attrs.get("units")
     if not dim_units or dim_units == "1":
         dim_units = ""
-    
+
     dim_suffixes = [f"{dim_val}{dim_units}" for dim_val in dim_values]
 
     for var_name, data in ds.data_vars.items():

diff --git a/test/conftest.py b/test/conftest.py
@@ -54,3 +54,74 @@ def dataset() -> xr.Dataset:
             "data_level": "c1",
         },
     )
+
+
+@pytest.fixture(autouse=True, scope="module")
+def bad_dataset() -> xr.Dataset:
+    return xr.Dataset(
+        coords={
+            "time": (
+                "time",
+                pd.date_range(
+                    "2022-04-05",
+                    "2022-04-06",
+                    periods=3 + 1,
+                    inclusive="left",
+                ),  # type: ignore
+                {"units": "Seconds since 1970-01-01 00:00:00"},
+            ),
+            "range": (
+                "range",
+                [1, 2, 3],
+                {},
+            ),
+            "height": (
+                "height",
+                [0, 10, 20, 30],
+                {"units": "m", "long_name": "Height AGL"},
+            ),
+        },
+        data_vars={
+            "temperature": (
+                ("range", "height"),
+                [
+                    [88, 80, 75, 70],
+                    [89, 81, 76, 71],
+                    [88.5, 81.5, 75.5, 69.5],
+                ],
+                {"units": "degF", "_FillValue": -9999.0},
+            ),
+            "too_large": (
+                ("time", "range", "height"),
+                [
+                    [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                    [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                    [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                ],
+                {"units": "1", "_FillValue": -9999.0},
+            ),
+            "humidity": (
+                "range",
+                [60.5, 65.5, 63],
+                {"units": "%", "_FillValue": -9999.0},
+            ),
+            "other": (
+                "height",
+                [1, 2, 3, 4],
+                {"units": "1", "_FillValue": -9999.0},
+            ),
+            "time_var": (
+                "time",
+                [1, 2, 3],
+                {"units": "1", "_FillValue": -9999.0},
+            ),
+        },
+        attrs={
+            "datastream": "bad.buoy.c1",
+            "title": "title",
+            "description": "description",
+            "location_id": "bad",
+            "dataset_name": "buoy",
+            "data_level": "c1",
+        },
+    )
diff --git a/test/test_cli.py b/test/test_cli.py
@@ -37,30 +37,35 @@ def test_convert_cli(dataset: xr.Dataset):
 
     runner = CliRunner()
 
-    with runner.isolated_filesystem():
-        dataset.to_netcdf("test.20220405.000000.nc")
-        dataset.to_netcdf("test.20220405.001200.nc")
-        dataset.to_netcdf("test.20220406.000000.nc")
-        dataset.to_netcdf("test.20220406.001200.nc")
-        dataset.to_netcdf("test.20220410.000000.nc")
-        dataset.to_netcdf("test.20220410.001200.nc")
-        dataset.to_netcdf("test.20220420.000000.nc")
-        dataset.close()
+    for verbosity in ["--verbose", "--no-verbose"]:
+        with runner.isolated_filesystem():
+            dataset.to_netcdf("test.20220405.000000.nc")
+            dataset.to_netcdf("test.20220405.001200.nc")
+            dataset.to_netcdf("test.20220406.000000.nc")
+            dataset.to_netcdf("test.20220406.001200.nc")
+            dataset.to_netcdf("test.20220410.000000.nc")
+            dataset.to_netcdf("test.20220410.001200.nc")
+            dataset.to_netcdf("test.20220420.000000.nc")
+            dataset.close()
 
-        result = runner.invoke(
-            app,
-            args=(
-                "to_csv",
-                "test.2022040*.nc",
-                "test.2022041*.nc",
-                "test.20220420.000000.nc",
-                "--output-dir",
-                "outputs",
-                "--verbose",
-            ),
-        )
+            result = runner.invoke(
+                app,
+                args=(
+                    "to_csv",
+                    "test.2022040*.nc",
+                    "test.2022041*.nc",
+                    "test.20220420.000000.nc",
+                    "--output-dir",
+                    "outputs",
+                    verbosity,
+                ),
+            )
 
-        assert result.exit_code == 0
+            assert result.exit_code == 0
+            if verbosity == "--no-verbose":
+                assert result.stdout == ""
+            else:
+                assert result.stdout != ""
 
-        assert len(list(Path("./outputs").glob("*.csv"))) == 7
-        assert len(list(Path("./outputs").glob("*.json"))) == 7
+            assert len(list(Path("./outputs").glob("*.csv"))) == 7
+            assert len(list(Path("./outputs").glob("*.json"))) == 7
diff --git a/test/test_csv.py b/test/test_csv.py
@@ -70,26 +70,46 @@ def test_csv_collection(dataset: xr.Dataset):
         os.remove(output_path)
     os.remove(metadata_path)
 
-def test_faceted_csv(dataset: xr.Dataset):
+
+def test_faceted_csv(dataset: xr.Dataset, bad_dataset: xr.Dataset):
     from ncconvert.csv import to_faceted_dim_csv
 
     filepath = Path(".tmp/data/faceted.csv")
-
     output_path, metadata_path = to_faceted_dim_csv(dataset, filepath)
 
     assert output_path == filepath
     assert metadata_path is not None
     assert metadata_path == filepath.with_suffix(".json")
 
-    df = pd.read_csv(output_path)
-
-
     # cols=time, humidity, static, and [temperature, other]@each height
+    df = pd.read_csv(output_path)
     assert len(df.index) == len(dataset.time)
-    assert len(df.columns) == 2*len(dataset.height) + 3
+    assert len(df.columns) == 2 * len(dataset.height) + 3
 
+    # should preserve metadata just like the others
     meta = json.loads(metadata_path.read_text())
+    assert "datastream" in meta["attrs"]
+    assert "time" in meta["coords"]
 
+    os.remove(output_path)
+    os.remove(metadata_path)
+
+    # test with the bad dataset.
+    # We should see some warnings in the logs
+    filepath = Path(".tmp/data/bad_faceted.csv")
+    output_path, metadata_path = to_faceted_dim_csv(bad_dataset, filepath)
+
+    assert output_path == filepath
+    assert metadata_path is not None
+    assert metadata_path == filepath.with_suffix(".json")
+
+    # cols=time, time_var, [humidity]@each range, [other]@each height
+    df = pd.read_csv(output_path)
+    assert len(df.index) == len(bad_dataset.time)
+    assert len(df.columns) == 2 + len(bad_dataset.height) + len(bad_dataset.range)
+
+    # should preserve metadata just like the others
+    meta = json.loads(metadata_path.read_text())
     assert "datastream" in meta["attrs"]
     assert "time" in meta["coords"]