diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py index 238850d..ef48287 100644 --- a/odc/stats/plugins/lc_level34.py +++ b/odc/stats/plugins/lc_level34.py @@ -87,7 +87,7 @@ def classification(self, xx, class_def, con_cols, class_col): res = da.full(xx.level_3_4.shape, 0, dtype="uint8") for expression in expressions: - _log.info(expression) + _log.debug(expression) local_dict.update({"res": res}) res = expr_eval( expression, diff --git a/tests/test_lc_level3.py b/tests/test_lc_level3.py deleted file mode 100644 index 90ecbd7..0000000 --- a/tests/test_lc_level3.py +++ /dev/null @@ -1,107 +0,0 @@ -import numpy as np -import pandas as pd -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import lc_level3 -from odc.stats.plugins._utils import rasterize_vector_mask -from datacube.utils.geometry import GeoBox -from affine import Affine - -import pytest - -NODATA = 255 - -expected_l3_classes = [ - [111, 112, 215], - [124, 112, 215], - [220, 215, 216], - [220, 255, 220], -] - - -@pytest.fixture(scope="module") -def image_groups(): - l34 = np.array( - [ - [ - [110, 110, 210], - [124, 110, 210], - [221, 210, 210], - [223, np.nan, 223], - ] - ], - dtype="float32", - ) - - urban = np.array( - [ - [ - [215, 215, 215], - [216, 216, 215], - [116, 215, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - cultivated = np.array( - [ - [ - [111, 112, 255], - [255, 112, 255], - [255, 255, 255], - [255, np.nan, np.nan], - ] - ], - dtype="float32", - ) - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - - affine = Affine.translation(10, 0) * Affine.scale( - (20 - 10) / l34.shape[2], (5 - 0) / l34.shape[1] - ) - geobox = GeoBox( - crs="epsg:3577", affine=affine, width=l34.shape[2], height=l34.shape[1] - ) - coords = geobox.xr_coords() - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_l3_classes(image_groups, urban_shape): - filter_expression = "mock > 9" - urban_mask = rasterize_vector_mask( - urban_shape, - image_groups.geobox.transform, - image_groups.artificial_surface.shape, - filter_expression=filter_expression, - threshold=0.3, - ) - - level3_classes = lc_level3.lc_level3(image_groups, urban_mask) - assert (level3_classes == expected_l3_classes).all() diff --git a/tests/test_lc_l34.py b/tests/test_lc_level34.py similarity index 58% rename from tests/test_lc_l34.py rename to tests/test_lc_level34.py index 54706d0..5f1113a 100644 --- a/tests/test_lc_l34.py +++ b/tests/test_lc_level34.py @@ -1,4 +1,7 @@ from odc.stats.plugins.lc_level34 import StatsLccsLevel4 +from odc.stats.plugins._utils import generate_numexpr_expressions + +import re import numpy as np import pandas as pd import xarray as xr @@ -6,7 +9,6 @@ from datacube.utils.geometry import GeoBox from affine import Affine - import pytest @@ -178,6 +180,21 @@ def test_l4_classes(image_groups, urban_shape): expected_l4 = [[95, 97, 93], [97, 96, 96], [100, 93, 93], [101, 101, 101]] stats_l4 = StatsLccsLevel4( measurements=["level3", "level4"], + class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/" + "lccs_colour_scheme_golden_dark_au_c3.csv", + class_condition={ + "level3": ["level1", "artificial_surface", "cultivated"], + "level4": [ + "level1", + "level3", + "woody", + "water_season", + "water_frequency", + "pv_pc_50", + "bs_pc_50", + ], + }, + data_var_condition={"level1": "level_3_4"}, urban_mask=urban_shape, filter_expression="mock > 9", mask_threshold=0.3, @@ -186,3 +203,79 @@ def test_l4_classes(image_groups, urban_shape): assert (ds.level3.compute() == expected_l3).all() assert (ds.level4.compute() == expected_l4).all() + + +@pytest.mark.parametrize( + "rules_df, expected_expressions", + [ + # Test with range conditions + # when condition numbers are the same the order doesn't matter + ( + pd.DataFrame( + { + "condition_1": ["[5, 10)", "(1, 4]"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_1>1.0)&(condition_1<=4.0)&(condition_2!=2.0), 2, previous)", + "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)", + ], + ), + # Test with NaN + # when clause with smaller number of conditions always takes precedence + ( + pd.DataFrame( + { + "condition_1": ["[5, 10)", "nan"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_2!=2.0), 2, previous)", + "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)", + ], + ), + # Test with single value implying "==" and "255" + ( + pd.DataFrame( + { + "condition_1": ["3", "255"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_2!=2.0), 2, previous)", + "where((condition_1==3)&(condition_2==2.0), 1, previous)", + ], + ), + ], +) +def test_generate_numexpr_expressions(rules_df, expected_expressions): + con_cols = ["condition_1", "condition_2"] + class_col = "final_class" + + generated_expressions = generate_numexpr_expressions( + rules_df[con_cols + [class_col]], class_col, "previous" + ) + + def normalize_expression(expression): + match = re.match(r"where\((.*), (.*?), (.*?)\)", expression) + if match: + conditions, true_value, false_value = match.groups() + # Split conditions, sort them, and rejoin + sorted_conditions = "&".join(sorted(conditions.split("&"))) + return f"where({sorted_conditions}, {true_value}, {false_value})" + return expression + + normalized_expected = [normalize_expression(expr) for expr in expected_expressions] + normalized_generated = [ + normalize_expression(expr) for expr in generated_expressions + ] + + assert ( + normalized_generated == normalized_expected + ), f"Expected {expected_expressions}, but got {generated_expressions}"