diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py
index 238850d..ef48287 100644
--- a/odc/stats/plugins/lc_level34.py
+++ b/odc/stats/plugins/lc_level34.py
@@ -87,7 +87,7 @@ def classification(self, xx, class_def, con_cols, class_col):
         res = da.full(xx.level_3_4.shape, 0, dtype="uint8")
 
         for expression in expressions:
-            _log.info(expression)
+            _log.debug(expression)
             local_dict.update({"res": res})
             res = expr_eval(
                 expression,
diff --git a/tests/test_lc_level3.py b/tests/test_lc_level3.py
deleted file mode 100644
index 90ecbd7..0000000
--- a/tests/test_lc_level3.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import numpy as np
-import pandas as pd
-import xarray as xr
-import dask.array as da
-
-from odc.stats.plugins.l34_utils import lc_level3
-from odc.stats.plugins._utils import rasterize_vector_mask
-from datacube.utils.geometry import GeoBox
-from affine import Affine
-
-import pytest
-
-NODATA = 255
-
-expected_l3_classes = [
-    [111, 112, 215],
-    [124, 112, 215],
-    [220, 215, 216],
-    [220, 255, 220],
-]
-
-
-@pytest.fixture(scope="module")
-def image_groups():
-    l34 = np.array(
-        [
-            [
-                [110, 110, 210],
-                [124, 110, 210],
-                [221, 210, 210],
-                [223, np.nan, 223],
-            ]
-        ],
-        dtype="float32",
-    )
-
-    urban = np.array(
-        [
-            [
-                [215, 215, 215],
-                [216, 216, 215],
-                [116, 215, 216],
-                [216, 216, 216],
-            ]
-        ],
-        dtype="uint8",
-    )
-
-    cultivated = np.array(
-        [
-            [
-                [111, 112, 255],
-                [255, 112, 255],
-                [255, 255, 255],
-                [255, np.nan, np.nan],
-            ]
-        ],
-        dtype="float32",
-    )
-
-    tuples = [
-        (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")),
-    ]
-    index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"])
-
-    affine = Affine.translation(10, 0) * Affine.scale(
-        (20 - 10) / l34.shape[2], (5 - 0) / l34.shape[1]
-    )
-    geobox = GeoBox(
-        crs="epsg:3577", affine=affine, width=l34.shape[2], height=l34.shape[1]
-    )
-    coords = geobox.xr_coords()
-
-    data_vars = {
-        "level_3_4": xr.DataArray(
-            da.from_array(l34, chunks=(1, -1, -1)),
-            dims=("spec", "y", "x"),
-            attrs={"nodata": 255},
-        ),
-        "artificial_surface": xr.DataArray(
-            da.from_array(urban, chunks=(1, -1, -1)),
-            dims=("spec", "y", "x"),
-            attrs={"nodata": 255},
-        ),
-        "cultivated": xr.DataArray(
-            da.from_array(cultivated, chunks=(1, -1, -1)),
-            dims=("spec", "y", "x"),
-            attrs={"nodata": 255},
-        ),
-    }
-    xx = xr.Dataset(data_vars=data_vars, coords=coords)
-    xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec"))
-    return xx
-
-
-def test_l3_classes(image_groups, urban_shape):
-    filter_expression = "mock > 9"
-    urban_mask = rasterize_vector_mask(
-        urban_shape,
-        image_groups.geobox.transform,
-        image_groups.artificial_surface.shape,
-        filter_expression=filter_expression,
-        threshold=0.3,
-    )
-
-    level3_classes = lc_level3.lc_level3(image_groups, urban_mask)
-    assert (level3_classes == expected_l3_classes).all()
diff --git a/tests/test_lc_l34.py b/tests/test_lc_level34.py
similarity index 58%
rename from tests/test_lc_l34.py
rename to tests/test_lc_level34.py
index 54706d0..5f1113a 100644
--- a/tests/test_lc_l34.py
+++ b/tests/test_lc_level34.py
@@ -1,4 +1,7 @@
 from odc.stats.plugins.lc_level34 import StatsLccsLevel4
+from odc.stats.plugins._utils import generate_numexpr_expressions
+
+import re
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -6,7 +9,6 @@
 from datacube.utils.geometry import GeoBox
 from affine import Affine
 
-
 import pytest
 
 
@@ -178,6 +180,21 @@ def test_l4_classes(image_groups, urban_shape):
     expected_l4 = [[95, 97, 93], [97, 96, 96], [100, 93, 93], [101, 101, 101]]
     stats_l4 = StatsLccsLevel4(
         measurements=["level3", "level4"],
+        class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/"
+        "lccs_colour_scheme_golden_dark_au_c3.csv",
+        class_condition={
+            "level3": ["level1", "artificial_surface", "cultivated"],
+            "level4": [
+                "level1",
+                "level3",
+                "woody",
+                "water_season",
+                "water_frequency",
+                "pv_pc_50",
+                "bs_pc_50",
+            ],
+        },
+        data_var_condition={"level1": "level_3_4"},
         urban_mask=urban_shape,
         filter_expression="mock > 9",
         mask_threshold=0.3,
@@ -186,3 +203,79 @@ def test_l4_classes(image_groups, urban_shape):
 
     assert (ds.level3.compute() == expected_l3).all()
     assert (ds.level4.compute() == expected_l4).all()
+
+
+@pytest.mark.parametrize(
+    "rules_df, expected_expressions",
+    [
+        # Test with range conditions
+        # when condition numbers are the same the order doesn't matter
+        (
+            pd.DataFrame(
+                {
+                    "condition_1": ["[5, 10)", "(1, 4]"],
+                    "condition_2": ["==2", "!=2"],
+                    "final_class": [1, 2],
+                }
+            ),
+            [
+                "where((condition_1>1.0)&(condition_1<=4.0)&(condition_2!=2.0), 2, previous)",
+                "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)",
+            ],
+        ),
+        # Test with NaN
+        # when clause with smaller number of conditions always takes precedence
+        (
+            pd.DataFrame(
+                {
+                    "condition_1": ["[5, 10)", "nan"],
+                    "condition_2": ["==2", "!=2"],
+                    "final_class": [1, 2],
+                }
+            ),
+            [
+                "where((condition_2!=2.0), 2, previous)",
+                "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)",
+            ],
+        ),
+        # Test with single value implying "==" and "255"
+        (
+            pd.DataFrame(
+                {
+                    "condition_1": ["3", "255"],
+                    "condition_2": ["==2", "!=2"],
+                    "final_class": [1, 2],
+                }
+            ),
+            [
+                "where((condition_2!=2.0), 2, previous)",
+                "where((condition_1==3)&(condition_2==2.0), 1, previous)",
+            ],
+        ),
+    ],
+)
+def test_generate_numexpr_expressions(rules_df, expected_expressions):
+    con_cols = ["condition_1", "condition_2"]
+    class_col = "final_class"
+
+    generated_expressions = generate_numexpr_expressions(
+        rules_df[con_cols + [class_col]], class_col, "previous"
+    )
+
+    def normalize_expression(expression):
+        match = re.match(r"where\((.*), (.*?), (.*?)\)", expression)
+        if match:
+            conditions, true_value, false_value = match.groups()
+            # Split conditions, sort them, and rejoin
+            sorted_conditions = "&".join(sorted(conditions.split("&")))
+            return f"where({sorted_conditions}, {true_value}, {false_value})"
+        return expression
+
+    normalized_expected = [normalize_expression(expr) for expr in expected_expressions]
+    normalized_generated = [
+        normalize_expression(expr) for expr in generated_expressions
+    ]
+
+    assert (
+        normalized_generated == normalized_expected
+    ), f"Expected {expected_expressions}, but got {generated_expressions}"