diff --git a/scripts/v.dissolve/tests/conftest.py b/scripts/v.dissolve/tests/conftest.py new file mode 100644 index 00000000000..b74969999b1 --- /dev/null +++ b/scripts/v.dissolve/tests/conftest.py @@ -0,0 +1,248 @@ +"""Fixtures for v.dissolve tests""" + +from types import SimpleNamespace + +import pytest + +import grass.script as gs +import grass.script.setup as grass_setup + + +def updates_as_transaction(table, cat_column, column, column_quote, cats, values): + """Create SQL statement for categories and values for a given column""" + sql = ["BEGIN TRANSACTION"] + if column_quote: + quote = "'" + else: + quote = "" + for cat, value in zip(cats, values): + sql.append( + f"UPDATE {table} SET {column} = {quote}{value}{quote} " + f"WHERE {cat_column} = {cat};" + ) + sql.append("END TRANSACTION") + return "\n".join(sql) + + +def value_update_by_category(map_name, layer, column_name, cats, values): + """Update column value for multiple rows based on category""" + db_info = gs.vector_db(map_name)[layer] + table = db_info["table"] + database = db_info["database"] + driver = db_info["driver"] + cat_column = "cat" + column_type = gs.vector_columns(map_name, layer)[column_name] + column_quote = bool(column_type["type"] in ("CHARACTER", "TEXT")) + sql = updates_as_transaction( + table=table, + cat_column=cat_column, + column=column_name, + column_quote=column_quote, + cats=cats, + values=values, + ) + gs.write_command( + "db.execute", input="-", database=database, driver=driver, stdin=sql + ) + + +@pytest.fixture(scope="module") +def dataset(tmp_path_factory): + """Creates a session with a mapset which has vector with a float column""" + tmp_path = tmp_path_factory.mktemp("dataset") + location = "test" + point_map_name = "points" + map_name = "areas" + int_column_name = "int_value" + float_column_name = "double_value" + str_column_name = "str_value" + + cats = [1, 2, 3, 4, 5, 6] + int_values = [10, 10, 10, 5, 24, 5] + float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] + str_values = ["apples", "oranges", "oranges", "plumbs", "oranges", "plumbs"] + num_points = len(cats) + + gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access + with grass_setup.init(tmp_path / location): + gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) + gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) + gs.run_command("v.voronoi", input=point_map_name, output=map_name) + gs.run_command( + "v.db.addtable", + map=map_name, + columns=[ + f"{int_column_name} integer", + f"{float_column_name} double precision", + f"{str_column_name} text", + ], + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=int_column_name, + cats=cats, + values=int_values, + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=float_column_name, + cats=cats, + values=float_values, + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=str_column_name, + cats=cats, + values=str_values, + ) + yield SimpleNamespace( + vector_name=map_name, + int_column_name=int_column_name, + int_values=int_values, + float_column_name=float_column_name, + float_values=float_values, + str_column_name=str_column_name, + str_column_values=str_values, + ) + + +@pytest.fixture(scope="module") +def discontinuous_dataset(tmp_path_factory): + """Creates a session with a mapset which has vector with a float column""" + tmp_path = tmp_path_factory.mktemp("discontinuous_dataset") + location = "test" + point_map_name = "points" + map_name = "areas" + int_column_name = "int_value" + float_column_name = "double_value" + str_column_name = "str_value" + + cats = [1, 2, 3, 4, 5, 6] + int_values = [10, 12, 10, 5, 24, 24] + float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] + str_values = ["apples", "plumbs", "apples", "plumbs", "oranges", "oranges"] + num_points = len(cats) + + gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access + with grass_setup.init(tmp_path / location): + gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) + gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) + gs.run_command("v.voronoi", input=point_map_name, output=map_name) + gs.run_command( + "v.db.addtable", + map=map_name, + columns=[ + f"{int_column_name} integer", + f"{float_column_name} double precision", + f"{str_column_name} text", + ], + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=int_column_name, + cats=cats, + values=int_values, + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=float_column_name, + cats=cats, + values=float_values, + ) + value_update_by_category( + map_name=map_name, + layer=1, + column_name=str_column_name, + cats=cats, + values=str_values, + ) + yield SimpleNamespace( + vector_name=map_name, + int_column_name=int_column_name, + int_values=int_values, + float_column_name=float_column_name, + float_values=float_values, + str_column_name=str_column_name, + str_column_values=str_values, + ) + + +@pytest.fixture(scope="module") +def dataset_layer_2(tmp_path_factory): + """Creates a session with a mapset which has vector with a float column""" + tmp_path = tmp_path_factory.mktemp("dataset_layer_2") + location = "test" + point_map_name = "points" + point_map_name_layer_2 = "points2" + map_name = "areas" + int_column_name = "int_value" + float_column_name = "double_value" + str_column_name = "str_value" + + cats = [1, 2, 3, 4, 5, 6] + int_values = [10, 10, 10, 5, 24, 5] + float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] + str_values = ["apples", "oranges", "oranges", "plumbs", "oranges", "plumbs"] + num_points = len(cats) + + layer = 2 + + gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access + with grass_setup.init(tmp_path / location): + gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) + gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) + gs.run_command( + "v.category", + input=point_map_name, + layer=[1, layer], + output=point_map_name_layer_2, + option="transfer", + ) + gs.run_command( + "v.voronoi", input=point_map_name_layer_2, layer=layer, output=map_name + ) + gs.run_command( + "v.db.addtable", + map=map_name, + layer=layer, + columns=[ + f"{int_column_name} integer", + f"{float_column_name} double precision", + f"{str_column_name} text", + ], + ) + value_update_by_category( + map_name=map_name, + layer=layer, + column_name=int_column_name, + cats=cats, + values=int_values, + ) + value_update_by_category( + map_name=map_name, + layer=layer, + column_name=float_column_name, + cats=cats, + values=float_values, + ) + value_update_by_category( + map_name=map_name, + layer=layer, + column_name=str_column_name, + cats=cats, + values=str_values, + ) + yield SimpleNamespace( + vector_name=map_name, + int_column_name=int_column_name, + int_values=int_values, + float_column_name=float_column_name, + float_values=float_values, + str_column_name=str_column_name, + str_column_values=str_values, + ) diff --git a/scripts/v.dissolve/tests/v_dissolve_aggregate_test.py b/scripts/v.dissolve/tests/v_dissolve_aggregate_test.py new file mode 100644 index 00000000000..1c2b6d45123 --- /dev/null +++ b/scripts/v.dissolve/tests/v_dissolve_aggregate_test.py @@ -0,0 +1,405 @@ +"""Test v.dissolve attribute aggregations""" + +import json +import statistics + +import pytest + +import grass.script as gs + + +@pytest.mark.parametrize( + "aggregate_methods", + [ + ["n"], + ["sum"], + ["range"], + ["min", "max", "mean", "variance"], + ["mean_abs", "stddev", "coeff_var"], + ], +) +def test_aggregate_methods(dataset, aggregate_methods): + """All aggregate methods are accepted and their columns generated""" + dissolved_vector = f"test_methods_{'_'.join(aggregate_methods)}" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=dataset.float_column_name, + aggregate_method=aggregate_methods, + ) + columns = gs.vector_columns(dissolved_vector) + stats_columns = [ + f"{dataset.float_column_name}_{method}" for method in aggregate_methods + ] + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + stats_columns + ) + + +def test_aggregate_two_columns(dataset): + """Aggregate stats for two columns are generated""" + dissolved_vector = "test_two_columns" + aggregate_methods = ["mean", "stddev"] + aggregate_columns = [dataset.float_column_name, dataset.int_column_name] + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=aggregate_columns, + aggregate_method=aggregate_methods, + ) + stats_columns = [ + f"{column}_{method}" + for method in aggregate_methods + for column in aggregate_columns + ] + columns = gs.vector_columns(dissolved_vector) + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + stats_columns + ) + + +@pytest.mark.parametrize("backend", [None, "univar", "sql"]) +def test_aggregate_column_result(dataset, backend): + """Check resulting types and values of basic stats with different backends + + It assumes that the univar-like names are translated to SQLite names. + """ + dissolved_vector = f"test_results_{backend}" + stats = ["sum", "n", "min", "max", "mean"] + stats_columns = [f"value_{method}" for method in stats] + aggregate_columns = [dataset.float_column_name] * len(stats) + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=aggregate_columns, + aggregate_method=stats, + result_column=stats_columns, + aggregate_backend=backend, + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + + columns = gs.vector_columns(dissolved_vector) + assert len(columns) == len(stats_columns) + 2 + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + stats_columns + ) + for stats_column in stats_columns: + assert stats_column in columns + column_info = columns[stats_column] + if stats_column.endswith("_n"): + correct_type = "integer" + else: + correct_type = "double precision" + assert ( + columns[stats_column]["type"].lower() == correct_type + ), f"{stats_column} has a wrong type" + assert dataset.str_column_name in columns + column_info = columns[dataset.str_column_name] + assert column_info["type"].lower() == "character" + + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + ref_unique_values = set(dataset.str_column_values) + actual_values = [record[dataset.str_column_name] for record in records] + assert len(actual_values) == len(ref_unique_values) + assert set(actual_values) == ref_unique_values + + aggregate_n = [record["value_n"] for record in records] + assert sum(aggregate_n) == gs.vector_info(dataset.vector_name)["areas"] + assert sorted(aggregate_n) == [1, 2, 3] + aggregate_sum = [record["value_sum"] for record in records] + assert sorted(aggregate_sum) == [ + dataset.float_values[0], + pytest.approx(dataset.float_values[3] + dataset.float_values[5]), + pytest.approx( + dataset.float_values[1] + dataset.float_values[2] + dataset.float_values[4] + ), + ] + aggregate_max = [record["value_max"] for record in records] + assert sorted(aggregate_max) == [ + dataset.float_values[0], + pytest.approx(max([dataset.float_values[3], dataset.float_values[5]])), + pytest.approx( + max( + [ + dataset.float_values[1], + dataset.float_values[2], + dataset.float_values[4], + ] + ) + ), + ] + aggregate_min = [record["value_min"] for record in records] + assert sorted(aggregate_min) == [ + dataset.float_values[0], + pytest.approx( + min( + [ + dataset.float_values[1], + dataset.float_values[2], + dataset.float_values[4], + ] + ) + ), + pytest.approx(min([dataset.float_values[3], dataset.float_values[5]])), + ] + aggregate_mean = [record["value_mean"] for record in records] + assert sorted(aggregate_mean) == [ + dataset.float_values[0], + pytest.approx( + statistics.mean([dataset.float_values[3], dataset.float_values[5]]) + ), + pytest.approx( + statistics.mean( + [ + dataset.float_values[1], + dataset.float_values[2], + dataset.float_values[4], + ] + ) + ), + ] + + +def test_sqlite_agg_accepted(dataset): + """Numeric SQLite aggregate functions are accepted + + Additionally, it checks: + 1. generated column names + 2. types of columns + 3. aggregate counts + """ + dissolved_vector = "test_sqlite" + stats = ["avg", "count", "max", "min", "sum", "total"] + expected_stats_columns = [ + f"{dataset.float_column_name}_{method}" for method in stats + ] + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=dataset.float_column_name, + aggregate_method=stats, + aggregate_backend="sql", + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + + columns = gs.vector_columns(dissolved_vector) + assert len(columns) == len(expected_stats_columns) + 2 + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + expected_stats_columns + ), "Unexpected autogenerated column names" + for method, stats_column in zip(stats, expected_stats_columns): + assert stats_column in columns + column_info = columns[stats_column] + if method == "count": + correct_type = "integer" + else: + correct_type = "double precision" + assert ( + columns[stats_column]["type"].lower() == correct_type + ), f"{stats_column} has a wrong type" + assert dataset.str_column_name in columns + column_info = columns[dataset.str_column_name] + assert column_info["type"].lower() == "character" + + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + ref_unique_values = set(dataset.str_column_values) + actual_values = [record[dataset.str_column_name] for record in records] + assert len(actual_values) == len(ref_unique_values) + assert set(actual_values) == ref_unique_values + + aggregate_n = [record[f"{dataset.float_column_name}_count"] for record in records] + assert sum(aggregate_n) == gs.vector_info(dataset.vector_name)["areas"] + assert sorted(aggregate_n) == [1, 2, 3] + + +def test_sqlite_concat(dataset): + """SQLite group concat text-returning aggregate function works""" + dissolved_vector = "test_sqlite_concat" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=f"group_concat({dataset.int_column_name})", + result_column="concat_values text", + aggregate_backend="sql", + ) + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + # Order of records is ignored - they are just sorted. + # Order within values of group_concat is defined as arbitrary by SQLite. + expected_integers = sorted(["10", "10,10,24", "5,5"]) + actual_integers = sorted([record["concat_values"] for record in records]) + for expected, actual in zip(expected_integers, actual_integers): + assert sorted(expected.split(",")) == sorted(actual.split(",")) + + +def test_sqlite_concat_with_two_parameters(dataset): + """SQLite group concat text-returning two-parameter aggregate function works""" + dissolved_vector = "test_sqlite_concat_separator" + separator = "--+--" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=f"group_concat({dataset.int_column_name}, '{separator}')", + result_column="concat_values text", + aggregate_backend="sql", + ) + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + # Order of records is ignored - they are just sorted. + # Order within values of group_concat is defined as arbitrary by SQLite. + expected_integers = sorted(["10", "10,10,24", "5,5"]) + actual_integers = sorted([record["concat_values"] for record in records]) + for expected, actual in zip(expected_integers, actual_integers): + assert sorted(expected.split(",")) == sorted(actual.split(separator)) + + +def test_duplicate_columns_and_methods_accepted(dataset): + """Duplicate aggregate columns and methods are accepted and deduplicated""" + dissolved_vector = "test_duplicates" + stats = ["count", "count", "n", "min", "min", "n", "sum"] + expected_stats_columns = [ + f"{dataset.float_column_name}_{method}" + for method in ["count", "n", "min", "sum"] + ] + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=[dataset.float_column_name, dataset.float_column_name], + aggregate_method=stats, + aggregate_backend="sql", + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + + columns = gs.vector_columns(dissolved_vector) + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + expected_stats_columns + ), "Unexpected autogenerated column names" + + +def test_sql_expressions_accepted(dataset): + """Arbitrary SQL expressions are accepted for columns""" + dissolved_vector = "test_expressions" + aggregate_columns = ( + f"sum({dataset.float_column_name}), " + f"max({dataset.float_column_name}) - min({dataset.float_column_name}), " + f" count({dataset.float_column_name}) " + ) + result_columns = ( + " sum_of_values double, range_of_values double, count_of_rows integer" + ) + expected_stats_columns = ["sum_of_values", "range_of_values", "count_of_rows"] + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=aggregate_columns, + result_column=result_columns, + aggregate_backend="sql", + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + + columns = gs.vector_columns(dissolved_vector) + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + expected_stats_columns + ) + + +def test_no_methods_with_univar_and_result_columns_fail(dataset): + """Omitting methods as for sql backend is forbiden for univar""" + dissolved_vector = "test_no_method_univar_fails" + + aggregate_columns = dataset.float_column_name + result_columns = ( + "sum_of_values double,range_of_values double, count_of_rows integer" + ) + assert ( + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=aggregate_columns, + result_column=result_columns, + aggregate_backend="univar", + errors="status", + ) + != 0 + ) + + +def test_int_fails(dataset): + """An integer column fails with aggregates""" + dissolved_vector = "test_int" + assert ( + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.int_column_name, + output=dissolved_vector, + aggregate_column=dataset.float_column_name, + aggregate_method="n", + errors="status", + ) + != 0 + ) diff --git a/scripts/v.dissolve/tests/v_dissolve_geometry_test.py b/scripts/v.dissolve/tests/v_dissolve_geometry_test.py new file mode 100644 index 00000000000..71c950a2141 --- /dev/null +++ b/scripts/v.dissolve/tests/v_dissolve_geometry_test.py @@ -0,0 +1,59 @@ +"""Test v.dissolve with more advanced geometry""" + +import json + +import grass.script as gs + + +def test_dissolve_discontinuous_str(discontinuous_dataset): + """Dissolving of discontinuous areas results in a single attribute record + + Even when the areas are discontinuous, there should be only one row + in the attribute table. + This behavior is assumed by the attribute aggregation functionality. + """ + dataset = discontinuous_dataset + dissolved_vector = "test_discontinuous_str" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 5 + assert vector_info["areas"] == 5 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + # Reference values obtained by examining the result. + assert vector_info["north"] == 80 + assert vector_info["south"] == 0 + assert vector_info["east"] == 120 + assert vector_info["west"] == 0 + assert vector_info["nodes"] == 14 + assert vector_info["points"] == 0 + assert vector_info["lines"] == 0 + assert vector_info["boundaries"] == 18 + assert vector_info["islands"] == 1 + assert vector_info["primitives"] == 23 + assert vector_info["map3d"] == 0 + + columns = gs.vector_columns(dissolved_vector) + assert len(columns) == 2 + assert sorted(columns.keys()) == sorted(["cat", dataset.str_column_name]) + column_info = columns[dataset.str_column_name] + assert column_info["type"].lower() == "character" + + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + ref_unique_values = set(dataset.str_column_values) + actual_values = [record[dataset.str_column_name] for record in records] + assert len(actual_values) == len(ref_unique_values) + assert set(actual_values) == ref_unique_values diff --git a/scripts/v.dissolve/tests/v_dissolve_layers_test.py b/scripts/v.dissolve/tests/v_dissolve_layers_test.py new file mode 100644 index 00000000000..a13dc93315a --- /dev/null +++ b/scripts/v.dissolve/tests/v_dissolve_layers_test.py @@ -0,0 +1,74 @@ +"""Tests of v.dissolve with layer other than 1""" + +import json + +import grass.script as gs + + +def test_layer_2(dataset_layer_2): + """Numeric SQLite aggregate function are accepted + + Additionally, it checks: + 1. generated column names + 2. types of columns + 3. aggregate counts + """ + dataset = dataset_layer_2 + dissolved_vector = "test_sqlite" + stats = ["avg", "count", "max", "min", "sum", "total"] + expected_stats_columns = [ + f"{dataset.float_column_name}_{method}" for method in stats + ] + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + layer=2, + column=dataset.str_column_name, + output=dissolved_vector, + aggregate_column=dataset.float_column_name, + aggregate_method=stats, + aggregate_backend="sql", + ) + + vector_info = gs.vector_info(dissolved_vector, layer=2) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + + columns = gs.vector_columns(dissolved_vector, layer=2) + assert len(columns) == len(expected_stats_columns) + 2 + assert sorted(columns.keys()) == sorted( + ["cat", dataset.str_column_name] + expected_stats_columns + ), "Unexpected autogenerated column names" + for method, stats_column in zip(stats, expected_stats_columns): + assert stats_column in columns + column_info = columns[stats_column] + if method == "count": + correct_type = "integer" + else: + correct_type = "double precision" + assert ( + columns[stats_column]["type"].lower() == correct_type + ), f"{stats_column} has a wrong type" + assert dataset.str_column_name in columns + column_info = columns[dataset.str_column_name] + assert column_info["type"].lower() == "character" + + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + layer=2, + format="json", + ) + )["records"] + ref_unique_values = set(dataset.str_column_values) + actual_values = [record[dataset.str_column_name] for record in records] + assert len(actual_values) == len(ref_unique_values) + assert set(actual_values) == ref_unique_values + + aggregate_n = [record[f"{dataset.float_column_name}_count"] for record in records] + assert sum(aggregate_n) == gs.vector_info(dataset.vector_name)["areas"] + assert sorted(aggregate_n) == [1, 2, 3] diff --git a/scripts/v.dissolve/tests/v_dissolve_test.py b/scripts/v.dissolve/tests/v_dissolve_test.py new file mode 100644 index 00000000000..f5d579f5139 --- /dev/null +++ b/scripts/v.dissolve/tests/v_dissolve_test.py @@ -0,0 +1,82 @@ +"""Test v.dissolve geometry info and basic attributes""" + +import json + +import grass.script as gs + + +def test_dissolve_int(dataset): + """Dissolving works on integer column""" + dissolved_vector = "test_int" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.int_column_name, + output=dissolved_vector, + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 0 + # Reference values obtained by examining the result. + assert vector_info["north"] == 80 + assert vector_info["south"] == 0 + assert vector_info["east"] == 120 + assert vector_info["west"] == 0 + assert vector_info["nodes"] == 14 + assert vector_info["points"] == 0 + assert vector_info["lines"] == 0 + assert vector_info["boundaries"] == 16 + assert vector_info["islands"] == 1 + assert vector_info["primitives"] == 19 + assert vector_info["map3d"] == 0 + + +def test_dissolve_str(dataset): + """Dissolving works on string column and attributes are present""" + dissolved_vector = "test_str" + gs.run_command( + "v.dissolve", + input=dataset.vector_name, + column=dataset.str_column_name, + output=dissolved_vector, + ) + + vector_info = gs.vector_info(dissolved_vector) + assert vector_info["level"] == 2 + assert vector_info["centroids"] == 3 + assert vector_info["areas"] == 3 + assert vector_info["num_dblinks"] == 1 + assert vector_info["attribute_primary_key"] == "cat" + # Reference values obtained by examining the result. + assert vector_info["north"] == 80 + assert vector_info["south"] == 0 + assert vector_info["east"] == 120 + assert vector_info["west"] == 0 + assert vector_info["nodes"] == 13 + assert vector_info["points"] == 0 + assert vector_info["lines"] == 0 + assert vector_info["boundaries"] == 15 + assert vector_info["islands"] == 1 + assert vector_info["primitives"] == 18 + assert vector_info["map3d"] == 0 + + columns = gs.vector_columns(dissolved_vector) + assert len(columns) == 2 + assert sorted(columns.keys()) == sorted(["cat", dataset.str_column_name]) + column_info = columns[dataset.str_column_name] + assert column_info["type"].lower() == "character" + + records = json.loads( + gs.read_command( + "v.db.select", + map=dissolved_vector, + format="json", + ) + )["records"] + ref_unique_values = set(dataset.str_column_values) + actual_values = [record[dataset.str_column_name] for record in records] + assert len(actual_values) == len(ref_unique_values) + assert set(actual_values) == ref_unique_values diff --git a/scripts/v.dissolve/v.dissolve.html b/scripts/v.dissolve/v.dissolve.html index 5896b491aeb..e22290fa064 100644 --- a/scripts/v.dissolve/v.dissolve.html +++ b/scripts/v.dissolve/v.dissolve.html @@ -7,6 +7,104 @@
+ Figure: Areas with the same attribute value (first image) are + merged into into one (second image) +
++Attributes of merged areas can be aggregated using various aggregation methods +such as sum and mean. The specific methods available depend +on the backend used for aggregation. Two aggregate backends (specified in +aggregate_backend) are available, univar and sql. +When univar is used, the methods available are the ones +which v.db.univar uses by default, +i.e., n, min, max, range, +mean, mean_abs, variance, stddev, +coef_var, and sum. +When the sql backend is used, the methods in turn depends on the SQL +database backend used for the attribute table of the input vector. +For SQLite, it is at least the following +build-in aggregate functions: +count, min, max, +avg, sum, and total. +For PostgreSQL, the list of +aggregate functions +is much longer and includes, e.g., count, min, max, +avg, sum, stddev, and variance. +The sql aggregate backend, regardless of the underlying database, +will typically perform significantly better than the univar backend. + +
+Aggregate methods are specified by name in aggregate_methods
+or using SQL syntax in aggregate_columns.
+If result_columns is provided including type information
+and the sql backend is used,
+aggregate_columns can contain SQL syntax specifying both columns
+and the functions applied, e.g.,
+aggregate_columns="sum(cows) / sum(animals)"
.
+In this case, aggregate_methods should to be omitted.
+This provides the highest flexibility and it is suitable for scripting.
+
+
+The backend is, by default, determined automatically based on the requested +methods. Specifically, the sql backend is used by default, +but when a method is not one of the SQLite build-in aggregate functions +and, at the same time, is available with the univar backend, +the univar backed is used. +The default behavior is intended for interactive use and testing. +For scripting and other automated usage, specifying the backend explicitly +is strongly recommended. + +
+For convince, certain methods, namely n, count, +mean, and avg, are converted to the name appropriate +for the selected backend. However, for scripting, specifying the appropriate +method (function) name for the backend is recommended because the conversion +is a heuristic which may change in the future. + +
+If only aggregate_columns is provided, methods default to +n, min, max, mean, and sum. +If the univar backend is specified, all the available methods +for the univar backend are used. + +
+If the result_columns is not provided, each method is applied to each +specified column producing result columns for all combinations. These result +columns have auto-generated names based on the aggregate column and method. +If the result_column is provided, each method is applied only once +to the matching column in the aggregate column list and the result will be +available under the name of the matching result column. In other words, number +of items in aggregate_columns, aggregate_methods (unless omitted), +and result_column needs to match and no +combinations are created on the fly. +For scripting, it is recommended to specify all resulting column names, +while for interactive use, automatically created combinations are expected +to be beneficial, especially for exploratory analysis. + +
+Type of the result column is determined based on the method selected.
+For n and count, the type is INTEGER and for all other
+methods, it is DOUBLE. Aggregate methods which produce other types
+require the type to be specified as part of the result_columns.
+A type can be provided in result_columns using the SQL syntax
+name type
, e.g., sum_of_values double precision
.
+Type specification is mandatory when SQL syntax is used in
+aggregate_columns (and aggregate_methods is omitted).
+
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities \ + aggregate_columns=ACRES +
DOTURBAN_N == 'Wadesboro'
:
+
++v.db.select municipalities where="DOTURBAN_N == 'Wadesboro'" separator=tab +
+cat DOTURBAN_N ACRES_n ACRES_min ACRES_max ACRES_mean ACRES_sum +66 Wadesboro 2 634.987 3935.325 2285.156 4570.312 +
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_2 \ + aggregate_columns=ACRES aggregate_methods=sum +
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_3 \ + aggregate_columns=ACRES,NEW_PERC_G aggregate_methods=sum,avg +
+The v.dissolve module will apply each aggregate method only to the +corresponding column when column names for the results are specified manually +with the result_columns option: + +
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_4 \ + aggregate_columns=ACRES,NEW_PERC_G aggregate_methods=sum,avg \ + result_columns=acres,new_perc_g +
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_5 \ + aggregate_columns=ACRES,DOTURBAN_N,TEXT_NAME aggregate_methods=sum,count,count \ + result_columns=acres,number_of_parts,named_parts +
+Modifying the previous example, we will now specify the SQL aggregate function calls
+explicitly instead of letting v.dissolve generate them for us.
+We will compute sum of the ACRES column using sum(ACRES)
+(alternatively, we could use SQLite specific total(ACRES)
+which returns zero even when all values are NULL).
+Further, we will count number of aggregated (i.e., dissolved) parts using
+count(*)
which counts all rows regardless of NULL values.
+Then, we will count all unique names of parts as distinguished by
+the MB_NAME column using count(distinct MB_NAME)
.
+Finally, we will collect all these names into a comma-separated list using
+group_concat(MB_NAME)
:
+
+
+v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_6 \ + aggregate_columns="total(ACRES),count(*),count(distinct MB_NAME),group_concat(MB_NAME)" \ + result_columns="acres REAL,named_parts INTEGER,unique_names INTEGER,names TEXT" +
+When working with general SQL syntax, v.dissolve turns off its checks for +number of aggregate and result columns to allow for all SQL syntax to be used +for aggregate columns. This allows us to use also functions with multiple parameters, +for example specify separator to be used with group_concat: + +
+ v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_7 \ + aggregate_columns="group_concat(MB_NAME, ';')" \ + result_columns="names TEXT" +
DOTURBAN_N == 'Wadesboro'
:
+
++v.db.select municipalities_7 where="DOTURBAN_N == 'Wadesboro'" separator=tab +
+cat DOTURBAN_N names +66 Wadesboro Wadesboro;Lilesville +