Skip to content

Commit

Permalink
Merge pull request #138 from PDOK/PDOK-14940-check-spatialite-geometry
Browse files Browse the repository at this point in the history
Check for spatialite geometry
  • Loading branch information
damienmulder authored Feb 5, 2025
2 parents 635781d + 1307cf7 commit 8560cf9
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 19 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ The current checks are (see also the 'show-validations' command):
| RQ2 | Layers must have at least one feature. |
| RQ3 | _LEGACY:_ use RQ14 * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON). |
| RQ4 | The geopackage should have no views defined. |
| RQ5 | _LEGACY:_ use RQ23 * Geometry should be valid. |
| RQ5 | _LEGACY:_ use RQ23 * Geometry should be valid and in GeoPackage format. |
| RQ6 | Column names must start with a letter, and valid characters are lowercase a-z, numbers or underscores. |
| RQ7 | Tables should have a feature id column with unique index. |
| RQ8 | Geopackage must conform to given JSON or YAML definitions. |
Expand All @@ -111,7 +111,7 @@ The current checks are (see also the 'show-validations' command):
| RQ16 | _LEGACY:_ use RQ21 * All layer and column names shall not be longer than 53 characters. |
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RQ23 | Geometry should be valid and simple. |
| RQ23 | Geometry should be valid, simple and in GeoPackage format. |
| RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). |
| RC17 | It is recommended to name all GEOMETRY type columns 'geom'. |
| RC18 | It is recommended to give all GEOMETRY type columns the same name. |
Expand Down
36 changes: 27 additions & 9 deletions geopackage_validator/validations/geometry_valid_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,28 @@
from geopackage_validator.validations import validator
from geopackage_validator import utils

SQL_ONLY_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
SQL_VALID_TEMPLATE_V0 = """SELECT reason, count(reason) AS count, row_id
FROM(
SELECT
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
CASE ST_IsValid("{column_name}")
WHEN 0
THEN ST_IsValidReason("{column_name}")
ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '['))
THEN
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
WHEN 0
THEN ST_IsValidReason("{column_name}")
ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '['))
END
ELSE
CASE
WHEN IsValidGPB("{column_name}") = 0
THEN 'Not GeoPackage geometry'
END
END AS reason,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0
FROM "{table_name}"
WHERE
ST_IsValid("{column_name}") = 0 OR
(IsValidGPB("{column_name}") = 0 AND ST_IsEmpty("{column_name}") = 0) -- Empty geometry is considered valid
)
GROUP BY reason;"""

Expand All @@ -27,13 +39,19 @@
ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '['))
END
ELSE
CASE ST_IsSimple("{column_name}")
WHEN 0
CASE
WHEN ST_IsSimple("{column_name}") = 0
THEN 'Not Simple'
WHEN IsValidGPB("{column_name}") = 0
THEN 'Not GeoPackage geometry'
END
END AS reason,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 OR ST_IsSimple("{column_name}") = 0
FROM "{table_name}"
WHERE
ST_IsValid("{column_name}") = 0 OR
ST_IsSimple("{column_name}") = 0 OR
(IsValidGPB("{column_name}") = 0 AND ST_IsEmpty("{column_name}") = 0) -- Empty geometry is considered valid
)
GROUP BY reason;"""

Expand All @@ -58,7 +76,7 @@ class ValidGeometryValidatorV0(validator.Validator):
message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_valid(self.dataset, SQL_ONLY_VALID_TEMPLATE)
result = query_geometry_valid(self.dataset, SQL_VALID_TEMPLATE_V0)

return [
self.message.format(
Expand Down
Binary file added tests/data/test_geometry_spatialite.gpkg
Binary file not shown.
30 changes: 22 additions & 8 deletions tests/validations/test_geometry_valid_check.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
from geopackage_validator.utils import open_dataset
from geopackage_validator.validations.geometry_valid_check import (
query_geometry_valid,
SQL_ONLY_VALID_TEMPLATE,
SQL_VALID_TEMPLATE_V0,
SQL_VALID_TEMPLATE,
)


def test_with_gpkg_valid():
dataset = open_dataset("tests/data/test_geometry_valid.gpkg")
checks = list(query_geometry_valid(dataset, SQL_ONLY_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Self-intersection"
assert checks[0][3] == 1
assert checks[0][4] == 1
for template in [SQL_VALID_TEMPLATE_V0, SQL_VALID_TEMPLATE]:
checks = list(query_geometry_valid(dataset, template))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Self-intersection"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_simple():
Expand Down Expand Up @@ -53,6 +54,19 @@ def test_with_gpkg_null():
assert len(checks) == 0


def test_with_gpkg_spatialite():
# spatialite geometries are not considered valid
dataset = open_dataset("tests/data/test_geometry_spatialite.gpkg")
for template in [SQL_VALID_TEMPLATE_V0, SQL_VALID_TEMPLATE]:
checks = list(query_geometry_valid(dataset, template))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_spatialite"
assert checks[0][1] == "geom"
assert checks[0][2] == "Not GeoPackage geometry"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_allcorrect():
dataset = open_dataset("tests/data/test_allcorrect.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
Expand Down

0 comments on commit 8560cf9

Please sign in to comment.