12rambau · 12rambau · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023
diff --git a/pygadm/data/gadm_continent.json b/pygadm/data/gadm_continent.json
@@ -1,23 +1,23 @@
 {
     "africa": [
-        "DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "TCD", "COM", "COG", "COD", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "CIV", "KEN", "LSO", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MYT", "MAR", "MOZ", "NAM", "NER", "NGA", "REU", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TZA", "TGO", "TUN", "UGA", "ZMB", "ZWE"
+        "DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "ESH", "TCD", "COM", "COG", "COD", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "CIV", "KEN", "LSO", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MYT", "MAR", "MOZ", "NAM", "NER", "NGA", "REU", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TZA", "TGO", "TUN", "UGA", "ZMB", "ZWE"
     ],
     "asia": [
-        "AFG", "ARM", "AZE", "BGD", "BTN", "IOT", "BHR", "BRN", "KHM", "CHN", "CYP", "EGY", "GEO", "IND", "IDN", "IRN", "IRQ", "ISR", "JPN", "JOR", "KAZ", "KWT", "KGZ", "LAO", "LBN", "MYS", "MDV", "MNG", "MMR", "NPL", "PRK", "OMN", "PAK", "PSE", "PHL", "QAT", "RUS", "SAU", "SGP", "KOR", "LKA", "SYR", "TWN", "TJK", "TLS", "TUR", "TKM", "ARE", "UZB", "VNM", "YEM"
+        "AFG", "ARM", "AZE", "BGD", "BTN", "IOT", "BHR", "BRN", "KHM", "CHN", "IND", "IDN", "IRN", "IRQ", "ISR", "JPN", "JOR", "KAZ", "KWT", "KGZ", "LAO", "LBN", "MYS", "MDV", "MNG", "MMR", "NPL", "PRK", "OMN", "PAK", "PSE", "PHL", "QAT", "RUS", "SAU", "SGP", "KOR", "LKA", "SYR", "THA", "TWN", "TJK", "TLS", "TKM", "ARE", "UZB", "VNM", "YEM", "SJM", "XCA", "XPI", "Z01", "Z02", "Z03", "Z04", "Z05", "Z06", "Z07", "Z08", "Z09"
     ],
     "europe": [
-        "ALB", "AND", "ARM", "AUT", "AZE", "BLR", "BEL", "BIH", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "GEO", "DEU", "GRC", "HUN", "ISL", "IRL", "ITA", "KAZ", "LVA", "LIE", "LTU", "LUX", "MLT", "MDA", "MCO", "MNE", "NLD", "MKD", "NOR", "POL", "PRT", "ROU", "SMR", "SRB", "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "UKR", "GBR", "VAT"
+        "ALA", "ALB", "AND", "AUT", "BLR", "BEL", "BIH", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "FRO", "GEO", "GIB", "GGY", "DEU", "GRC", "HUN", "ISL", "IRL", "ITA", "LVA", "LIE", "LTU", "LUX", "MLT", "MDA", "MCO", "MNE", "NLD", "MKD", "NOR", "POL", "PRT", "ROU", "SMR", "SRB", "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "UKR", "GBR", "VAT", "IMN", "JEY", "XAD", "ZNC", "XKO"
     ],
     "north america": [
         "ATG", "BHS", "BRB", "BLZ", "CAN", "CRI", "CUB", "DMA", "DOM", "SLV", "GRD", "GTM", "HTI", "HND", "JAM", "MEX", "NIC", "PAN", "KNA", "LCA", "VCT", "TTO", "USA", "AIA", "ABW", "BMU", "BES", "VGB", "CYM", "XCL", "CUW", "GRL", "GLP", "MTQ", "MSR", "UMI", "PRI", "BLM", "MAF", "SPM", "SXM", "TCA", "VIR"
     ],
     "south america": [
-        "ARG", "BOL", "BRA", "CHL", "COL", "ECU", "GUY", "PRY", "PER", "SUR", "URY", "VEN", "BVT", "FLK", "GUF", "SGS"
+        "ARG", "BOL", "BRA", "CHL", "COL", "ECU", "GUY", "PRY", "PER", "SUR", "URY", "VEN", "BVT", "FLK", "GUF", "SGS", "SHN"
     ],
-    "oceania": [    
-        "AUS", "FJI", "KIR", "MHL", "FSM", "NRU", "NZL", "PLW", "PNG", "WSM", "SLB", "TON", "TUV", "VUT", "ASM", "COK", "PYF", "GUM", "NCL", "NIU", "NFK", "MNP", "PCN", "TKL", "WLF"
+    "oceania": [
+        "AUS", "CCK", "CXR", "FJI", "KIR", "MHL", "FSM", "NRU", "NZL", "PLW", "PNG", "WSM", "SLB", "TON", "TUV", "VUT", "ASM", "COK", "PYF", "GUM", "NCL", "NIU", "NFK", "MNP", "PCN", "TKL", "WLF", "HMD", "XSP"
     ],
     "antartica": [
-        "ATA"
+        "ATA", "ATF"
     ]
 }
diff --git a/pygadm/data/gadm_database.parquet b/pygadm/data/gadm_database.parquet
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,20 +42,16 @@ Download = "https://github.com/12rambau/pygadm/archive/${metadata:version}.tar.g
 dev = [
     "pre-commit",
     "commitizen",
-    "tqdm",
-    "mypy",
+    "nox"
 ]
 test = [
     "pytest",
     "pytest-sugar",
-    "pytest-icdiff",
     "pytest-cov",
-    "Flake8-pyproject",
-    "nox"
+    "pytest-regressions"
 ]
 doc = [
     "pydata-sphinx-theme",
-    "nox",
     "sphinx-copybutton",
     "jupyter-sphinx",
     "sphinx-design",

diff --git a/tests/test_continent.py b/tests/test_continent.py
@@ -0,0 +1,57 @@
+"""Tests of the continents submanagement."""
+import json
+from pathlib import Path
+
+import pandas as pd
+
+import pygadm
+
+continent_file = Path(__file__).parents[1] / "pygadm" / "data" / "gadm_continent.json"
+database_file = Path(__file__).parents[1] / "pygadm" / "data" / "gadm_database.parquet"
+
+
+def test_file():
+    """Assert the continent file exist."""
+    assert continent_file.is_file()
+
+
+def test_continent(dataframe_regression):
+    """Check that the continent are working on the simplest."""
+    gdf = pygadm.get_items(name="antartica")
+    df = pd.concat([gdf.GID_0, gdf.bounds], axis=1)
+    dataframe_regression.check(df)
+
+
+def test_duplication():
+    """Make sure there are no duplicates in the continent database."""
+    continent_dict = json.loads(continent_file.read_text())
+    duplicates = {}
+    for continent in continent_dict:
+        duplicates[continent] = set()
+        current_set = set(continent_dict[continent])
+        for other in continent_dict:
+            if other == continent:
+                continue
+            other_list = continent_dict[other]
+            intersection = current_set.intersection(other_list)
+            duplicates[continent] = duplicates[continent].union(intersection)
+
+    error = [f"{c}: [{','.join(d)}]" for c, d in duplicates.items()]
+    assert all([len(d) == 0 for c, d in duplicates.items()]), error
+
+
+def test_orphan():
+    """Check that all countries are in a continent."""
+    data = pd.read_parquet(database_file)
+    continent_dict = json.loads(continent_file.read_text())
+    countries = data.GID_0.unique()
+    orphan = []
+    for country in countries:
+        exist = False
+        for continent in continent_dict:
+            if country in continent_dict[continent]:
+                exist = True
+                break
+        if exist is False:
+            orphan.append(country)
+    assert len(orphan) == 0, ",".join(orphan)
diff --git a/tests/test_continent/test_continent.csv b/tests/test_continent/test_continent.csv
@@ -0,0 +1,3 @@
+,GID_0,minx,miny,maxx,maxy
+0,ATA,-180,-90,180,-59.593699999999998
+0,ATF,40.328200000000002,-50.018799999999999,77.603300000000004,-11.5137
diff --git a/tests/test_get_items.py b/tests/test_get_items.py
@@ -1,7 +1,6 @@
 """Tests of the ``get_items`` function."""
 
-import math
-
+import pandas as pd
 import pytest
 
 import pygadm
@@ -29,38 +28,32 @@ def test_non_existing():
         pygadm.get_items(admin="t0t0")
 
 
-def test_area():
-    """Request a known."""
-    bounds = [103.6091, 1.1664, 104.0858, 1.4714]
+def test_area(dataframe_regression):
+    """Request a known geometry."""
     gdf = pygadm.get_items(name="Singapore")
-    assert gdf.loc[0]["GID_0"] == "SGP"
-    assert all([math.isclose(b, t) for b, t in zip(gdf.total_bounds.tolist(), bounds)])
+    df = pd.concat([gdf.GID_0, gdf.bounds], axis=1)
+    dataframe_regression.check(df)
 
 
-def test_sub_content():
+def test_sub_content(dataframe_regression):
     """Request a sublevel."""
-    sublevels = ["Central", "East", "North", "North-East", "West"]
     gdf = pygadm.get_items(name="Singapore", content_level=1)
-    assert (gdf.GID_0 == "SGP").all()
-    assert len(gdf) == 5
-    assert sorted(gdf.NAME_1.to_list()) == sublevels
+    dataframe_regression.check(gdf[["NAME_1", "GID_0"]])
 
 
-def test_too_high():
+def test_too_high(data_regression):
     """Request a sublevel higher than available in the area."""
     with pytest.warns(UserWarning):
         gdf = pygadm.get_items(admin="SGP.1_1", content_level=0)
-        assert len(gdf) == 1
-        assert gdf.loc[0]["GID_1"] == "SGP.1_1"
+        data_regression.check(gdf.GID_1.tolist())
 
 
-def test_too_low():
+def test_too_low(data_regression):
     """Request a sublevel lower than available in the area."""
     # request a level too low
     with pytest.warns(UserWarning):
         gdf = pygadm.get_items(admin="SGP.1_1", content_level=3)
-        assert len(gdf) == 1
-        assert gdf.loc[0]["GID_1"] == "SGP.1_1"
+        data_regression.check(gdf.GID_1.tolist())
 
 
 def test_case_insensitive():
@@ -77,25 +70,18 @@ def test_duplicate_areas():
         pygadm.get_items(name="central")
 
 
-def test_multiple_input():
+def test_multiple_input(dataframe_regression):
     """Test when several geometries are requested at once."""
     gdf1 = pygadm.get_items(name=["france", "germany"])
-    assert len(gdf1) == 2
+    df = pd.concat([gdf1.GID_0, gdf1.bounds], axis=1)
+    dataframe_regression.check(df)
 
     gdf2 = pygadm.get_items(admin=["FRA", "DEU"])
-    assert len(gdf2) == 2
-
-
-def test_continent():
-    """Check that the continent are working."""
-    gdf = pygadm.get_items(name="antartica")
-    assert len(gdf) == 1
-    assert gdf.GID_0.to_list() == ["ATA"]
+    assert gdf2.equals(gdf1)
 
 
-def test_duplication():
+def test_duplication(data_regression):
     """Test that known duplication cases return the biggest AOI."""
     # italy is also a level 4 province of Bangladesh: BGD.5.4.6.6_1
     gdf = pygadm.get_items(name="Italy")
-    assert len(gdf) == 1
-    assert gdf.GID_0.to_list() == ["ITA"]
+    data_regression.check(gdf.GID_0.tolist())
diff --git a/tests/test_get_items/test_area.csv b/tests/test_get_items/test_area.csv
@@ -0,0 +1,2 @@
+,GID_0,minx,miny,maxx,maxy
+0,SGP,103.6091,1.1664000000000001,104.08580000000001,1.4714
diff --git a/tests/test_get_items/test_duplication.yml b/tests/test_get_items/test_duplication.yml
@@ -0,0 +1 @@
+- ITA
diff --git a/tests/test_get_items/test_multiple_input.csv b/tests/test_get_items/test_multiple_input.csv
@@ -0,0 +1,3 @@
+,GID_0,minx,miny,maxx,maxy
+0,FRA,-5.1437999999999997,41.333799999999997,9.5603999999999996,51.089399999999998
+0,DEU,5.8662999999999998,47.270800000000001,15.0418,55.0565
diff --git a/tests/test_get_items/test_sub_content.csv b/tests/test_get_items/test_sub_content.csv
@@ -0,0 +1,6 @@
+,NAME_1,GID_0
+0,Central,SGP
+1,East,SGP
+2,North,SGP
+3,North-East,SGP
+4,West,SGP
diff --git a/tests/test_get_items/test_too_high.yml b/tests/test_get_items/test_too_high.yml
@@ -0,0 +1 @@
+- SGP.1_1
diff --git a/tests/test_get_items/test_too_low.yml b/tests/test_get_items/test_too_low.yml
@@ -0,0 +1 @@
+- SGP.1_1
diff --git a/tests/test_get_names.py b/tests/test_get_names.py
@@ -26,35 +26,33 @@ def test_non_existing():
         pygadm.get_names(admin="t0t0")
 
 
-def test_area():
-    """Request a known."""
-    sublevels = ["Singapore"]
+def test_area(dataframe_regression):
+    """Request a known geometry."""
     df = pygadm.get_names(name="Singapore")
-    assert sorted(df.NAME_0.to_list()) == sublevels
+    dataframe_regression.check(df)
 
+    df_admin = pygadm.get_names(admin="SGP")
+    assert df_admin.equals(df)
 
-def test_sub_content():
+
+def test_sub_content(dataframe_regression):
     """Request a sublevel."""
-    sublevels = ["Central", "East", "North", "North-East", "West"]
     df = pygadm.get_names(name="Singapore", content_level=1)
-    assert sorted(df.NAME_1.to_list()) == sublevels
-    assert len(df) == 5
+    dataframe_regression.check(df)
 
 
-def test_too_high():
+def test_too_high(dataframe_regression):
     """Request a sublevel higher than available in the area."""
     with pytest.warns(UserWarning):
         df = pygadm.get_names(admin="SGP.1_1", content_level=0)
-        assert len(df) == 1
-        assert df.NAME_1.to_list() == ["Central"]
+        dataframe_regression.check(df)
 
 
-def test_too_low():
+def test_too_low(dataframe_regression):
     """Request a sublevel lower than available in the area."""
     with pytest.warns(UserWarning):
         df = pygadm.get_names(admin="SGP.1_1", content_level=3)
-        assert len(df) == 1
-        assert df.NAME_1.to_list() == ["Central"]
+        dataframe_regression.check(df)
 
 
 def test_case_insensitive():

diff --git a/tests/test_get_names/test_area.csv b/tests/test_get_names/test_area.csv
@@ -0,0 +1,2 @@
+,NAME_0,GID_0
+0,Singapore,SGP
diff --git a/tests/test_get_names/test_sub_content.csv b/tests/test_get_names/test_sub_content.csv
@@ -0,0 +1,6 @@
+,NAME_1,GID_1
+1,Central,SGP.1_1
+2,East,SGP.2_1
+3,North,SGP.3_1
+4,North-East,SGP.4_1
+5,West,SGP.5_1
diff --git a/tests/test_get_names/test_too_high.csv b/tests/test_get_names/test_too_high.csv
@@ -0,0 +1,2 @@
+,NAME_1,GID_1
+0,Central,SGP.1_1
diff --git a/tests/test_get_names/test_too_low.csv b/tests/test_get_names/test_too_low.csv
@@ -0,0 +1,2 @@
+,NAME_1,GID_1
+0,Central,SGP.1_1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		,GID_0,minx,miny,maxx,maxy
		0,SGP,103.6091,1.1664000000000001,104.08580000000001,1.4714