diff --git a/pygadm/data/gadm_continent.json b/pygadm/data/gadm_continent.json index 64d4474..b4df226 100644 --- a/pygadm/data/gadm_continent.json +++ b/pygadm/data/gadm_continent.json @@ -1,23 +1,23 @@ { "africa": [ - "DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "TCD", "COM", "COG", "COD", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "CIV", "KEN", "LSO", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MYT", "MAR", "MOZ", "NAM", "NER", "NGA", "REU", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TZA", "TGO", "TUN", "UGA", "ZMB", "ZWE" + "DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "ESH", "TCD", "COM", "COG", "COD", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "CIV", "KEN", "LSO", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MYT", "MAR", "MOZ", "NAM", "NER", "NGA", "REU", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TZA", "TGO", "TUN", "UGA", "ZMB", "ZWE" ], "asia": [ - "AFG", "ARM", "AZE", "BGD", "BTN", "IOT", "BHR", "BRN", "KHM", "CHN", "CYP", "EGY", "GEO", "IND", "IDN", "IRN", "IRQ", "ISR", "JPN", "JOR", "KAZ", "KWT", "KGZ", "LAO", "LBN", "MYS", "MDV", "MNG", "MMR", "NPL", "PRK", "OMN", "PAK", "PSE", "PHL", "QAT", "RUS", "SAU", "SGP", "KOR", "LKA", "SYR", "TWN", "TJK", "TLS", "TUR", "TKM", "ARE", "UZB", "VNM", "YEM" + "AFG", "ARM", "AZE", "BGD", "BTN", "IOT", "BHR", "BRN", "KHM", "CHN", "IND", "IDN", "IRN", "IRQ", "ISR", "JPN", "JOR", "KAZ", "KWT", "KGZ", "LAO", "LBN", "MYS", "MDV", "MNG", "MMR", "NPL", "PRK", "OMN", "PAK", "PSE", "PHL", "QAT", "RUS", "SAU", "SGP", "KOR", "LKA", "SYR", "THA", "TWN", "TJK", "TLS", "TKM", "ARE", "UZB", "VNM", "YEM", "SJM", "XCA", "XPI", "Z01", "Z02", "Z03", "Z04", "Z05", "Z06", "Z07", "Z08", "Z09" ], "europe": [ - "ALB", "AND", "ARM", "AUT", "AZE", "BLR", "BEL", "BIH", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "GEO", "DEU", "GRC", "HUN", "ISL", "IRL", "ITA", "KAZ", "LVA", "LIE", "LTU", "LUX", "MLT", "MDA", "MCO", "MNE", "NLD", "MKD", "NOR", "POL", "PRT", "ROU", "SMR", "SRB", "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "UKR", "GBR", "VAT" + "ALA", "ALB", "AND", "AUT", "BLR", "BEL", "BIH", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "FRO", "GEO", "GIB", "GGY", "DEU", "GRC", "HUN", "ISL", "IRL", "ITA", "LVA", "LIE", "LTU", "LUX", "MLT", "MDA", "MCO", "MNE", "NLD", "MKD", "NOR", "POL", "PRT", "ROU", "SMR", "SRB", "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "UKR", "GBR", "VAT", "IMN", "JEY", "XAD", "ZNC", "XKO" ], "north america": [ "ATG", "BHS", "BRB", "BLZ", "CAN", "CRI", "CUB", "DMA", "DOM", "SLV", "GRD", "GTM", "HTI", "HND", "JAM", "MEX", "NIC", "PAN", "KNA", "LCA", "VCT", "TTO", "USA", "AIA", "ABW", "BMU", "BES", "VGB", "CYM", "XCL", "CUW", "GRL", "GLP", "MTQ", "MSR", "UMI", "PRI", "BLM", "MAF", "SPM", "SXM", "TCA", "VIR" ], "south america": [ - "ARG", "BOL", "BRA", "CHL", "COL", "ECU", "GUY", "PRY", "PER", "SUR", "URY", "VEN", "BVT", "FLK", "GUF", "SGS" + "ARG", "BOL", "BRA", "CHL", "COL", "ECU", "GUY", "PRY", "PER", "SUR", "URY", "VEN", "BVT", "FLK", "GUF", "SGS", "SHN" ], - "oceania": [ - "AUS", "FJI", "KIR", "MHL", "FSM", "NRU", "NZL", "PLW", "PNG", "WSM", "SLB", "TON", "TUV", "VUT", "ASM", "COK", "PYF", "GUM", "NCL", "NIU", "NFK", "MNP", "PCN", "TKL", "WLF" + "oceania": [ + "AUS", "CCK", "CXR", "FJI", "KIR", "MHL", "FSM", "NRU", "NZL", "PLW", "PNG", "WSM", "SLB", "TON", "TUV", "VUT", "ASM", "COK", "PYF", "GUM", "NCL", "NIU", "NFK", "MNP", "PCN", "TKL", "WLF", "HMD", "XSP" ], "antartica": [ - "ATA" + "ATA", "ATF" ] } \ No newline at end of file diff --git a/pygadm/data/gadm_database.parquet b/pygadm/data/gadm_database.parquet index 0bab6b8..762ac3a 100644 Binary files a/pygadm/data/gadm_database.parquet and b/pygadm/data/gadm_database.parquet differ diff --git a/pyproject.toml b/pyproject.toml index e137669..4b73c02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,20 +42,16 @@ Download = "https://github.com/12rambau/pygadm/archive/${metadata:version}.tar.g dev = [ "pre-commit", "commitizen", - "tqdm", - "mypy", + "nox" ] test = [ "pytest", "pytest-sugar", - "pytest-icdiff", "pytest-cov", - "Flake8-pyproject", - "nox" + "pytest-regressions" ] doc = [ "pydata-sphinx-theme", - "nox", "sphinx-copybutton", "jupyter-sphinx", "sphinx-design", diff --git a/tests/test_continent.py b/tests/test_continent.py new file mode 100644 index 0000000..daf4883 --- /dev/null +++ b/tests/test_continent.py @@ -0,0 +1,57 @@ +"""Tests of the continents submanagement.""" +import json +from pathlib import Path + +import pandas as pd + +import pygadm + +continent_file = Path(__file__).parents[1] / "pygadm" / "data" / "gadm_continent.json" +database_file = Path(__file__).parents[1] / "pygadm" / "data" / "gadm_database.parquet" + + +def test_file(): + """Assert the continent file exist.""" + assert continent_file.is_file() + + +def test_continent(dataframe_regression): + """Check that the continent are working on the simplest.""" + gdf = pygadm.get_items(name="antartica") + df = pd.concat([gdf.GID_0, gdf.bounds], axis=1) + dataframe_regression.check(df) + + +def test_duplication(): + """Make sure there are no duplicates in the continent database.""" + continent_dict = json.loads(continent_file.read_text()) + duplicates = {} + for continent in continent_dict: + duplicates[continent] = set() + current_set = set(continent_dict[continent]) + for other in continent_dict: + if other == continent: + continue + other_list = continent_dict[other] + intersection = current_set.intersection(other_list) + duplicates[continent] = duplicates[continent].union(intersection) + + error = [f"{c}: [{','.join(d)}]" for c, d in duplicates.items()] + assert all([len(d) == 0 for c, d in duplicates.items()]), error + + +def test_orphan(): + """Check that all countries are in a continent.""" + data = pd.read_parquet(database_file) + continent_dict = json.loads(continent_file.read_text()) + countries = data.GID_0.unique() + orphan = [] + for country in countries: + exist = False + for continent in continent_dict: + if country in continent_dict[continent]: + exist = True + break + if exist is False: + orphan.append(country) + assert len(orphan) == 0, ",".join(orphan) diff --git a/tests/test_continent/test_continent.csv b/tests/test_continent/test_continent.csv new file mode 100644 index 0000000..e4dbb4a --- /dev/null +++ b/tests/test_continent/test_continent.csv @@ -0,0 +1,3 @@ +,GID_0,minx,miny,maxx,maxy +0,ATA,-180,-90,180,-59.593699999999998 +0,ATF,40.328200000000002,-50.018799999999999,77.603300000000004,-11.5137 diff --git a/tests/test_get_items.py b/tests/test_get_items.py index 4fc2887..d8e3486 100644 --- a/tests/test_get_items.py +++ b/tests/test_get_items.py @@ -1,7 +1,6 @@ """Tests of the ``get_items`` function.""" -import math - +import pandas as pd import pytest import pygadm @@ -29,38 +28,32 @@ def test_non_existing(): pygadm.get_items(admin="t0t0") -def test_area(): - """Request a known.""" - bounds = [103.6091, 1.1664, 104.0858, 1.4714] +def test_area(dataframe_regression): + """Request a known geometry.""" gdf = pygadm.get_items(name="Singapore") - assert gdf.loc[0]["GID_0"] == "SGP" - assert all([math.isclose(b, t) for b, t in zip(gdf.total_bounds.tolist(), bounds)]) + df = pd.concat([gdf.GID_0, gdf.bounds], axis=1) + dataframe_regression.check(df) -def test_sub_content(): +def test_sub_content(dataframe_regression): """Request a sublevel.""" - sublevels = ["Central", "East", "North", "North-East", "West"] gdf = pygadm.get_items(name="Singapore", content_level=1) - assert (gdf.GID_0 == "SGP").all() - assert len(gdf) == 5 - assert sorted(gdf.NAME_1.to_list()) == sublevels + dataframe_regression.check(gdf[["NAME_1", "GID_0"]]) -def test_too_high(): +def test_too_high(data_regression): """Request a sublevel higher than available in the area.""" with pytest.warns(UserWarning): gdf = pygadm.get_items(admin="SGP.1_1", content_level=0) - assert len(gdf) == 1 - assert gdf.loc[0]["GID_1"] == "SGP.1_1" + data_regression.check(gdf.GID_1.tolist()) -def test_too_low(): +def test_too_low(data_regression): """Request a sublevel lower than available in the area.""" # request a level too low with pytest.warns(UserWarning): gdf = pygadm.get_items(admin="SGP.1_1", content_level=3) - assert len(gdf) == 1 - assert gdf.loc[0]["GID_1"] == "SGP.1_1" + data_regression.check(gdf.GID_1.tolist()) def test_case_insensitive(): @@ -77,25 +70,18 @@ def test_duplicate_areas(): pygadm.get_items(name="central") -def test_multiple_input(): +def test_multiple_input(dataframe_regression): """Test when several geometries are requested at once.""" gdf1 = pygadm.get_items(name=["france", "germany"]) - assert len(gdf1) == 2 + df = pd.concat([gdf1.GID_0, gdf1.bounds], axis=1) + dataframe_regression.check(df) gdf2 = pygadm.get_items(admin=["FRA", "DEU"]) - assert len(gdf2) == 2 - - -def test_continent(): - """Check that the continent are working.""" - gdf = pygadm.get_items(name="antartica") - assert len(gdf) == 1 - assert gdf.GID_0.to_list() == ["ATA"] + assert gdf2.equals(gdf1) -def test_duplication(): +def test_duplication(data_regression): """Test that known duplication cases return the biggest AOI.""" # italy is also a level 4 province of Bangladesh: BGD.5.4.6.6_1 gdf = pygadm.get_items(name="Italy") - assert len(gdf) == 1 - assert gdf.GID_0.to_list() == ["ITA"] + data_regression.check(gdf.GID_0.tolist()) diff --git a/tests/test_get_items/test_area.csv b/tests/test_get_items/test_area.csv new file mode 100644 index 0000000..bdf0fed --- /dev/null +++ b/tests/test_get_items/test_area.csv @@ -0,0 +1,2 @@ +,GID_0,minx,miny,maxx,maxy +0,SGP,103.6091,1.1664000000000001,104.08580000000001,1.4714 diff --git a/tests/test_get_items/test_duplication.yml b/tests/test_get_items/test_duplication.yml new file mode 100644 index 0000000..d3d8ef8 --- /dev/null +++ b/tests/test_get_items/test_duplication.yml @@ -0,0 +1 @@ +- ITA diff --git a/tests/test_get_items/test_multiple_input.csv b/tests/test_get_items/test_multiple_input.csv new file mode 100644 index 0000000..1d9e418 --- /dev/null +++ b/tests/test_get_items/test_multiple_input.csv @@ -0,0 +1,3 @@ +,GID_0,minx,miny,maxx,maxy +0,FRA,-5.1437999999999997,41.333799999999997,9.5603999999999996,51.089399999999998 +0,DEU,5.8662999999999998,47.270800000000001,15.0418,55.0565 diff --git a/tests/test_get_items/test_sub_content.csv b/tests/test_get_items/test_sub_content.csv new file mode 100644 index 0000000..0d03691 --- /dev/null +++ b/tests/test_get_items/test_sub_content.csv @@ -0,0 +1,6 @@ +,NAME_1,GID_0 +0,Central,SGP +1,East,SGP +2,North,SGP +3,North-East,SGP +4,West,SGP diff --git a/tests/test_get_items/test_too_high.yml b/tests/test_get_items/test_too_high.yml new file mode 100644 index 0000000..2a5e9eb --- /dev/null +++ b/tests/test_get_items/test_too_high.yml @@ -0,0 +1 @@ +- SGP.1_1 diff --git a/tests/test_get_items/test_too_low.yml b/tests/test_get_items/test_too_low.yml new file mode 100644 index 0000000..2a5e9eb --- /dev/null +++ b/tests/test_get_items/test_too_low.yml @@ -0,0 +1 @@ +- SGP.1_1 diff --git a/tests/test_get_names.py b/tests/test_get_names.py index 09e4feb..0f9249f 100644 --- a/tests/test_get_names.py +++ b/tests/test_get_names.py @@ -26,35 +26,33 @@ def test_non_existing(): pygadm.get_names(admin="t0t0") -def test_area(): - """Request a known.""" - sublevels = ["Singapore"] +def test_area(dataframe_regression): + """Request a known geometry.""" df = pygadm.get_names(name="Singapore") - assert sorted(df.NAME_0.to_list()) == sublevels + dataframe_regression.check(df) + df_admin = pygadm.get_names(admin="SGP") + assert df_admin.equals(df) -def test_sub_content(): + +def test_sub_content(dataframe_regression): """Request a sublevel.""" - sublevels = ["Central", "East", "North", "North-East", "West"] df = pygadm.get_names(name="Singapore", content_level=1) - assert sorted(df.NAME_1.to_list()) == sublevels - assert len(df) == 5 + dataframe_regression.check(df) -def test_too_high(): +def test_too_high(dataframe_regression): """Request a sublevel higher than available in the area.""" with pytest.warns(UserWarning): df = pygadm.get_names(admin="SGP.1_1", content_level=0) - assert len(df) == 1 - assert df.NAME_1.to_list() == ["Central"] + dataframe_regression.check(df) -def test_too_low(): +def test_too_low(dataframe_regression): """Request a sublevel lower than available in the area.""" with pytest.warns(UserWarning): df = pygadm.get_names(admin="SGP.1_1", content_level=3) - assert len(df) == 1 - assert df.NAME_1.to_list() == ["Central"] + dataframe_regression.check(df) def test_case_insensitive(): diff --git a/tests/test_get_names/test_area.csv b/tests/test_get_names/test_area.csv new file mode 100644 index 0000000..046b6e2 --- /dev/null +++ b/tests/test_get_names/test_area.csv @@ -0,0 +1,2 @@ +,NAME_0,GID_0 +0,Singapore,SGP diff --git a/tests/test_get_names/test_sub_content.csv b/tests/test_get_names/test_sub_content.csv new file mode 100644 index 0000000..6b8059c --- /dev/null +++ b/tests/test_get_names/test_sub_content.csv @@ -0,0 +1,6 @@ +,NAME_1,GID_1 +1,Central,SGP.1_1 +2,East,SGP.2_1 +3,North,SGP.3_1 +4,North-East,SGP.4_1 +5,West,SGP.5_1 diff --git a/tests/test_get_names/test_too_high.csv b/tests/test_get_names/test_too_high.csv new file mode 100644 index 0000000..60b038c --- /dev/null +++ b/tests/test_get_names/test_too_high.csv @@ -0,0 +1,2 @@ +,NAME_1,GID_1 +0,Central,SGP.1_1 diff --git a/tests/test_get_names/test_too_low.csv b/tests/test_get_names/test_too_low.csv new file mode 100644 index 0000000..60b038c --- /dev/null +++ b/tests/test_get_names/test_too_low.csv @@ -0,0 +1,2 @@ +,NAME_1,GID_1 +0,Central,SGP.1_1