Skip to content

Commit

Permalink
fix: NA county_code is valid, not Nan
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessandroLorenzi committed May 4, 2023
1 parent 3e3235b commit 0af9fe5
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 3 deletions.
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
36 changes: 33 additions & 3 deletions pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,27 @@
"ZA",
]

NA_VALUES = [
"",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
# "NA", # NA is a valid county code for Naples, Italy
"NULL",
"NaN",
"n/a",
"nan",
"null",
]


@contextlib.contextmanager
def _open_extract_url(url: str, country: str) -> Any:
Expand Down Expand Up @@ -231,7 +252,12 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:

data_path = os.path.join(STORAGE_DIR, country.upper() + ".txt")
if os.path.exists(data_path):
data = pd.read_csv(data_path, dtype={"postal_code": str})
data = pd.read_csv(
data_path,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
else:
download_urls = [
val.format(country=country) for val in DOWNLOAD_URL
Expand All @@ -243,10 +269,11 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:
header=None,
names=DATA_FIELDS,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
os.makedirs(STORAGE_DIR, exist_ok=True)
data.to_csv(data_path, index=None)

return data_path, data

def _index_postal_codes(self) -> pd.DataFrame:
Expand All @@ -255,7 +282,10 @@ def _index_postal_codes(self) -> pd.DataFrame:

if os.path.exists(data_path_unique):
data_unique = pd.read_csv(
data_path_unique, dtype={"postal_code": str}
data_path_unique,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
else:
# group together places with the same postal code
Expand Down
8 changes: 8 additions & 0 deletions test_pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,14 @@ def test_query_location_exact():
assert res["state_name"].unique().tolist() == ["Île-de-France"]


def test_location_naples():
# https://github.com/symerio/pgeocode/issues/73
nomi = Nominatim("it")
res = nomi.query_location("Napoli")
assert res["county_name"].unique().tolist() == ["Napoli"]
assert res["county_code"].unique().tolist() == ["NA"]


def test_query_location_fuzzy():
pytest.importorskip("thefuzz")
nomi = Nominatim("fr")
Expand Down

0 comments on commit 0af9fe5

Please sign in to comment.