From c8c06602b3dc8ce2e5e2c22920612c2324a204a1 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sat, 21 Oct 2023 23:47:39 -0400 Subject: [PATCH 01/17] Fix isort linting error --- countrycode/countrycode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/countrycode/countrycode.py b/countrycode/countrycode.py index 10b5993..2fc3274 100755 --- a/countrycode/countrycode.py +++ b/countrycode/countrycode.py @@ -1,6 +1,6 @@ +import csv import os import re -import csv try: import polars as pl From 10190cb8319584fe228f02095d4ec9536f5515f1 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sat, 21 Oct 2023 23:49:07 -0400 Subject: [PATCH 02/17] Update countrycode formatting to pass black checks --- countrycode/countrycode.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/countrycode/countrycode.py b/countrycode/countrycode.py index 2fc3274..8d6ebd0 100755 --- a/countrycode/countrycode.py +++ b/countrycode/countrycode.py @@ -158,7 +158,7 @@ def replace_exact(sourcevar, origin, destination): for string in sourcevar: match_found = False for position, origin_i in enumerate(codelist[origin]): - if origin_i == '' or codelist[destination][position] == '': + if origin_i == "" or codelist[destination][position] == "": continue if string == origin_i: if codelist[destination][position].isdigit(): @@ -172,13 +172,12 @@ def replace_exact(sourcevar, origin, destination): return out - def replace_regex(sourcevar, origin, destination): sourcevar_unique = list(set(sourcevar)) o = [] d = [] for i, (val_origin, val_destination) in enumerate(zip(codelist[origin], codelist[destination])): - if val_origin != '' and val_destination != '': + if val_origin != "" and val_destination != "": o.append(re.compile(val_origin, flags=re.IGNORECASE)) d.append(val_destination) @@ -194,4 +193,4 @@ def replace_regex(sourcevar, origin, destination): result.append(None) mapping = dict(zip(sourcevar_unique, result)) out = [int(mapping[i]) if mapping[i] and mapping[i].isdigit() else mapping[i] for i in sourcevar] - return out \ No newline at end of file + return out From 5f441bc4e9b3eedb6ca220ba5accf793b29fe2b8 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sat, 21 Oct 2023 23:49:58 -0400 Subject: [PATCH 03/17] rename internal regex to polars --- tests/{test_regex_internal.py => test_regex_internal_polars.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_regex_internal.py => test_regex_internal_polars.py} (100%) diff --git a/tests/test_regex_internal.py b/tests/test_regex_internal_polars.py similarity index 100% rename from tests/test_regex_internal.py rename to tests/test_regex_internal_polars.py From 08d1152a6d2af3eabfb455787d3f284a9d0fb616 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sat, 21 Oct 2023 23:51:13 -0400 Subject: [PATCH 04/17] Skip internal tests without polars --- tests/test_regex_internal_polars.py | 50 +++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/tests/test_regex_internal_polars.py b/tests/test_regex_internal_polars.py index f76b47e..0b7c8a3 100644 --- a/tests/test_regex_internal_polars.py +++ b/tests/test_regex_internal_polars.py @@ -1,50 +1,80 @@ import os import pytest -import polars as pl -from countrycode import * -codelist = pl.read_csv("countrycode/data/codelist.csv") +from countrycode import countrycode + +try: + import polars as pl + _has_polars = True + + pkg_dir, pkg_filename = os.path.split(__file__) + pkg_dir = os.path.dirname(pkg_dir) + data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") + codelist = pl.read_csv(data_path) +except ImportError: + _has_polars = False + from custom_strategies import codelist + +_regex_internal_skip_reason = "Test requires polars installation" + # Test all country names with iso3c codes are matched exactly once +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_iso3c_match(): name = codelist.filter(pl.col("iso3c").is_not_null()) - iso3c_from_name = countrycode(name["country.name.en"], origin='country.name', destination = "iso3c") + iso3c_from_name = countrycode(name["country.name.en"], origin='country.name', destination="iso3c") assert len(iso3c_from_name) == len(set(iso3c_from_name)) + # Test iso3c-to-country.name-to-iso3c is internally consistent +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_iso3c_consistency(): tmp = codelist.filter(pl.col("iso3c").is_not_null()) - a = countrycode(tmp["iso3c"], origin='iso3c', destination = "country.name") - b = countrycode(a, origin='country.name', destination = "iso3c") + a = countrycode(tmp["iso3c"], origin='iso3c', destination="country.name") + b = countrycode(a, origin='country.name', destination="iso3c") assert (b == tmp["iso3c"]).all() + # Test English regex vs. cldr.short. +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_english_regex(): tmp = codelist.filter(pl.col("country.name.en").is_not_null()) tmp = tmp.with_columns( - test = countrycode(tmp["country.name.en"], origin="country.name.en", destination="cldr.short.en") + test=countrycode(tmp["country.name.en"], origin="country.name.en", destination="cldr.short.en") ) assert (tmp["test"] != tmp["cldr.short.en"]).any() == False + # Test Italian regex vs. cldr.short.it +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_italian_regex(): tmp = codelist.filter(pl.col("country.name.it").is_not_null()) tmp = tmp.with_columns( - test = countrycode(tmp["country.name.it"], origin="country.name.it", destination="cldr.short.it") + test=countrycode(tmp["country.name.it"], origin="country.name.it", destination="cldr.short.it") ) assert (tmp["test"] != tmp["cldr.short.it"]).any() == False + # Test German regex vs. cldr.short.de +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_german_regex(): tmp = codelist.filter(pl.col("country.name.de").is_not_null()) tmp = tmp.with_columns( - test = countrycode(tmp["country.name.de"], origin="country.name.de", destination="cldr.short.de") + test=countrycode(tmp["country.name.de"], origin="country.name.de", destination="cldr.short.de") ) assert (tmp["test"] != tmp["cldr.short.de"]).any() == False + # Test French regex vs. cldr.short.fr +@pytest.mark.skipif(not _has_polars, + reason=_regex_internal_skip_reason) def test_french_regex(): tmp = codelist.filter(pl.col("country.name.fr").is_not_null()) tmp = tmp.with_columns( - test = countrycode(tmp["country.name.fr"], origin="country.name.fr", destination="cldr.short.fr") + test=countrycode(tmp["country.name.fr"], origin="country.name.fr", destination="cldr.short.fr") ) assert (tmp["test"] != tmp["cldr.short.fr"]).any() == False \ No newline at end of file From 52683e4c73d176879fb4164b17d295d2509a795a Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:18:29 -0400 Subject: [PATCH 05/17] migrate polars-specific strategies to own file --- tests/custom_strategies_polars.py | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/custom_strategies_polars.py diff --git a/tests/custom_strategies_polars.py b/tests/custom_strategies_polars.py new file mode 100644 index 0000000..6dfb9d7 --- /dev/null +++ b/tests/custom_strategies_polars.py @@ -0,0 +1,49 @@ +import string +import os +from typing import Optional, Union + +from hypothesis import strategies as st +from hypothesis.strategies import SearchStrategy + +try: + import polars as pl + pkg_dir, pkg_filename = os.path.split(__file__) + pkg_dir = os.path.dirname(pkg_dir) + data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") + codelist = pl.read_csv(data_path) +except ImportError: + codelist = None + +def _select_codes(code="iso3c") -> list: + return codelist.get_column(code).drop_nulls().to_list() + + +def build_valid_code(code: str = "iso3c") -> SearchStrategy[str]: + """ + Builder function that returns a strategy to pick one of a valid 'code'. + """ + return st.sampled_from( + _select_codes(code) + ) + + +def select_filtered_row(column: str, column_value: str, target_col="country.name.en") -> Union[ + Optional[int], Optional[str]]: + """ + Function to return the following operation: + codelist.filter(pl.col(column) == column_value).item(0, target_col) + Args: + column: Column from codelist to filter + column_value: The value with which to filter the specified column + target_col: THe column to be selected + Returns: + The first cell of target_column after filtering column as equals to column_value + """ + return codelist.filter(pl.col(column) == column_value).item(0, target_col) + + +def build_invalid_code(code="iso3c") -> SearchStrategy[str]: + """ + Returns a string that is not represented in code within codelist + """ + return st.text(alphabet=string.printable, min_size=1, max_size=10).filter(lambda z: z not in _select_codes(code)) From 1a3b8f7b76813b7852ddb1f09dec41fa46d60e7e Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:20:56 -0400 Subject: [PATCH 06/17] Use module-level skipping for lack of polars installation --- tests/test_regex_internal_polars.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/test_regex_internal_polars.py b/tests/test_regex_internal_polars.py index 0b7c8a3..cc1b8e9 100644 --- a/tests/test_regex_internal_polars.py +++ b/tests/test_regex_internal_polars.py @@ -14,12 +14,13 @@ _has_polars = False from custom_strategies import codelist +if not _has_polars: + pytest.skip("Skipping tests that use pytest", allow_module_level = True) + _regex_internal_skip_reason = "Test requires polars installation" # Test all country names with iso3c codes are matched exactly once -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_iso3c_match(): name = codelist.filter(pl.col("iso3c").is_not_null()) iso3c_from_name = countrycode(name["country.name.en"], origin='country.name', destination="iso3c") @@ -27,8 +28,6 @@ def test_iso3c_match(): # Test iso3c-to-country.name-to-iso3c is internally consistent -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_iso3c_consistency(): tmp = codelist.filter(pl.col("iso3c").is_not_null()) a = countrycode(tmp["iso3c"], origin='iso3c', destination="country.name") @@ -37,8 +36,6 @@ def test_iso3c_consistency(): # Test English regex vs. cldr.short. -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_english_regex(): tmp = codelist.filter(pl.col("country.name.en").is_not_null()) tmp = tmp.with_columns( @@ -48,8 +45,6 @@ def test_english_regex(): # Test Italian regex vs. cldr.short.it -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_italian_regex(): tmp = codelist.filter(pl.col("country.name.it").is_not_null()) tmp = tmp.with_columns( @@ -59,8 +54,6 @@ def test_italian_regex(): # Test German regex vs. cldr.short.de -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_german_regex(): tmp = codelist.filter(pl.col("country.name.de").is_not_null()) tmp = tmp.with_columns( @@ -70,8 +63,6 @@ def test_german_regex(): # Test French regex vs. cldr.short.fr -@pytest.mark.skipif(not _has_polars, - reason=_regex_internal_skip_reason) def test_french_regex(): tmp = codelist.filter(pl.col("country.name.fr").is_not_null()) tmp = tmp.with_columns( From 0625868b283031180e9d4240e9fcbbf78e755034 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:35:11 -0400 Subject: [PATCH 07/17] Move numeric testing to conversion and module level skipping in conversion testing --- tests/test_basic.py | 30 ++++++++++++++++++++++++------ tests/test_conversion.py | 21 +++++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index b5d5933..ca320cb 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,18 +1,36 @@ +import os +import pytest + from hypothesis import given, example from countrycode import countrycode -from custom_strategies import build_valid_code, select_filtered_row +try: + from custom_strategies_polars import ( + build_valid_code as build_valid_code_polars, + select_filtered_row as select_filtered_row_polars + ) + + _has_polars = True + + pkg_dir, pkg_filename = os.path.split(__file__) + pkg_dir = os.path.dirname(pkg_dir) + data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") + codelist = pl.read_csv(data_path) + +except ImportError: + _has_polars = False + from custom_strategies import codelist + +_regex_internal_skip_reason = "Test requires polars installation" """ Test to check that finding the iso3n representation of an iso3c row is equivalent to finding the corresponding cell in the countrycode dataframe. """ -@given(code_param=build_valid_code("iso3c")) -@example(code_param="CAN") -def test_numeric(code_param): - expected = select_filtered_row("iso3c", code_param, "iso3n") - assert countrycode(code_param, "iso3c", "iso3n") == expected + + + def test_basic_conversions(): diff --git a/tests/test_conversion.py b/tests/test_conversion.py index f2a231f..6fee009 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -1,8 +1,29 @@ +import pytest from hypothesis import given, example from countrycode import countrycode + +try: + import polars as pl + + _has_polars = True +except ImportError: + _has_polars = False + +_regex_internal_skip_reason = "Test requires polars installation" + +if not _has_polars: + pytest.skip("Skipping tests that use pytest", allow_module_level=True) + from custom_strategies import build_invalid_code, build_valid_code, select_filtered_row +@given(code_param=build_valid_code("iso3c")) +@example(code_param="CAN") +def test_numeric(code_param): + expected = select_filtered_row("iso3c", code_param, "iso3n") + assert countrycode(code_param, "iso3c", "iso3n") == expected + + """ Test to check that finding the country.name representation of an iso3c row is equivalent to finding the corresponding cell in the countrycode dataframe. From 6a9b42ee26c9b6c5270b418bc0e684bed00bfaa7 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:43:21 -0400 Subject: [PATCH 08/17] Update codelist tests to function with and without polars --- tests/test_codelist.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/test_codelist.py b/tests/test_codelist.py index 9088575..f43e1e6 100644 --- a/tests/test_codelist.py +++ b/tests/test_codelist.py @@ -1,14 +1,27 @@ import os -import polars as pl -pkg_dir, pkg_filename = os.path.split(__file__) -pkg_dir = os.path.dirname(pkg_dir) -data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") -codelist = pl.read_csv(data_path) +import pytest +try: + import polars as pl + _has_polars = True -def test_codelist_dimensions(): + pkg_dir, pkg_filename = os.path.split(__file__) + pkg_dir = os.path.dirname(pkg_dir) + data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") + codelist = pl.read_csv(data_path) +except ImportError: + _has_polars = False + from custom_strategies import codelist + +@pytest.mark.skipif(not _has_polars, reason=".Shape method assumes polars installation") +def test_codelist_dimensions_polars(): """ Unit test to validate the dimensions of the data. """ assert codelist.shape == (291, 624) + +@pytest.mark.skipif(_has_polars, reason="Test assumed dictionary representation of codelist") +def test_codelist(): + assert len(codelist.keys()) == 624 + assert all(len(codelist.get(key)) == 291 for key in codelist.keys()) From d1f0a7ff2c02c2f360c76508129a1d591f36f713 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:46:18 -0400 Subject: [PATCH 09/17] Cleanup of tests --- tests/test_corner_cases.py | 1 - tests/test_regex_external.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_corner_cases.py b/tests/test_corner_cases.py index a45cbb4..53c90c1 100644 --- a/tests/test_corner_cases.py +++ b/tests/test_corner_cases.py @@ -1,4 +1,3 @@ -import pytest from countrycode import countrycode diff --git a/tests/test_regex_external.py b/tests/test_regex_external.py index 5eef9f9..f10466c 100644 --- a/tests/test_regex_external.py +++ b/tests/test_regex_external.py @@ -1,14 +1,14 @@ -import pytest from countrycode import countrycode -def iso3c_of(name): - out = countrycode(sourcevar = name, origin = 'country.name', destination = 'iso3c') + +def iso3c_of(name): + out = countrycode(sourcevar=name, origin='country.name', destination='iso3c') if out is None: out = "" return out -def test_known_variants(): +def test_known_variants(): assert iso3c_of('Aruba') == 'ABW' assert iso3c_of('Afghanistan') == 'AFG' assert iso3c_of('Angola') == 'AGO' From 3b4b12b0b0d69c03a3c69205972d45582d5e6dd2 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:47:21 -0400 Subject: [PATCH 10/17] Clean up imports in basic testing --- tests/test_basic.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index ca320cb..11c583b 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,38 +1,9 @@ -import os -import pytest - -from hypothesis import given, example - from countrycode import countrycode -try: - from custom_strategies_polars import ( - build_valid_code as build_valid_code_polars, - select_filtered_row as select_filtered_row_polars - ) - - _has_polars = True - - pkg_dir, pkg_filename = os.path.split(__file__) - pkg_dir = os.path.dirname(pkg_dir) - data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") - codelist = pl.read_csv(data_path) - -except ImportError: - _has_polars = False - from custom_strategies import codelist - -_regex_internal_skip_reason = "Test requires polars installation" - """ Test to check that finding the iso3n representation of an iso3c row is equivalent to finding the corresponding cell in the countrycode dataframe. """ - - - - - def test_basic_conversions(): def name_of(iso3c_code): return countrycode(iso3c_code, origin='iso3c', destination='country.name') From 3b127e7a05f138e3d1bed51d04d9b99a7f8a5e2e Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:49:37 -0400 Subject: [PATCH 11/17] Update ignore to disregard hypothesis and pytest cache --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 91daabf..0d0b86e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,10 @@ __pypackages__/ # Testing .hypothesis/ -.pytest_cache/ \ No newline at end of file +.pytest_cache/ + +## Ignore hypothesis data +.hypothesis + + ## ignore caching from tests +.pytest_cache \ No newline at end of file From 43a1df3923c0e6fb3dc494a7a6e76fc6bbfe087c Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:50:19 -0400 Subject: [PATCH 12/17] Update gitignore to remove error --- .gitignore | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.gitignore b/.gitignore index 0d0b86e..ea01b40 100644 --- a/.gitignore +++ b/.gitignore @@ -13,9 +13,3 @@ __pypackages__/ # Testing .hypothesis/ .pytest_cache/ - -## Ignore hypothesis data -.hypothesis - - ## ignore caching from tests -.pytest_cache \ No newline at end of file From 4624e28e3f98eff795c9f738aeceaad4230f7dd6 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:53:23 -0400 Subject: [PATCH 13/17] Update testing workflow to use polars --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d6bd0d..4f84a5b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,3 +35,11 @@ jobs: - name: Run pytest run: | pytest + + - name: Install polars + run: | + poetry install --all-extras + + - name: Repeat tests with polars support + run: | + pytest From 867f73d9c06554a824f1da9b253864f094961c83 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:59:06 -0400 Subject: [PATCH 14/17] Add non-polars custom strategies --- tests/custom_strategies.py | 45 ++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/tests/custom_strategies.py b/tests/custom_strategies.py index 72f33b5..b9fbbd9 100644 --- a/tests/custom_strategies.py +++ b/tests/custom_strategies.py @@ -1,3 +1,4 @@ +import csv import string import os from typing import Optional, Union @@ -5,16 +6,40 @@ from hypothesis import strategies as st from hypothesis.strategies import SearchStrategy -import polars as pl - pkg_dir, pkg_filename = os.path.split(__file__) pkg_dir = os.path.dirname(pkg_dir) data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") -codelist = pl.read_csv(data_path) +with open(data_path) as f: + rows = csv.reader(f) + codelist = {col[0]: list(col[1:]) for col in zip(*rows)} + + +def empty_string_to_null(s: str) -> Optional[str]: + """ + Helper function to convert empty strings to `None`. Diract extraction from + the `codelist` dictionary stores empty values as `""` while + `countrycode` represents those values as None + Args: + s: A string + Returns: `None` is the string is empty, otherwise the function will return + the input string `s`. -def _select_codes(code="iso3c") -> list: - return codelist.get_column(code).drop_nulls().to_list() + """ + if s == "": + return None + return s + +def _select_codes(code: str = "iso3c") -> list: + """ + Select all distinct values for a given column `code` from codelist + Args: + code: String representation of a column in `codelist` representing the field + of distinct values you wish to access + + Returns: An array of non-empty values of the `code` column + """ + return list(filter(lambda z: z != "", codelist.get(code))) def build_valid_code(code: str = "iso3c") -> SearchStrategy[str]: @@ -26,19 +51,21 @@ def build_valid_code(code: str = "iso3c") -> SearchStrategy[str]: ) -def select_filtered_row(column: str, column_value: str, target_col="country.name.en") -> Union[ +def select_filtered_row(input_column: str, column_value: str, target_col="country.name.en") -> Union[ Optional[int], Optional[str]]: """ - Function to return the following operation: + Function to return the `target_col` row that matches the `column_value` value of `column` + Assuming `codelist` is from the `polars` package: codelist.filter(pl.col(column) == column_value).item(0, target_col) Args: - column: Column from codelist to filter + input_column: Column from codelist to filter column_value: The value with which to filter the specified column target_col: THe column to be selected Returns: The first cell of target_column after filtering column as equals to column_value """ - return codelist.filter(pl.col(column) == column_value).item(0, target_col) + input_value_idx = codelist.get(input_column).index(column_value) + return codelist.get(target_col)[input_value_idx] def build_invalid_code(code="iso3c") -> SearchStrategy[str]: From f5b9a006e49ba9e3e296b8665e0e2163b11960f7 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Sun, 22 Oct 2023 00:59:20 -0400 Subject: [PATCH 15/17] Migrate prior custom strategies to use polars --- tests/custom_strategies_polars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/custom_strategies_polars.py b/tests/custom_strategies_polars.py index 6dfb9d7..c6483b6 100644 --- a/tests/custom_strategies_polars.py +++ b/tests/custom_strategies_polars.py @@ -12,7 +12,7 @@ data_path = os.path.join(pkg_dir, "countrycode", "data", "codelist.csv") codelist = pl.read_csv(data_path) except ImportError: - codelist = None + pass def _select_codes(code="iso3c") -> list: return codelist.get_column(code).drop_nulls().to_list() From d4520c8ec4aa051f3e7332d98dd9e7a455ace638 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Wed, 1 Nov 2023 21:56:07 -0400 Subject: [PATCH 16/17] Update skip reason in polars testing --- tests/test_regex_internal_polars.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_regex_internal_polars.py b/tests/test_regex_internal_polars.py index cc1b8e9..14771a4 100644 --- a/tests/test_regex_internal_polars.py +++ b/tests/test_regex_internal_polars.py @@ -4,6 +4,7 @@ try: import polars as pl + _has_polars = True pkg_dir, pkg_filename = os.path.split(__file__) @@ -12,13 +13,12 @@ codelist = pl.read_csv(data_path) except ImportError: _has_polars = False - from custom_strategies import codelist - -if not _has_polars: - pytest.skip("Skipping tests that use pytest", allow_module_level = True) _regex_internal_skip_reason = "Test requires polars installation" +if not _has_polars: + pytest.skip(_regex_internal_skip_reason, allow_module_level=True) + # Test all country names with iso3c codes are matched exactly once def test_iso3c_match(): @@ -68,4 +68,4 @@ def test_french_regex(): tmp = tmp.with_columns( test=countrycode(tmp["country.name.fr"], origin="country.name.fr", destination="cldr.short.fr") ) - assert (tmp["test"] != tmp["cldr.short.fr"]).any() == False \ No newline at end of file + assert (tmp["test"] != tmp["cldr.short.fr"]).any() == False From 39c4bbd3a851a8b66e7324b96cb5da62460527a0 Mon Sep 17 00:00:00 2001 From: "Francis R. Kovacs" Date: Wed, 1 Nov 2023 21:59:08 -0400 Subject: [PATCH 17/17] move existing conversion to polars --- tests/{test_conversion.py => test_conversion_polars.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename tests/{test_conversion.py => test_conversion_polars.py} (91%) diff --git a/tests/test_conversion.py b/tests/test_conversion_polars.py similarity index 91% rename from tests/test_conversion.py rename to tests/test_conversion_polars.py index 6fee009..a7900c4 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion_polars.py @@ -13,9 +13,9 @@ _regex_internal_skip_reason = "Test requires polars installation" if not _has_polars: - pytest.skip("Skipping tests that use pytest", allow_module_level=True) + pytest.skip(_regex_internal_skip_reason, allow_module_level=True) -from custom_strategies import build_invalid_code, build_valid_code, select_filtered_row +from custom_strategies_polars import build_invalid_code, build_valid_code, select_filtered_row @given(code_param=build_valid_code("iso3c")) @example(code_param="CAN")