diff --git a/requirements.txt b/requirements.txt index 4cde2fc..1203917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ fillpdf==0.7.2 inquirer==3.1.3 -matplotlib==3.8.0 +matplotlib==3.8.1 numpy==1.26.1 opencv-python==4.8.1.78 openpyxl==3.1.2 -pandas==2.1.1 +pandas==2.1.2 parsel==1.8.1 -PyMuPDF==1.23.4 +PyMuPDF==1.23.6 pytesseract==0.3.10 pyutils @ git+https://github.com/iagocanalejas/pyutils.git@master reportlab~=4.0.5 diff --git a/rscraping/data/normalization/races.py b/rscraping/data/normalization/races.py index 3a11c30..155a3f4 100644 --- a/rscraping/data/normalization/races.py +++ b/rscraping/data/normalization/races.py @@ -29,6 +29,11 @@ ] +_NORMALIZED_RACES = { + "DONIBANE ZIBURUKO ESTROPADAK": [["SAN", "JUAN", "LUZ"]], +} + + def normalize_name_parts(normalized_name: str) -> list[tuple[str, int | None]]: parts: list[tuple[str, int | None]] = [] normalized_name = remove_parenthesis(whitespaces_clean(normalized_name)) @@ -51,6 +56,7 @@ def normalize_race_name(name: str) -> str: name = amend_race_name(name) name = remove_league_indicator(name) name = remove_race_sponsor(name) + name = normalize_race_known_synonyms(name) return whitespaces_clean(name) @@ -111,3 +117,11 @@ def amend_race_name(name: str) -> str: for a, b in _MISSPELLINGS: name = name.replace(a, b) return whitespaces_clean(name) + + +def normalize_race_known_synonyms(name: str) -> str: + for synonym, values in _NORMALIZED_RACES.items(): + for value in values: + if name in " ".join(value) or all(v in name for v in value): + return synonym + return name