Skip to content

Commit

Permalink
feat: more checks for TIME_TRIAL races
Browse files Browse the repository at this point in the history
  • Loading branch information
iagocanalejas committed Apr 8, 2024
1 parent fc91e18 commit 3dd9f1d
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 60 deletions.
2 changes: 1 addition & 1 deletion fixtures/html/traineras_details.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ <h1>BANDERA TERESA HERRERA 2012</h1>
<div>
<div>
<div>
<h2>A Coruña - 11-08-2012 (17:30) - 5556 metros - SM</h2>
<h2>A Coruña - 12-08-2012 (17:30) - 5556 metros - SM</h2>
</div>
<div>
<table>
Expand Down
2 changes: 1 addition & 1 deletion rscraping/clients/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pyutils.shortcuts import only_one_not_none
from pyutils.strings import roman_to_int
from rscraping.data.functions import is_female
from rscraping.data.checks import is_female
from rscraping.data.models import Datasource, Lineup, Race, RaceName
from rscraping.parsers.df import (
COLUMN_CLUB,
Expand Down
55 changes: 55 additions & 0 deletions rscraping/data/checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from datetime import date

from pyutils.strings import remove_symbols
from rscraping.data.constants import SYNONYM_FEMALE, SYNONYM_MEMORIAL, SYNONYMS


def should_be_time_trial(name: str, date: date) -> bool:
return is_play_off(name) or (all(w in name.split() for w in ["TERESA", "HERRERA"]) and date.isoweekday() == 6)


def is_play_off(name: str) -> bool:
return "PLAY" in name and "OFF" in name


def is_memorial(name: str) -> bool:
return any(w in name.split() for w in SYNONYMS[SYNONYM_MEMORIAL])


def is_female(name: str) -> bool:
return any(w in name.split() for w in SYNONYMS[SYNONYM_FEMALE])


def is_branch_club(name: str, letter: str = "B") -> bool:
clean_name = remove_symbols(name)
return any(e == letter for e in clean_name.upper().split())


def is_act(name: str, is_female: bool = False) -> bool:
if is_female:
return "EUSKOTREN" in name
return all(w in name.split() for w in ["EUSKO", "LABEL"]) or "ACT" in name.split() or "EUSKOLABEL" in name


def is_lgt(name: str, letter: str = "A") -> bool:
match letter:
case "A":
return all(w in name.split() for w in ["LGT", "A"]) or "LGTA" in name
case "B":
return all(w in name.split() for w in ["LGT", "B"]) or "LGTB" in name
case "F":
return all(w in name.split() for w in ["LGT", "F"]) or "LGTF" in name
raise ValueError(f"Invalid letter: {letter}")


def is_arc(name: str, category: int = 1) -> bool:
match category:
case 1:
return "ARC" in name.split()
case 2:
return "ARC2" in name.split()
raise ValueError(f"Invalid category: {category}")


def is_ete(name: str) -> bool:
return "ETE" in name.split()
49 changes: 0 additions & 49 deletions rscraping/data/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,9 @@
import sys
from typing import Any

from pyutils.strings import remove_symbols
from rscraping.data.constants import SYNONYM_FEMALE, SYNONYM_MEMORIAL, SYNONYMS
from rscraping.data.models import Lineup, Race


def is_play_off(name: str) -> bool:
return "PLAY" in name and "OFF" in name


def is_memorial(name: str) -> bool:
return any(w in name.split() for w in SYNONYMS[SYNONYM_MEMORIAL])


def is_female(name: str) -> bool:
return any(w in name.split() for w in SYNONYMS[SYNONYM_FEMALE])


def is_branch_club(name: str, letter: str = "B") -> bool:
clean_name = remove_symbols(name)
return any(e == letter for e in clean_name.upper().split())


def is_act(name: str, is_female: bool = False) -> bool:
if is_female:
return "EUSKOTREN" in name
return all(w in name.split() for w in ["EUSKO", "LABEL"]) or "ACT" in name.split() or "EUSKOLABEL" in name


def is_lgt(name: str, letter: str = "A") -> bool:
match letter:
case "A":
return all(w in name.split() for w in ["LGT", "A"]) or "LGTA" in name
case "B":
return all(w in name.split() for w in ["LGT", "B"]) or "LGTB" in name
case "F":
return all(w in name.split() for w in ["LGT", "F"]) or "LGTF" in name
raise ValueError(f"Invalid letter: {letter}")


def is_arc(name: str, category: int = 1) -> bool:
match category:
case 1:
return "ARC" in name.split()
case 2:
return "ARC2" in name.split()
raise ValueError(f"Invalid category: {category}")


def is_ete(name: str) -> bool:
return "ETE" in name.split()


def expand_path(path: str, valid_files: list[str]) -> list[str]:
def is_valid(file: str) -> bool:
_, extension = os.path.splitext(file)
Expand Down
2 changes: 1 addition & 1 deletion rscraping/data/normalization/clubs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re

from pyutils.strings import match_normalization, remove_parenthesis, whitespaces_clean
from rscraping.data.functions import is_branch_club
from rscraping.data.checks import is_branch_club

_ENTITY_TITLES_SHORT = [
"CR",
Expand Down
2 changes: 1 addition & 1 deletion rscraping/data/normalization/leagues.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pyutils.strings import match_normalization
from rscraping.data.functions import is_act, is_arc, is_ete, is_lgt, is_play_off
from rscraping.data.checks import is_act, is_arc, is_ete, is_lgt, is_play_off

__LEAGUES_MAP = {
"LIGA GALEGA DE TRAIÑAS": [["LGT"]],
Expand Down
2 changes: 1 addition & 1 deletion rscraping/data/normalization/races.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
roman_to_int,
whitespaces_clean,
)
from rscraping.data.functions import is_play_off
from rscraping.data.checks import is_play_off

_MISSPELLINGS = {
"": ["RECICLAMOS LA LUZ", " AE ", "EXCMO", "ILTMO"],
Expand Down
2 changes: 1 addition & 1 deletion rscraping/parsers/html/act.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from parsel.selector import Selector

from pyutils.strings import find_date, remove_parenthesis, whitespaces_clean
from rscraping.data.checks import is_play_off
from rscraping.data.constants import (
CATEGORY_ABSOLUT,
GENDER_FEMALE,
Expand All @@ -15,7 +16,6 @@
RACE_TIME_TRIAL,
RACE_TRAINERA,
)
from rscraping.data.functions import is_play_off
from rscraping.data.models import Datasource, Participant, Race, RaceName
from rscraping.data.normalization.clubs import normalize_club_name
from rscraping.data.normalization.races import (
Expand Down
2 changes: 1 addition & 1 deletion rscraping/parsers/html/arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from parsel.selector import Selector

from pyutils.strings import remove_parenthesis, whitespaces_clean
from rscraping.data.checks import is_play_off
from rscraping.data.constants import (
CATEGORY_ABSOLUT,
GENDER_FEMALE,
Expand All @@ -16,7 +17,6 @@
RACE_TIME_TRIAL,
RACE_TRAINERA,
)
from rscraping.data.functions import is_play_off
from rscraping.data.models import Datasource, Lineup, Participant, Race, RaceName
from rscraping.data.normalization.clubs import normalize_club_name
from rscraping.data.normalization.races import (
Expand Down
2 changes: 1 addition & 1 deletion rscraping/parsers/html/lgt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from parsel.selector import Selector

from pyutils.strings import whitespaces_clean
from rscraping.data.checks import is_female, is_play_off
from rscraping.data.constants import (
CATEGORY_ABSOLUT,
GENDER_FEMALE,
Expand All @@ -18,7 +19,6 @@
SYNONYM_FEMALE,
SYNONYMS,
)
from rscraping.data.functions import is_female, is_play_off
from rscraping.data.models import Datasource, Participant, Race, RaceName
from rscraping.data.normalization import (
find_race_sponsor,
Expand Down
4 changes: 2 additions & 2 deletions rscraping/parsers/html/traineras.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from parsel.selector import Selector

from pyutils.strings import find_date, whitespaces_clean
from rscraping.data.checks import should_be_time_trial
from rscraping.data.constants import (
CATEGORY_ABSOLUT,
CATEGORY_SCHOOL,
Expand All @@ -17,7 +18,6 @@
RACE_TIME_TRIAL,
RACE_TRAINERA,
)
from rscraping.data.functions import is_play_off
from rscraping.data.models import Datasource, Lineup, Participant, Race, RaceName
from rscraping.data.normalization import (
find_league,
Expand Down Expand Up @@ -81,7 +81,7 @@ def parse_race(self, selector: Selector, *, race_id: str, table: int | None = No

participants = self.get_participants(selector, table)
ttype = self.get_type(participants)
ttype = ttype if not is_play_off(name) else RACE_TIME_TRIAL
ttype = ttype if not should_be_time_trial(name, t_date) else RACE_TIME_TRIAL
category = self.get_category(selector)

race = Race(
Expand Down
2 changes: 1 addition & 1 deletion tests/parsers/html/traineras_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_get_number_of_pages(self):

_RACE_1 = Race(
name="BANDERA TERESA HERRERA",
date="11/08/2012",
date="12/08/2012",
day=1,
modality=RACE_TRAINERA,
type=RACE_CONVENTIONAL,
Expand Down

0 comments on commit 3dd9f1d

Please sign in to comment.