Skip to content

Commit

Permalink
fix: traineras edition retrieval not taking female races into account
Browse files Browse the repository at this point in the history
  • Loading branch information
iagocanalejas committed Apr 10, 2024
1 parent c2f7c7f commit 6b58ef9
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
6 changes: 3 additions & 3 deletions rscraping/clients/traineras.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import requests
from parsel.selector import Selector

from rscraping.data.constants import CATEGORY_SCHOOL, CATEGORY_VETERAN, HTTP_HEADERS
from rscraping.data.constants import CATEGORY_SCHOOL, CATEGORY_VETERAN, GENDER_FEMALE, HTTP_HEADERS
from rscraping.data.models import Datasource, Lineup, Race, RaceName
from rscraping.parsers.html import TrainerasHtmlParser

Expand Down Expand Up @@ -87,7 +87,6 @@ def get_race_ids_by_flag(self, flag_id: str, is_female: bool = False) -> Generat
content = Selector(requests.get(url=url, headers=HTTP_HEADERS()).content.decode("utf-8"))
yield from self._html_parser.parse_flag_race_ids(content, is_female=is_female)


@override
def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:
"""
Expand All @@ -114,7 +113,8 @@ def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:

# the first flag should be an exact match of the given one, so we can use it to get the editions
content = Selector(requests.get(url=flag_urls[0], headers=HTTP_HEADERS()).content.decode("utf-8"))
edition = next((e for (y, e) in self._html_parser.parse_flag_editions(content) if y == race.year), None)
editions = self._html_parser.parse_flag_editions(content, is_female=race.gender == GENDER_FEMALE)
edition = next((e for (y, e) in editions if y == race.year), None)
if edition:
race.normalized_names = [(n[0], edition) for n in race.normalized_names]

Expand Down
2 changes: 1 addition & 1 deletion rscraping/parsers/html/traineras.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def parse_rower_race_ids(self, selector: Selector, year: str | None = None) -> G
def parse_searched_flag_urls(self, selector: Selector) -> list[str]:
return selector.xpath("/html/body/div[1]/div[2]/div/div/div[*]/div/div/div[2]/h5/a/@href").getall()

def parse_flag_editions(self, selector: Selector, is_female: bool = False) -> Generator[tuple[int, int], Any, Any]:
def parse_flag_editions(self, selector: Selector, is_female: bool) -> Generator[tuple[int, int], Any, Any]:
table = selector.xpath(f"/html/body/main/div/div/div/div[{2 if is_female else 1}]/div/table").get(None)
if table:
for row in Selector(table).xpath("//*/tr").getall()[1:]:
Expand Down
6 changes: 3 additions & 3 deletions tests/parsers/html/traineras_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def test_parse_flag_race_ids(self):
selector = Selector(file.read())
male_ids = self.parser.parse_flag_race_ids(selector, is_female=False)
female_ids = self.parser.parse_flag_race_ids(selector, is_female=True)
self.assertEqual(list(male_ids), ['2476', '2477', '5814'])
self.assertEqual(list(female_ids), ['2508', '5815'])
self.assertEqual(list(male_ids), ["2476", "2477", "5814"])
self.assertEqual(list(female_ids), ["2508", "5815"])

def test_parse_club_race_ids(self):
with open(os.path.join(self.fixtures, "traineras_club.html")) as file:
Expand All @@ -95,7 +95,7 @@ def test_parse_search_flags(self):
def test_parse_flag_editions(self):
with open(os.path.join(self.fixtures, "traineras_flag_editions.html")) as file:
content = Selector(file.read())
male_editions = self.parser.parse_flag_editions(content)
male_editions = self.parser.parse_flag_editions(content, is_female=False)
female_editions = self.parser.parse_flag_editions(content, is_female=True)
self.assertEqual(list(male_editions), [(2007, 1), (2008, 2), (2011, 3), (2023, 14)])
self.assertEqual(list(female_editions), [(2016, 1), (2017, 2), (2023, 8)])
Expand Down

0 comments on commit 6b58ef9

Please sign in to comment.