fix: traineras edition retrieval not taking female races into account

iagocanalejas · Apr 10, 2024 · 6b58ef9 · 6b58ef9
1 parent c2f7c7f
commit 6b58ef9
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 7 deletions.
diff --git a/rscraping/clients/traineras.py b/rscraping/clients/traineras.py
@@ -5,7 +5,7 @@
 import requests
 from parsel.selector import Selector
 
-from rscraping.data.constants import CATEGORY_SCHOOL, CATEGORY_VETERAN, HTTP_HEADERS
+from rscraping.data.constants import CATEGORY_SCHOOL, CATEGORY_VETERAN, GENDER_FEMALE, HTTP_HEADERS
 from rscraping.data.models import Datasource, Lineup, Race, RaceName
 from rscraping.parsers.html import TrainerasHtmlParser
 
@@ -87,7 +87,6 @@ def get_race_ids_by_flag(self, flag_id: str, is_female: bool = False) -> Generat
         content = Selector(requests.get(url=url, headers=HTTP_HEADERS()).content.decode("utf-8"))
         yield from self._html_parser.parse_flag_race_ids(content, is_female=is_female)
 
-
     @override
     def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:
         """
@@ -114,7 +113,8 @@ def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:
 
         # the first flag should be an exact match of the given one, so we can use it to get the editions
         content = Selector(requests.get(url=flag_urls[0], headers=HTTP_HEADERS()).content.decode("utf-8"))
-        edition = next((e for (y, e) in self._html_parser.parse_flag_editions(content) if y == race.year), None)
+        editions = self._html_parser.parse_flag_editions(content, is_female=race.gender == GENDER_FEMALE)
+        edition = next((e for (y, e) in editions if y == race.year), None)
         if edition:
             race.normalized_names = [(n[0], edition) for n in race.normalized_names]
 

diff --git a/rscraping/parsers/html/traineras.py b/rscraping/parsers/html/traineras.py
@@ -178,7 +178,7 @@ def parse_rower_race_ids(self, selector: Selector, year: str | None = None) -> G
     def parse_searched_flag_urls(self, selector: Selector) -> list[str]:
         return selector.xpath("/html/body/div[1]/div[2]/div/div/div[*]/div/div/div[2]/h5/a/@href").getall()
 
-    def parse_flag_editions(self, selector: Selector, is_female: bool = False) -> Generator[tuple[int, int], Any, Any]:
+    def parse_flag_editions(self, selector: Selector, is_female: bool) -> Generator[tuple[int, int], Any, Any]:
         table = selector.xpath(f"/html/body/main/div/div/div/div[{2 if is_female else 1}]/div/table").get(None)
         if table:
             for row in Selector(table).xpath("//*/tr").getall()[1:]:

diff --git a/tests/parsers/html/traineras_parser_test.py b/tests/parsers/html/traineras_parser_test.py
@@ -74,8 +74,8 @@ def test_parse_flag_race_ids(self):
             selector = Selector(file.read())
             male_ids = self.parser.parse_flag_race_ids(selector, is_female=False)
             female_ids = self.parser.parse_flag_race_ids(selector, is_female=True)
-        self.assertEqual(list(male_ids), ['2476', '2477', '5814'])
-        self.assertEqual(list(female_ids), ['2508', '5815'])
+        self.assertEqual(list(male_ids), ["2476", "2477", "5814"])
+        self.assertEqual(list(female_ids), ["2508", "5815"])
 
     def test_parse_club_race_ids(self):
         with open(os.path.join(self.fixtures, "traineras_club.html")) as file:
@@ -95,7 +95,7 @@ def test_parse_search_flags(self):
     def test_parse_flag_editions(self):
         with open(os.path.join(self.fixtures, "traineras_flag_editions.html")) as file:
             content = Selector(file.read())
-            male_editions = self.parser.parse_flag_editions(content)
+            male_editions = self.parser.parse_flag_editions(content, is_female=False)
             female_editions = self.parser.parse_flag_editions(content, is_female=True)
         self.assertEqual(list(male_editions), [(2007, 1), (2008, 2), (2011, 3), (2023, 14)])
         self.assertEqual(list(female_editions), [(2016, 1), (2017, 2), (2023, 8)])