Skip to content

Commit

Permalink
change to 'new' project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
iagocanalejas committed Oct 21, 2023
1 parent aa8fc57 commit d707102
Show file tree
Hide file tree
Showing 34 changed files with 188 additions and 186 deletions.
3 changes: 0 additions & 3 deletions .gitmodules

This file was deleted.

1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ repos:
rev: v3.10.1
hooks:
- id: pyupgrade
args: [--py311-plus]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.282
Expand Down
7 changes: 3 additions & 4 deletions downloadimages.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import sys
import time
from typing import List, Optional

import requests
from parsel import Selector
Expand All @@ -22,7 +21,7 @@


# this only works with traineras.es
def main(rower_id: str, club_name: str, year: Optional[str] = None, output: str = "./out"):
def main(rower_id: str, club_name: str, year: str | None = None, output: str = "./out"):
client = Client(source=Datasource.TRAINERAS) # type: ignore
parser: TrainerasHtmlParser = client._html_parser # type: ignore

Expand All @@ -32,7 +31,7 @@ def main(rower_id: str, club_name: str, year: Optional[str] = None, output: str

t_date = find_date(selector.xpath(f"/html/body/div[1]/main/div/div/div/div[{1}]/h2/text()").get(""))
t_date = t_date.strftime("%d%m%Y") if t_date else None
participants: List[Selector] = parser.get_participants(selector, day=1)
participants: list[Selector] = parser.get_participants(selector, day=1)
for participant in participants:
if (
club_name.upper() not in parser.get_club_name(participant)
Expand All @@ -44,7 +43,7 @@ def main(rower_id: str, club_name: str, year: Optional[str] = None, output: str
time.sleep(20)


def retrieve_images(url: str, t_date: Optional[str], output: str):
def retrieve_images(url: str, t_date: str | None, output: str):
content = requests.get(url=url, headers=HTTP_HEADERS).content.decode("utf-8")
selector = Selector(content)

Expand Down
3 changes: 1 addition & 2 deletions fillforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import sys
from datetime import datetime
from typing import List

from rscraping.builder import (
PdfItem,
Expand All @@ -22,7 +21,7 @@

def main(
signed_on: str,
types: List[str],
types: list[str],
image_path: str,
use_preset: bool,
prompt_entity: bool,
Expand Down
3 changes: 1 addition & 2 deletions findrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import os
import sys
from typing import Optional

from rscraping import find_race
from rscraping.data.functions import save_csv
Expand All @@ -31,7 +30,7 @@ def _parse_arguments():
return parser.parse_args()


def main(race_id: str, datasource: str, is_female: bool, with_lineups: bool, save: bool, day: Optional[int]):
def main(race_id: str, datasource: str, is_female: bool, with_lineups: bool, save: bool, day: int | None):
if not Datasource.has_value(datasource):
raise ValueError(f"invalid datasource={datasource}")

Expand Down
5 changes: 2 additions & 3 deletions parseimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import os
import sys
from typing import List

from rscraping import parse_race_image
from rscraping.data.functions import expand_path, save_csv
Expand All @@ -25,11 +24,11 @@ def _parse_arguments():
return parser.parse_args()


def main(paths: List[str], datasource: str, header_size: int = 3, allow_plot: bool = False, save: bool = False):
def main(paths: list[str], datasource: str, header_size: int = 3, allow_plot: bool = False, save: bool = False):
if not Datasource.is_OCR(datasource):
raise ValueError(f"invalid datasource={datasource}")

parsed_items: List[Race] = []
parsed_items: list[Race] = []
for path in paths:
parsed_items.extend(
parse_race_image(
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-r requirements.txt
pytest
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
fillpdf==0.7.2
inquirer==3.1.3
matplotlib==3.8.0
numpy==1.26.0
numpy==1.26.1
opencv-python==4.8.1.78
openpyxl==3.1.2
pandas==2.1.1
parsel==1.8.1
PyMuPDF==1.23.4
pytesseract==0.3.10
pytest==7.4.2
pyutils @ git+https://github.com/iagocanalejas/pyutils.git@master
reportlab~=4.0.5
requests==2.31.0
Expand Down
9 changes: 5 additions & 4 deletions rscraping/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from typing import Any, Generator, List, Optional
from typing import Any, List, Optional
from collections.abc import Generator

from pyutils.strings import normalize_synonyms, remove_conjunctions, remove_symbols
from rscraping.clients import Client
Expand All @@ -15,9 +16,9 @@ def find_race(
race_id: str,
datasource: Datasource,
is_female: bool,
day: Optional[int] = None,
day: int | None = None,
with_lineup: bool = False,
) -> Optional[Race]:
) -> Race | None:
"""
Find a race based on the provided parameters.
Expand Down Expand Up @@ -100,7 +101,7 @@ def find_lineup(race_id: str, datasource: Datasource, is_female: bool) -> Genera
return client.get_lineup_by_race_id(race_id)


def lemmatize(phrase: str, lang: str = "es") -> List[str]:
def lemmatize(phrase: str, lang: str = "es") -> list[str]:
phrase = normalize_synonyms(phrase, SYNONYMS)
phrase = remove_symbols(remove_conjunctions(phrase))
return list(set(text_lemmatizer(phrase, lang=lang)))
69 changes: 34 additions & 35 deletions rscraping/builder/_item.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional

import inquirer

Expand All @@ -9,40 +8,40 @@

@dataclass
class PdfItem:
name: Optional[str] = None
surname: Optional[str] = None
nif: Optional[str] = None
gender: Optional[str] = None
birth: Optional[str] = None
nationality: Optional[str] = None
category: Optional[str] = None

address: Optional[str] = None
address_number: Optional[str] = None
postal_code: Optional[str] = None
town: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
phone: Optional[str] = None
email: Optional[str] = None

entity: Optional[str] = None
entity_town: Optional[str] = None
entity_state: Optional[str] = None

sign_in: Optional[str] = None
sign_on_day: Optional[str] = None
sign_on_month: Optional[str] = None
sign_on_year: Optional[str] = None

parent_name: Optional[str] = None
parent_surname: Optional[str] = None
parent_dni: Optional[str] = None
parent_category: Optional[str] = None
is_rower: Optional[bool] = False

is_coach: Optional[bool] = False
is_directive: Optional[bool] = False
name: str | None = None
surname: str | None = None
nif: str | None = None
gender: str | None = None
birth: str | None = None
nationality: str | None = None
category: str | None = None

address: str | None = None
address_number: str | None = None
postal_code: str | None = None
town: str | None = None
state: str | None = None
country: str | None = None
phone: str | None = None
email: str | None = None

entity: str | None = None
entity_town: str | None = None
entity_state: str | None = None

sign_in: str | None = None
sign_on_day: str | None = None
sign_on_month: str | None = None
sign_on_year: str | None = None

parent_name: str | None = None
parent_surname: str | None = None
parent_dni: str | None = None
parent_category: str | None = None
is_rower: bool | None = False

is_coach: bool | None = False
is_directive: bool | None = False

@classmethod
def preset(cls) -> "PdfItem":
Expand Down
3 changes: 1 addition & 2 deletions rscraping/builder/fegar.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import os
from typing import Optional

from fillpdf import fillpdfs

Expand All @@ -13,7 +12,7 @@


def fill_fegar_form(
data: PdfItem, with_parent: bool, images_folder: Optional[str] = None, remove_temp_files: bool = True
data: PdfItem, with_parent: bool, images_folder: str | None = None, remove_temp_files: bool = True
):
logging.info("fegar:: starting fegar form")

Expand Down
5 changes: 3 additions & 2 deletions rscraping/clients/_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from collections.abc import Generator
from datetime import date
from typing import Any, Generator, Optional
from typing import Any

import requests
from parsel import Selector
Expand Down Expand Up @@ -40,7 +41,7 @@ def validate_year_or_raise_exception(self, year: int):
if year < since or year > today:
raise ValueError(f"invalid 'year', available values are [{since}, {today}]")

def get_race_by_id(self, race_id: str, **kwargs) -> Optional[Race]:
def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:
url = self.get_race_details_url(race_id, is_female=self._is_female)
race = self._html_parser.parse_race(
selector=Selector(requests.get(url=url, headers=HTTP_HEADERS).content.decode("utf-8")),
Expand Down
3 changes: 2 additions & 1 deletion rscraping/clients/act.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any, Generator, override
from collections.abc import Generator
from typing import Any, override

import fitz
import requests
Expand Down
3 changes: 2 additions & 1 deletion rscraping/clients/arc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any, Generator, override
from collections.abc import Generator
from typing import Any, override

import requests
from parsel import Selector
Expand Down
9 changes: 5 additions & 4 deletions rscraping/clients/lgt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Generator
from datetime import date
from typing import Any, Dict, Generator, Optional, override
from typing import Any, override

import requests
from fitz import fitz
Expand Down Expand Up @@ -109,7 +110,7 @@ def get_calendar_selector() -> Selector:
return Selector(requests.post(url=url, headers=HTTP_HEADERS, data=data).content.decode("utf-8"))

@override
def get_race_by_id(self, race_id: str, **kwargs) -> Optional[Race]:
def get_race_by_id(self, race_id: str, **kwargs) -> Race | None:
if race_id in self._excluded_ids:
return None

Expand Down Expand Up @@ -210,9 +211,9 @@ def get_race_ids_by_rower(self, **_):
# UTILS #
####################################################

_RACE_YEARS: Dict[str, Optional[int]] = {}
_RACE_YEARS: dict[str, int | None] = {}

def _get_race_year(self, race_id: str) -> Optional[int]:
def _get_race_year(self, race_id: str) -> int | None:
if race_id in self._RACE_YEARS:
return self._RACE_YEARS[race_id]

Expand Down
5 changes: 3 additions & 2 deletions rscraping/clients/traineras.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any, Generator, Optional, override
from collections.abc import Generator
from typing import Any, override

import requests
from parsel import Selector
Expand Down Expand Up @@ -57,7 +58,7 @@ def get_race_ids_by_year(self, year: int, **_) -> Generator[str, Any, Any]:
for page in self.get_pages(year):
yield from self._html_parser.parse_race_ids(page, is_female=self._is_female)

def get_race_ids_by_rower(self, rower_id: str, year: Optional[str] = None, **_) -> Generator[str, Any, Any]:
def get_race_ids_by_rower(self, rower_id: str, year: str | None = None, **_) -> Generator[str, Any, Any]:
content = requests.get(url=self.get_rower_url(rower_id), headers=HTTP_HEADERS).content.decode("utf-8")
yield from self._html_parser.parse_rower_race_ids(Selector(content), year=year)

Expand Down
5 changes: 2 additions & 3 deletions rscraping/data/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import csv
import os
from typing import List

from pyutils.strings import remove_symbols
from rscraping.data.models import Lineup, Race
Expand All @@ -19,7 +18,7 @@ def is_branch_club(name: str, letter: str = "B") -> bool:
return any(e == letter for e in clean_name.upper().split())


def expand_path(path: str, valid_files: List[str]) -> List[str]:
def expand_path(path: str, valid_files: list[str]) -> list[str]:
def is_valid(file: str) -> bool:
_, extension = os.path.splitext(file)
return extension.upper() in valid_files
Expand All @@ -28,7 +27,7 @@ def is_valid(file: str) -> bool:
return [f for f in files if is_valid(f)]


def save_csv(lineups: List[Race] | List[Lineup], file_name: str):
def save_csv(lineups: list[Race] | list[Lineup], file_name: str):
if not len(lineups):
return

Expand Down
Loading

0 comments on commit d707102

Please sign in to comment.