diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9f8089b..ed60a56 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,7 +3,7 @@ name: Docker Compose Build and Test on: push: branches: - - vds_parse + - all-testing jobs: build-redis: diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 15758a0..90940cd 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,13 +1,12 @@ -name: Pylint +name: Pylint on pr and pushes to all-testing on: pull_request: branches: - - '**' # This will match pushes to all branches, including merges + - '**' push: branches: - - develop - - main + - all-testing jobs: build: @@ -18,13 +17,14 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install pylint + pip install -r */requirements.txt - name: Analysing the code with pylint run: | pylint $(git ls-files '*.py') diff --git a/parser/parse_edu.py b/parser/parse_edu.py index 83d3664..874f03a 100644 --- a/parser/parse_edu.py +++ b/parser/parse_edu.py @@ -1,21 +1,29 @@ -from dotenv import dotenv_values - """ -Gettin login and pass from .env file to edu.21-school.ru -File format: - LOGIN=login - PASSWORD=pass +## Logining and parse school edu site. """ + +import time +import logging +from dotenv import dotenv_values + from selenium import webdriver +from selenium.common.exceptions import ( + NoSuchElementException, + ElementNotInteractableException, +) from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service -from parse_raw_from_html import * -import time +from parse_raw_from_html import parse_raw_data_from_cluster + +logging.basicConfig(level=logging.ERROR) def create_chromedriver(): + """ + ### Create driver for browser + """ chromedriver_path = "./venv/chromedriver" chrome_service = Service(chromedriver_path) chrome_options = Options() @@ -30,7 +38,7 @@ def create_chromedriver(): def auth_edu(driver): """ - Log in to the edu website + ### Log in to the edu website """ try: driver.get("https://edu.21-school.ru/campus") @@ -42,13 +50,13 @@ def auth_edu(driver): time.sleep(0.5) password_field.send_keys(Keys.ENTER) time.sleep(3) - except Exception as ex: - print(ex) + except (NoSuchElementException, ElementNotInteractableException) as ex: + logging.error("An error occurred while trying to display floors: %s", ex) def displaying_floors(driver): """ - Unfolding the floor block + ### Unfolding the floor block """ try: floor2_t = driver.find_element( @@ -67,13 +75,13 @@ def displaying_floors(driver): By.XPATH, '//*[@id="root"]/div[2]/div/div[2]/div[2]/div[1]/button/div' ).click() time.sleep(1) - except Exception as ex: - print(ex) + except (NoSuchElementException, ElementNotInteractableException) as ex: + logging.error("An error occurred while trying to display floors: %s", ex) -def parse_each_cluster(driver): +def parse_each_cluster(driver) -> set[tuple]: """ - Open each cluster, and parse peers from html + ### Open each cluster, and parse peers from html """ all_peers = set() @@ -88,7 +96,7 @@ def parse_each_cluster(driver): } try: - for cluster_name in clusters_xpaths_dct: + for cluster_name in clusters_xpaths_dct.items(): print(f"start parse {cluster_name}") cluster_xpath = clusters_xpaths_dct[cluster_name] driver.find_element(By.XPATH, cluster_xpath).click() @@ -96,21 +104,21 @@ def parse_each_cluster(driver): html = driver.find_element(By.TAG_NAME, "body").get_attribute("innerHTML") peers_from_this_cluster = parse_raw_data_from_cluster(cluster_name, html) if not peers_from_this_cluster: - print( - f"\033[91m cluster {cluster_name} empty or failed load cluster data \033[0m" + logging.warning( + "cluster %s empty or failed to load cluster data", cluster_name ) all_peers.update(peers_from_this_cluster) - print(f"\nPeers counter from all clusters {len(all_peers)} at {time.ctime()}\n") - - except Exception as ex: - print(ex) + except (NoSuchElementException, ElementNotInteractableException) as ex: + logging.error("An error occurred while parsing clusters: %s", ex) - finally: - return all_peers + return all_peers -def login_and_parse_campus_map(): +def login_and_parse_campus_map() -> set[tuple]: + """ + ### Entery point to parse edu + """ driver = create_chromedriver() auth_edu(driver) @@ -124,4 +132,3 @@ def login_and_parse_campus_map(): if __name__ == "__main__": login_and_parse_campus_map() - # time_test_parse() diff --git a/parser/parse_raw_from_html.py b/parser/parse_raw_from_html.py index d2df892..c544444 100644 --- a/parser/parse_raw_from_html.py +++ b/parser/parse_raw_from_html.py @@ -1,4 +1,8 @@ -import re, json +""" +## Parse raw data from .html data and convert to .json +""" +import re +import json from datetime import datetime @@ -35,15 +39,12 @@ def parse_raw_data_from_cluster(cluster_name, cluster_data) -> set: peers.add((match.group(1), cluster_name, row_letter, row_number)) - # here heed logging to log file - print(f"{len(peers)} peers in {cluster_name}") - return peers -def convert_to_json(parsed_data: set[tuple]): +def convert_to_json(parsed_data: set[tuple]) -> dict[dict[dict]]: """ - Convert a set of tuples to JSON. + ### Convert a set of tuples to JSON. "peers": { "peer_nick": { "status": "val" @@ -53,8 +54,8 @@ def convert_to_json(parsed_data: set[tuple]): "time": "val", }, """ - # if not isinstance(parsed_data, set) or not len(parsed_data): - # return None + if not parsed_data or not isinstance(parsed_data, set) : + return {} data_as_dict = {"peers": {}} for parsed_nick, parsed_cluster, parsed_row, parsed_col in parsed_data: @@ -67,9 +68,7 @@ def convert_to_json(parsed_data: set[tuple]): "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } } - data_as_dict["peers"].update(temp_peer_dict) - json_parsed_data = json.dumps(data_as_dict) return json_parsed_data diff --git a/parser/parser_main.py b/parser/parser_main.py index 3d62fa4..b41cbe0 100644 --- a/parser/parser_main.py +++ b/parser/parser_main.py @@ -1,11 +1,14 @@ -from parse_edu import * -from parser_sender import * +""" +# Entery point to parse service +## Parse and send data to DB API +""" + +from parse_edu import login_and_parse_campus_map +from parse_raw_from_html import convert_to_json +from parser_sender import update_peers -# отдельный поточек блин блинский пончик if __name__ == "__main__": temp_data = login_and_parse_campus_map() temp_json = convert_to_json(temp_data) update_peers(temp_json) - - \ No newline at end of file diff --git a/parser/parser_sender.py b/parser/parser_sender.py index 2a5c9ce..4771381 100644 --- a/parser/parser_sender.py +++ b/parser/parser_sender.py @@ -1,7 +1,11 @@ +""" +## Sending json to DB API +""" + import requests -def update_peers(data_in_json): +def update_peers(data_in_json: dict): """ ## Sending parsed data to redis API """ @@ -12,9 +16,10 @@ def update_peers(data_in_json): try: response = requests.post( url=url_to_redis_api, - json=data_in_json, + json=data_in_json, headers=headers, + timeout=10, ) - response.raise_for_status() + response.raise_for_status() except requests.exceptions.RequestException as e: print(f"An error occurred: {e}") diff --git a/parser/readme_parser.md b/parser/readme_parser.md index c7dddda..b7c5467 100644 --- a/parser/readme_parser.md +++ b/parser/readme_parser.md @@ -29,18 +29,15 @@ data_to_return = ``` ## DOCs +Хочу в парсере при установке контейнера скачивать не самый новый хром и драйвер, а один и тот же. + Вебдрайвер должен лежать в `.venv/` -вебдрайвер для линукс +Вебдрайвер для линукс https://storage.googleapis.com/chrome-for-testing-public/125.0.6422.60/linux64/chrome-linux64.zip - -crontab -*/1 * * * * cd /home/jenniffr/friends_bot/ && . /home/jenniffr/friends_bot/venv/bin/activate && /home/jenniffr/friends_bot/venv/bin/python3 /home/jenniffr/friends_bot/main.py >> /home/jenniffr/friends_bot/cron.log - -scp jenniffr@87.242.85.185:/home/jenniffr/friends_bot/cron.log ./ -## Parser ожидаемые проблемы. +## Parser ожидаемые проблемы. Для покрытия тестами 1. Не хватило ресурсов и программа не выполнилась за минуту. 2. Не ответил сайт @@ -50,11 +47,3 @@ scp jenniffr@87.242.85.185:/home/jenniffr/friends_bot/cron.log ./ 6. Не удалось загрузить класстер 7. Не удалось распрасить данные класстера. 8. Все класстеры пусты. - - - - -на псевдокоде нужно чтобы создаешь поток -создаешь лок для потока -в потоке пушишь на ручку -снимаешь лок diff --git a/parser/tests/test_parser.py b/parser/tests/test_parser.py index 3cfe34e..cb49a9f 100644 --- a/parser/tests/test_parser.py +++ b/parser/tests/test_parser.py @@ -1,4 +1,10 @@ -def time_test_parse(): - start = time.time() - login_and_parse_campus_map() - print(f"parse take {time.time() - start} seconds") \ No newline at end of file +""" +## Tests for parser service +""" + +# import time + +# def time_test_parse(): +# start = time.time() +# login_and_parse_campus_map() +# print(f"parse take {time.time() - start} seconds") diff --git a/redis/backup_redis.py b/redis/backup_redis.py deleted file mode 100644 index 73a7872..0000000 --- a/redis/backup_redis.py +++ /dev/null @@ -1,2 +0,0 @@ -if __name__ == "__main__": - pass \ No newline at end of file diff --git a/redis/docker-compose.yml b/redis/docker-compose.yml deleted file mode 100644 index 8dca41f..0000000 --- a/redis/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3.8' - -services: - redis: - image: redis:latest - container_name: redis-container - ports: - - "6379:6379" - volumes: - - redis-data:/data - -volumes: - redis-data: diff --git a/redis/initialize_redis.py b/redis/initialize_redis.py deleted file mode 100644 index 22caaf2..0000000 --- a/redis/initialize_redis.py +++ /dev/null @@ -1,24 +0,0 @@ -import redis -from datetime import datetime - -def initialize_redis(): - - client = redis.Redis(host="localhost", port=6379, db=0) - - friends_data = {"tg_id": {"peer_nick", "peer_nick"}} - for key, value in friends_data.items(): - client.hset(key, value) - - sessions_data = { - "peer_nick": { - "status": "1", - "cluster": "init", - "row": "a", - "col": "1", - "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - } - } - for key, value in sessions_data.items(): - client.hset(key, value) - - print("Data has been initialized in Redis") diff --git a/redis/main_redis.py b/redis/main_redis.py deleted file mode 100644 index 1038f6e..0000000 --- a/redis/main_redis.py +++ /dev/null @@ -1,8 +0,0 @@ -from initialize_redis import * - -if __name__ == "__main__": - initialize_redis() - client = redis.Redis(host="localhost", port=6379, db=0) - client.hgetall("tg_id") - client.hgetall("peer_nick") - \ No newline at end of file diff --git a/redis/readme_redis.md b/redis/readme_redis.md deleted file mode 100644 index 9bf80a0..0000000 --- a/redis/readme_redis.md +++ /dev/null @@ -1,17 +0,0 @@ -## Redis container - -## DBs - -``` -"tg_id":["peer_nick", "peer_nick"], - -"peer_nick":[ - "status": "val" - "cluster": "val", - "row": "val", - "col": "val", - "time": "val", - ], - - -``` \ No newline at end of file