Skip to content

Commit

Permalink
Merge pull request #10 from DrVeles/drveles-parser
Browse files Browse the repository at this point in the history
Drveles parser testing pylint
  • Loading branch information
drveles authored Jun 1, 2024
2 parents c90cca0 + 168d578 commit a206488
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Docker Compose Build and Test
on:
push:
branches:
- vds_parse
- all-testing

jobs:
build-redis:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
name: Pylint
name: Pylint on pr and pushes to all-testing

on:
pull_request:
branches:
- '**' # This will match pushes to all branches, including merges
- '**'
push:
branches:
- develop
- main
- all-testing

jobs:
build:
Expand All @@ -18,13 +17,14 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
pip install -r */requirements.txt
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')
61 changes: 34 additions & 27 deletions parser/parse_edu.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
from dotenv import dotenv_values

"""
Gettin login and pass from .env file to edu.21-school.ru
File format:
LOGIN=login
PASSWORD=pass
## Logining and parse school edu site.
"""

import time
import logging
from dotenv import dotenv_values

from selenium import webdriver
from selenium.common.exceptions import (
NoSuchElementException,
ElementNotInteractableException,
)
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from parse_raw_from_html import *
import time
from parse_raw_from_html import parse_raw_data_from_cluster

logging.basicConfig(level=logging.ERROR)


def create_chromedriver():
"""
### Create driver for browser
"""
chromedriver_path = "./venv/chromedriver"
chrome_service = Service(chromedriver_path)
chrome_options = Options()
Expand All @@ -30,7 +38,7 @@ def create_chromedriver():

def auth_edu(driver):
"""
Log in to the edu website
### Log in to the edu website
"""
try:
driver.get("https://edu.21-school.ru/campus")
Expand All @@ -42,13 +50,13 @@ def auth_edu(driver):
time.sleep(0.5)
password_field.send_keys(Keys.ENTER)
time.sleep(3)
except Exception as ex:
print(ex)
except (NoSuchElementException, ElementNotInteractableException) as ex:
logging.error("An error occurred while trying to display floors: %s", ex)


def displaying_floors(driver):
"""
Unfolding the floor block
### Unfolding the floor block
"""
try:
floor2_t = driver.find_element(
Expand All @@ -67,13 +75,13 @@ def displaying_floors(driver):
By.XPATH, '//*[@id="root"]/div[2]/div/div[2]/div[2]/div[1]/button/div'
).click()
time.sleep(1)
except Exception as ex:
print(ex)
except (NoSuchElementException, ElementNotInteractableException) as ex:
logging.error("An error occurred while trying to display floors: %s", ex)


def parse_each_cluster(driver):
def parse_each_cluster(driver) -> set[tuple]:
"""
Open each cluster, and parse peers from html
### Open each cluster, and parse peers from html
"""

all_peers = set()
Expand All @@ -88,29 +96,29 @@ def parse_each_cluster(driver):
}
try:

for cluster_name in clusters_xpaths_dct:
for cluster_name in clusters_xpaths_dct.items():
print(f"start parse {cluster_name}")
cluster_xpath = clusters_xpaths_dct[cluster_name]
driver.find_element(By.XPATH, cluster_xpath).click()
time.sleep(4.5)
html = driver.find_element(By.TAG_NAME, "body").get_attribute("innerHTML")
peers_from_this_cluster = parse_raw_data_from_cluster(cluster_name, html)
if not peers_from_this_cluster:
print(
f"\033[91m cluster {cluster_name} empty or failed load cluster data \033[0m"
logging.warning(
"cluster %s empty or failed to load cluster data", cluster_name
)
all_peers.update(peers_from_this_cluster)

print(f"\nPeers counter from all clusters {len(all_peers)} at {time.ctime()}\n")

except Exception as ex:
print(ex)
except (NoSuchElementException, ElementNotInteractableException) as ex:
logging.error("An error occurred while parsing clusters: %s", ex)

finally:
return all_peers
return all_peers


def login_and_parse_campus_map():
def login_and_parse_campus_map() -> set[tuple]:
"""
### Entery point to parse edu
"""
driver = create_chromedriver()

auth_edu(driver)
Expand All @@ -124,4 +132,3 @@ def login_and_parse_campus_map():

if __name__ == "__main__":
login_and_parse_campus_map()
# time_test_parse()
19 changes: 9 additions & 10 deletions parser/parse_raw_from_html.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import re, json
"""
## Parse raw data from .html data and convert to .json
"""
import re
import json
from datetime import datetime


Expand Down Expand Up @@ -35,15 +39,12 @@ def parse_raw_data_from_cluster(cluster_name, cluster_data) -> set:

peers.add((match.group(1), cluster_name, row_letter, row_number))

# here heed logging to log file
print(f"{len(peers)} peers in {cluster_name}")

return peers


def convert_to_json(parsed_data: set[tuple]):
def convert_to_json(parsed_data: set[tuple]) -> dict[dict[dict]]:
"""
Convert a set of tuples to JSON.
### Convert a set of tuples to JSON.
"peers": {
"peer_nick": {
"status": "val"
Expand All @@ -53,8 +54,8 @@ def convert_to_json(parsed_data: set[tuple]):
"time": "val",
},
"""
# if not isinstance(parsed_data, set) or not len(parsed_data):
# return None
if not parsed_data or not isinstance(parsed_data, set) :
return {}
data_as_dict = {"peers": {}}

for parsed_nick, parsed_cluster, parsed_row, parsed_col in parsed_data:
Expand All @@ -67,9 +68,7 @@ def convert_to_json(parsed_data: set[tuple]):
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
}

data_as_dict["peers"].update(temp_peer_dict)

json_parsed_data = json.dumps(data_as_dict)

return json_parsed_data
13 changes: 8 additions & 5 deletions parser/parser_main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from parse_edu import *
from parser_sender import *
"""
# Entery point to parse service
## Parse and send data to DB API
"""

from parse_edu import login_and_parse_campus_map
from parse_raw_from_html import convert_to_json
from parser_sender import update_peers


# отдельный поточек блин блинский пончик
if __name__ == "__main__":
temp_data = login_and_parse_campus_map()
temp_json = convert_to_json(temp_data)
update_peers(temp_json)


11 changes: 8 additions & 3 deletions parser/parser_sender.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"""
## Sending json to DB API
"""

import requests


def update_peers(data_in_json):
def update_peers(data_in_json: dict):
"""
## Sending parsed data to redis API
"""
Expand All @@ -12,9 +16,10 @@ def update_peers(data_in_json):
try:
response = requests.post(
url=url_to_redis_api,
json=data_in_json,
json=data_in_json,
headers=headers,
timeout=10,
)
response.raise_for_status()
response.raise_for_status()
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
19 changes: 4 additions & 15 deletions parser/readme_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,15 @@ data_to_return =
```
## DOCs

Хочу в парсере при установке контейнера скачивать не самый новый хром и драйвер, а один и тот же.

Вебдрайвер должен лежать в `.venv/`

вебдрайвер для линукс
Вебдрайвер для линукс
https://storage.googleapis.com/chrome-for-testing-public/125.0.6422.60/linux64/chrome-linux64.zip

<!-- - Крона или системцтл для автозапуска парсера. -->
crontab
*/1 * * * * cd /home/jenniffr/friends_bot/ && . /home/jenniffr/friends_bot/venv/bin/activate && /home/jenniffr/friends_bot/venv/bin/python3 /home/jenniffr/friends_bot/main.py >> /home/jenniffr/friends_bot/cron.log

scp jenniffr@87.242.85.185:/home/jenniffr/friends_bot/cron.log ./

## Parser ожидаемые проблемы.
## Parser ожидаемые проблемы. Для покрытия тестами

1. Не хватило ресурсов и программа не выполнилась за минуту.
2. Не ответил сайт
Expand All @@ -50,11 +47,3 @@ scp jenniffr@87.242.85.185:/home/jenniffr/friends_bot/cron.log ./
6. Не удалось загрузить класстер
7. Не удалось распрасить данные класстера.
8. Все класстеры пусты.




на псевдокоде нужно чтобы создаешь поток
создаешь лок для потока
в потоке пушишь на ручку
снимаешь лок
14 changes: 10 additions & 4 deletions parser/tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
def time_test_parse():
start = time.time()
login_and_parse_campus_map()
print(f"parse take {time.time() - start} seconds")
"""
## Tests for parser service
"""

# import time

# def time_test_parse():
# start = time.time()
# login_and_parse_campus_map()
# print(f"parse take {time.time() - start} seconds")
2 changes: 0 additions & 2 deletions redis/backup_redis.py

This file was deleted.

13 changes: 0 additions & 13 deletions redis/docker-compose.yml

This file was deleted.

24 changes: 0 additions & 24 deletions redis/initialize_redis.py

This file was deleted.

8 changes: 0 additions & 8 deletions redis/main_redis.py

This file was deleted.

17 changes: 0 additions & 17 deletions redis/readme_redis.md

This file was deleted.

0 comments on commit a206488

Please sign in to comment.