-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
162 lines (125 loc) · 5.74 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import asyncio
import base64
import logging
from pathlib import Path
import aiohttp
import jdatetime
import telegram
from bs4 import BeautifulSoup
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
from decouple import config
# Configuration
SHIFTBOARD_USERNAME = config("SHIFTBOARD_USERNAME")
SHIFTBOARD_PASSWORD = config("SHIFTBOARD_PASSWORD")
MY_TELEGRAM_CHAT_ID = config("MY_TELEGRAM_CHAT_ID")
TELEGRAMBOT_TOKEN = config("TELEGRAMBOT_TOKEN")
SHIFTBOARD_LOGIN_URL = config("SHIFTBOARD_LOGIN_URL")
AES_KEY = config("THE_AES_KEY").encode() # Ensure your AES key is 16, 24, or 32 bytes long
AES_IV = config("THE_AES_IV").encode() # Ensure your AES IV is 16 bytes long
# Basic configuration for logging
logging.basicConfig(
level=logging.INFO, # Set the logging level to INFO
format='%(asctime)s - %(levelname)s - %(message)s', # Custom log format
handlers=[
logging.FileHandler('app.log'), # Log to a file
logging.StreamHandler() # Log to the console
]
)
logger = logging.getLogger(__name__)
# AES Encryption
def encrypt_text(text, key, iv):
cipher = AES.new(key, AES.MODE_CBC, iv)
ct_bytes = cipher.encrypt(pad(text.encode(), AES.block_size))
return base64.b64encode(iv + ct_bytes).decode("utf-8")
# AES Decryption
def decrypt_text(encrypted_text, key):
encrypted_data = base64.b64decode(encrypted_text)
iv = encrypted_data[:16] # Extract the IV
ct = encrypted_data[16:] # Extract the ciphertext
cipher = AES.new(key, AES.MODE_CBC, iv)
return unpad(cipher.decrypt(ct), AES.block_size).decode("utf-8")
# Get text content while keeping spaces and dashes
def get_readable_text(element):
return [content.get_text(strip=True) for content in element.contents]
def latin_to_persian(text):
latin_to_persian_map = str.maketrans("0123456789", "۰۱۲۳۴۵۶۷۸۹")
return text.translate(latin_to_persian_map)
async def get_new_shiftboard(session):
async with session.get(SHIFTBOARD_LOGIN_URL) as response:
login_page = await response.text()
logger.info("Shiftboard get login page responsed: %d", response.status)
soup = BeautifulSoup(login_page, "html.parser")
csrf_token = soup.find("input", {"name": "_token"})["value"]
payload = {
"name": SHIFTBOARD_USERNAME,
"password": SHIFTBOARD_PASSWORD,
"_token": csrf_token,
}
headers = {"X-XSRF-TOKEN": csrf_token}
async with session.post(SHIFTBOARD_LOGIN_URL, data=payload, headers=headers) as response:
response_content = await response.text()
logger.info("Shiftboard login process responsed: %d", response.status)
soup = BeautifulSoup(response_content, "html.parser")
today = jdatetime.datetime.today().date()
remained_shifts = []
if soup:
for td_element in soup.find_all("td"):
for date_span in td_element.find_all("span", class_="panel-title"):
try:
date = jdatetime.datetime.strptime(date_span.get_text(strip=True), "%Y-%m-%d").date()
except ValueError:
continue
if (
date > today
and not td_element.find(string="موقعیت ها در وضعیت پیش نویس هستند.")
and any(
True
for anchor in td_element.find_all("a")
if anchor.get_text(strip=True) in ("شیفت شب", "شیفت روز")
)
):
remained_shifts.append((date, td_element))
logger.info("Shiftboard had %d shifts in total.", len(remained_shifts))
if not remained_shifts:
return None
nearest_shift = sorted(remained_shifts, key=lambda tp: tp[0])[0][1]
panel_titles = nearest_shift.find_all("span", class_="panel-title")
panel_titles_text = [title.get_text(strip=True) for title in panel_titles]
target_div = nearest_shift.find("div", class_="panel border-top-xlg border-top-green alpha-green")
for icon in target_div.find_all("i", class_="icon-dash"):
icon.replace_with(" - ")
finial_text = [string for string in panel_titles_text + get_readable_text(target_div) if string]
date = "-".join(f"{string.rjust(2,'۰')}" for string in latin_to_persian(finial_text[2]).split("-")[::-1])
return (
f"<b>{finial_text[0]} {finial_text[1]} {date}</b>"
f"\n<blockquote><b>{' '.join(finial_text[5:8])} {finial_text[4]}</b></blockquote>"
f"\n<blockquote><b>{' '.join(finial_text[9:])} {finial_text[8]}</b></blockquote>"
)
async def main():
shiftboard_path = Path("shiftboard.txt")
async with aiohttp.ClientSession() as session:
new_board = await get_new_shiftboard(session)
if not new_board:
return
try:
# Read and decrypt the existing board file if it exists
encrypted_board = shiftboard_path.read_text()
old_board = decrypt_text(encrypted_board, AES_KEY)
file_not_found = False
except (FileNotFoundError, ValueError):
file_not_found = True
if file_not_found or new_board != old_board:
# Encrypt and update the shiftboard file with the new board data
encrypted_board = encrypt_text(new_board, AES_KEY, AES_IV)
shiftboard_path.write_text(encrypted_board)
# Initialize telegram bot
bot = telegram.Bot(token=TELEGRAMBOT_TOKEN)
# Send the new board data to Telegram
async with bot:
await bot.send_message(chat_id=MY_TELEGRAM_CHAT_ID, text=new_board, parse_mode="HTML")
logger.info("Message has been sent:\n", BeautifulSoup(new_board, "html.parser").get_text())
else:
logger.info("Shiftboard doesn't have new shift.")
if __name__ == "__main__":
asyncio.run(main())