This repository has been archived by the owner on Jan 4, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
555 lines (510 loc) · 22.7 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
from __future__ import annotations
from datetime import date, datetime, timedelta
from inspect import cleandoc
from typing import Union
import asyncio
import cmd
import getpass
import mmlsattendance
import os
import re
# import logging
# logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
PRINT_ATTENDANCE_LIST = False
def print_subjects(i_courses: mmlsattendance.Courses) -> None:
num_pad = len(str(len(i_courses.subjects)))
space = ' ' * num_pad + ' ' * 2
cat_space = space + ' ' * 3
for subject_no, subject in enumerate(i_courses.subjects, 1):
subj_no = str(subject_no).rjust(num_pad)
print(f"{subj_no}. {subject.code} - {subject.name}")
print(f"{space}> Subject ID: {subject.id}")
print(f"{space}> Coordinator ID: {subject.coordinator_id}")
print(f"{cat_space}Sel Class Class ID")
for char_id, kelas in enumerate(subject.classes, ord("a")):
x = "X" if kelas.selected else " "
print(f"{space}{chr(char_id)}. [{x}]{kelas.code:>6}{kelas.id:>9}")
if subject_no != len(i_courses.subjects):
print()
def print_attendance(d_form: mmlsattendance.DetailedAttendanceForm) -> None:
print()
print(
f"[{d_form.class_date} {d_form.start_time[:-3]}-{d_form.end_time[:-3]}] "
f"{d_form.subject_code} - {d_form.subject_name} ({d_form.class_code})"
)
print(d_form.attendance_url)
if PRINT_ATTENDANCE_LIST:
if d_form.attendance_list_url:
print(d_form.attendance_list_url)
def change_selection(args: str, op: Union[bool, None]) -> bool:
"""
Parses selection command arguments, creates a dict of sets -- where key is subject index and value is a set of class
index -- and iterates through each item and its set elements which through it does select, deselect or toggle
operation to classes at their index.
"""
args_list = args.lower().split() # E.g. ['1ab', '2ac', '3', '4a']
if args_list and args_list[0] == 'all':
for kelas in courses.classes:
kelas.selected = op if op is not None else not kelas.selected
else:
op_dict = {} # op_dict = {subject_index: {class_index, ...}, ...}
for arg in args_list:
re_obj = re.match("[0-9]+", arg[0])
if re_obj is None:
continue
sub_idx = int(re_obj[0]) - 1
if sub_idx < 0:
continue
re_obj = re.search("[a-z]+", arg)
letters = re_obj[0] if re_obj is not None else ''
class_choices = [ord(char) - ord('a') for char in letters]
op_dict[sub_idx] = set(class_choices)
if not op_dict:
return False
subjects = courses.subjects
for sub_idx, class_set in op_dict.items():
if not class_set:
try:
for kelas in subjects[sub_idx].classes:
kelas.selected = op if op is not None else not kelas.selected
except IndexError:
pass
else:
for cls_idx in class_set:
try:
subjects[sub_idx].classes[cls_idx].selected = op if op is not None else not \
subjects[sub_idx].classes[cls_idx].selected
except IndexError:
pass
return True
class Prompt(cmd.Cmd):
nohelp = "No help on '%s'."
user_id: Union[str, None] = None
def do_login(self, args: str):
"""
Description:
Log in to MMLS and MMU Mobile and load subjects and classes.
Usage:
login [student_id]
"""
user_id = args.split()[0] if args else input("Student ID: ")
password = getpass.getpass()
if loop.run_until_complete(courses.load_online(user_id, password)):
print("Success.")
self.user_id = user_id
else:
print("Invalid student ID or password.")
def do_save(self, args: str):
"""
Description:
Save current subjects and classes.
Usage:
save
"""
if self.user_id is None:
print("Log in to use this command.")
return
if os.path.exists("courses.json"):
os.remove("courses.json")
with open("courses.json", "w") as f_file:
f_file.write(self.user_id + "\n")
f_file.write(courses.json())
print("Courses saved.")
def do_load(self, args: str):
"""
Description:
Load saved subjects and classes.
Usage:
load
"""
try:
with open("courses.json", "r+") as f_file:
self.user_id = f_file.readline()
courses.load_json(f_file.read())
print("Courses loaded.")
except FileNotFoundError:
print("No courses were saved.")
def do_print(self, args: str):
"""
Description:
Display stored subjects, classes and selection.
Usage:
print
"""
print_subjects(courses)
def do_autoselect(self, args: str):
"""
Description:
Auto-select classes that the student has registered for.
Usage:
autoselect
"""
if self.user_id is not None:
loop.run_until_complete(courses.autoselect_classes(self.user_id))
print_subjects(courses)
else:
print("Log in to use this command.")
def do_select(self, args: str):
"""
Description:
Add selection of classes.
Usage:
select [number]+[character]* ... ...
select all
"""
if not change_selection(args, True):
print("Invalid command. Enter 'help select' for command help.")
return
print_subjects(courses)
def do_deselect(self, args: str):
"""
Description:
Remove selection of classes.
Usage:
deselect [number]+[character]* ... ...
deselect all
"""
if not change_selection(args, False):
print("Invalid command. Enter 'help deselect' for command help.")
return
print_subjects(courses)
def do_toggle(self, args: str):
"""
Description:
Toggle selection of classes.
Usage:
toggle [number]+[character]* ... ...
toggle all
"""
if not change_selection(args, None):
print("Invalid command. Enter 'help toggle' for command help.")
return
print_subjects(courses)
def do_search(self, args: str):
"""
Description:
Search for attendance links in a specified range.
Usage:
search {date|fastdate} <start_date> <end_date>
search {date|fastdate} [date]
...if date is not provided, uses current date.
search timetable <start_id> <end_id>
Addendum:
Dates need to be entered in ISO format, YYYY-MM-DD.
Using 'fastdate' is faster than 'date', but might miss some attendance URLs.
"""
i_cmd = ''.join(args.split()[:1])
args = ' '.join(args.split()[1:])
if not courses.selected_classes:
print('No classes selected for searching.')
return
elif not i_cmd or not (i_cmd == 'fastdate' or i_cmd == 'date' or i_cmd == 'timetable'):
print("Invalid command. Enter 'help search' for command help.")
return
# =============== search date <start_date> <end_date> ===============
elif i_cmd == 'date' or i_cmd == 'fastdate':
args_list = args.split()
try:
if len(args_list) > 2:
print("Too many arguments. Enter 'help search' for command help.")
return
elif len(args_list) == 1:
start_date = end_date = date.fromisoformat(args_list[0])
elif len(args_list) == 0:
start_date = end_date = (datetime.utcnow() + timedelta(hours=8)).date()
else:
start_date = date.fromisoformat(args_list[0])
end_date = date.fromisoformat(args_list[1])
except ValueError as err:
print(f"{err}.")
return
if len(args_list) == 2:
print(f"Searching classes from {start_date.isoformat()} to {end_date.isoformat()}.")
else:
print(f"Searching classes in {start_date.isoformat()}.")
fast = True if i_cmd == 'fastdate' else False
async def scrape_and_print():
async for result in scraper.scrape_date(courses, start_date, end_date, fast=fast):
print_attendance(result)
loop.run_until_complete(scrape_and_print())
# =============== search timetable <start_id> <end_id> ===============
elif i_cmd == 'timetable':
args_list = args.split()
if not len(args_list) == 2:
print("Expected two arguments. Enter 'help search' for command help.")
return
try:
start_timetable_id = int(args_list[0])
end_timetable_id = int(args_list[1])
except ValueError as err:
print(f"Value error: {err}. Enter 'help search' for command help.")
return
print(f"Searching classes from {start_timetable_id} to {end_timetable_id}.")
async def scrape_and_print():
async for result in scraper.scrape(courses, start_timetable_id, end_timetable_id):
print_attendance(result)
loop.run_until_complete(scrape_and_print())
def do_cache(self, args: str):
"""
Description:
Cache attendances in a specified timetable ID range.
Usage:
cache <start_id> <end_id>
cache all
"""
args_list = args.split()
try:
if len(args_list) == 1:
if args_list[0] == 'all':
start_ttid = end_ttid = None
else:
print(f"Invalid argument. Enter 'help cache' for command help.")
return
elif len(args_list) == 2:
start_ttid = int(args_list[0])
end_ttid = int(args_list[1])
elif len(args_list) == 0:
print(f"Missing arguments. Enter 'help cache' for command help.")
return
else:
print(f"Too many arguments. Enter 'help cache' for command help.")
return
except ValueError:
print(f"Value error. Enter 'help cache' for command help.")
return
num_cached = len(scraper.attendance_cache)
print('This could take a while. Working...')
scraper.attendance_cache = loop.run_until_complete(
mmlsattendance.update_cache(scraper.attendance_cache,
update_cached=True, start_timetable_id=start_ttid, end_timetable_id=end_ttid)
)
new_num_cached = len(scraper.attendance_cache)
print(f"Cached: {new_num_cached} (+{new_num_cached - num_cached})")
print('Done!')
def do_cached(self, args: str):
"""
Description:
Print the number of cached attendances.
Usage:
cached
"""
print(f'Total cached: {len(scraper.attendance_cache)}')
def do_exit(self, args: str):
"""
Description:
Terminate this script.
Usage:
exit
"""
print('Exiting.')
exit()
def do_guided(self, args: str):
"""
Description:
Start a guided setup for typical attendance scraping.
Usage:
guided
"""
def ask_yes_no(question):
while True:
decision = input(f"{question} (y/n): ")
if decision.lower() == 'y':
return True
if decision.lower() == 'n':
return False
print("Invalid input.")
print(cleandoc(
"""
How do you want to scrape attendance links?:
1. Retrieve classes via MMLS login and search by fastdate.*
2. Retrieve classes via MMLS login and search by date.
2. Retrieve classes via MMLS login and search by range of timetable_id.
* Unreliable in the first three trimester days and in some cases.
If no links were caught use the second option instead.
"""
))
while True:
try:
what_to_do = int(input("\nChoice: "))
if not 0 < what_to_do < 4:
raise ValueError
break
except ValueError:
print("Invalid input.")
if self.user_id is None:
self.do_login('')
if self.user_id is None:
return
self.do_print("")
if ask_yes_no("Auto-select your registered classes?"):
self.do_autoselect('')
if ask_yes_no("Edit class selection?"):
self.do_print('')
while True:
try:
sub_idx = int(input("Select which subject: ")) - 1
class_choices = input("Toggle which classes?: ").replace(',', '').split(' ')
class_indexes = [ord(char) - ord('a') for char in class_choices]
for index in class_indexes:
courses.subjects[sub_idx].classes[index].selected = \
not courses.subjects[sub_idx].classes[index].selected
self.do_print('')
except (ValueError, TypeError):
print("Invalid input.")
if not ask_yes_no("Continue editing?"):
break
if what_to_do == 1:
start_date = input("Search from what date? YYYY-MM-DD: ")
end_date = input("Until what date? YYYY-MM-DD: ")
self.do_search(f"fastdate {start_date} {end_date}")
elif what_to_do == 2:
start_date = input("Search from what date? YYYY-MM-DD: ")
end_date = input("Until what date? YYYY-MM-DD: ")
self.do_search(f"date {start_date} {end_date}")
elif what_to_do == 3:
start_timetable_id = input("Define beginning of timetable_id range: ")
end_timetable_id = input("Define end of timetable_id range: ")
self.do_search(f"timetable {start_timetable_id} {end_timetable_id}")
def default(self, line: str):
print(f"Command not found: '{line}'. Enter 'help' to list commands.")
def emptyline(self):
print("Hello?")
def help_manual(self):
print(cleandoc(
"""
Preface: The 'guided' command is a wrapper for commands described under this
entry. As such, it may be simpler to use that command instead.
There are three main steps to start scraping for attendance links: Setting
up required information, selecting what to search, and lastly, starting the
attendance search itself.
First step: Setting up required information.
————————————————————————————————————————————————————————————————————————————————
Commands: login
Usage: login [student_id]
The script requires certain information before it could start. The script
is able to programmatically obtain required information by simply logging in to
MMLS.
Second step: Displaying and selecting what to search.
————————————————————————————————————————————————————————————————————————————————
Commands: print, select, deselect, toggle, and autoselect.
Usage: print
select|deselect|toggle <i.e. '1a 2abc 3' and 'all'>
autoselect
After the script has logged in and parsed the required information, a list
of subjects and its classes could be displayed by using the 'print' command.
There will be selection boxes accompanying the class entries which signifies
whether the class will be searched for its attendance links. Altering class
selections can be done using the 'select', 'deselect', and 'toggle' command. On
the other hand, the 'autoselect' command is available which can automatically
select registered classes.
Third step: Starting attendance link scraping.
————————————————————————————————————————————————————————————————————————————————
Commands: search fastdate, search date and search timetable.
Usage: search {date|fastdate} <start_date> <end_date>
search {date|fastdate} [date]
...if date is not provided, uses current date.
search timetable <start_timetable_id> <end_timetable_id>
The way this script works is by iterating through timetable IDs for selected
class IDs then formats them into an attendance link if the timetable ID belongs
to one of the selected classes. A date search can be conducted by using the
'search date' and 'search fastdate' command with date entered in the format of
yyyy-mm-dd. 'fastdate' searches for the first occurence and last occurence of
the timetable IDs which falls within the specified date range and uses those as
the timetable ID search range. As it is using binary search for the exact
timetable ID that is at the beginning and the end of date range, it is
unreliable when it encounters timetable IDs that are not sorted by date. To
mitigate this, use 'date' subcommand instead which looks for a timetable ID 1-2
months prior to the specified date. Although much slower, this makes it, with a
high degree of confidence, all timetable IDs that falls within the specified
date range are parsed and checked. Alternatively, one could skip the date search
by manually providing the timetable ID range to search using the 'search
timetable' command. Found attendance links are displayed as it is searched.
"""
))
def do_help(self, arg: str):
"""
Description:
List available commands or provide command help.
Usage:
help [command]
"""
if arg:
# XXX check arg syntax
try:
func = getattr(self, 'help_' + arg)
except AttributeError:
try:
# EDIT: Use inspect.cleandoc() as cmd module's do_help() does not de-indent docstrings by default.
doc = cleandoc(getattr(self, 'do_' + arg).__doc__)
if doc:
self.stdout.write("%s\n" % str(doc))
return
except AttributeError:
pass
self.stdout.write("%s\n" % str(self.nohelp % (arg,)))
return
func()
else:
names = self.get_names()
cmds_doc = []
cmds_undoc = []
_help = {}
for name in names:
if name[:5] == 'help_':
_help[name[5:]] = 1
names.sort()
# There can be duplicates if routines overridden
prevname = ''
for name in names:
if name[:3] == 'do_':
if name == prevname:
continue
prevname = name
_cmd = name[3:]
if _cmd in _help:
cmds_doc.append(_cmd)
del _help[_cmd]
elif getattr(self, name).__doc__:
cmds_doc.append(_cmd)
else:
cmds_undoc.append(_cmd)
self.stdout.write("%s\n" % str(self.doc_leader))
self.print_topics(self.doc_header, cmds_doc, 15, 80)
self.print_topics(self.misc_header, list(_help.keys()), 15, 80)
self.print_topics(self.undoc_header, cmds_undoc, 15, 80)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
courses = mmlsattendance.Courses()
scraper = mmlsattendance.Scraper()
prompt = Prompt()
prompt.prompt = '> '
prompt.ruler = '—'
prompt.intro = cleandoc("""
_____ _____ __ _____
| | | | | __| .\\\\To start:
| | | | | | | |__|__ | > guided
|_|_|_|_|_|_|_____|_____|_
| _ | |_| |_ ___ ___ _| |___ ___ ___ ___
| | _| _| -_| | . | .'| | _| -_|
|__|__|_| |_| |___|_|_|___|__,|_|_|___|___|
| __|___ ___ ___ ___ ___ ___
|__ | _| _| .'| . | -_| _|
|_____|___|_| |__,| _|___|_|
|_|
Enter 'help' or '?' to list commands.
""")
try:
with open("attendance_cache.json", "r+") as file:
scraper.attendance_cache = file
except FileNotFoundError:
pass
try:
prompt.cmdloop()
except KeyboardInterrupt:
print()
prompt.do_exit("")
finally:
if os.path.exists("attendance_cache.json"):
os.remove("attendance_cache.json")
with open("attendance_cache.json", "w") as file:
file.write(scraper.attendance_cache_json())