-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProgramFinder.py
41 lines (30 loc) · 961 Bytes
/
ProgramFinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from lxml import etree
from Program import *
from Finder import *
class ProgramFinder(Finder):
def __init__(self, domain):
Finder.__init__(self, domain)
self.programs = {} # program: url
# Data Extraction
self.data_extraction()
# Getter
def get_programs(self):
return self.programs
# Parent Class Implementation
def data_extraction(self):
html = self.request()
tree = etree.HTML(html)
tag = tree.xpath(u'//li/a')
for program in tag:
link = program.attrib["href"]
if link.startswith("http"):
url = link
visible = False
else:
url = self.domain + link
visible = True
name = program.text.strip("\n\r\t' '")
instance = Program(name, url, visible)
self.programs[name] = instance
def data_construction(self):
return