-
Notifications
You must be signed in to change notification settings - Fork 0
/
webclass.py
113 lines (88 loc) · 2.36 KB
/
webclass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/python
Usage = """Website class to allow automation of webpage making. Simple html based."""
import subprocess
import urllib2 as urllib
__author__ = "Jonny Elliott"
__copyright__ = "Copyright 2011"
__credits__ = ""
__license__ = "GPL"
__version__ = "0.0"
__maintainer__ = "Jonny Elliott"
__email__ = "jonnyelliott@mpe.mpg.de"
__status__ = "Prototype"
class webpage(object):
def __init__(self, title=""):
self._title = title
self._html = ""
self._header = ""
self._body = ""
def setTitle(self, title):
self._title = title
def addBody(self, body, link=False):
if link:
_body = "<a href=%s>%s</a>" % (body, body)
else:
_body = "<p>%s</p>" % (body)
if self._body == "":
self._body = [_body]
else:
self._body.append(_body)
def buildPage(self, outname):
# open file
webout = open(outname, "w")
# Write the standard html format
webout.write("<html>")
webout.write(" <title> %s </title>" % self._title)
webout.write(" <body")
for body in self._body:
webout.write(" %s" % body)
webout.write(" </body>")
webout.write("</html>")
# close file
webout.close()
class CopiedWebPage(webpage):
# Initiatilsation will inherit properties of webpage
def __init__(self):
self._content = []
self._filename = ""
# Following style of C++
def getContent(self):
return self._content
def setFileName(self, filename):
self._filename = filename
def getFileName(self):
return self._filename
# Download the webpage using the wget protocol and Popen
def getWebPage(self, url=""):
try:
tmp = open(self.getFileName(), "r")
returnFlag = False
tmp.close()
except:
req = urllib.Request(url)
try:
download = urllib.urlopen(req)
webwrite = open(self.getFileName(), "w")
webwrite.write(download.read())
webwrite.close()
returnFlag = True
# Handle errors
except urllib.HTTPError, e:
print "HTTP Error:",e.code , url
returnFlag = False
except urllib.URLError, e:
print "URL Error:",e.reason , url
returnFlag = False
return returnFlag
# Parse the webpage into body/title etc based on the webpage class
def parseWebPage(self):
# Open web page
rrmhawkifile = open(self.getFileName(), "r")
rrmhawki = rrmhawkifile.readlines()
rrmhawkifile.close()
#
# Parse file (TBC)
#
self._content = rrmhawki
if __name__ == "__main__":
print Usage