-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlib.py
106 lines (104 loc) · 3.93 KB
/
lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import urllib
import time
import urllib2
import smtplib
def getPosts(url, url_to_find, num_of_pages, limit_config):
limit = int(limit_config)
#Where the content of the page will be stored
page = ' '
#Viewing area for each while cycle
view = ' '
#List containing all the post's titles
titles = []
links = []
current_page = 1
current_link = url
#Try to connect to the page
while(current_page < num_of_pages + 1):
#Initiate some variables
page = ' '
request = 0
tries = 0
#While the page is still blank and there are more to process
while(page == ' ' and tries < limit):
#Try to connect to the page and get it's contents...
try:
request = urllib2.Request(current_link)
page = urllib2.urlopen(request).read()
#... a few times
except:
print("Can't retrieve content, attempt no.{0}".format(tries))
tries += 1
#If it remained the same (page), wait a little longer and repeat
if(page == ' '):
time.sleep(1)
#This is just a reference point for debbuging, dont pay attention to it
titles.append("--Page no. {0}--".format(current_page))
links.append("--Page no. {0}--".format(current_page))
index = 0
while(page.find(url_to_find, index) > 0):
#Find the post's link, and save it's possition to the index
index = page.find(url_to_find, index)
#Get the post's link
link = page[index: page.find(" ", index) -1]
#Get the post's name from the link
content = link[link.find("/", 49) +1:]
#Eliminate the / and the _ from it
content = content.replace("_"," ")
content = content.replace("/", " ")
#If you haven't reached the end of the page, then add 1 to index so it will go for the next link
if(index > 0):
index += 1
titles.append(content)
links.append(link)
#Here it finds the next button in reddit (at the end of the page), and make it the next target
index = page.find('<span class="next-button">', index)
current_link = page[page.find("https", index): page.find('" ', index)]
print(current_link)
current_page += 1
return(titles, links)
def formatFixer(title):
#Get the country and state from the title, and make it uppercase
country = title[:title.find(" ")].upper()
#Now add the brackets
country = "[" + country + "]"
#Add [[COUNTRY]-[STATE]]
if(country.find("USA") > 0):
country = country.replace("USA", "USA-")
elif(country.find("CA") > 0):
country = country.replace("CA", "CA-")
rest = title[title.find(" ") -1: ]
#Have
H = ' '
if(rest.find(" h ", ) > 0):
rest = rest.replace(" h ", "[H] ")
have_beggining = rest.find(" [H] ") + 5
if(rest.find(" w ") > 0):
H = rest[have_beggining: rest.find(" w ") -1]
else:
H = rest[have_beggining: ]
elif(country.find("H]") > 0):
country = country.replace("H]", "][H]")
elif(rest.find("h") > 0):
rest = rest.replace("h","[H] ", 1)
#Want
W = ' '
if(rest.find(" w ")):
rest = rest.replace(" w ", " [W] ")
want_beggining = rest.find(" [W] ") + 5
if(rest.find(" [H] ") > 0):
W = rest[want_beggining: rest.find(" [H] ") -1]
else:
W = rest[want_beggining: ]
elif(country.find("W]")> 0):
country = country.replace("W]", " ][W] ")
elif(rest.find("w") > 0):
rest = rest.replace("w", "[W]", 1)
return(country + rest[1:])
def sendEmail(eServer, eUser, ePass, eContent):
try:
emailServer = smtplib.SMTP_SSL(eServer, 465)
emailServer.login(eUser, ePass)
emailServer.sendmail(eUser, eUser, eContent)
except:
print("Couldn't send email")