-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
58 lines (42 loc) · 1.42 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from lxml import html
import sqlite3
import requests
import time
import config
conn = sqlite3.connect('base.db')
c = conn.cursor()
c.execute("CREATE TABLE IF NOT EXISTS listings (link text)")
sendEmails = False
def scrape():
print('Scraping')
page = requests.get(config.search_url)
tree = html.fromstring(page.content)
objects = tree.xpath('//tr[@class="item imageitem"]');
newListItems = []
for obj in objects:
link = obj.xpath("*/a/@href")
c.execute('SELECT * FROM listings WHERE link=?', link)
if (c.fetchone() == None):
rent = obj.xpath('*/p[@class="rent"]')
newListItems.append(link)
c.execute('INSERT INTO listings VALUES (?)', link)
conn.commit()
if (len(newListItems) > 0):
print('New links found. Sending email')
if (sendEmails):
OutputList(newListItems)
else:
print('No new links found')
def OutputList(list):
SendEmail(list)
def SendEmail(text):
return requests.post(
"https://api.mailgun.net/v3/" + str(config.request_url) + "/messages",
auth=("api", config.api_key),
data={"from": "Boplats Scraper <postmaster@" + str(config.request_url) + ">",
"to": "Patrik Olsson <patrik.m.olsson@gmail.com>",
"subject": "Ny lägenhet!",
"text": text})
while True:
scrape()
time.sleep(60 * 60 * 2) # Every two hours