-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrap.py
127 lines (99 loc) · 4.45 KB
/
scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#! venv/Scripts/python
import sqlite3
import logging
import re
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from time import sleep
class Scrap():
def __init__(self):
logging.basicConfig(filename='.log', filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)
self.con = sqlite3.connect('database')
self.cur = self.con.cursor()
self.initBrowser()
def initBrowser(self):
options = webdriver.ChromeOptions()
# options.add_argument('--headless')
options.add_argument('--start-maximized')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# options.add_experimental_option('useAutomationExtension', False)
self.driver = webdriver.Chrome(executable_path=r'.\driver\chromedriver.exe', options=options)
def run(self):
admins = self.cur.execute('SELECT * FROM admins')
for admin in admins:
products = self.cur.execute('SELECT * FROM products WHERE admin_id = ?', str(admin[0])).fetchall()
self.goToProductsPage(admin)
for product in products:
self.crawl(admin, product)
logging.info(f'The {product[3]} => {product[2]} updated.')
self.switchToAdminTab()
def goToProductsPage(self, admin):
self.driver.get(admin[2])
# Email field
email = self.driver.find_element(By.ID, 'email')
email.send_keys(admin[3])
# Passwd field
passwd = self.driver.find_element(By.ID, 'passwd')
passwd.send_keys(admin[4])
passwd.submit()
# Click on catalog link
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#subtab-AdminCatalog > .link'))
).click()
# Click on products link
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#subtab-AdminProducts > .link'))
).click()
# Reset catagory filter
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#product_catalog_category_tree_filter'))
).click()
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#product_catalog_category_tree_filter_reset'))
).click()
def crawl(self, admin, product):
self.driver.execute_script('window.open("about:blank", "scrapTab")')
self.driver.switch_to.window('scrapTab')
self.driver.get(product[4])
price = re.sub("[^0-9]", "", self.driver.find_element(By.XPATH, product[5]).text)
# Switch to admin products page
self.switchToAdminTab()
# Filter product
self.filterProduct(product[3])
# Open editing product page
productUrl = self.driver.find_element(By.CSS_SELECTOR, 'a.product-edit').get_attribute('href')
self.driver.execute_script('window.open("about:blank", "productEdit")')
self.driver.switch_to.window('productEdit')
self.driver.get(productUrl)
priceField = self.driver.find_element(By.ID, 'form_step1_price_ttc_shortcut')
priceField.clear()
priceField.send_keys(price)
submitBtn = self.driver.find_element(By.CSS_SELECTOR, '#submit')
if submitBtn.is_displayed():
submitBtn.click()
else:
self.driver.find_element(By.CSS_SELECTOR, '#form button.btn-primary[type="submit"]').click()
sleep(1)
self.driver.close()
def switchToAdminTab(self):
self.driver.switch_to.window(self.driver.window_handles[0])
def filterProduct(self, id):
reset = self.driver.find_element(By.CSS_SELECTOR, 'button[name="products_filter_reset"]')
if reset.is_displayed():
reset.click()
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.ID, 'filter_column_id_product_min'))
).send_keys(id)
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.ID, 'filter_column_id_product_max'))
).send_keys(id)
WebDriverWait(self.driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[name="products_filter_submit"]'))
).click()
if __name__ == '__main__':
scrap = Scrap()
while True:
scrap.run()
sleep(30 * 60)