-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjekunauto.py
60 lines (48 loc) · 1.43 KB
/
jekunauto.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
'''
2017/3/20
爬取jekunauto.com网站上的服务产品项目及其价格
不需要登录
需要使用第三方包lxml和requests
'''
import requests
from time import sleep
from lxml import etree
url = "http://www.jekunauto.com/product/category/index"
res = requests.get(url)
source = res.text
root = etree.HTML(source)
category_div = root.xpath('//div[@class="jk-list-wrap"]')
cat_name = list()
cat_url = list()
for each_div in category_div:
categorys = each_div.findall('./a')
for category in categorys:
name = category.text
url = category.get('href')
cat_name.append(name)
cat_url.append(url)
base_url = "http://www.jekunauto.com/v1/goods?service_id="
goods_name_list = list()
price_list = list()
i = 0
for n, u in zip(cat_name, cat_url):
print(n)
id = u.split('/')[3]
xml_url = base_url + str(id)
xml_res = requests.get(xml_url)
xml_sou = xml_res.text
xml = xml_sou.replace('true', '"true"').replace('null', '"null"')
xml_dict = eval(xml)
data = xml_dict['data']
for item in data:
goods_name = item['goods_name']
price = item['preferential_price']
print('\t' + str(i) + '\t' + goods_name + '\t-----\t' + price)
goods_name_list.append(goods_name)
price_list.append(price)
i += 1
sleep(3)
else:
with open('result', 'w') as f:
for g, p in zip(goods_name_list, price_list):
f.write("%s\t%s\n" % (g, p))