-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
110 lines (94 loc) · 4.63 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from bs4 import BeautifulSoup
import requests
def flipkart(product_name):
url = "https://www.flipkart.com/"
query = "search?q=" + product_name
url = url + query
site = 'Flipkart'
result = requests.get(url)
soup = BeautifulSoup(result.content, 'html.parser')
flipkart_details = []
if soup.find_all(class_='_31qSD5'):
for i,mob in enumerate(soup.find_all(class_ = '_31qSD5')):
try:
name = mob.find(class_ = '_3wU53n').text.strip()
price = mob.find(class_ = '_1vC4OE _2rQ-NK').text.strip()
try:
img_det = re.findall("keySpecs(.*?)jpeg", result.text)[i]
details = re.findall("\[\"(.*?)\",\"(.*?)\",\"(.*?)\",\"(.*?)\",\"(.*?)\".*url\":\"(.*)", img_det)[0]
url = details[5]
url = re.sub("{@width}|{@height}", '250', url) + 'jpeg'
except:
url = ''
try:
prod_url = mob.attrs['href']
prod_url = "https://www.flipkart.com" + prod_url
except:
prod_url = ''
try:
rating = mob.find('div', class_ = 'hGSR34 _2beYZw').text.strip()
except:
rating = ''
try:
no_of_ratings = re.findall('(.*)Ratings',mob.find_all('span', class_ = '_38sUEc')[0].text)[0].strip()
except:
no_of_ratings = ''
#no_of_reviews = re.findall('\xa0&\xa0(.*)Reviews',mob.find_all('span', class_ = '_38sUEc')[0].text)[0].strip()
flipkart_details.append([name, price, rating, no_of_ratings, site, url, prod_url])
#print(site, name, price, url, prod_url)
except:
pass
else:
for i,mob in enumerate(soup.find_all('div', class_='_3liAhj _1R0K0g')):
try:
name = mob.find(class_ = '_2cLu-l').text.strip()
price = mob.find(class_ = '_1vC4OE').text.strip()
try:
img_det = re.findall("keySpecs(.*?)jpeg", result.text)[i]
details = re.findall("\[\"(.*?)\",\"(.*?)\",\"(.*?)\",\"(.*?)\",\"(.*?)\".*url\":\"(.*)", img_det)[0]
url = details[5]
url = re.sub("{@width}|{@height}", '250', url) + 'jpeg'
except:
url = ''
try:
prod_url = mob.find(class_ = 'Zhf2z-')
prod_url = prod_url.attrs['href']
prod_url = "https://www.flipkart.com" + prod_url
except:
prod_url = ''
try:
rating = mob.find(class_ = 'hGSR34 _2beYZw').text.strip()
except:
rating = ''
try:
no_of_ratings = mob.find(class_ = '_38sUEc').text.strip('()')
except:
no_of_ratings = ''
flipkart_details.append([name, price, rating, no_of_ratings, site, url, prod_url])
# print(site, name, price, url, prod_url)
except:
pass
return True
def amazon(product_name):
# url = "https://www.amazon.in/"
site = 'Amazon'
url = "https://www.amazon.in/"
query = "s?k=" + product_name
# url = url + query
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
r = requests.get(url, headers = header)
driver = BeautifulSoup(r.content,"html5lib")
amazon_details = []
if driver.find_all(class_='s-main-slot'):
for i,mob in enumerate(driver.find_all(class_='s-result-item')):
if not mob.find(class_='a-price-whole') is None:
price = mob.find(class_='a-price-whole').text.strip()
name = mob.find(class_="a-size-medium a-color-base a-text-normal").text.strip()
prod_url = mob.find(class_='a-link-normal')
prod_url = prod_url.attrs['href']
prod_url = "https://www.amazon.in" + prod_url
# print(name,price,"link = ",prod_url)
amazon_details.append([name,price,'','',url])
k = input()
flipkart(k)
amazon(k)