-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
104 lines (81 loc) · 3.6 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from flask import Flask, render_template, request,jsonify
from flask_cors import CORS,cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging
import pymongo
logging.basicConfig(filename="scrapper.log" , level=logging.INFO)
app = Flask(__name__)
@app.route('/', methods=['GET','POST'])
@cross_origin()
def homepage():
return render_template('index.html')
@app.route("/review" , methods = ['POST' , 'GET'])
@cross_origin()
def index():
if request.method == 'POST':
try:
searchString = request.form['content'].replace(" ","")
flipkart_url = "https://www.flipkart.com/search?q=" + searchString
uClient = uReq(flipkart_url)
flipkartPage = uClient.read()
uClient.close()
flipkart_html = bs(flipkartPage, "html.parser")
bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
del bigboxes[0:3]
del bigboxes[-5:]
box = bigboxes[0]
productLink = "https://www.flipkart.com" + box.div.div.div.a['href']
# logging.info(productLink)
prodRes = requests.get(productLink)
# prodRes.encoding='utf-8'
prod_html = bs(prodRes.text, "html.parser")
# print(prod_html)
commentboxes = prod_html.find_all('div', {'class': "_16PBlm"})
filename = searchString + ".csv"
fw = open(filename, "w")
headers = "Product, Customer Name, Rating, Heading, Comment \n"
fw.write(headers)
reviews = []
for commentbox in commentboxes:
try:
# name.encode(encoding='utf-8')
name = commentbox.div.div.find_all('p', {'class': '_2sc7ZR _2V5EHH'})[0].text
except:
logging.info("name")
try:
# rating.encode(encoding='utf-8')
rating = commentbox.div.div.div.div.text
except:
rating = 'No Rating'
logging.info("rating")
try:
# commentHead.encode(encoding='utf-8')
commentHead = commentbox.div.div.div.p.text
except:
commentHead = 'No Comment Heading'
logging.info(commentHead)
try:
comtag = commentbox.div.div.find_all('div', {'class': ''})
# custComment.encode(encoding='utf-8')
custComment = comtag[0].div.text
except Exception as e:
logging.info(e)
mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
"Comment": custComment}
reviews.append(mydict)
logging.info("log my final result {}".format(reviews))
client = pymongo.MongoClient("mongodb+srv://Abhijit:blW8IFOK5O8j2GaZ@cluster0.jsdabxs.mongodb.net/?retryWrites=true&w=majority")
db = client['Product_review_Scraping'] # creating Database
review_coll = db['Product_review'] # creating collection
review_coll.insert_many(reviews)
return render_template('results.html', reviews=reviews[0:(len(reviews)-1)])
except Exception as e:
logging.info(e)
return 'something is wrong'
return render_template('results.html')
else:
return render_template('index.html')
if __name__=="__main__":
app.run(host="127.0.0.1",debug=True)