-
Notifications
You must be signed in to change notification settings - Fork 0
/
webScrapping.py
304 lines (248 loc) · 8.54 KB
/
webScrapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import wikipedia
import webbrowser
import requests
from bs4 import BeautifulSoup
import threading
import smtplib
import urllib.request
import os
from geopy.geocoders import Nominatim
from geopy.distance import great_circle
class COVID:
def __init__(self):
self.total = 'Not Available'
self.deaths = 'Not Available'
self.recovered = 'Not Available'
self.totalIndia = 'Not Available'
self.deathsIndia = 'Not Available'
self.recoveredIndia = 'Not Available'
def covidUpdate(self):
URL = 'https://www.worldometers.info/coronavirus/'
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
temp = []
divs = soup.find_all('div', class_='maincounter-number')
for div in divs:
temp.append(div.text.strip())
self.total, self.deaths, self.recovered = temp[0], temp[1], temp[2]
def covidUpdateIndia(self):
URL = 'https://www.worldometers.info/coronavirus/country/india/'
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
temp = []
divs = soup.find_all('div', class_='maincounter-number')
for div in divs:
temp.append(div.text.strip())
self.totalIndia, self.deathsIndia, self.recoveredIndia = temp[0], temp[1], temp[2]
def totalCases(self,india_bool):
if india_bool: return self.totalIndia
return self.total
def totalDeaths(self,india_bool):
if india_bool: return self.deathsIndia
return self.deaths
def totalRecovery(self,india_bool):
if india_bool: return self.recoveredIndia
return self.recovered
def symptoms(self):
symt = ['1. Fever',
'2. Coughing',
'3. Shortness of breath',
'4. Trouble breathing',
'5. Fatigue',
'6. Chills, sometimes with shaking',
'7. Body aches',
'8. Headache',
'9. Sore throat',
'10. Loss of smell or taste',
'11. Nausea',
'12. Diarrhea']
return symt
def prevention(self):
prevention = ['1. Clean your hands often. Use soap and water, or an alcohol-based hand rub.',
'2. Maintain a safe distance from anyone who is coughing or sneezing.',
'3. Wear a mask when physical distancing is not possible.',
'4. Don’t touch your eyes, nose or mouth.',
'5. Cover your nose and mouth with your bent elbow or a tissue when you cough or sneeze.',
'6. Stay home if you feel unwell.',
'7. If you have a fever, cough and difficulty breathing, seek medical attention.']
return prevention
def wikiResult(query):
query = query.replace('wikipedia','')
query = query.replace('search','')
if len(query.split())==0: query = "wikipedia"
try:
return wikipedia.summary(query, sentences=2)
except Exception as e:
return "Desired Result Not Found"
class WEATHER:
def __init__(self):
#Currently in Lucknow, its 26 with Haze
self.tempValue = ''
self.city = ''
self.currCondition = ''
self.speakResult = ''
def updateWeather(self):
res = requests.get("https://ipinfo.io/")
data = res.json()
# URL = 'https://weather.com/en-IN/weather/today/l/'+data['loc']
URL = 'https://weather.com/en-IN/weather/today/'
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
city = ""
for h in soup.find_all('h1'):
cty = h.text
cty = cty.replace('Weather','')
self.city = cty[:cty.find(',')]
break
spans = soup.find_all('span')
for span in spans:
try:
if span['data-testid']=="TemperatureValue":
self.tempValue = span.text[:-1]
break
except Exception as e:
pass
divs = soup.find_all('div', class_='CurrentConditions--phraseValue--2xXSr')
for div in divs:
self.currCondition = div.text
break
def weather(self):
from datetime import datetime
today = datetime.today().strftime('%A')
self.speakResult = "Currently in " + self.city + ", its " + self.tempValue + " degree, with a " + self.currCondition
return [self.tempValue, self.currCondition, today, self.city, self.speakResult]
c = COVID()
w = WEATHER()
def dataUpdate():
c.covidUpdate()
c.covidUpdateIndia()
w.updateWeather()
##### WEATHER #####
def weather():
return w.weather()
### COVID ###
def covid(query):
if "india" in query: india_bool = True
else: india_bool = False
if "statistic" in query or 'report' in query:
return ["Here are the statistics...", ["Total cases: " + c.totalCases(india_bool), "Total Recovery: " + c.totalRecovery(india_bool), "Total Deaths: " + c.totalDeaths(india_bool)]]
elif "symptom" in query:
return ["Here are the Symptoms...", c.symptoms()]
elif "prevent" in query or "measure" in query or "precaution" in query:
return ["Here are the some of preventions from COVID-19:", c.prevention()]
elif "recov" in query:
return "Total Recovery is: " + c.totalRecovery(india_bool)
elif "death" in query:
return "Total Deaths are: " + c.totalDeaths(india_bool)
else:
return "Total Cases are: " + c.totalCases(india_bool)
def latestNews(news=5):
URL = 'https://indianexpress.com/latest-news/'
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
headlineLinks = []
headlines = []
divs = soup.find_all('div', {'class':'title'})
count=0
for div in divs:
count += 1
if count>news:
break
a_tag = div.find('a')
headlineLinks.append(a_tag.attrs['href'])
headlines.append(a_tag.text)
return headlines,headlineLinks
def maps(text):
text = text.replace('maps', '')
text = text.replace('map', '')
text = text.replace('google', '')
openWebsite('https://www.google.com/maps/place/'+text)
def giveDirections(startingPoint, destinationPoint):
geolocator = Nominatim(user_agent='assistant')
if 'current' in startingPoint:
res = requests.get("https://ipinfo.io/")
data = res.json()
startinglocation = geolocator.reverse(data['loc'])
else:
startinglocation = geolocator.geocode(startingPoint)
destinationlocation = geolocator.geocode(destinationPoint)
startingPoint = startinglocation.address.replace(' ', '+')
destinationPoint = destinationlocation.address.replace(' ', '+')
openWebsite('https://www.google.co.in/maps/dir/'+startingPoint+'/'+destinationPoint+'/')
startinglocationCoordinate = (startinglocation.latitude, startinglocation.longitude)
destinationlocationCoordinate = (destinationlocation.latitude, destinationlocation.longitude)
total_distance = great_circle(startinglocationCoordinate, destinationlocationCoordinate).km #.mile
return str(round(total_distance, 2)) + 'KM'
def openWebsite(url='https://www.google.com/'):
webbrowser.open(url)
def jokes():
URL = 'https://icanhazdadjoke.com/'
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
try:
p = soup.find('p')
return p.text
except Exception as e:
raise e
def youtube(query):
from youtube_search import YoutubeSearch
query = query.replace('play',' ')
query = query.replace('on youtube',' ')
query = query.replace('youtube',' ')
print("Pahuch Gya")
results = YoutubeSearch(query,max_results=1).to_dict()
print("Link mil gya")
webbrowser.open('https://www.youtube.com/watch?v=' + results[0]['id'])
return "Enjoy"
def googleSearch(query):
if 'image' in query:
query += "&tbm=isch"
query = query.replace('images','')
query = query.replace('image','')
query = query.replace('search','')
query = query.replace('show','')
webbrowser.open("https://www.google.com/search?q=" + query)
return "Here you go..."
def sendWhatsapp(phone_no='',message=''):
phone_no = '+91' + str(phone_no)
webbrowser.open('https://web.whatsapp.com/send?phone='+phone_no+'&text='+message)
import time
from pynput.keyboard import Key, Controller
time.sleep(10)
k = Controller()
k.press(Key.enter)
def email(rec_email=None, text="Hello, It's ALICIAA here...", sub='ALICIAA'):
if '@gmail.com' not in rec_email: return
s = smtplib.SMTP('smtp.gmail.com', 587)
s.starttls()
s.login("mailmerairajat@gmail.com", "8422930312") # eg, abc@gmail.com (email) and ****(pass)
message = 'Subject: {}\n\n{}'.format(sub, text)
s.sendmail("senderEmail", rec_email, message)
print("Sent")
s.quit()
def downloadImage(query, n=4):
query = query.replace('images','')
query = query.replace('image','')
query = query.replace('search','')
query = query.replace('show','')
URL = "https://www.google.com/search?tbm=isch&q=" + query
result = requests.get(URL)
src = result.content
soup = BeautifulSoup(src, 'html.parser')
imgTags = soup.find_all('img', class_='yWs4tf') # old class name -> t0fcAb (Update this)
if os.path.exists('Downloads')==False:
os.mkdir('Downloads')
count=0
for i in imgTags:
if count==n: break
try:
urllib.request.urlretrieve(i['src'], 'Downloads/' + str(count) + '.jpg')
count+=1
print('Downloaded', count)
except Exception as e:
raise e