-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindiarailinfo.py
88 lines (56 loc) · 2.01 KB
/
indiarailinfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import requests
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
# set webdriver path here it may vary
driver = webdriver.Chrome('./chromedriver')
url = "https://indiarailinfo.com/trains/passenger/0/0/0/0"
driver.get(url)
main = []
def process_divs(all_line, l):
for i in range(l):
one = all_line[i]
a = []
for o in one:
a.append(o.text)
fill = []
for aa in a:
if aa == "":
fill.append('NULL')
else:
fill.append(aa)
main.append(fill)
count = 0
while True:
try:
if not driver.find_element_by_class_name('nextbtn'):
break
next_btn = driver.find_element_by_class_name('nextbtn')
next_btn.click()
url = 'https://indiarailinfo.com/trains/passenger/0/' + str(count) + '/0/0'
print(url)
page = requests.get(url)
# soup = BeautifulSoup(r.content, 'html5lib')
soup = BeautifulSoup(page.text, 'html.parser')
data = soup.find('div', attrs={'class': 'srhres newbg inline alt'})
all_line = data.find_all('div', attrs={'style': 'line-height:20px;'})
length = len(all_line)
process_divs(all_line, length)
col = ['No', 'Name', 'Type', 'Zone', 'TTChange', 'Date From', 'Date To', 'From', 'Dep', 'To', 'Arr',
'Duration', 'Halts', 'Dep Days', 'Classes', 'Distance', 'Speed', 'Return']
# Create the pandas DataFrame
df = pd.DataFrame(main, columns=col)
df.to_csv('railway_data.csv')
print(df)
print('\nPage No ... ' + str(count))
count += 1
except NoSuchElementException as ae:
print('Next page does not exists')
driver.close()
driver.close()