-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWeb_scraping_bestiario_golarion.py
60 lines (41 loc) · 1.37 KB
/
Web_scraping_bestiario_golarion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import re
driver = webdriver.Chrome("C:/Users/scutt/OneDrive - studenti.unimi.it/Python/Web Scraping/chromedriver.exe")
products=[] #List to store name of the product
prices=[] #List to store price of the product
ratings=[] #List to store rating of the product
driver.get("https://golarion.altervista.org/wiki/Database_Mostri")
tableXPath='//*[@id="wiki_table_filter"]/tbody'
table=driver.find_element_by_xpath(tableXPath)
nome=[]
gs=[]
terreno=[]
clima=[]
tipo=[]
sottotipo=[]
fonte=[]
i=0
for row in table.find_elements_by_xpath('./tr'):
tds=row.find_elements_by_xpath('./td')
nome.append(tds[0].text)
filterGs=re.search("(1/[1-9])|[1-9]+", tds[1].text)
gs.append(filterGs.group(0))
terreno.append(tds[2].text)
if tds[3].text=="-":
clima.append("")
else:
clima.append(tds[3].text)
tipo.append(tds[4].text)
if tds[5].text=="-":
sottotipo.append("")
else:
sottotipo.append(tds[5].text)
fonte.append(tds[6].text)
if i%25==0:
print('row',i)
i+=1
driver.close()
df = pd.DataFrame({'Nome':nome,'Gs':gs, 'Terreno':terreno, 'Clima':clima, 'Tipo':tipo, 'Sottotipo':sottotipo, 'Fonte':fonte})
df.to_csv('C:/Users/scutt/OneDrive - studenti.unimi.it/Python/Web Scraping/bestiario.csv', index=False, encoding='utf-8')