-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
88 lines (76 loc) · 2.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import csv
table = []
with open('backup/f12.csv', newline='') as f:
reader = csv.reader(f)
#table = list(reader)
driver = webdriver.Chrome()
driver.get("https://dl.acm.org/conference/cpr/proceedings")
elem = driver.switch_to.active_element
driver.implicitly_wait(10)
driver.find_element_by_xpath('//*[@id="conference-C-0"]/div/span/span').click()
test = driver.find_elements_by_xpath('//*[@id="conference-C-0"]/div/ul/li')
print(test)
links = []
i = 0
for x in test:
links.append(x.find_element_by_tag_name('a').get_attribute('href'))
for index, link in enumerate(links):
if (index < 0):
continue
print(link)
print(str(index)+' / '+str(len(links)))
# open tab
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
driver.get(link)
# open all
lists = driver.find_elements_by_xpath(
'//*[@id="pb-page-content"]/div/main/div[4]/div/div[2]/div[1]/div/div[2]/div/div/div')
for x in lists:
drop = x.find_element_by_tag_name('a')
if (drop.get_attribute('aria-expanded') == 'false'):
drop.click()
else:
continue
time.sleep(2.0)
awards = driver.find_elements_by_class_name('issue-item__content')
for index, award in enumerate(awards):
print(str(index) + ' / ' + str(len(awards)))
try:
title = award.find_element_by_class_name('issue-item__title').find_element_by_tag_name('a').text
except:
title = ''
try:
author = award.find_element_by_tag_name('ul').find_element_by_tag_name('a').get_attribute('title')
except:
author = ''
spans = award.find_element_by_class_name('issue-item__detail').find_elements_by_tag_name('span')
date = spans[0].text
doi = spans[2].text
try:
ab = award.find_element_by_tag_name('p')
syn = ab.text
if ab.find_element_by_tag_name('a').text == '… (More)':
print('true')
newTab = award.find_element_by_tag_name('a').get_attribute('href')
print(newTab)
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(newTab)
time.sleep(2.0)
syn = driver.find_element_by_class_name('abstractSection').find_element_by_tag_name('p').text
print(syn)
driver.close()
time.sleep(2.0)
driver.switch_to.window(driver.window_handles[0])
except:
syn = ''
row = [title, author, date, doi, syn]
table.append(row)
print(title)
df = pd.DataFrame(table, columns=['title', 'author', 'date', 'doi', 'ab'])
df.to_csv('f' + str(index) + '.csv', index=False)
i = i + 1