-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02-sessions.r
60 lines (45 loc) · 1.59 KB
/
02-sessions.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(tidyverse)
library(rvest)
fs::dir_create("data")
d <- tibble::tibble()
for (i in fs::dir_ls("html/sessions", regexp = "session")) {
h <- read_html(i)
d <- tibble::tibble(
session = i,
title = html_node(h, "div[class='session_link']") %>%
html_text(),
chairs = html_node(h, xpath = "//div[contains(text(), 'Chair/s')]/following::div") %>%
html_text(),
discussants = html_node(h, xpath = "//div[contains(text(), 'Discussant/s')]/following::div") %>%
html_text(),
abstract = html_nodes(h, xpath = "//a[contains(@href, '/abstract')]") %>%
html_attr("href")
) %>%
bind_rows(d)
}
d <- d %>%
mutate(
session = fs::path_file(session),
chairs = str_squish(chairs),
discussants = str_squish(discussants),
abstract = fs::path_file(abstract)
)
View(d)
# n = 131 unique sessions
s <- distinct(select(d, -abstract))
# session titles are NOT unique
s$title[ duplicated(s$title) ]
# chairs and discussants are missing for special (non-paper-based) panels,
# and some individuals chaired or discussed multiple times; there is also a
# special case of 'Shared by Panellists' in `chairs` and `discussants`
count(s, chairs, sort = TRUE)
count(s, discussants, sort = TRUE)
# only n = 1 case of multiple chairs
s$chairs[ str_detect(s$chairs, ",") ]
# n = 5 cases of multiple discussants (beyond 'Shared by Panellists' cases)
s$discussants[ str_detect(s$discussants, ",") ]
# there is no affiliation for chairs or discussants: find them from abstracts,
# if they also authored a paper?
# uneconomical export
readr::write_tsv(d, "data/sessions.tsv")
# wip