forked from hadley/web-scraping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
forbes-live.R
30 lines (19 loc) · 1.05 KB
/
forbes-live.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
library(rvest)
# Static site -------------------------------------------------------------
html <- read_html("https://www.forbes.com/top-colleges/")
html |> html_elements(".TopColleges2023_tableRow__BYOSU")
html |> html_elements(".TopColleges2023_table__Ffgau")
# WHERE IS THE DATA???
# Dynamic site ------------------------------------------------------------
html <- read_html_live("https://www.forbes.com/top-colleges/")
# Behind the scenes this runs a real live Chrome browser; you can see it
# if you want. There are also a bunch of commands you can use to simulate
# a human using the site (i.e. if you need to click buttons or type text)
html$view()
rows <- html |> html_elements(".TopColleges2023_tableRow__BYOSU")
# Two alternative techniques that are less reliant on that suspicious
# random suffix
html |> html_elements('[role="row"]')
html |> html_elements('[class^="TopColleges2023_tableRow"]')
rows |> html_element(".TopColleges2023_organizationName__J1lEV") %>% html_text()
rows |> html_element(".grant-aid") %>% html_text() |> readr::parse_number()