-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
65 lines (56 loc) · 1.84 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/** @format */
const axios = require("axios");
const cheerio = require("cheerio");
const countryList = require("./countryList").countryList;
const defaultUrl =
"https://en.wikipedia.org/wiki/Travel_restrictions_related_to_the_2019%E2%80%9320_coronavirus_pandemic";
// Store countries in dictionary for quicker look up
const dict = {};
countryList.forEach((el, index) => (dict[el] = index));
module.exports.scrapeData = function(url = defaultUrl) {
return axios(url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
const countriesBannedFlights = {};
let elementToTraverse = $("h3");
while ((elementToTraverse = elementToTraverse.next())) {
if (
elementToTraverse.length === 0 ||
elementToTraverse.prop("tagName") === "H2"
)
break;
if (elementToTraverse.prop("tagName") === "UL") {
elementToTraverse.children().each((index, childElement) => {
const countryTitle = $(childElement)
.find("a")
.attr("title");
if (countryTitle === undefined) return;
if (dict[countryTitle]) {
let travelInfo = $(childElement)
.text()
.replace(/\[([0-9]+)\]/g, "")
.replace(`${countryTitle}:`, "")
.trim();
if (countriesBannedFlights[countryTitle] === undefined) {
if (travelInfo == countryTitle) {
countriesBannedFlights[countryTitle] = [
"Persons Travelling from South Korea, Japan or China have entry bans."
];
} else {
countriesBannedFlights[countryTitle] = [travelInfo];
}
} else if (
!countriesBannedFlights[countryTitle].includes(travelInfo) &&
travelInfo !== countryTitle
) {
countriesBannedFlights[countryTitle].push(travelInfo);
}
}
});
}
}
return countriesBannedFlights;
})
.catch(console.error);
};