-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetJobs.js
68 lines (52 loc) · 1.93 KB
/
getJobs.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
const puppeteer = require('puppeteer');
const dotenv = require('dotenv');
dotenv.config();
const jobsurl = process.env.JOBSURL;
console.log('starting');
module.exports = async function getJobs(){
function extractItems(){
const extractedElements = document.querySelectorAll('.oracletaleocwsv2-accordion-head');
const items = [];
for (let element of extractedElements) {
const [title, division, location, id] = element.innerText.split('\n')
items.push({title, division, location, id});
}
return items;
}
async function scrapeInfiniteScrollItems(
page,
extractItems,
itemTargetCount,
scrollDelay = 1000,
){
let items = [];
try {
let previousHeight;
while (items.length < itemTargetCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await page.waitFor(scrollDelay);
}
} catch (e) {
//nothing here, waitForFunction has to timeout
}
return items;
}
const browser = await puppeteer.launch({
// executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
// userDataDir : '/Users/jtarver/Library/Application Support/Google/Chrome',
headless: true
});
const page = await browser.newPage();
page.setViewport({width: 1280, height: 926});
await page.goto(jobsurl);
console.log('opened page');
await page.waitFor(5000);
const items = await scrapeInfiniteScrollItems(page, extractItems, 100);
console.log('done');
await browser.close();
return items;
};
module.exports();