-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape-profile-usernames-search.js
94 lines (75 loc) · 2.46 KB
/
scrape-profile-usernames-search.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
const puppeteer = require('puppeteer');
const { URL } = require('url');
const { loginToLinkedIn, writeInFile } = require('./utils');
/**
* Function to evaluate to return all usernames in the page
*/
const scrapePageProfileUsernamesEvaluator = () => Array.from(
document.querySelectorAll('.search-result__info .search-result__result-link'),
).map(userElement => (
userElement.href.match(/www.linkedin.com\/in\/(.*)\//)[1]
));
/**
* Find all usernames by browsing search pages recursively
*/
const scrapeProfileUsernames = async (
puppeteerPage,
searchUrl,
pageIndex = 1,
usernamesResults = [],
) => {
// Set page number query param in the search URL
searchUrl.searchParams.set('page', pageIndex);
console.info(`Scrape page ${searchUrl}`);
// Go and wait the results page
await puppeteerPage.goto(searchUrl.href);
await puppeteerPage.waitForSelector('.search-results-page');
// Retrieve profile usernames in the current page
const pageUsernames = await puppeteerPage.evaluate(scrapePageProfileUsernamesEvaluator);
// If we don't find new usernames, return current usernames list
if (!pageUsernames || pageUsernames.length === 0) {
console.log(`No username found on page ${searchUrl}`);
return usernamesResults;
}
console.log(`Found ${pageUsernames.length} usernames on page ${searchUrl}:`, pageUsernames);
// Current usernames list + new usernames found
const newUsernamesResults = [...usernamesResults, ...pageUsernames];
// Scrape the next page
return scrapeProfileUsernames(
puppeteerPage,
searchUrl,
pageIndex + 1,
newUsernamesResults,
);
};
const run = async () => {
// Initialize puppeteer
const browser = await puppeteer.launch({
headless: false,
defaultViewport: {
width: 1000,
height: 3000,
},
});
if (!process.argv[2] || !process.argv[3]) {
throw new Error('Missing input params');
}
// Script params
const sessionCookie = process.argv[2];
const searchUrl = new URL(process.argv[3]);
// Login to LinkedIn
await loginToLinkedIn(browser, sessionCookie);
// Scrape all profile usernames from the given search URL
const page = await browser.newPage();
const usernames = await scrapeProfileUsernames(page, searchUrl);
console.log('Results ->', usernames);
// Write results in a file
writeInFile(
'./profile-usernames.json',
{ results: usernames, offset: 0, limit: 100 },
);
// Close browser
await page.close();
await browser.close();
};
run();