-
Notifications
You must be signed in to change notification settings - Fork 406
/
redis-cache.js
27 lines (24 loc) · 1.01 KB
/
redis-cache.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
const HCCrawler = require('headless-chrome-crawler');
const RedisCache = require('headless-chrome-crawler/cache/redis');
const cache = new RedisCache({ host: '127.0.0.1', port: 6379 });
function launch(persistCache) {
return HCCrawler.launch({
onSuccess: result => {
console.log(`Requested ${result.options.url}.`);
},
cache,
persistCache, // Cache won't be cleared when closing the crawler if set true
});
}
(async () => {
const crawler1 = await launch(true); // Launch the crawler with persisting cache
await crawler1.queue('https://example.com/');
await crawler1.queue('https://example.net/');
await crawler1.onIdle();
await crawler1.close(); // Close the crawler but cache won't be cleared
const crawler2 = await launch(false); // Launch the crawler again without persisting cache
await crawler2.queue('https://example.net/'); // This queue won't be requested because cache remains
await crawler2.queue('https://example.org/');
await crawler2.onIdle();
await crawler2.close();
})();