-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractHotelDetail.ts
77 lines (72 loc) · 2.16 KB
/
extractHotelDetail.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import { config } from "dotenv";
import { scrapeHotel } from "./src/@booking/accommodation/accommodationScrap";
import {createWriteStream, existsSync, mkdirSync} from "fs"
import path from "path";
const { createConnection } = require("typeorm");
config();
interface OverralLink {
link : string,
typeCode : string,
typeName: string
}
const directory = path.join("dist",process.env.QUERY);
if (!existsSync(directory)) {
mkdirSync(directory, { recursive: true });
}
let ws;
let isFirstWrite = true;
function chunkArray(arr, size) {
const chunks = [];
for (let i = 0; i < arr.length; i += size) {
chunks.push(arr.slice(i, i + size));
}
return chunks;
}
const scrapData = async () => {
const directory = path.join("dist",process.env.QUERY);
if (!existsSync(directory)) {
mkdirSync(directory, { recursive: true });
}
try {
const links = (await import(`./dist/${process.env.QUERY}/links.json`))
.default as OverralLink[];
const CHUNKSIZE = Number(process.env.CHUNKSIZE??"5");
const linkChunks = chunkArray(links, CHUNKSIZE);
let chunkIndex = 1;
for (const chunk of linkChunks) {
ws = createWriteStream( path.join(directory, `accommodations_${chunkIndex}.json`), { flags: 'w'})
ws.on("ready", () => {
ws.write("[");
});
ws.addListener("done",()=>{
ws.write("]");
isFirstWrite = true;
})
await scrapeWithDelay(chunk);
ws.emit("done")
chunkIndex++
}
} catch (error) {
console.log(error);
} finally {
// await driver.quit();
}
};
async function scrapeWithDelay(links) {
for (const l of links) {
const accommodation = await scrapeHotel(l.link);
if (!isFirstWrite) {
ws.write(",");
} else {
isFirstWrite = false;
}
ws.write(`${JSON.stringify({...accommodation,typeCode: l.typeCode, typeName: l.typeName })}`)
console.log({...accommodation,typeCode: l.typeCode, typeName: l.typeName });
// Introduce a delay of 100ms between each iteration
await delay(100);
}
}
export function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
scrapData();