Skip to content

Commit

Permalink
fix(crawler): Only update the database if full page archival is enabled
Browse files Browse the repository at this point in the history
  • Loading branch information
MohamedBassem committed May 26, 2024
1 parent 9198c1b commit 9d89f98
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,6 @@ async function archiveWebpage(
userId: string,
jobId: string,
) {
if (!serverConfig.crawler.fullPageArchive) {
return;
}
logger.info(`[Crawler][${jobId}] Will attempt to archive page ...`);
const urlParsed = new URL(url);
const baseUrl = `${urlParsed.protocol}//${urlParsed.host}`;
Expand Down Expand Up @@ -499,22 +496,25 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
});

// Do the archival as a separate last step as it has the potential for failure
const fullPageArchiveAssetId = await archiveWebpage(
htmlContent,
browserUrl,
userId,
jobId,
);
await db
.update(bookmarkLinks)
.set({
fullPageArchiveAssetId,
})
.where(eq(bookmarkLinks.id, bookmarkId));

if (oldFullPageArchiveAssetId) {
deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch(
() => ({}),
if (serverConfig.crawler.fullPageArchive) {
const fullPageArchiveAssetId = await archiveWebpage(
htmlContent,
browserUrl,
userId,
jobId,
);

await db
.update(bookmarkLinks)
.set({
fullPageArchiveAssetId,
})
.where(eq(bookmarkLinks.id, bookmarkId));

if (oldFullPageArchiveAssetId) {
deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch(
() => ({}),
);
}
}
}

0 comments on commit 9d89f98

Please sign in to comment.