From 212cae6b7381316731158f63a7f454d7ad233784 Mon Sep 17 00:00:00 2001 From: gazconroy Date: Mon, 18 Nov 2024 16:53:03 +0000 Subject: [PATCH 1/2] fix(specs): additional safetyChecks (#4128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Gary Conroy Co-authored-by: Clément Vannicatte --- specs/crawler/common/schemas/configuration.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/specs/crawler/common/schemas/configuration.yml b/specs/crawler/common/schemas/configuration.yml index be07a16902..58e8fa797c 100644 --- a/specs/crawler/common/schemas/configuration.yml +++ b/specs/crawler/common/schemas/configuration.yml @@ -444,14 +444,14 @@ extraParameters: safetyChecks: type: object - description: Safety checks for ensuring data integrity between crawls. + description: Checks to ensure the crawl was successful. properties: beforeIndexPublishing: $ref: '#/beforeIndexPublishing' beforeIndexPublishing: type: object - description: Checks triggered after the crawl finishes and before the records are added to the Algolia index. + description: These checks are triggered after the crawl finishes but before the records are added to the Algolia index. properties: maxLostRecordsPercentage: type: number @@ -464,6 +464,11 @@ beforeIndexPublishing: minimum: 1 maximum: 100 default: 10 + maxFailedUrls: + type: number + description: | + Stops the crawler if a specified number of pages fail to crawl. + If undefined, the crawler won't stop if it encounters such errors. schedule: type: string From aae4ddb7dfe79b7e44e5735250ce45f966dd3132 Mon Sep 17 00:00:00 2001 From: algolia-bot Date: Mon, 18 Nov 2024 17:06:50 +0000 Subject: [PATCH 2/2] fix(specs): additional safetyChecks (#4128) (generated) [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: gazconroy Co-authored-by: Gary Conroy Co-authored-by: Clément Vannicatte --- specs/bundled/crawler.doc.yml | 11 ++++++++--- specs/bundled/crawler.yml | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/specs/bundled/crawler.doc.yml b/specs/bundled/crawler.doc.yml index 634ff565e0..961eab5566 100644 --- a/specs/bundled/crawler.doc.yml +++ b/specs/bundled/crawler.doc.yml @@ -2881,8 +2881,8 @@ components: beforeIndexPublishing: type: object description: >- - Checks triggered after the crawl finishes and before the records are - added to the Algolia index. + These checks are triggered after the crawl finishes but before the + records are added to the Algolia index. properties: maxLostRecordsPercentage: type: number @@ -2900,9 +2900,14 @@ components: minimum: 1 maximum: 100 default: 10 + maxFailedUrls: + type: number + description: | + Stops the crawler if a specified number of pages fail to crawl. + If undefined, the crawler won't stop if it encounters such errors. safetyChecks: type: object - description: Safety checks for ensuring data integrity between crawls. + description: Checks to ensure the crawl was successful. properties: beforeIndexPublishing: $ref: '#/components/schemas/beforeIndexPublishing' diff --git a/specs/bundled/crawler.yml b/specs/bundled/crawler.yml index 5ea6f3d2c7..db1019540d 100644 --- a/specs/bundled/crawler.yml +++ b/specs/bundled/crawler.yml @@ -2881,8 +2881,8 @@ components: beforeIndexPublishing: type: object description: >- - Checks triggered after the crawl finishes and before the records are - added to the Algolia index. + These checks are triggered after the crawl finishes but before the + records are added to the Algolia index. properties: maxLostRecordsPercentage: type: number @@ -2900,9 +2900,14 @@ components: minimum: 1 maximum: 100 default: 10 + maxFailedUrls: + type: number + description: | + Stops the crawler if a specified number of pages fail to crawl. + If undefined, the crawler won't stop if it encounters such errors. safetyChecks: type: object - description: Safety checks for ensuring data integrity between crawls. + description: Checks to ensure the crawl was successful. properties: beforeIndexPublishing: $ref: '#/components/schemas/beforeIndexPublishing'