From 6740757818836529515641106de0c549f4ca89bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Daoust?= Date: Wed, 19 Jun 2024 13:02:43 +0200 Subject: [PATCH] Force full crawl on spec when previous crawl reported an error (#1602) Reffy happily reused the result of a previous crawl on a spec when a spec wasn't modified even though these results indicated an error. In most cases, the error is a network error, and reusing the result is actually quite fine because that result was for when the spec could be crawled without error, and we should just forget about the error. In some cases though, the error is for something else, and reusing the result while pretending the error is gone would be wrong. These cases are not straightforward to distinguish. This update does not try to be smart and simply forces a full crawl on specs for which the previous result contains an error. --- src/lib/specs-crawler.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib/specs-crawler.js b/src/lib/specs-crawler.js index f5fc7e07..3618b5f8 100644 --- a/src/lib/specs-crawler.js +++ b/src/lib/specs-crawler.js @@ -84,8 +84,15 @@ async function crawlSpec(spec, crawlOptions) { try { const fallback = crawlOptions.fallbackData?.results?.find(s => s.url === spec.url); let cacheInfo = {}; - if (crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) { - cacheInfo = Object.assign({}, fallback?.crawlCacheInfo); + if (fallback && !fallback.error && + crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) { + // Note: we don't want to reuse the previous crawl results if + // there was an error because we don't really know whether these + // results come from that previous crawl (in which case we should + // crawl the spec again), or from a an earlier crawl where + // everything went fine (in which case we could reuse the results + // if the spec wasn't updated in the meantime). + cacheInfo = Object.assign({}, fallback.crawlCacheInfo); } let result = null; if (crawlOptions.useCrawl) {