From 6740757818836529515641106de0c549f4ca89bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Daoust?= <fd@w3.org>
Date: Wed, 19 Jun 2024 13:02:43 +0200
Subject: [PATCH] Force full crawl on spec when previous crawl reported an
 error (#1602)

Reffy happily reused the result of a previous crawl on a spec when a spec
wasn't modified even though these results indicated an error.

In most cases, the error is a network error, and reusing the result is
actually quite fine because that result was for when the spec could be
crawled without error, and we should just forget about the error.

In some cases though, the error is for something else, and reusing the
result while pretending the error is gone would be wrong.

These cases are not straightforward to distinguish. This update does not try
to be smart and simply forces a full crawl on specs for which the previous
result contains an error.
---
 src/lib/specs-crawler.js | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/lib/specs-crawler.js b/src/lib/specs-crawler.js
index f5fc7e07..3618b5f8 100644
--- a/src/lib/specs-crawler.js
+++ b/src/lib/specs-crawler.js
@@ -84,8 +84,15 @@ async function crawlSpec(spec, crawlOptions) {
     try {
         const fallback = crawlOptions.fallbackData?.results?.find(s => s.url === spec.url);
         let cacheInfo = {};
-        if (crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) {
-          cacheInfo = Object.assign({}, fallback?.crawlCacheInfo);
+        if (fallback && !fallback.error &&
+                crawlOptions.fallbackData?.crawler === `reffy-${reffyVersion}`) {
+            // Note: we don't want to reuse the previous crawl results if
+            // there was an error because we don't really know whether these
+            // results come from that previous crawl (in which case we should
+            // crawl the spec again), or from a an earlier crawl where
+            // everything went fine (in which case we could reuse the results
+            // if the spec wasn't updated in the meantime).
+            cacheInfo = Object.assign({}, fallback.crawlCacheInfo);
         }
         let result = null;
         if (crawlOptions.useCrawl) {