From 01aac8a98aaf43331f0c7244f80ef87f6cc9918d Mon Sep 17 00:00:00 2001 From: Benjamin Ooghe-Tabanou Date: Fri, 10 Sep 2021 18:04:36 +0200 Subject: [PATCH] fix stack when resolving urls from scrapy --- hyphe_backend/crawler/hcicrawler/pipelines.py | 2 +- hyphe_backend/crawler/hcicrawler/resolver.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hyphe_backend/crawler/hcicrawler/pipelines.py b/hyphe_backend/crawler/hcicrawler/pipelines.py index e360f0af..04f5c8e6 100644 --- a/hyphe_backend/crawler/hcicrawler/pipelines.py +++ b/hyphe_backend/crawler/hcicrawler/pipelines.py @@ -92,7 +92,7 @@ def process_item(self, item, spider): lru = url_to_lru_clean(rurl, TLDS_TREE) spider.resolved_links[url] = lru except Exception, e: - spider.log("Error resolving redirects from URL %s: %s %s" % (url, type(e), e), logging.INFO) + spider.log("Error resolving redirects for URL %s (found into %s): %s %s" % (url, item['url'], type(e), e), logging.WARNING) lrulinks.append(lru) item["lrulinks"] = lrulinks returnValue(item) diff --git a/hyphe_backend/crawler/hcicrawler/resolver.py b/hyphe_backend/crawler/hcicrawler/resolver.py index 93b2c51a..6bba6612 100644 --- a/hyphe_backend/crawler/hcicrawler/resolver.py +++ b/hyphe_backend/crawler/hcicrawler/resolver.py @@ -5,8 +5,6 @@ from twisted.web.client import Agent, ProxyAgent, RedirectAgent, _HTTP11ClientFactory _HTTP11ClientFactory.noisy = False -from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory - class ResolverAgent(RedirectAgent): def __init__(self, redirectLimit=5, connectTimeout=30, proxy=None): @@ -18,7 +16,7 @@ def __init__(self, redirectLimit=5, connectTimeout=30, proxy=None): raise TypeError("ResolverAgent's proxy argument need to be a dict with fields host and port") agent = ProxyAgent(endpoint) else: - agent = Agent(reactor, connectTimeout=connectTimeout, contextFactory=ScrapyClientContextFactory) + agent = Agent(reactor, connectTimeout=connectTimeout) RedirectAgent.__init__(self, agent, redirectLimit=redirectLimit) @defer.inlineCallbacks