From a51458d4166434b72607e5e477b9e386cf8dde75 Mon Sep 17 00:00:00 2001 From: slh <835908688@qq.com> Date: Sun, 18 Dec 2022 13:50:08 +0800 Subject: [PATCH] fix regex bug --- weibospider/spiders/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weibospider/spiders/search.py b/weibospider/spiders/search.py index a8fbcfdf..482b6f18 100644 --- a/weibospider/spiders/search.py +++ b/weibospider/spiders/search.py @@ -41,7 +41,7 @@ def parse(self, response, **kwargs): 网页解析 """ html = response.text - tweet_ids = re.findall(r'\d+/(.*?)\?refer_flag=1001030103_\'\)">复制微博地址', html) + tweet_ids = re.findall(r'\d+/(.*?)\?refer_flag=1001030103_" ', html) for tweet_id in tweet_ids: url = f"https://weibo.com/ajax/statuses/show?id={tweet_id}" yield Request(url, callback=self.parse_tweet, meta=response.meta)