From b716a00da5e35f4afb663eaff36e374fac891993 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 28 Nov 2024 09:53:17 +0530 Subject: [PATCH 1/3] Update indian express --- recipes/economist_world_ahead.recipe | 2 +- recipes/indian_express.recipe | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 03c051906541..d54afffead55 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -28,7 +28,7 @@ def process_node(node): if ntype == 'PARAGRAPH': if node.get('textHtml'): return f'

{node.get("textHtml")}

' - return f'

{node.get("tex", "")}

' + return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': alt = "" if node.get("altText") is None else node.get("altText") cap = "" diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index 91394fef1969..8d6392636ca9 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -45,7 +45,7 @@ class IndianExpress(BasicNewsRecipe): 'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg ' 'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class ' 'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide ' - 'author-social author-follow author-img premium_widget_below_article' + 'author-social author-follow author-img premium_widget_below_article author-block' ) ] @@ -136,9 +136,13 @@ class IndianExpress(BasicNewsRecipe): return citem['content'].replace('300', '600') def preprocess_html(self, soup): - if h2 := soup.find(attrs={'itemprop': 'description'}): + if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))):: h2.name = 'p' h2['id'] = 'sub-d' + for span in soup.findAll( + "span", attrs={"class": ["ie-custom-caption", "custom-caption"]} + ): + span["id"] = "img-cap" for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}): From ea2d3f9edd30168a85ac13ac5af8b2d4defa3317 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 28 Nov 2024 09:54:00 +0530 Subject: [PATCH 2/3] ... --- recipes/indian_express.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index 8d6392636ca9..cd94f157cc2c 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -136,7 +136,7 @@ class IndianExpress(BasicNewsRecipe): return citem['content'].replace('300', '600') def preprocess_html(self, soup): - if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))):: + if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))): h2.name = 'p' h2['id'] = 'sub-d' for span in soup.findAll( From 0adbd16f214df5f8490496992e02a0aeeb754643 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:23:58 +0530 Subject: [PATCH 3/3] ... --- recipes/economist_world_ahead.recipe | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index d54afffead55..a3706a6b3073 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -49,6 +49,7 @@ def process_node(node): print('** ', ntype) return '' + def safe_dict(data, *names): ans = data for x in names: @@ -187,15 +188,14 @@ class EconomistWorld(BasicNewsRecipe): } def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa + kwargs['user_agent'] = ( + 'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa + ) br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('x-requested-with', 'com.economist.lamarr') - ] + br.addheaders += [('x-requested-with', 'com.economist.lamarr')] return br def economist_test_article(self): - self.cover_url = None return [('Articles', [{'title':'test', 'url':'https://www.economist.com/the-world-ahead/2024/11/20/ten-business-trends-for-2025-and-forecasts-for-15-industries' }])] @@ -257,9 +257,12 @@ class EconomistWorld(BasicNewsRecipe): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root_ = parse(raw) if '/interactive/' in url: - return '

' + root_.xpath('//h1')[0].text + '

' \ - + 'This article is supposed to be read in a browser' \ - + '
' + return ( + '

' + + root_.xpath('//h1')[0].text + '

' + + 'This article is supposed to be read in a browser' + + '
' + ) script = root_.xpath('//script[@id="__NEXT_DATA__"]')