From 4aca29b7b44e3c68f70db0f939360e28b8e4e5a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 1 May 2022 15:39:43 +0200 Subject: [PATCH] [naverwebtoon] support (best)challenge comics (closes #2542) and update URL pattern to match URLs without '.nhn' --- gallery_dl/extractor/naverwebtoon.py | 53 +++++++++++++++++++++------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py index 348f6a149f..eadd4602b1 100644 --- a/gallery_dl/extractor/naverwebtoon.py +++ b/gallery_dl/extractor/naverwebtoon.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2021 Seonghyeon Cho +# Copyright 2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,8 +11,10 @@ from .common import GalleryExtractor, Extractor, Message from .. import text +import re -BASE_PATTERN = r"(?:https?://)?comic\.naver\.com/webtoon" +BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com" + r"/(webtoon|challenge|bestChallenge)") class NaverwebtoonBase(): @@ -25,19 +28,33 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): directory_fmt = ("{category}", "{comic}") filename_fmt = "{episode:>03}-{num:>02}.{extension}" archive_fmt = "{title_id}_{episode}_{num}" - pattern = BASE_PATTERN + r"/detail\.nhn\?([^#]+)" + pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)" test = ( - (("https://comic.naver.com/webtoon/detail.nhn?" - "titleId=26458&no=1&weekday=tue"), { + (("https://comic.naver.com/webtoon/detail" + "?titleId=26458&no=1&weekday=tue"), { "url": "47a956ba8c7a837213d5985f50c569fcff986f75", "content": "3806b6e8befbb1920048de9888dfce6220f69a60", "count": 14 }), + (("https://comic.naver.com/challenge/detail" + "?titleId=765124&no=1"), { + "pattern": r"https://image-comic\.pstatic\.net/nas" + r"/user_contents_data/challenge_comic/2021/01/19" + r"/342586/upload_7149856273586337846\.jpeg", + "count": 1, + }), + (("https://comic.naver.com/bestChallenge/detail.nhn" + "?titleId=771467&no=3"), { + "pattern": r"https://image-comic\.pstatic\.net/nas" + r"/user_contents_data/challenge_comic/2021/04/28" + r"/345534/upload_3617293622396203109\.jpeg", + "count": 1, + }), ) def __init__(self, match): - query = match.group(1) - url = "{}/webtoon/detail.nhn?{}".format(self.root, query) + path, query = match.groups() + url = "{}/{}/detail?{}".format(self.root, path, query) GalleryExtractor.__init__(self, match, url) query = text.parse_query(query) @@ -70,22 +87,31 @@ def images(page): class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): subcategory = "comic" categorytransfer = True - pattern = (BASE_PATTERN + r"/list\.nhn\?([^#]+)") + pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)") test = ( - ("https://comic.naver.com/webtoon/list.nhn?titleId=22073", { + ("https://comic.naver.com/webtoon/list?titleId=22073", { "pattern": NaverwebtoonEpisodeExtractor.pattern, "count": 32, }), + ("https://comic.naver.com/challenge/list?titleId=765124", { + "pattern": NaverwebtoonEpisodeExtractor.pattern, + "count": 25, + }), + ("https://comic.naver.com/bestChallenge/list.nhn?titleId=789786", { + "pattern": NaverwebtoonEpisodeExtractor.pattern, + "count": ">= 12", + }), ) def __init__(self, match): Extractor.__init__(self, match) - query = text.parse_query(match.group(1)) + self.path, query = match.groups() + query = text.parse_query(query) self.title_id = query.get("titleId") self.page_no = text.parse_int(query.get("page"), 1) def items(self): - url = self.root + "/webtoon/list.nhn" + url = "{}/{}/list".format(self.root, self.path) params = {"titleId": self.title_id, "page": self.page_no} data = {"_extractor": NaverwebtoonEpisodeExtractor} @@ -103,7 +129,8 @@ def items(self): def get_episode_urls(self, page): """Extract and return all episode urls in page""" return [ - self.root + "/webtoon/detail.nhn?" + query - for query in text.extract_iter( - page, '