Skip to content

Commit

Permalink
[idolcomplex] fix extraction & update URL patterns (mikf#5002)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf authored and bradenhilton committed Feb 5, 2024
1 parent 621c495 commit 260dcd4
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 10 deletions.
17 changes: 10 additions & 7 deletions gallery_dl/extractor/idolcomplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ def __init__(self, match):
self.start_post = 0

def _init(self):
self.find_pids = re.compile(
r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
).findall
self.find_tags = re.compile(
r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
).findall

def items(self):
Expand Down Expand Up @@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
example = "https://idol.sankakucomplex.com/?tags=TAGS"
pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
per_page = 20

def __init__(self, match):
Expand Down Expand Up @@ -196,7 +199,8 @@ def post_ids(self):
page = self.request(self.root, params=params, retries=10).text
pos = ((page.find('id="more-popular-posts-link"') + 1) or
(page.find('<span class="thumb') + 1))
yield from text.extract_iter(page, ' href="/posts/', '"', pos)

yield from self.find_pids(page, pos)

next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
if not next_url:
Expand All @@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
example = "https://idol.sankakucomplex.com/pools/show/12345"
per_page = 24

Expand All @@ -242,8 +246,7 @@ def post_ids(self):
while True:
page = self.request(url, params=params, retries=10).text
pos = page.find('id="pool-show"') + 1
post_ids = list(text.extract_iter(
page, ' href="/posts/', '"', pos))
post_ids = self.find_pids(page, pos)

yield from post_ids
if len(post_ids) < self.per_page:
Expand Down
36 changes: 33 additions & 3 deletions test/results/idolcomplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,32 @@

__tests__ = (
{
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
"#url" : "https://idol.sankakucomplex.com/en/posts?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
"#range" : "18-22",
"#count" : 5,
},

{
"#url" : "https://idol.sankakucomplex.com/posts/?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/en/?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos+wreath&page=3&next=694215",
"#category": ("booru", "idolcomplex", "tag"),
Expand All @@ -30,22 +48,28 @@
"#count" : 3,
},

{
"#url" : "https://idol.sankakucomplex.com/en/pools/show/145",
"#category": ("booru", "idolcomplex", "pool"),
"#class" : idolcomplex.IdolcomplexPoolExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/pool/show/145",
"#category": ("booru", "idolcomplex", "pool"),
"#class" : idolcomplex.IdolcomplexPoolExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
"#url" : "https://idol.sankakucomplex.com/en/posts/509eccbba54a43cea6b275a65b93c51d",
"#category": ("booru", "idolcomplex", "post"),
"#class" : idolcomplex.IdolcomplexPostExtractor,
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",

"created_at" : "2017-11-24 17:01:27.696",
"date" : "dt:2017-11-24 17:01:27",
"extension" : "jpg",
"file_url" : r"re:https://is\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
"filename" : "509eccbba54a43cea6b275a65b93c51d",
"height" : 683,
"id" : 694215,
Expand All @@ -62,6 +86,12 @@
"width" : 1024,
},

{
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
"#category": ("booru", "idolcomplex", "post"),
"#class" : idolcomplex.IdolcomplexPostExtractor,
},

{
"#url" : "https://idol.sankakucomplex.com/posts/509eccbba54a43cea6b275a65b93c51d",
"#category": ("booru", "idolcomplex", "post"),
Expand Down

0 comments on commit 260dcd4

Please sign in to comment.