Skip to content

Commit

Permalink
[kemonoparty] add 'revision_hash' metadata (mikf#4706, mikf#4727, mik…
Browse files Browse the repository at this point in the history
…f#5013)

A SHA1 hexdigest of other relevant metadata fields like
title, content, file and attachment URLs.

This value does NOT reflect which revisions are listed on the website.
Neither does 'edited' or any other metadata field (combinations).
  • Loading branch information
mikf committed Jan 15, 2024
1 parent 4d6ec69 commit 3d68eda
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
26 changes: 22 additions & 4 deletions gallery_dl/extractor/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
"""Extractors for https://kemono.party/"""

from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
import json
import re

BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
Expand All @@ -37,10 +38,14 @@ def __init__(self, match):
Extractor.__init__(self, match)

def _init(self):
self.revisions = self.config("revisions")
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
self._json_dumps = json.JSONEncoder(
ensure_ascii=False, check_circular=False,
sort_keys=True, separators=(",", ":")).encode

def items(self):
find_hash = re.compile(HASH_PATTERN).match
Expand Down Expand Up @@ -223,11 +228,23 @@ def _post_revisions(self, url):

idx = len(revs)
for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)
rev["revision_index"] = idx
idx -= 1

return revs

def _revision_hash(self, revision):
rev = revision.copy()
rev.pop("revision_id", None)
rev.pop("added", None)
rev.pop("next", None)
rev.pop("prev", None)
rev["file"].pop("name", None)
for a in rev["attachments"]:
a.pop("name", None)
return util.sha1(self._json_dumps(rev))


def _validate(response):
return (response.headers["content-length"] != "9" or
Expand All @@ -252,13 +269,13 @@ def posts(self):
url = self.api_url
params = text.parse_query(self.query)
params["o"] = text.parse_int(params.get("o"))
revisions = self.config("revisions")

while True:
posts = self.request(url, params=params).json()

if revisions:
if self.revisions:
for post in posts:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
post_url = "{}/post/{}".format(self.api_url, post["id"])
try:
Expand Down Expand Up @@ -296,7 +313,8 @@ def __init__(self, match):
def posts(self):
if not self.revision:
post = self.request(self.api_url).json()
if self.config("revisions"):
if self.revisions:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
try:
revs = self._post_revisions(self.api_url)
Expand Down
2 changes: 2 additions & 0 deletions test/results/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@

"revision_id": 142470,
"revision_index": 2,
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
},

{
Expand All @@ -190,6 +191,7 @@

"revision_id": range(134996, 3052965),
"revision_index": range(1, 9),
"revision_hash": r"re:^[0-9a-f]{40}$",
},


Expand Down

0 comments on commit 3d68eda

Please sign in to comment.