Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[poringa] add support #4962

Merged
merged 1 commit into from
Dec 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,12 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Poringa</td>
<td>http://www.poringa.net/</td>
<td>Posts Images, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Porn Image</td>
<td>https://porn-images-xxx.com/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
"pixnet",
"plurk",
"poipiku",
"poringa",
"pornhub",
"pornpics",
"postmill",
Expand Down
129 changes: 129 additions & 0 deletions gallery_dl/extractor/poringa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for http://www.poringa.net/"""

from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
import itertools

BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"


class PoringaExtractor(Extractor):
category = "poringa"
directory_fmt = ("{category}", "{user}", "{post_id}")
filename_fmt = "{post_id}_{title}_{filename}.{extension}"
archive_fmt = "{post_id}"
root = "http://www.poringa.net"

def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
self.__cookies = True

def items(self):
for post_id in self.posts():
url = "{}/posts/imagenes/{}".format(self.root, post_id)

try:
page = self.request(url).text
except exception.HttpError as exc:
self.log.warning(
"Unable to fetch posts for '%s' (%s)", post_id, exc)
continue

title, pos = text.extract(
page, 'property="og:title" content="', '"')
pos = page.index('<div class="main-info', pos)
user, pos = text.extract(
page, 'href="http://www.poringa.net/', '"', pos)

if not user:
user = "poringa"

data = {
"post_id" : post_id,
"title" : text.unescape(title),
"user" : text.unquote(user),
"_http_headers": {"Referer": url},
}

yield Message.Directory, data
main_post = text.extr(
page, 'property="dc:content" role="main">', '</div>')
for url in text.extract_iter(
main_post,
'<img class="imagen" border="0" src="',
'"',
):
yield Message.Url, url, text.nameext_from_url(url, data)

def posts(self):
return ()

def request(self, url, **kwargs):
if self.__cookies:
self.__cookies = False
self.cookies_update(_cookie_cache())

for _ in range(5):
response = Extractor.request(self, url, **kwargs)
if response.cookies:
_cookie_cache.update("", response.cookies)
if response.content.find(
b"<title>Please wait a few moments</title>", 0, 600) < 0:
return response
self.sleep(5.0, "check")

def _pagination(self, url, params):
for params["p"] in itertools.count(1):
page = self.request(url, params=params).text

posts_ids = PoringaPostExtractor.pattern.findall(page)
posts_ids = list(dict.fromkeys(posts_ids))
yield from posts_ids

if len(posts_ids) < 19:
return


class PoringaPostExtractor(PoringaExtractor):
"""Extractor for posts on poringa.net"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)/[a-zA-Z0-9_-]+\.html"
example = "http://www.poringa.net/posts/imagenes/12/TITLE.html"

def posts(self):
return (self.item,)


class PoringaUserExtractor(PoringaExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/([a-zA-Z0-9_-]+)$"
example = "http://www.poringa.net/USER"

def posts(self):
url = "{}/buscar/".format(self.root)
params = {"q": text.unquote(self.item)}
return self._pagination(url, params)


class PoringaSearchExtractor(PoringaExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
example = "http://www.poringa.net/buscar/?q=QUERY"

def posts(self):
url = self.root + "/buscar/"
params = {"q": text.unquote(self.item)}
return self._pagination(url, params)


@cache()
def _cookie_cache():
return ()
3 changes: 3 additions & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@
"sketch": "Sketch",
"work": "individual Images",
},
"poringa": {
"post": "Posts Images",
},
"pornhub": {
"gifs": "",
},
Expand Down
47 changes: 47 additions & 0 deletions test/results/poringa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import poringa


__tests__ = (
{
"#url" : "http://www.poringa.net/posts/imagenes/3051081/Turrita-alto-ojete.html",
"#category": ("", "poringa", "post"),
"#class" : poringa.PoringaPostExtractor,
"#pattern" : r"http://www\.poringa\.net/posts/imagenes/3051081/[a-zA-Z0-9_-]+\.html",

"post_id" : "3051081",
"title" : "turrita alto ojete...",
"user" : "vipower1top",
},

{
"#url" : "http://www.poringa.net/posts/imagenes/3095554/Otra-culona-de-instagram.html",
"#category": ("", "poringa", "post"),
"#class" : poringa.PoringaPostExtractor,
"#pattern" : r"http://www\.poringa\.net/posts/imagenes/3095554/[a-zA-Z0-9_-]+\.html",

"post_id" : "3095554",
"title" : "Otra culona de instagram",
"user" : "Expectro007",
},

{
"#url" : "http://www.poringa.net/Expectro007",
"#category": ("", "poringa", "user"),
"#class" : poringa.PoringaUserExtractor,
"#pattern" : r"https?://img-[0-9]\.poringa\.net/poringa/img/[a-zA-Z0-9/{2}]{12}[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+\.jpg",
},

{
"#url" : "http://www.poringa.net/buscar/?&q=yuslopez",
"#category": ("", "poringa", "search"),
"#class" : poringa.PoringaSearchExtractor,
"#pattern" : r"https?://img-[0-9]\.poringa\.net/poringa/img/[a-zA-Z0-9/{2}]{12}[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+\.jpg",
},

)
Loading