Skip to content
This repository has been archived by the owner on Jan 29, 2024. It is now read-only.

Commit

Permalink
Add ids to Filter
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevo committed Aug 28, 2021
1 parent ae1affe commit ffd54ff
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 17 deletions.
35 changes: 28 additions & 7 deletions pyflarum/extensions/absolutely_all.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from typing import Generator, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from ..flarum.core.notifications import Notifications
from ..flarum.core.discussions import Discussions, Discussion
from ..flarum.core.posts import Posts, Post
from ..flarum.core.discussions import Discussions
from ..flarum.core.posts import Posts
from ..flarum.core.users import Users


import warnings


from . import ExtensionMixin
from ..session import FlarumUser

from ..flarum.core.filters import Filter
from ..flarum.core.discussions import Discussion


AUTHOR = 'skevo'
Expand Down Expand Up @@ -127,21 +131,38 @@ def absolutely_all_notifications(self, filter: Optional[Filter]=None) -> Generat
break


def get_all_posts_from_discussion(self, discussion: 'Discussion') -> Generator['Post', None, None]:
def get_all_posts_from_discussion(self, discussion: Discussion, at_once: int=50, force: bool=False) -> Generator['Posts', None, None]:
"""
This makes additional API request for every individual post to fetch full post data from a long discussion.
Sadly, the reason why additional request is needed is because only post IDs are present in the relationship data of the discussion.
This fetches all posts from a long discussion where only post IDs are present.
First, a list of all IDs is created from the API response. Then, IDs are broken into chunks of size `at_once` and
yielded as `Posts`.
I recommend you to put a delay between `next()` to prevent "429 Rate Limited" error for forums that are protected from flooding.
Use `force=True` to bypass `at_once` being capped at 50, if more than 50.
"""

if not isinstance(discussion, Discussion):
raise TypeError("`discussion` parameter must be an instance of `Discussion`.")


if at_once > 50 and not force:
at_once = 50
warnings.warn("`at_once` was capped at 50, because Flarum (by default/currently) doesn't support fetching more than 50 entires at once from API. Use `force=True` to bypass.")


raw_posts = discussion.relationships.get("posts", {}).get("data", []) # type: list[dict]
post_ids = [] # type: list[int]


for raw_post in raw_posts:
post_id = raw_post.get("id", None) # type: Optional[int]

if post_id:
yield self.get_post_by_id(post_id)
post_ids.append(post_id)


for id in range(0, len(post_ids), at_once):
yield self.get_posts(filter=Filter(limit=50, ids=post_ids[id:id + at_once]))



Expand Down
28 changes: 24 additions & 4 deletions pyflarum/flarum/core/filters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Literal, Optional
from typing import Literal, Optional, Iterable
import warnings


__all__ = ['Filter']
Expand All @@ -12,9 +13,24 @@ class Filter:
It allows you to filter discussions without having to manually specify URL parameters.
"""

def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', 'createdAt', '-createdAt']]=None, query: Optional[str]=None, limit: int=20, page: Optional[int]=None, include: Optional[list[str]]=None, additional_data: Optional[dict]=None):
def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', 'createdAt', '-createdAt']]=None, query: Optional[str]=None, ids: Optional[Iterable['str | int']]=None, limit: int=20, page: Optional[int]=None, include: Optional[list[str]]=None, additional_data: Optional[dict]=None):
"""
- `order_by` - gets passed into `?sort=` parameter. Common values are `commentCount`, `createdAt` and their reverse/negated values (prefixed with `-`)
- `query` - the search query, passed into `?filter[q]=`. This can be a string. Flarum search bar uses this. Gambits such as `author:username` are supported.
- `ids` - fetches entries with specific ids, passed into `?filter[id]=`. This is a list, that is then converted into comma separated string.
- `limit` - limit of entires to fetch. Flarum (by default) allows a max. of 50 entries to be fetched at once. Passed into `?page[limit]=`
- `page` - fetch a specific page of entires. This is actually an offset - which is determined by multiplying `page` with `limit` (see above).
- `include` - include specific entries. See [included data](https://cwkevo.github.io/pyflarum/docs/#included-data). You will likely never use this.
- `additional_data` - this is a `dict` (`parameter: value`) of additional search parameters that you might want to use. This can be used to overwrite previous filters.
"""

if limit > 50:
warnings.warn(f"Maximum amount of entries fetchable by 1 bulk API request for Flarum is 50 by default, you specified {limit}. This is a Flarum limitation to prevent flooding, and I can't bypass it. Please, lower the value to be 50 or below.")


self.order_by = order_by
self.query = query
self.ids = ids
self.limit = limit
self.page = page
self.include = include
Expand All @@ -27,9 +43,10 @@ def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', '
def to_dict(self) -> dict:
"""
Converts the filter to a `dict`, so that
it can be sent to the API.
it can be sent to the API (`requests` module, see ["Passing parameters in URLs"](https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls)).
An extension might add additional data during runtime.
An extension might add additional filter data after the filter was initialized
(for example: `absolutely_all` needs to update page number to continuously yield results).
"""

__data = {}
Expand All @@ -39,6 +56,9 @@ def to_dict(self) -> dict:

if self.query:
__data["filter[q]"] = self.query

if self.ids:
__data["filter[id]"] = ','.join(map(str, self.ids))

if self.limit:
__data["page[limit]"] = self.limit
Expand Down
6 changes: 4 additions & 2 deletions tests/absolutely_all_posts_from_discussion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@
if __name__ == "__main__":
discussion = USER.get_discussion_by_id(7585)

for post in USER.get_all_posts_from_discussion(discussion):
print(post.number, '\n', post.contentHtml, '\n\n')
for posts in USER.get_all_posts_from_discussion(discussion):
for post in posts:
print(post.url)
print('-' * 20)
9 changes: 5 additions & 4 deletions tests/scrap_all_discussions_and_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ def scrap_all(file_name: 'str | bytes | Path'="scrapped.html"):
data = f"""<div id="d-{discussion.id}" style="margin: 2rem; background-color: lavender; border: 3px double skyblue; padding: 1rem; border-radius: 2rem;"><h1><a href="{discussion.url}">Discussion #{discussion.id}</a></h1><div id="d-{discussion.id}-posts">\n"""
scrap.write(data)

for post in USER.get_all_posts_from_discussion(discussion):
if post.contentHtml:
data = f"""<div id="d-{discussion.id}-{post.number}"><h3><a href="{post.url}">Post #{post.number} in discussion #{discussion.id}</a></h3><p><i>By: {post.get_author().username} @ {post.createdAt}</i></p><div>{post.contentHtml}</div><hr/><br/></div>\n"""
scrap.write(data)
for posts in USER.get_all_posts_from_discussion(discussion):
for post in posts:
if post.contentHtml:
data = f"""<div id="d-{discussion.id}-{post.number}"><h3><a href="{post.url}">Post #{post.number} in discussion #{discussion.id}</a></h3><p><i>By: {post.get_author().username} @ {post.createdAt}</i></p><div>{post.contentHtml}</div><hr/><br/></div>\n"""
scrap.write(data)

scrap.write("""</div></div>""")

Expand Down

0 comments on commit ffd54ff

Please sign in to comment.