Add ids to Filter

SKevo18 · Aug 28, 2021 · ffd54ff · ffd54ff
1 parent ae1affe
commit ffd54ff
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 17 deletions.
diff --git a/pyflarum/extensions/absolutely_all.py b/pyflarum/extensions/absolutely_all.py
@@ -1,15 +1,19 @@
 from typing import Generator, Optional, TYPE_CHECKING
 if TYPE_CHECKING:
     from ..flarum.core.notifications import Notifications
-    from ..flarum.core.discussions import Discussions, Discussion
-    from ..flarum.core.posts import Posts, Post
+    from ..flarum.core.discussions import Discussions
+    from ..flarum.core.posts import Posts
     from ..flarum.core.users import Users
 
 
+import warnings
+
+
 from . import ExtensionMixin
 from ..session import FlarumUser
 
 from ..flarum.core.filters import Filter
+from ..flarum.core.discussions import Discussion
 
 
 AUTHOR = 'skevo'
@@ -127,21 +131,38 @@ def absolutely_all_notifications(self, filter: Optional[Filter]=None) -> Generat
                 break
 
 
-    def get_all_posts_from_discussion(self, discussion: 'Discussion') -> Generator['Post', None, None]:
+    def get_all_posts_from_discussion(self, discussion: Discussion, at_once: int=50, force: bool=False) -> Generator['Posts', None, None]:
         """
-            This makes additional API request for every individual post to fetch full post data from a long discussion.
-            Sadly, the reason why additional request is needed is because only post IDs are present in the relationship data of the discussion.
+            This fetches all posts from a long discussion where only post IDs are present.
+
+            First, a list of all IDs is created from the API response. Then, IDs are broken into chunks of size `at_once` and
+            yielded as `Posts`.
 
-            I recommend you to put a delay between `next()` to prevent "429  Rate Limited" error for forums that are protected from flooding.
+            Use `force=True` to bypass `at_once` being capped at 50, if more than 50.
         """
 
+        if not isinstance(discussion, Discussion):
+            raise TypeError("`discussion` parameter must be an instance of `Discussion`.")
+
+
+        if at_once > 50 and not force:
+            at_once = 50
+            warnings.warn("`at_once` was capped at 50, because Flarum (by default/currently) doesn't support fetching more than 50 entires at once from API. Use `force=True` to bypass.")
+
+
         raw_posts = discussion.relationships.get("posts", {}).get("data", []) # type: list[dict]
+        post_ids = [] # type: list[int]
+
 
         for raw_post in raw_posts:
             post_id = raw_post.get("id", None) # type: Optional[int]
 
             if post_id:
-                yield self.get_post_by_id(post_id)
+                post_ids.append(post_id)
+
+
+        for id in range(0, len(post_ids), at_once): 
+            yield self.get_posts(filter=Filter(limit=50, ids=post_ids[id:id + at_once]))
 
 
 

diff --git a/pyflarum/flarum/core/filters.py b/pyflarum/flarum/core/filters.py
@@ -1,4 +1,5 @@
-from typing import Literal, Optional
+from typing import Literal, Optional, Iterable
+import warnings
 
 
 __all__ = ['Filter']
@@ -12,9 +13,24 @@ class Filter:
         It allows you to filter discussions without having to manually specify URL parameters.
     """
 
-    def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', 'createdAt', '-createdAt']]=None, query: Optional[str]=None, limit: int=20, page: Optional[int]=None, include: Optional[list[str]]=None, additional_data: Optional[dict]=None):
+    def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', 'createdAt', '-createdAt']]=None, query: Optional[str]=None, ids: Optional[Iterable['str | int']]=None, limit: int=20, page: Optional[int]=None, include: Optional[list[str]]=None, additional_data: Optional[dict]=None):
+        """
+            - `order_by` - gets passed into `?sort=` parameter. Common values are `commentCount`, `createdAt` and their reverse/negated values (prefixed with `-`)
+            - `query` - the search query, passed into `?filter[q]=`. This can be a string. Flarum search bar uses this. Gambits such as `author:username` are supported.
+            - `ids` - fetches entries with specific ids, passed into `?filter[id]=`. This is a list, that is then converted into comma separated string.
+            - `limit` - limit of entires to fetch. Flarum (by default) allows a max. of 50 entries to be fetched at once. Passed into `?page[limit]=`
+            - `page` - fetch a specific page of entires. This is actually an offset - which is determined by multiplying `page` with `limit` (see above).
+            - `include` - include specific entries. See [included data](https://cwkevo.github.io/pyflarum/docs/#included-data). You will likely never use this.
+            - `additional_data` - this is a `dict` (`parameter: value`) of additional search parameters that you might want to use. This can be used to overwrite previous filters.
+        """
+
+        if limit > 50:
+            warnings.warn(f"Maximum amount of entries fetchable by 1 bulk API request for Flarum is 50 by default, you specified {limit}. This is a Flarum limitation to prevent flooding, and I can't bypass it. Please, lower the value to be 50 or below.")
+
+
         self.order_by = order_by
         self.query = query
+        self.ids = ids
         self.limit = limit
         self.page = page
         self.include = include
@@ -27,9 +43,10 @@ def __init__(self, order_by: Optional[Literal['commentCount', '-commentCount', '
     def to_dict(self) -> dict:
         """
             Converts the filter to a `dict`, so that
-            it can be sent to the API.
+            it can be sent to the API (`requests` module, see ["Passing parameters in URLs"](https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls)).
 
-            An extension might add additional data during runtime.
+            An extension might add additional filter data after the filter was initialized
+            (for example: `absolutely_all` needs to update page number to continuously yield results).
         """
 
         __data = {}
@@ -39,6 +56,9 @@ def to_dict(self) -> dict:
 
         if self.query:
             __data["filter[q]"] = self.query
+
+        if self.ids:
+            __data["filter[id]"] = ','.join(map(str, self.ids))
 
         if self.limit:
             __data["page[limit]"] = self.limit

diff --git a/tests/absolutely_all_posts_from_discussion.py b/tests/absolutely_all_posts_from_discussion.py
@@ -17,5 +17,7 @@
 if __name__ == "__main__":
     discussion = USER.get_discussion_by_id(7585)
 
-    for post in USER.get_all_posts_from_discussion(discussion):
-        print(post.number, '\n', post.contentHtml, '\n\n')
+    for posts in USER.get_all_posts_from_discussion(discussion):
+        for post in posts:
+            print(post.url)
+        print('-' * 20)
diff --git a/tests/scrap_all_discussions_and_posts.py b/tests/scrap_all_discussions_and_posts.py
@@ -37,10 +37,11 @@ def scrap_all(file_name: 'str | bytes | Path'="scrapped.html"):
                         data = f"""<div id="d-{discussion.id}" style="margin: 2rem; background-color: lavender; border: 3px double skyblue; padding: 1rem; border-radius: 2rem;"><h1><a href="{discussion.url}">Discussion #{discussion.id}</a></h1><div id="d-{discussion.id}-posts">\n"""
                         scrap.write(data)
 
-                        for post in USER.get_all_posts_from_discussion(discussion):
-                            if post.contentHtml:
-                                data = f"""<div id="d-{discussion.id}-{post.number}"><h3><a href="{post.url}">Post #{post.number} in discussion #{discussion.id}</a></h3><p><i>By: {post.get_author().username} @ {post.createdAt}</i></p><div>{post.contentHtml}</div><hr/><br/></div>\n"""
-                                scrap.write(data)
+                        for posts in USER.get_all_posts_from_discussion(discussion):
+                            for post in posts:
+                                if post.contentHtml:
+                                    data = f"""<div id="d-{discussion.id}-{post.number}"><h3><a href="{post.url}">Post #{post.number} in discussion #{discussion.id}</a></h3><p><i>By: {post.get_author().username} @ {post.createdAt}</i></p><div>{post.contentHtml}</div><hr/><br/></div>\n"""
+                                    scrap.write(data)
 
                         scrap.write("""</div></div>""")