langchain-ai · dev2049 · May 30, 2023 · May 16, 2023 · May 16, 2023 · May 17, 2023
diff --git a/docs/modules/indexes/document_loaders/examples/trello.ipynb b/docs/modules/indexes/document_loaders/examples/trello.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Trello\n",
+    "\n",
+    ">[Trello](https://www.atlassian.com/software/trello) is a web-based project management and collaboration tool that allows individuals and teams to organize and track their tasks and projects. It provides a visual interface known as a \"board\" where users can create lists and cards to represent their tasks and activities.\n",
+    "\n",
+    "The TrelloLoader allows you to load cards from a Trello board and is implemented on top of [py-trello](https://pypi.org/project/py-trello/)\n",
+    "\n",
+    "This currently supports `api_key/token` only.\n",
+    "\n",
+    "1. Credentials generation: https://trello.com/power-ups/admin/\n",
+    "\n",
+    "2. Click in the manual token generation link to get the token.\n",
+    "\n",
+    "To specify the API key and token you can either set the environment variables ``TRELLO_API_KEY`` and ``TRELLO_TOKEN`` or you can pass ``api_key`` and ``token`` directly into the `from_credentials` convenience constructor method.\n",
+    "\n",
+    "This loader allows you to provide the board name to pull in the corresponding cards into Document objects.\n",
+    "\n",
+    "Notice that the board \"name\" is also called \"title\" in oficial documentation:\n",
+    "\n",
+    "https://support.atlassian.com/trello/docs/changing-a-boards-title-and-description/\n",
+    "\n",
+    "You can also specify several load parameters to include / remove different fields both from the document page_content properties and metadata.\n",
+    "\n",
+    "## Features\n",
+    "- Load cards from a Trello board.\n",
+    "- Filter cards based on their status (open or closed).\n",
+    "- Include card names, comments, and checklists in the loaded documents.\n",
+    "- Customize the additional metadata fields to include in the document.\n",
+    "\n",
+    "By default all card fields are included for the full text page_content and metadata accordinly.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "#!pip install py-trello beautifulsoup4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "········\n",
+      "········\n"
+     ]
+    }
+   ],
+   "source": [
+    "# If you have already set the API key and token using environment variables,\n",
+    "# you can skip this cell and comment out the `api_key` and `token` named arguments\n",
+    "# in the initialization steps below.\n",
+    "from getpass import getpass\n",
+    "\n",
+    "API_KEY = getpass()\n",
+    "TOKEN = getpass()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Review Tech partner pages\n",
+      "Comments:\n",
+      "{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'labels': ['Demand Marketing'], 'list': 'Done', 'closed': False, 'due_date': ''}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.document_loaders import TrelloLoader\n",
+    "\n",
+    "# Get the open cards from \"Awesome Board\"\n",
+    "loader = TrelloLoader.from_credentials(\n",
+    "    \"Awesome Board\",\n",
+    "    api_key=API_KEY,\n",
+    "    token=TOKEN,\n",
+    "    card_filter=\"open\",\n",
+    "    )\n",
+    "documents = loader.load()\n",
+    "\n",
+    "print(documents[0].page_content)\n",
+    "print(documents[0].metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Review Tech partner pages\n",
+      "Comments:\n",
+      "{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'list': 'Done'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get all the cards from \"Awesome Board\" but only include the\n",
+    "# card list(column) as extra metadata.\n",
+    "loader = TrelloLoader.from_credentials(\n",
+    "    \"Awesome Board\",\n",
+    "    api_key=API_KEY,\n",
+    "    token=TOKEN,\n",
+    "    extra_metadata=(\"list\"),\n",
+    ")\n",
+    "documents = loader.load()\n",
+    "\n",
+    "print(documents[0].page_content)\n",
+    "print(documents[0].metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the cards from \"Another Board\" and exclude the card name,\n",
+    "# checklist and comments from the Document page_content text.\n",
+    "loader = TrelloLoader.from_credentials(\n",
+    "    \"test\",\n",
+    "    api_key=API_KEY,\n",
+    "    token=TOKEN,\n",
+    "    include_card_name= False,\n",
+    "    include_checklist= False,\n",
+    "    include_comments= False,\n",
+    ")\n",
+    "documents = loader.load()\n",
+    "\n",
+    "print(\"Document: \" + documents[0].page_content)\n",
+    "print(documents[0].metadata)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py
@@ -92,6 +92,7 @@
 from langchain.document_loaders.text import TextLoader
 from langchain.document_loaders.tomarkdown import ToMarkdownLoader
 from langchain.document_loaders.toml import TomlLoader
+from langchain.document_loaders.trello import TrelloLoader
 from langchain.document_loaders.twitter import TwitterTweetLoader
 from langchain.document_loaders.unstructured import (
     UnstructuredAPIFileIOLoader,
@@ -201,6 +202,7 @@
     "StripeLoader",
     "TextLoader",
     "TomlLoader",
+    "TrelloLoader",
     "TwitterTweetLoader",
     "UnstructuredAPIFileIOLoader",
     "UnstructuredAPIFileLoader",

diff --git a/langchain/document_loaders/trello.py b/langchain/document_loaders/trello.py
@@ -0,0 +1,168 @@
+"""Loader that loads cards from Trello"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple
+
+from langchain.docstore.document import Document
+from langchain.document_loaders.base import BaseLoader
+from langchain.utils import get_from_env
+
+if TYPE_CHECKING:
+    from trello import Board, Card, TrelloClient
+
+
+class TrelloLoader(BaseLoader):
+    """Trello loader. Reads all cards from a Trello board."""
+
+    def __init__(
+        self,
+        client: TrelloClient,
+        board_name: str,
+        *,
+        include_card_name: bool = True,
+        include_comments: bool = True,
+        include_checklist: bool = True,
+        card_filter: Literal["closed", "open", "all"] = "all",
+        extra_metadata: Tuple[str, ...] = ("due_date", "labels", "list", "closed"),
+    ):
+        """Initialize Trello loader.
+
+        Args:
+            client: Trello API client.
+            board_name: The name of the Trello board.
+            include_card_name: Whether to include the name of the card in the document.
+            include_comments: Whether to include the comments on the card in the
+                document.
+            include_checklist: Whether to include the checklist on the card in the
+                document.
+            card_filter: Filter on card status. Valid values are "closed", "open",
+                "all".
+            extra_metadata: List of additional metadata fields to include as document
+                metadata.Valid values are "due_date", "labels", "list", "closed".
+
+        """
+        self.client = client
+        self.board_name = board_name
+        self.include_card_name = include_card_name
+        self.include_comments = include_comments
+        self.include_checklist = include_checklist
+        self.extra_metadata = extra_metadata
+        self.card_filter = card_filter
+
+    @classmethod
+    def from_credentials(
+        cls,
+        board_name: str,
+        *,
+        api_key: Optional[str] = None,
+        token: Optional[str] = None,
+        **kwargs: Any,
+    ) -> TrelloLoader:
+        """Convenience constructor that builds TrelloClient init param for you.
+
+        Args:
+            board_name: The name of the Trello board.
+            api_key: Trello API key. Can also be specified as environment variable
+                TRELLO_API_KEY.
+            token: Trello token. Can also be specified as environment variable
+                TRELLO_TOKEN.
+            include_card_name: Whether to include the name of the card in the document.
+            include_comments: Whether to include the comments on the card in the
+                document.
+            include_checklist: Whether to include the checklist on the card in the
+                document.
+            card_filter: Filter on card status. Valid values are "closed", "open",
+                "all".
+            extra_metadata: List of additional metadata fields to include as document
+                metadata.Valid values are "due_date", "labels", "list", "closed".
+        """
+
+        try:
+            from trello import TrelloClient  # type: ignore
+        except ImportError as ex:
+            raise ImportError(
+                "Could not import trello python package. "
+                "Please install it with `pip install py-trello`."
+            ) from ex
+        api_key = api_key or get_from_env("api_key", "TRELLO_API_KEY")
+        token = token or get_from_env("token", "TRELLO_TOKEN")
+        client = TrelloClient(api_key=api_key, token=token)
+        return cls(client, board_name, **kwargs)
+
+    def load(self) -> List[Document]:
+        """Loads all cards from the specified Trello board.
+
+        You can filter the cards, metadata and text included by using the optional
+            parameters.
+
+         Returns:
+            A list of documents, one for each card in the board.
+        """
+        try:
+            from bs4 import BeautifulSoup  # noqa: F401
+        except ImportError as ex:
+            raise ImportError(
+                "`beautifulsoup4` package not found, please run"
+                " `pip install beautifulsoup4`"
+            ) from ex
+
+        board = self._get_board()
+        # Create a dictionary with the list IDs as keys and the list names as values
+        list_dict = {list_item.id: list_item.name for list_item in board.list_lists()}
+        # Get Cards on the board
+        cards = board.get_cards(card_filter=self.card_filter)
+        return [self._card_to_doc(card, list_dict) for card in cards]
+
+    def _get_board(self) -> Board:
+        # Find the first board with a matching name
+        board = next(
+            (b for b in self.client.list_boards() if b.name == self.board_name), None
+        )
+        if not board:
+            raise ValueError(f"Board `{self.board_name}` not found.")
+        return board
+
+    def _card_to_doc(self, card: Card, list_dict: dict) -> Document:
+        from bs4 import BeautifulSoup  # type: ignore
+
+        text_content = ""
+        if self.include_card_name:
+            text_content = card.name + "\n"
+        if card.description.strip():
+            text_content += BeautifulSoup(card.description, "lxml").get_text()
+        if self.include_checklist:
+            # Get all the checklist items on the card
+            for checklist in card.checklists:
+                if checklist.items:
+                    items = [
+                        f"{item['name']}:{item['state']}" for item in checklist.items
+                    ]
+                    text_content += f"\n{checklist.name}\n" + "\n".join(items)
+
+        if self.include_comments:
+            # Get all the comments on the card
+            comments = [
+                BeautifulSoup(comment["data"]["text"], "lxml").get_text()
+                for comment in card.comments
+            ]
+            text_content += "Comments:" + "\n".join(comments)
+
+        # Default metadata fields
+        metadata = {
+            "title": card.name,
+            "id": card.id,
+            "url": card.url,
+        }
+
+        # Extra metadata fields. Card object is not subscriptable.
+        if "labels" in self.extra_metadata:
+            metadata["labels"] = [label.name for label in card.labels]
+        if "list" in self.extra_metadata:
+            if card.list_id in list_dict:
+                metadata["list"] = list_dict[card.list_id]
+        if "closed" in self.extra_metadata:
+            metadata["closed"] = card.closed
+        if "due_date" in self.extra_metadata:
+            metadata["due_date"] = card.due_date
+
+        return Document(page_content=text_content, metadata=metadata)