docstrings: document_loaders consitency 2 (#9148)

This is Part 2. See #9139 (Part 1).
langchain-ai · Aug 11, 2023 · 19f5047 · 19f5047
1 parent 1b58460
commit 19f5047
Show file tree

Hide file tree

Showing 72 changed files with 114 additions and 144 deletions.
diff --git a/libs/langchain/langchain/document_loaders/hugging_face_dataset.py b/libs/langchain/langchain/document_loaders/hugging_face_dataset.py
@@ -1,12 +1,11 @@
-"""Loads HuggingFace datasets."""
 from typing import Iterator, List, Mapping, Optional, Sequence, Union
 
 from langchain.docstore.document import Document
 from langchain.document_loaders.base import BaseLoader
 
 
 class HuggingFaceDatasetLoader(BaseLoader):
-    """Load Documents from the Hugging Face Hub."""
+    """Load from `Hugging Face Hub` datasets."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/ifixit.py b/libs/langchain/langchain/document_loaders/ifixit.py
@@ -1,4 +1,3 @@
-"""Loads iFixit data."""
 from typing import List, Optional
 
 import requests
@@ -11,7 +10,7 @@
 
 
 class IFixitLoader(BaseLoader):
-    """Load iFixit repair guides, device wikis and answers.
+    """Load `iFixit` repair guides, device wikis and answers.
 
     iFixit is the largest, open repair community on the web. The site contains nearly
     100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is

diff --git a/libs/langchain/langchain/document_loaders/image.py b/libs/langchain/langchain/document_loaders/image.py
@@ -1,11 +1,10 @@
-"""Loads image files."""
 from typing import List
 
 from langchain.document_loaders.unstructured import UnstructuredFileLoader
 
 
 class UnstructuredImageLoader(UnstructuredFileLoader):
-    """Loader that uses Unstructured to load PNG and JPG files.
+    """Load `PNG` and `JPG` files using `Unstructured`.
 
     You can run the loader in one of two modes: "single" and "elements".
     If you use "single" mode, the document will be returned as a single

diff --git a/libs/langchain/langchain/document_loaders/image_captions.py b/libs/langchain/langchain/document_loaders/image_captions.py
@@ -1,9 +1,3 @@
-"""Loads image captions.
-
-By default, the loader utilizes the pre-trained BLIP image captioning model.
-https://huggingface.co/Salesforce/blip-image-captioning-base
-
-"""
 from typing import Any, List, Tuple, Union
 
 import requests
@@ -13,7 +7,12 @@
 
 
 class ImageCaptionLoader(BaseLoader):
-    """Loads the captions of an image"""
+    """Load image captions.
+
+    By default, the loader utilizes the pre-trained
+    Salesforce BLIP image captioning model.
+    https://huggingface.co/Salesforce/blip-image-captioning-base
+    """
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/imsdb.py b/libs/langchain/langchain/document_loaders/imsdb.py
@@ -1,12 +1,11 @@
-"""Loads IMSDb."""
 from typing import List
 
 from langchain.docstore.document import Document
 from langchain.document_loaders.web_base import WebBaseLoader
 
 
 class IMSDbLoader(WebBaseLoader):
-    """Loads IMSDb webpages."""
+    """Load `IMSDb` webpages."""
 
     def load(self) -> List[Document]:
         """Load webpage."""

diff --git a/libs/langchain/langchain/document_loaders/iugu.py b/libs/langchain/langchain/document_loaders/iugu.py
@@ -1,4 +1,3 @@
-"""Loader that fetches data from IUGU"""
 import json
 import urllib.request
 from typing import List, Optional
@@ -17,7 +16,7 @@
 
 
 class IuguLoader(BaseLoader):
-    """Loader that fetches data from IUGU."""
+    """Load from `IUGU`."""
 
     def __init__(self, resource: str, api_token: Optional[str] = None) -> None:
         """Initialize the IUGU resource.

diff --git a/libs/langchain/langchain/document_loaders/joplin.py b/libs/langchain/langchain/document_loaders/joplin.py
@@ -11,8 +11,7 @@
 
 
 class JoplinLoader(BaseLoader):
-    """
-    Loader that fetches notes from Joplin.
+    """Load notes from `Joplin`.
 
     In order to use this loader, you need to have Joplin running with the
     Web Clipper enabled (look for "Web Clipper" in the app settings).

diff --git a/libs/langchain/langchain/document_loaders/json_loader.py b/libs/langchain/langchain/document_loaders/json_loader.py
@@ -1,4 +1,3 @@
-"""Loads data from JSON."""
 import json
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union
@@ -8,7 +7,7 @@
 
 
 class JSONLoader(BaseLoader):
-    """Loads a JSON file using a jq schema.
+    """Load a `JSON` file using a `jq` schema.
 
     Example:
         [{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text

diff --git a/libs/langchain/langchain/document_loaders/larksuite.py b/libs/langchain/langchain/document_loaders/larksuite.py
@@ -1,4 +1,3 @@
-"""Loads LarkSuite (FeiShu) document json dump."""
 import json
 import urllib.request
 from typing import Any, Iterator, List
@@ -8,7 +7,7 @@
 
 
 class LarkSuiteDocLoader(BaseLoader):
-    """Loads LarkSuite (FeiShu) document."""
+    """Load from `LarkSuite` (`FeiShu`)."""
 
     def __init__(self, domain: str, access_token: str, document_id: str):
         """Initialize with domain, access_token (tenant / user), and document_id.

diff --git a/libs/langchain/langchain/document_loaders/markdown.py b/libs/langchain/langchain/document_loaders/markdown.py
@@ -1,11 +1,10 @@
-"""Loads Markdown files."""
 from typing import List
 
 from langchain.document_loaders.unstructured import UnstructuredFileLoader
 
 
 class UnstructuredMarkdownLoader(UnstructuredFileLoader):
-    """Loader that uses Unstructured to load markdown files.
+    """Load `Markdown` files using `Unstructured`.
 
     You can run the loader in one of two modes: "single" and "elements".
     If you use "single" mode, the document will be returned as a single

diff --git a/libs/langchain/langchain/document_loaders/mastodon.py b/libs/langchain/langchain/document_loaders/mastodon.py
@@ -1,4 +1,3 @@
-"""Mastodon document loader."""
 from __future__ import annotations
 
 import os
@@ -23,7 +22,7 @@ def _dependable_mastodon_import() -> mastodon:
 
 
 class MastodonTootsLoader(BaseLoader):
-    """Mastodon toots loader."""
+    """Load the `Mastodon` 'toots'."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/max_compute.py b/libs/langchain/langchain/document_loaders/max_compute.py
@@ -8,7 +8,7 @@
 
 
 class MaxComputeLoader(BaseLoader):
-    """Loads a query result from Alibaba Cloud MaxCompute table into documents."""
+    """Load from `Alibaba Cloud MaxCompute` table."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/mediawikidump.py b/libs/langchain/langchain/document_loaders/mediawikidump.py
@@ -1,4 +1,3 @@
-"""Load Data from a MediaWiki dump xml."""
 import logging
 from pathlib import Path
 from typing import List, Optional, Sequence, Union
@@ -10,8 +9,8 @@
 
 
 class MWDumpLoader(BaseLoader):
-    """
-    Load MediaWiki dump from XML file
+    """Load `MediaWiki` dump from an `XML` file.
+
     Example:
         .. code-block:: python
 

diff --git a/libs/langchain/langchain/document_loaders/mhtml.py b/libs/langchain/langchain/document_loaders/mhtml.py
@@ -1,5 +1,3 @@
-"""Load MHTML files, enriching metadata with page title."""
-
 import email
 import logging
 from typing import Dict, List, Union
@@ -11,7 +9,7 @@
 
 
 class MHTMLLoader(BaseLoader):
-    """Loader that uses beautiful soup to parse HTML files."""
+    """Parse `MHTML` files with `BeautifulSoup`."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/modern_treasury.py b/libs/langchain/langchain/document_loaders/modern_treasury.py
@@ -1,4 +1,3 @@
-"""Loader that fetches data from Modern Treasury"""
 import json
 import urllib.request
 from base64 import b64encode
@@ -27,7 +26,7 @@
 
 
 class ModernTreasuryLoader(BaseLoader):
-    """Loader that fetches data from Modern Treasury."""
+    """Load from `Modern Treasury`."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/news.py b/libs/langchain/langchain/document_loaders/news.py
@@ -9,7 +9,7 @@
 
 
 class NewsURLLoader(BaseLoader):
-    """Loader that uses newspaper to load news articles from URLs.
+    """Load news articles from URLs using `Unstructured`.
 
     Args:
         urls: URLs to load. Each is loaded into its own document.

diff --git a/libs/langchain/langchain/document_loaders/notebook.py b/libs/langchain/langchain/document_loaders/notebook.py
@@ -70,7 +70,7 @@ def remove_newlines(x: Any) -> Any:
 
 
 class NotebookLoader(BaseLoader):
-    """Loads .ipynb notebook files."""
+    """Load `Jupyter notebook` (.ipynb) files."""
 
     def __init__(
         self,
@@ -80,7 +80,7 @@ def __init__(
         remove_newline: bool = False,
         traceback: bool = False,
     ):
-        """Initialize with path.
+        """Initialize with a path.
 
         Args:
             path: The path to load the notebook from.

diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py
@@ -1,4 +1,3 @@
-"""Loads Notion directory dump."""
 from pathlib import Path
 from typing import List
 
@@ -7,7 +6,7 @@
 
 
 class NotionDirectoryLoader(BaseLoader):
-    """Loads Notion directory dump."""
+    """Load `Notion directory` dump."""
 
     def __init__(self, path: str):
         """Initialize with a file path."""

diff --git a/libs/langchain/langchain/document_loaders/notiondb.py b/libs/langchain/langchain/document_loaders/notiondb.py
@@ -1,5 +1,3 @@
-"""Notion DB loader for langchain"""
-
 from typing import Any, Dict, List, Optional
 
 import requests
@@ -14,7 +12,7 @@
 
 
 class NotionDBLoader(BaseLoader):
-    """Notion DB Loader.
+    """Load from `Notion DB`.
 
     Reads content from pages within a Notion Database.
     Args:

diff --git a/libs/langchain/langchain/document_loaders/nuclia.py b/libs/langchain/langchain/document_loaders/nuclia.py
@@ -1,4 +1,3 @@
-"""Extract text from any file type."""
 import json
 import uuid
 from typing import List
@@ -9,7 +8,7 @@
 
 
 class NucliaLoader(BaseLoader):
-    """Extract text from any file type."""
+    """Load from any file type using `Nuclia Understanding API`."""
 
     def __init__(self, path: str, nuclia_tool: NucliaUnderstandingAPI):
         self.nua = nuclia_tool

diff --git a/libs/langchain/langchain/document_loaders/obs_directory.py b/libs/langchain/langchain/document_loaders/obs_directory.py
@@ -7,7 +7,7 @@
 
 
 class OBSDirectoryLoader(BaseLoader):
-    """Loading logic for loading documents from Huawei OBS."""
+    """Load from `Huawei OBS directory`."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/obs_file.py b/libs/langchain/langchain/document_loaders/obs_file.py
@@ -10,7 +10,7 @@
 
 
 class OBSFileLoader(BaseLoader):
-    """Loader for Huawei OBS file."""
+    """Load from the `Huawei OBS file`."""
 
     def __init__(
         self,

diff --git a/libs/langchain/langchain/document_loaders/obsidian.py b/libs/langchain/langchain/document_loaders/obsidian.py
@@ -1,4 +1,3 @@
-"""Loads Obsidian directory dump."""
 import re
 from pathlib import Path
 from typing import List
@@ -8,7 +7,7 @@
 
 
 class ObsidianLoader(BaseLoader):
-    """Loads Obsidian files from disk."""
+    """Load `Obsidian` files from directory."""
 
     FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
 

diff --git a/libs/langchain/langchain/document_loaders/odt.py b/libs/langchain/langchain/document_loaders/odt.py
@@ -1,4 +1,3 @@
-"""Loads OpenOffice ODT files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -8,7 +7,8 @@
 
 
 class UnstructuredODTLoader(UnstructuredFileLoader):
-    """Loader that uses unstructured to load OpenOffice ODT files.
+    """Load `OpenOffice ODT` files using `Unstructured`.
+
     You can run the loader in one of two modes: "single" and "elements".
     If you use "single" mode, the document will be returned as a single
     langchain Document object. If you use "elements" mode, the unstructured

diff --git a/libs/langchain/langchain/document_loaders/onedrive.py b/libs/langchain/langchain/document_loaders/onedrive.py
@@ -60,7 +60,7 @@ def fetch_mime_types(self) -> Dict[str, str]:
 
 
 class OneDriveLoader(BaseLoader, BaseModel):
-    """Loads data from OneDrive."""
+    """Load from `Microsoft OneDrive`."""
 
     settings: _OneDriveSettings = Field(default_factory=_OneDriveSettings)
     """ The settings for the OneDrive API client."""

diff --git a/libs/langchain/langchain/document_loaders/onedrive_file.py b/libs/langchain/langchain/document_loaders/onedrive_file.py
@@ -16,7 +16,7 @@
 
 
 class OneDriveFileLoader(BaseLoader, BaseModel):
-    """Loads a file from OneDrive."""
+    """Load a file from `Microsoft OneDrive`."""
 
     file: File = Field(...)
     """The file to load."""

diff --git a/libs/langchain/langchain/document_loaders/open_city_data.py b/libs/langchain/langchain/document_loaders/open_city_data.py
@@ -5,7 +5,7 @@
 
 
 class OpenCityDataLoader(BaseLoader):
-    """Loads Open City data."""
+    """Load from `Open City`."""
 
     def __init__(self, city_id: str, dataset_id: str, limit: int):
         """Initialize with dataset_id.

diff --git a/libs/langchain/langchain/document_loaders/org_mode.py b/libs/langchain/langchain/document_loaders/org_mode.py
@@ -1,4 +1,3 @@
-"""Loads Org-Mode files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -8,7 +7,8 @@
 
 
 class UnstructuredOrgModeLoader(UnstructuredFileLoader):
-    """Loader that uses unstructured to load Org-Mode files.
+    """Load `Org-Mode` files using `Unstructured`.
+
     You can run the loader in one of two modes: "single" and "elements".
     If you use "single" mode, the document will be returned as a single
     langchain Document object. If you use "elements" mode, the unstructured