Skip to content

Commit

Permalink
docstrings: document_loaders consitency 2 (#9148)
Browse files Browse the repository at this point in the history
This is Part 2. See #9139 (Part 1).
  • Loading branch information
leo-gan authored Aug 11, 2023
1 parent 1b58460 commit 19f5047
Show file tree
Hide file tree
Showing 72 changed files with 114 additions and 144 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Loads HuggingFace datasets."""
from typing import Iterator, List, Mapping, Optional, Sequence, Union

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader


class HuggingFaceDatasetLoader(BaseLoader):
"""Load Documents from the Hugging Face Hub."""
"""Load from `Hugging Face Hub` datasets."""

def __init__(
self,
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/ifixit.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads iFixit data."""
from typing import List, Optional

import requests
Expand All @@ -11,7 +10,7 @@


class IFixitLoader(BaseLoader):
"""Load iFixit repair guides, device wikis and answers.
"""Load `iFixit` repair guides, device wikis and answers.
iFixit is the largest, open repair community on the web. The site contains nearly
100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/image.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Loads image files."""
from typing import List

from langchain.document_loaders.unstructured import UnstructuredFileLoader


class UnstructuredImageLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load PNG and JPG files.
"""Load `PNG` and `JPG` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
Expand Down
13 changes: 6 additions & 7 deletions libs/langchain/langchain/document_loaders/image_captions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
"""Loads image captions.
By default, the loader utilizes the pre-trained BLIP image captioning model.
https://huggingface.co/Salesforce/blip-image-captioning-base
"""
from typing import Any, List, Tuple, Union

import requests
Expand All @@ -13,7 +7,12 @@


class ImageCaptionLoader(BaseLoader):
"""Loads the captions of an image"""
"""Load image captions.
By default, the loader utilizes the pre-trained
Salesforce BLIP image captioning model.
https://huggingface.co/Salesforce/blip-image-captioning-base
"""

def __init__(
self,
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/imsdb.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Loads IMSDb."""
from typing import List

from langchain.docstore.document import Document
from langchain.document_loaders.web_base import WebBaseLoader


class IMSDbLoader(WebBaseLoader):
"""Loads IMSDb webpages."""
"""Load `IMSDb` webpages."""

def load(self) -> List[Document]:
"""Load webpage."""
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/iugu.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loader that fetches data from IUGU"""
import json
import urllib.request
from typing import List, Optional
Expand All @@ -17,7 +16,7 @@


class IuguLoader(BaseLoader):
"""Loader that fetches data from IUGU."""
"""Load from `IUGU`."""

def __init__(self, resource: str, api_token: Optional[str] = None) -> None:
"""Initialize the IUGU resource.
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/joplin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@


class JoplinLoader(BaseLoader):
"""
Loader that fetches notes from Joplin.
"""Load notes from `Joplin`.
In order to use this loader, you need to have Joplin running with the
Web Clipper enabled (look for "Web Clipper" in the app settings).
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/json_loader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads data from JSON."""
import json
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
Expand All @@ -8,7 +7,7 @@


class JSONLoader(BaseLoader):
"""Loads a JSON file using a jq schema.
"""Load a `JSON` file using a `jq` schema.
Example:
[{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/larksuite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads LarkSuite (FeiShu) document json dump."""
import json
import urllib.request
from typing import Any, Iterator, List
Expand All @@ -8,7 +7,7 @@


class LarkSuiteDocLoader(BaseLoader):
"""Loads LarkSuite (FeiShu) document."""
"""Load from `LarkSuite` (`FeiShu`)."""

def __init__(self, domain: str, access_token: str, document_id: str):
"""Initialize with domain, access_token (tenant / user), and document_id.
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/markdown.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Loads Markdown files."""
from typing import List

from langchain.document_loaders.unstructured import UnstructuredFileLoader


class UnstructuredMarkdownLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load markdown files.
"""Load `Markdown` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/mastodon.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Mastodon document loader."""
from __future__ import annotations

import os
Expand All @@ -23,7 +22,7 @@ def _dependable_mastodon_import() -> mastodon:


class MastodonTootsLoader(BaseLoader):
"""Mastodon toots loader."""
"""Load the `Mastodon` 'toots'."""

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/max_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class MaxComputeLoader(BaseLoader):
"""Loads a query result from Alibaba Cloud MaxCompute table into documents."""
"""Load from `Alibaba Cloud MaxCompute` table."""

def __init__(
self,
Expand Down
5 changes: 2 additions & 3 deletions libs/langchain/langchain/document_loaders/mediawikidump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Load Data from a MediaWiki dump xml."""
import logging
from pathlib import Path
from typing import List, Optional, Sequence, Union
Expand All @@ -10,8 +9,8 @@


class MWDumpLoader(BaseLoader):
"""
Load MediaWiki dump from XML file
"""Load `MediaWiki` dump from an `XML` file.
Example:
.. code-block:: python
Expand Down
4 changes: 1 addition & 3 deletions libs/langchain/langchain/document_loaders/mhtml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""Load MHTML files, enriching metadata with page title."""

import email
import logging
from typing import Dict, List, Union
Expand All @@ -11,7 +9,7 @@


class MHTMLLoader(BaseLoader):
"""Loader that uses beautiful soup to parse HTML files."""
"""Parse `MHTML` files with `BeautifulSoup`."""

def __init__(
self,
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/modern_treasury.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loader that fetches data from Modern Treasury"""
import json
import urllib.request
from base64 import b64encode
Expand Down Expand Up @@ -27,7 +26,7 @@


class ModernTreasuryLoader(BaseLoader):
"""Loader that fetches data from Modern Treasury."""
"""Load from `Modern Treasury`."""

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class NewsURLLoader(BaseLoader):
"""Loader that uses newspaper to load news articles from URLs.
"""Load news articles from URLs using `Unstructured`.
Args:
urls: URLs to load. Each is loaded into its own document.
Expand Down
4 changes: 2 additions & 2 deletions libs/langchain/langchain/document_loaders/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def remove_newlines(x: Any) -> Any:


class NotebookLoader(BaseLoader):
"""Loads .ipynb notebook files."""
"""Load `Jupyter notebook` (.ipynb) files."""

def __init__(
self,
Expand All @@ -80,7 +80,7 @@ def __init__(
remove_newline: bool = False,
traceback: bool = False,
):
"""Initialize with path.
"""Initialize with a path.
Args:
path: The path to load the notebook from.
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/notion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads Notion directory dump."""
from pathlib import Path
from typing import List

Expand All @@ -7,7 +6,7 @@


class NotionDirectoryLoader(BaseLoader):
"""Loads Notion directory dump."""
"""Load `Notion directory` dump."""

def __init__(self, path: str):
"""Initialize with a file path."""
Expand Down
4 changes: 1 addition & 3 deletions libs/langchain/langchain/document_loaders/notiondb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""Notion DB loader for langchain"""

from typing import Any, Dict, List, Optional

import requests
Expand All @@ -14,7 +12,7 @@


class NotionDBLoader(BaseLoader):
"""Notion DB Loader.
"""Load from `Notion DB`.
Reads content from pages within a Notion Database.
Args:
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/nuclia.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Extract text from any file type."""
import json
import uuid
from typing import List
Expand All @@ -9,7 +8,7 @@


class NucliaLoader(BaseLoader):
"""Extract text from any file type."""
"""Load from any file type using `Nuclia Understanding API`."""

def __init__(self, path: str, nuclia_tool: NucliaUnderstandingAPI):
self.nua = nuclia_tool
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/obs_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class OBSDirectoryLoader(BaseLoader):
"""Loading logic for loading documents from Huawei OBS."""
"""Load from `Huawei OBS directory`."""

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/obs_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


class OBSFileLoader(BaseLoader):
"""Loader for Huawei OBS file."""
"""Load from the `Huawei OBS file`."""

def __init__(
self,
Expand Down
3 changes: 1 addition & 2 deletions libs/langchain/langchain/document_loaders/obsidian.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads Obsidian directory dump."""
import re
from pathlib import Path
from typing import List
Expand All @@ -8,7 +7,7 @@


class ObsidianLoader(BaseLoader):
"""Loads Obsidian files from disk."""
"""Load `Obsidian` files from directory."""

FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)

Expand Down
4 changes: 2 additions & 2 deletions libs/langchain/langchain/document_loaders/odt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads OpenOffice ODT files."""
from typing import Any, List

from langchain.document_loaders.unstructured import (
Expand All @@ -8,7 +7,8 @@


class UnstructuredODTLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load OpenOffice ODT files.
"""Load `OpenOffice ODT` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def fetch_mime_types(self) -> Dict[str, str]:


class OneDriveLoader(BaseLoader, BaseModel):
"""Loads data from OneDrive."""
"""Load from `Microsoft OneDrive`."""

settings: _OneDriveSettings = Field(default_factory=_OneDriveSettings)
""" The settings for the OneDrive API client."""
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/document_loaders/onedrive_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


class OneDriveFileLoader(BaseLoader, BaseModel):
"""Loads a file from OneDrive."""
"""Load a file from `Microsoft OneDrive`."""

file: File = Field(...)
"""The file to load."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class OpenCityDataLoader(BaseLoader):
"""Loads Open City data."""
"""Load from `Open City`."""

def __init__(self, city_id: str, dataset_id: str, limit: int):
"""Initialize with dataset_id.
Expand Down
4 changes: 2 additions & 2 deletions libs/langchain/langchain/document_loaders/org_mode.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Loads Org-Mode files."""
from typing import Any, List

from langchain.document_loaders.unstructured import (
Expand All @@ -8,7 +7,8 @@


class UnstructuredOrgModeLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load Org-Mode files.
"""Load `Org-Mode` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured
Expand Down
Loading

0 comments on commit 19f5047

Please sign in to comment.