diff --git a/src/GOES_DL/datasource/__init__.py b/src/GOES_DL/datasource/__init__.py index c3820de..ee8b09d 100644 --- a/src/GOES_DL/datasource/__init__.py +++ b/src/GOES_DL/datasource/__init__.py @@ -15,9 +15,12 @@ A datasource object that caches the files. DatasourceHTTP A datasource object for an HTTP server. +DownloaderRepository + Manage file operations for the downloader object. """ from .datasource import Datasource as Datasource from .datasource_aws import DatasourceAWS as DatasourceAWS from .datasource_cache import DatasourceCache as DatasourceCache from .datasource_http import DatasourceHTTP as DatasourceHTTP +from .datasource_repository import DatasourceRepository as DatasourceRepository diff --git a/src/GOES_DL/datasource/datasource_aws.py b/src/GOES_DL/datasource/datasource_aws.py index 8d47a0f..3865922 100644 --- a/src/GOES_DL/datasource/datasource_aws.py +++ b/src/GOES_DL/datasource/datasource_aws.py @@ -5,22 +5,25 @@ DatasourceAWS: Handle AWS-based data sources. """ -from typing import Any, overload +from pathlib import Path +from typing import Any, Literal from urllib.parse import ParseResult import boto3 from botocore import UNSIGNED from botocore.client import ClientError, Config +from mypy_boto3_s3.client import S3Client from ..dataset import ProductLocator from ..utils.url import url -from .datasource import Datasource +from .datasource_base import DatasourceBase from .datasource_cache import DatasourceCache +from .datasource_repository import DatasourceRepository -AWS_CLIENT: str = "s3" +AWS_CLIENT: Literal["s3"] = "s3" -class DatasourceAWS(Datasource): +class DatasourceAWS(DatasourceBase): """ Handle AWS-based data sources. @@ -29,67 +32,51 @@ class DatasourceAWS(Datasource): location. The base URL of the datasource is the URL of the AWS S3 bucket. - Parameters - ---------- - locator : tuple[str, ...] | ProductLocator - A `ProductLocator` object or a tuple of strings containing - the base URL and an optional region where the S3 bucket is - located. E.g. "us-west-1", "us-east-1", "eu-west-1", etc. If - None, the default region is used. - Attributes ---------- - base_url : str - The base URL of the datasource. This is the URL where the - datasource is located. The base URL is used to build the full - URL to the files and directories. bucket_name : str The name of the AWS S3 bucket. - base_path : str - The base path of the AWS S3 bucket. s3_client : boto3.Client The AWS S3 client. - cached : dict[str, list[str]] - The cached file lists in the datasource, organised by folder. Methods ------- - bucket_exists(bucket_name: str) -> bool - Check if the bucket exists. - clear_cache(dir_path: str = "") -> None - Clear the cache. - get_client() -> Any - Get the AWS S3 client. get_file(file_path: str) -> Any Download a file into memory. - get_folder_path(dir_path: str) -> str - Get the folder path. listdir(dir_path: str) -> list[str] List the contents of a directory. - object_exists(bucket_name: str, object_path: str) -> bool - Check if the object exists. - - Raises - ------ - ValueError - If the bucket does not exist or the user has no access. """ - @overload - def __init__( - self, locator: tuple[str, ...], cache: DatasourceCache | None = None - ) -> None: ... - - @overload - def __init__( - self, locator: ProductLocator, cache: DatasourceCache | None = None - ) -> None: ... + bucket_name: str + s3_client: S3Client def __init__( self, locator: ProductLocator | tuple[str, ...], - cache: DatasourceCache | None = None, + repository: str | Path | DatasourceRepository | None = None, + cache: float | DatasourceCache | None = None, ) -> None: + """ + Initialize the DatasourceAWS object. + + Parameters + ---------- + locator : ProductLocator | tuple[str, ...] + A `ProductLocator` object or a tuple of strings containing + the base URL and an optional region where the S3 bucket is + located. E.g. "us-west-1", "us-east-1", "eu-west-1", etc. If + None, the default region is used. + repository : str | Path | DatasourceRepository, optional + The directory where the files will be stored, by default + None. + cache : float | DatasourceCache, optional + The cache expiration time in seconds, by default None. + + Raises + ------ + ValueError + If the bucket does not exist or the user has no access. + """ base_url: str region: str | None if isinstance(locator, ProductLocator): @@ -101,62 +88,17 @@ def __init__( bucket_name: str = url_parts.netloc - self.s3_client: Any = self._get_client(region) + self.s3_client: S3Client = self._get_client(region) if not self._bucket_exists(bucket_name): raise ValueError( f"Bucket '{bucket_name}' does not exist or you have no access." ) - super().__init__(base_url) + super().__init__(base_url, repository, cache) self.bucket_name: str = bucket_name - self.cache: DatasourceCache = cache or DatasourceCache() - - @overload - @staticmethod - def create( - locator: ProductLocator, life_time: float | None = None - ) -> "DatasourceAWS": ... - - @overload - @staticmethod - def create( - locator: tuple[str, ...], life_time: float | None = None - ) -> "DatasourceAWS": ... - - @staticmethod - def create( - locator: tuple[str, ...] | ProductLocator, - life_time: float | None = None, - ) -> "DatasourceAWS": - """ - Create a new AWS-based datasource. - - Create a new AWS-based datasource with a base URL or a - ProductLocator object. - - Parameters - ---------- - locator : str - The base URL of a HTTP folder or a `ProductLocator` object. - life_time : float, optional - The cache life time in seconds, by default None. - - Returns - ------- - DatasourceHTTP - A new `DatasourceHTTP` object. - - Raises - ------ - ValueError - If the resource does not exist or the user has no access. - """ - cache = DatasourceCache(life_time) - return DatasourceAWS(locator, cache) - def _bucket_exists(self, bucket_name: str) -> bool: """ Check if the bucket exists. @@ -183,7 +125,7 @@ def _bucket_exists(self, bucket_name: str) -> bool: return True @staticmethod - def _get_client(region: str | None) -> Any: + def _get_client(region: str | None) -> S3Client: """ Get the AWS S3 client. @@ -234,13 +176,20 @@ def get_file(self, file_path: str) -> bytes: RuntimeError If the file cannot be retrieved. """ - folder_path: str = self.get_item_path(file_path) + local_file = self.repository.get_item(file_path) + + if local_file is not None: + return local_file + + folder_path: str = self._get_item_path(file_path) try: - response: Any = self.s3_client.get_object( + response = self.s3_client.get_object( Bucket=self.bucket_name, Key=folder_path ) - return bytes(response["Body"].read()) + content = response["Body"].read() + self.repository.add_item(file_path, content) + return content except ClientError as exc: message: str = f"Unable to retrieve the file '{file_path}': {exc}" @@ -254,7 +203,7 @@ def _url_join(head: str, tail: str) -> str: return f"{head}/{tail}" return head + tail - def get_item_path(self, dir_path: str) -> str: + def _get_item_path(self, dir_path: str) -> str: """ Get the folder path. @@ -301,7 +250,7 @@ def listdir(self, dir_path: str) -> list[str]: if cached_list is not None: return cached_list - folder_path: str = self.get_item_path(dir_path) + folder_path: str = self._get_item_path(dir_path) paginator: Any = self.s3_client.get_paginator("list_objects_v2") pages: Any = paginator.paginate( diff --git a/src/GOES_DL/datasource/datasource_base.py b/src/GOES_DL/datasource/datasource_base.py new file mode 100644 index 0000000..86661b7 --- /dev/null +++ b/src/GOES_DL/datasource/datasource_base.py @@ -0,0 +1,67 @@ +""" +Extend the Datasource interface with cache and repository support. + +Classes: + DatasourceBase: Extend the Datasource interface. +""" + +from pathlib import Path + +from .datasource import Datasource +from .datasource_cache import DatasourceCache +from .datasource_repository import DatasourceRepository + + +class DatasourceBase(Datasource): + """ + Extend the Datasource interface with cache and repository support. + + Attributes + ---------- + cache : DatasourceCache + The cache for the datasource. + repository : DatasourceRepository + The repository for the datasource. + """ + + cache: DatasourceCache + repository: DatasourceRepository + + def __init__( + self, + base_url: str, + repository: str | Path | DatasourceRepository | None, + cache: float | DatasourceCache | None, + ) -> None: + """ + Initialize the DatasourceBase. + + Parameters + ---------- + base_url : str + The base URL for the datasource. + repository : str | Path | DatasourceRepository | None + The repository for the datasource. If a path string is + provided, it will be used as the base path for the + repository. If `None` is provided, the repository will be + set to the current directory. + cache : float | DatasourceCache | None + The cache for the datasource. If a float is provided, it + will be used as the life time for each entry in the cache. + If `None` is provided, the cache will be set to have a life + time of 0.0 seconds, i.e. no caching. + """ + super().__init__(base_url) + if repository is None: + repository = "." + if isinstance(repository, (str, Path)): + base_path = repository + repository = DatasourceRepository(base_path) + self.repository = repository + + if cache is None: + cache = 0.0 + if isinstance(cache, float): + life_time: float = cache + cache = DatasourceCache(life_time) + self.cache = cache diff --git a/src/GOES_DL/datasource/datasource_http.py b/src/GOES_DL/datasource/datasource_http.py index 355c724..298811c 100644 --- a/src/GOES_DL/datasource/datasource_http.py +++ b/src/GOES_DL/datasource/datasource_http.py @@ -7,7 +7,7 @@ import re import socket -from typing import overload +from pathlib import Path from urllib.parse import ParseResult import requests @@ -15,112 +15,74 @@ from ..dataset import ProductLocator from ..utils.headers import APPLICATION_NETCDF4, TEXT_HTML, RequestHeaders from ..utils.url import url -from .datasource import Datasource +from .datasource_base import DatasourceBase from .datasource_cache import DatasourceCache +from .datasource_repository import DatasourceRepository HTTP_STATUS_OK = 200 -class DatasourceHTTP(Datasource): +class DatasourceHTTP(DatasourceBase): """ Handle HTTP-based data sources. Provide methods to interact with HTTP folders and files, either through a base URL or a `ProductLocator` object. - Parameters - ---------- - locator : str | ProductLocator - The base URL of a HTTP-based data sources or a `ProductLocator` - object. - - Raises - ------ - ValueError - If the resource does not exist or the user has no access. + Methods + ------- + get_file(file_path: str) -> bytes + Download a file into memory. + listdir(dir_path: str) -> list[str] + List the contents of a directory. """ - @overload - def __init__( - self, locator: str, cache: DatasourceCache | None = None - ) -> None: ... - - @overload - def __init__( - self, locator: ProductLocator, cache: DatasourceCache | None = None - ) -> None: ... - def __init__( self, locator: str | ProductLocator, - cache: DatasourceCache | None = None, + repository: str | Path | DatasourceRepository | None = None, + cache: float | DatasourceCache | None = None, ) -> None: - if isinstance(locator, ProductLocator): - base_url: str = locator.get_base_url("HTTP")[0] - else: - base_url = locator + """ + Initialize the DatasourceHTTP object. + + Parameters + ---------- + locator : str | ProductLocator + The base URL of a HTTP-based data sources or a `ProductLocator` + object. + repository : str | Path | DatasourceRepository, optional + The directory where the files will be stored, by default + None. + cache : float | DatasourceCache, optional + The cache expiration time in seconds, by default None. + + Raises + ------ + ValueError + If the resource does not exist or the user has no access. + """ + base_url: str = ( + locator + if isinstance(locator, str) + else locator.get_base_url("HTTP")[0] + ) url_parts: ParseResult = url.parse(base_url) host_name: str = url_parts.netloc - base_path: str = url_parts.path + base_path = url_parts.path if not self._host_exists(host_name): raise ValueError( f"Host '{host_name}' does not exist or is out of service." ) - if not self._path_exists(base_url): raise ValueError( f"Path '{base_path}' does not exist or you have no access." ) - super().__init__(base_url) - - self.cache: DatasourceCache = cache or DatasourceCache() - - @overload - @staticmethod - def create( - locator: ProductLocator, life_time: float | None = None - ) -> "DatasourceHTTP": ... - - @overload - @staticmethod - def create( - locator: str, life_time: float | None = None - ) -> "DatasourceHTTP": ... - - @staticmethod - def create( - locator: str | ProductLocator, - life_time: float | None = None, - ) -> "DatasourceHTTP": - """ - Create a new HTTP datasource. - - Create a new HTTP datasource with a base URL or a ProductLocator - object. - - Parameters - ---------- - locator : str - The base URL of a HTTP folder or a `ProductLocator` object. - life_time : float, optional - The cache life time in seconds, by default None. - - Returns - ------- - DatasourceHTTP - A new `DatasourceHTTP` object. - - Raises - ------ - ValueError - If the resource does not exist or the user has no access. - """ - cache = DatasourceCache(life_time) - return DatasourceHTTP(locator, cache) + super().__init__(base_url, repository, cache) def get_file(self, file_path: str) -> bytes: """ @@ -146,8 +108,12 @@ def get_file(self, file_path: str) -> bytes: RuntimeError If the file cannot be retrieved. """ - try: + local_file = self.repository.get_item(file_path) + if local_file is not None: + return local_file + + try: file_url: str = url.join(self.base_url, file_path) headers = RequestHeaders(accept=APPLICATION_NETCDF4).headers @@ -156,6 +122,7 @@ def get_file(self, file_path: str) -> bytes: response.raise_for_status() if response.status_code == HTTP_STATUS_OK: + self.repository.add_item(file_path, response.content) return response.content raise requests.HTTPError("Request failure", response=response) diff --git a/src/GOES_DL/datasource/datasource_repository.py b/src/GOES_DL/datasource/datasource_repository.py new file mode 100644 index 0000000..6a10427 --- /dev/null +++ b/src/GOES_DL/datasource/datasource_repository.py @@ -0,0 +1,101 @@ +""" +Provide a repository mechanism for downloaded objects. + +Classes: + DatasourceRepository: A local repository for managing downloaded + objects. +""" + +from pathlib import Path + +from ..utils import FileRepository + + +class DatasourceRepository: + """ + A local file repository for managing downloaded objects. + + Attributes + ---------- + repository : FileRepository + The underlying file repository object. + """ + + repository: FileRepository + + def __init__( + self, repository: str | Path | FileRepository | None = None + ) -> None: + """Initialize the local repository in a base directory. + + Initialize the repository with a base directory where downloaded + files will be stored. Create a new directory at the given path + if it does not exist; any missing parents of this path are + created as needed. + + Parameters + ---------- + repository : str | Path | FileRepository, optional + An initialised `FileRepository` instance or the path to the + base directory where the repository will be created. If not + specified, the current working directory is used; by default + None. + """ + if isinstance(repository, FileRepository): + self.repository = repository + else: + self.repository = FileRepository(repository) + + def add_item(self, file_path: str, file: bytes) -> None: + """ + Add a file to the repository. + + Parameters + ---------- + file_path : str + The path where the file will be stored within the repository. + file : bytes + The file content to be stored. + + Raises + ------ + ValueError + If the file already exists in the repository. + """ + if self.has_item(file_path): + raise ValueError(f"File '{file_path}' already in repository.") + self.repository.save_file(file, file_path) + + def get_item(self, file_path: str) -> bytes | None: + """ + Retrieve a file from the repository. + + Parameters + ---------- + file_path : str + The path to the file within the repository. + + Returns + ------- + bytes or None + The file content as bytes if the file exists, otherwise None. + """ + if self.has_item(file_path): + return self.repository.read_file(file_path) + return None + + def has_item(self, file_path: str) -> bool: + """ + Check if a file exists in the repository. + + Parameters + ---------- + file_path : str + The path to the file within the repository. + + Returns + ------- + bool + True if the file exists, False otherwise. + """ + return self.repository.is_file(file_path) diff --git a/src/GOES_DL/utils/__init__.py b/src/GOES_DL/utils/__init__.py index 58b9c71..110edd7 100644 --- a/src/GOES_DL/utils/__init__.py +++ b/src/GOES_DL/utils/__init__.py @@ -1,2 +1,3 @@ -from .url import url as url +from .file_repository import FileRepository as FileRepository from .headers import RequestHeaders as RequestHeaders +from .url import url as url diff --git a/src/GOES_DL/utils/file_repository.py b/src/GOES_DL/utils/file_repository.py new file mode 100644 index 0000000..f9fcbc6 --- /dev/null +++ b/src/GOES_DL/utils/file_repository.py @@ -0,0 +1,477 @@ +""" +Provide local repository mechanisms for managing file operations. + +Classes: + FileRepository: A local repository for managing file + operations. +""" + +import shutil +from pathlib import Path + + +class FileRepository: + """ + A local file repository for managing file operations. + + Provide methods to handle file and directory operations such as + listing, adding, deleting, moving, and searching files within a + specified base directory. + + Attributes + ---------- + base_directory : Path + The base directory where the repository is created. + """ + + base_directory: Path + + def __init__(self, base_directory: str | Path | None = None) -> None: + """Initialize the local file repository in a base directory. + + Initialize the repository with a base directory where downloaded + files will be stored. Create a new directory at the given path + if it does not exist; any missing parents of this path are + created as needed. + + Parameters + ---------- + base_directory : str | Path, optional + The base directory where the repository will be created. If + not specified, the current working directory is used, by + default None. + + Raises + ------ + NotADirectoryError + If the given path already exists in the file system and is + not a directory. + """ + if base_directory is None: + base_directory = "." + self.base_directory = Path(base_directory) + if not self.base_directory.exists(): + self.base_directory.mkdir(parents=True) + elif not self.base_directory.is_dir(): + raise NotADirectoryError( + f"The path '{base_directory}' is not a directory." + ) + + def add_file( + self, + source_path: str | Path, + target_directory: str | Path = "", + move: bool = False, + ) -> None: + """Add a file to the repository by copying it or moving it. + + Copy or move a file from an external location to the specified + directory within the repository. + + `FileExistsError` is raised if the file already exists in the + target directory. `FileNotFoundError` if the source file does + not exist or is not a file. If the target path already exists in + the file system and is not a directory, `NotADirectoryError` is + raised. + + Parameters + ---------- + source_path : str | Path + The path to the file to be added. + target_directory : str | Path, optional + The directory to add the file to, by default "". + move : bool + Whether to move the file instead of copying it, optional, by + default False. + """ + source_path = Path(source_path) + target_path: Path = self._make_target_path( + source_path, target_directory + ) + if move: + shutil.move(source_path, target_path) + else: + shutil.copy2(source_path, target_path) + + def create_directory(self, directory: str | Path) -> None: + """Create a new directory in the repository. + + Create a new directory inside the repository provided that it + does not already exist. + + Parameters + ---------- + directory : str | Path + The name or relative path of the directory to create. + + Raises + ------ + FileExistsError + If the directory already exists. + """ + dir_path: Path = self.base_directory / directory + if not dir_path.exists(): + dir_path.mkdir(parents=True) + else: + raise FileExistsError( + f"The directory '{dir_path}' already exists." + ) + + def delete_directory(self, directory: str | Path) -> None: + """Delete an empty directory from the repository. + + Remove an existing empty directory within the repository. + + Parameters + ---------- + directory : str | Path + The name or relative path of the directory to delete. + + Raises + ------ + NotADirectoryError + If the directory does not exist. + OSError + If the directory is not empty or cannot be deleted. + """ + dir_path: Path = self.base_directory / directory + if not dir_path.is_dir(): + raise NotADirectoryError( + f"The directory '{dir_path}' does not exist " + "or is not a directory." + ) + try: + dir_path.rmdir() + except OSError as e: + raise OSError( + f"The directory '{dir_path}' " + "is not empty or cannot be deleted." + ) from e + + def delete_file( + self, file_name: str | Path, directory: str | Path = "" + ) -> None: + """Delete a file from the repository. + + Delete a file by name within a directory in the repository. + + Parameters + ---------- + file_name : str | Path + The name or relative path of the file to delete. + directory : str | Path, optional + The directory where the file is located, by default "". + + Raises + ------ + FileNotFoundError + If the file does not exist or is not a file. + """ + file_path: Path = self.base_directory / directory / file_name + if file_path.is_file(): + file_path.unlink() + else: + raise FileNotFoundError( + f"The file '{file_path}' does not exist " "or is not a file." + ) + + def get_full_path( + self, file_name: str | Path, directory: str | Path = "" + ) -> str: + """Get the full path to a file in the repository. + + Get the full path to a file within a directory in the + repository. + + Parameters + ---------- + file_name : str | Path + The name or relative path of the file. + directory : str | Path, optional + The directory where the file is located, by default "". + + Returns + ------- + str + The full path to the file. + """ + return str(self.base_directory / directory / file_name) + + def is_directory( + self, path_name: str | Path, directory: str | Path = "" + ) -> bool: + """Check if a path name from the repository is a directory. + + Check if a path name exists within the repository and is a + directory. + + Parameters + ---------- + path_name : str | Path + The name or relative path name of an object to check. + directory : str | Path, optional + The directory where the object is located, by default "". + + Returns + ------- + bool + True if the file exists, False otherwise. + """ + directory_path: Path = self.base_directory / directory / path_name + return directory_path.is_dir() + + def is_file( + self, path_name: str | Path, directory: str | Path = "" + ) -> bool: + """Check if a path name from the repository is a file. + + Check if a path name exists within a directory in the repository + and is a file. + + Parameters + ---------- + path_name : str | Path + The name or relative path name of an object to check. + directory : str | Path, optional + The directory where the object is located, by default "". + + Returns + ------- + bool + True if the file exists, False otherwise. + """ + file_path: Path = self.base_directory / directory / path_name + return file_path.is_file() + + def list_files(self, directory: str | Path = "") -> list[str]: + """List all files in the given directory. + + List all files within a given directory or the root directory if + not specified. + + Parameters + ---------- + directory : str | Path, optional + The name or relative path of the directory to list files + from, by default "". + + Returns + ------- + list[str] + A list of file names in the given directory. + + Raises + ------ + NotADirectoryError + If the given directory does not exist or is not a directory. + """ + dir_path: Path = self.base_directory / directory + if dir_path.is_dir(): + return [item.name for item in dir_path.iterdir() if item.is_file()] + raise NotADirectoryError( + f"The directory '{dir_path}' does not exist or is not a directory." + ) + + def move_file( + self, + file_name: str | Path, + source_directory: str | Path, + target_directory: str | Path, + ) -> None: + """Move a file within the repository. + + Move a file from one directory to another within the repository. + + `FileExistsError` is raised if the file already exists in the + target directory. `FileNotFoundError` if the source file does + not exist or is not a file. If the target path already exists in + the file system and is not a directory, `NotADirectoryError` is + raised. + + Parameters + ---------- + file_name : str | Path + The name or relative path of the file to move. + source_directory : str | Path + The directory where the file is currently located. + target_directory : str | Path + The directory to move the file to. + """ + source_path: Path = self.base_directory / source_directory / file_name + target_path: Path = self._make_target_path( + source_path, target_directory + ) + shutil.move(source_path, target_path) + + def path_exists( + self, path_name: str | Path, directory: str | Path = "" + ) -> bool: + """Check if a path exists in the repository. + + Check if a given path exists within the repository. + + Parameters + ---------- + path_name : str | Path + The name or relative path name of an object to check. + directory : str | Path, optional + The directory where the object is located, by default "". + + Returns + ------- + bool + True if the file exists, False otherwise. + """ + object_path: Path = self.base_directory / directory / path_name + return object_path.exists() + + def read_file( + self, file_name: str | Path, directory: str | Path = "" + ) -> bytes: + """Read the content of a file in the repository. + + Read the content of a file within a directory in the repository. + + Parameters + ---------- + file_name : str | Path + The name or relative path of the file to read. + directory : str | Path, optional + The directory where the file is located, by default "". + + Returns + ------- + bytes + The content of the file as a byte string. + + Raises + ------ + FileNotFoundError + If the file does not exist or is not a file. + """ + file_path: Path = self.base_directory / directory / file_name + if file_path.is_file(): + with open(file_path, "rb") as file: + return file.read() + raise FileNotFoundError( + f"The file '{file_path}' does not exist or is not a file." + ) + + def save_file( + self, content: bytes, file_name: str | Path, directory: str | Path = "" + ) -> None: + """Save content to a file in the repository. + + Save the given content to a file within a directory in the + repository. The relative path will be recreated within the + repository to mirror the source path. + + Parameters + ---------- + content : bytes + The content to save to the file. + file_name : str | Path + The name or relative path of the file to save the content. + directory : str | Path, optional + The directory to save the file to, by default "". + + Raises + ------ + FileExistsError + If the file already exists in the target directory or if the + given target path already exists in the file system and is + not a directory (via `mkdir`). + """ + file_path: Path = self.base_directory / directory / file_name + file_path.parent.mkdir(parents=True, exist_ok=True) + if file_path.exists(): + raise FileExistsError( + f"The file '{file_path}' already exists in the " + "target directory." + ) + with open(file_path, "wb") as file: + file.write(content) + + def search_files( + self, pattern: str, directory: str | Path = "" + ) -> list[str]: + """Search for files that match a pattern. + + Search for files in a given directory within the repository by a + search pattern; for example, "*.txt" to search for text files. + + Parameters + ---------- + pattern : str + The pattern to search for. + directory : str | Path, optional + The name or relative path of the directory to search files + from, by default "". + + Returns + ------- + list[str] + A list of file names that match the pattern. + + Raises + ------ + NotADirectoryError + If the directory does not exist or is not a directory. + """ + dir_path: Path = self.base_directory / directory + if dir_path.is_dir(): + return [item.name for item in dir_path.glob(pattern)] + raise NotADirectoryError( + f"The directory '{dir_path}' does not exist " + "or is not a directory." + ) + + def _make_target_path( + self, source_path: Path, target_directory: str | Path + ) -> Path: + """Create the target directory for adding a file. + + Any missing parents of the target directory path are created as + needed. + + Parameters + ---------- + source_path : Path + The path to the source file. + target_directory : str | Path + The directory to add the file to. + + Returns + ------- + Path + The target directory path. + + Raises + ------ + FileExistsError + If the file already exists in the target directory. + FileNotFoundError + If the source file does not exist or is not a file. + NotADirectoryError + If the target path already exists in the file system and is + not a directory. + """ + target_path: Path = self.base_directory / target_directory + if not source_path.exists() or not source_path.is_file(): + raise FileNotFoundError( + f"The file '{source_path}' does not exist or is not a file." + ) + try: + target_path.mkdir(parents=True, exist_ok=True) + except FileExistsError as exc: + raise NotADirectoryError( + f"The path '{target_path}' already exists in the " + "file system and is not a directory." + ) from exc + destination: Path = target_path / source_path.name + if destination.exists(): + raise FileExistsError( + f"The file '{destination}' already exists in the " + "target directory." + ) + return target_path