-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(JobAttachments)!: Add 'last seen on S3' cache (#172)
Breaking changes: - mostly, this is a significant change in behaviour (i.e. the addition of the S3 check cache) and warrants a new version - the S3AssetUploader.upload_assets public function was changed to accommodate the S3 cache path. Customers that use custom Python scripts that use the S3AssetUploader may need to update their scripts to specify the cache path - list_object_threshold was removed from the CLI config Signed-off-by: Caden Marofke <marofke@amazon.com>
- Loading branch information
Showing
21 changed files
with
903 additions
and
480 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
|
||
from .cache_db import CacheDB, CONFIG_ROOT, COMPONENT_NAME | ||
from .hash_cache import HashCache, HashCacheEntry | ||
from .s3_check_cache import S3CheckCache, S3CheckCacheEntry | ||
|
||
__all__ = [ | ||
"CacheDB", | ||
"CONFIG_ROOT", | ||
"COMPONENT_NAME", | ||
"HashCache", | ||
"HashCacheEntry", | ||
"S3CheckCache", | ||
"S3CheckCacheEntry", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
|
||
""" | ||
Module for defining a local cache file. | ||
""" | ||
|
||
import logging | ||
import os | ||
from abc import ABC | ||
from threading import Lock | ||
from typing import Optional | ||
|
||
from ..exceptions import JobAttachmentsError | ||
|
||
CONFIG_ROOT = ".deadline" | ||
COMPONENT_NAME = "job_attachments" | ||
|
||
logger = logging.getLogger("Deadline") | ||
|
||
|
||
class CacheDB(ABC): | ||
""" | ||
Abstract base class for connecting to a local SQLite cache database. | ||
This class is intended to always be used with a context manager to properly | ||
close the connection to the cache database. | ||
""" | ||
|
||
def __init__( | ||
self, cache_name: str, table_name: str, create_query: str, cache_dir: Optional[str] = None | ||
) -> None: | ||
if not cache_name or not table_name or not create_query: | ||
raise JobAttachmentsError("Constructor strings for CacheDB cannot be empty.") | ||
self.cache_name: str = cache_name | ||
self.table_name: str = table_name | ||
self.create_query: str = create_query | ||
|
||
try: | ||
# SQLite is included in Python installers, but might not exist if building python from source. | ||
import sqlite3 # noqa | ||
|
||
self.enabled = True | ||
except ImportError: | ||
logger.warn(f"SQLite was not found, {cache_name} will not be used.") | ||
self.enabled = False | ||
return | ||
|
||
if cache_dir is None: | ||
cache_dir = self.get_default_cache_db_file_dir() | ||
if cache_dir is None: | ||
raise JobAttachmentsError( | ||
f"No default cache path found. Please provide a directory for {self.cache_name}." | ||
) | ||
os.makedirs(cache_dir, exist_ok=True) | ||
self.cache_dir: str = os.path.join(cache_dir, f"{self.cache_name}.db") | ||
self.db_lock = Lock() | ||
|
||
def __enter__(self): | ||
"""Called when entering the context manager.""" | ||
if self.enabled: | ||
import sqlite3 | ||
|
||
try: | ||
self.db_connection: sqlite3.Connection = sqlite3.connect( | ||
self.cache_dir, check_same_thread=False | ||
) | ||
except sqlite3.OperationalError as oe: | ||
raise JobAttachmentsError( | ||
f"Could not access cache file in {self.cache_dir}" | ||
) from oe | ||
|
||
try: | ||
self.db_connection.execute(f"SELECT * FROM {self.table_name}") | ||
except Exception: | ||
# DB file doesn't have our table, so we need to create it | ||
logger.info( | ||
f"No cache entries for the current library version were found. Creating a new cache for {self.cache_name}" | ||
) | ||
self.db_connection.execute(self.create_query) | ||
return self | ||
|
||
def __exit__(self, exc_type, exc_value, exc_traceback): | ||
"""Called when exiting the context manager.""" | ||
if self.enabled: | ||
self.db_connection.close() | ||
|
||
@classmethod | ||
def get_default_cache_db_file_dir(cls) -> Optional[str]: | ||
""" | ||
Gets the expected directory for the cache database file based on OS environment variables. | ||
If a directory cannot be found, defaults to the working directory. | ||
""" | ||
default_path = os.environ.get("HOME") | ||
if default_path: | ||
default_path = os.path.join(default_path, CONFIG_ROOT, COMPONENT_NAME) | ||
return default_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
|
||
""" | ||
Module for accessing the local file hash cache. | ||
""" | ||
|
||
import logging | ||
from dataclasses import dataclass | ||
from typing import Any, Dict, Optional | ||
|
||
from .cache_db import CacheDB | ||
from ..asset_manifests.hash_algorithms import HashAlgorithm | ||
|
||
|
||
logger = logging.getLogger("Deadline") | ||
|
||
|
||
@dataclass | ||
class HashCacheEntry: | ||
"""Represents an entry in the local hash-cache database""" | ||
|
||
file_path: str | ||
hash_algorithm: HashAlgorithm | ||
file_hash: str | ||
last_modified_time: str | ||
|
||
def to_dict(self) -> Dict[str, Any]: | ||
return { | ||
"file_path": self.file_path, | ||
"hash_algorithm": self.hash_algorithm.value, | ||
"file_hash": self.file_hash, | ||
"last_modified_time": self.last_modified_time, | ||
} | ||
|
||
|
||
class HashCache(CacheDB): | ||
""" | ||
Class used to store and retrieve entries in the local file hash cache. | ||
This class is intended to always be used with a context manager to properly | ||
close the connection to the hash cache database. | ||
This class also automatically locks when doing writes, so it can be called | ||
by multiple threads. | ||
""" | ||
|
||
CACHE_NAME = "hash_cache" | ||
CACHE_DB_VERSION = 2 | ||
|
||
def __init__(self, cache_dir: Optional[str] = None) -> None: | ||
table_name: str = f"hashesV{self.CACHE_DB_VERSION}" | ||
create_query: str = f"CREATE TABLE hashesV{self.CACHE_DB_VERSION}(file_path text primary key, hash_algorithm text secondary key, file_hash text, last_modified_time timestamp)" | ||
super().__init__( | ||
cache_name=self.CACHE_NAME, | ||
table_name=table_name, | ||
create_query=create_query, | ||
cache_dir=cache_dir, | ||
) | ||
|
||
def get_entry( | ||
self, file_path_key: str, hash_algorithm: HashAlgorithm | ||
) -> Optional[HashCacheEntry]: | ||
""" | ||
Returns an entry from the hash cache, if it exists. | ||
""" | ||
if not self.enabled: | ||
return None | ||
|
||
with self.db_lock, self.db_connection: | ||
entry_vals = self.db_connection.execute( | ||
f"SELECT * FROM {self.table_name} WHERE file_path=? AND hash_algorithm=?", | ||
[file_path_key, hash_algorithm.value], | ||
).fetchone() | ||
if entry_vals: | ||
return HashCacheEntry( | ||
file_path=entry_vals[0], | ||
hash_algorithm=HashAlgorithm(entry_vals[1]), | ||
file_hash=entry_vals[2], | ||
last_modified_time=str(entry_vals[3]), | ||
) | ||
else: | ||
return None | ||
|
||
def put_entry(self, entry: HashCacheEntry) -> None: | ||
"""Inserts or replaces an entry into the hash cache database after acquiring the lock.""" | ||
if self.enabled: | ||
with self.db_lock, self.db_connection: | ||
self.db_connection.execute( | ||
f"INSERT OR REPLACE INTO {self.table_name} VALUES(:file_path, :hash_algorithm, :file_hash, :last_modified_time)", | ||
entry.to_dict(), | ||
) |
Oops, something went wrong.