Skip to content

Commit

Permalink
Plain filesystem storage (#516)
Browse files Browse the repository at this point in the history
* Minimize public interface of SwiftManager

* Add support for FilesystemStorage and SwiftStorage
  • Loading branch information
jennydaman authored Jun 5, 2023
1 parent ce549dd commit df388da
Show file tree
Hide file tree
Showing 29 changed files with 621 additions and 159 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dc.out
celerybeat.pid
swarm/prod/secrets/
kubernetes/prod/base/secrets/
venv
15 changes: 13 additions & 2 deletions chris_backend/config/settings/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import ldap
from django_auth_ldap.config import LDAPSearch
from .common import * # noqa
from core.swiftmanager import SwiftManager
from core.storage import verify_storage_connection

# Normally you should not import ANYTHING from Django directly
# into your settings, but ImproperlyConfigured is an exception.
Expand Down Expand Up @@ -81,6 +81,12 @@
'propagate': False # required to avoid double logging with root logger
}

# Storage Settings
#
# To use local storage:
# DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage'
# MEDIA_ROOT = '/var/chris'
#
# Swift service settings
DEFAULT_FILE_STORAGE = 'swift.storage.SwiftStorage'
SWIFT_AUTH_URL = 'http://swift_service:8080/auth/v1.0'
Expand All @@ -91,7 +97,12 @@
'key': SWIFT_KEY,
'authurl': SWIFT_AUTH_URL}
try:
SwiftManager(SWIFT_CONTAINER_NAME, SWIFT_CONNECTION_PARAMS).create_container()
verify_storage_connection(
DEFAULT_FILE_STORAGE=DEFAULT_FILE_STORAGE,
MEDIA_ROOT=globals().get('MEDIA_ROOT', None),
SWIFT_CONTAINER_NAME=SWIFT_CONTAINER_NAME,
SWIFT_CONNECTION_PARAMS=SWIFT_CONNECTION_PARAMS
)
except Exception as e:
raise ImproperlyConfigured(str(e))

Expand Down
32 changes: 22 additions & 10 deletions chris_backend/config/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django_auth_ldap.config import LDAPSearch
from .common import * # noqa
from environs import Env, EnvValidationError
from core.swiftmanager import SwiftManager
from core.storage import verify_storage_connection

# Normally you should not import ANYTHING from Django directly
# into your settings, but ImproperlyConfigured is an exception.
Expand Down Expand Up @@ -55,16 +55,28 @@ def get_secret(setting, secret_type=env):

# SWIFT SERVICE CONFIGURATION
# ------------------------------------------------------------------------------
DEFAULT_FILE_STORAGE = 'swift.storage.SwiftStorage'
SWIFT_AUTH_URL = get_secret('SWIFT_AUTH_URL')
SWIFT_USERNAME = get_secret('SWIFT_USERNAME')
SWIFT_KEY = get_secret('SWIFT_KEY')
SWIFT_CONTAINER_NAME = get_secret('SWIFT_CONTAINER_NAME')
SWIFT_CONNECTION_PARAMS = {'user': SWIFT_USERNAME,
'key': SWIFT_KEY,
'authurl': SWIFT_AUTH_URL}
DEFAULT_FILE_STORAGE = get_secret('DEFAULT_FILE_STORAGE')

if DEFAULT_FILE_STORAGE == 'django.core.files.storage.FileSystemStorage':
MEDIA_ROOT = get_secret('MEDIA_ROOT')
verify_storage = lambda: verify_storage_connection(DEFAULT_FILE_STORAGE=DEFAULT_FILE_STORAGE, MEDIA_ROOT=MEDIA_ROOT)
elif DEFAULT_FILE_STORAGE == 'swift.storage.SwiftStorage':
SWIFT_AUTH_URL = get_secret('SWIFT_AUTH_URL')
SWIFT_USERNAME = get_secret('SWIFT_USERNAME')
SWIFT_KEY = get_secret('SWIFT_KEY')
SWIFT_CONTAINER_NAME = get_secret('SWIFT_CONTAINER_NAME')
SWIFT_CONNECTION_PARAMS = {'user': SWIFT_USERNAME,
'key': SWIFT_KEY,
'authurl': SWIFT_AUTH_URL}
verify_storage = lambda: verify_storage_connection(
SWIFT_CONTAINER_NAME=SWIFT_CONTAINER_NAME,
SWIFT_CONNECTION_PARAMS=SWIFT_CONNECTION_PARAMS
)
else:
verify_storage = lambda: verify_storage_connection()

try:
SwiftManager(SWIFT_CONTAINER_NAME, SWIFT_CONNECTION_PARAMS).create_container()
verify_storage()
except Exception as e:
raise ImproperlyConfigured(str(e))

Expand Down
22 changes: 22 additions & 0 deletions chris_backend/core/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
A module for interfacing with file storage backends.
File storage backends are "services" which store arbitrary data identified by path-like strings.
Examples include OpenStack Swift object storage, AWS S3, Nooba on OpenShift, or of course,
a literal UNIX-y filesystem.
ChRIS files are immutable, so file storage services can be optimized for WORM
(write-once, read-many) workloads.
Note to developers: historically, *ChRIS* was tightly-coupled to OpenStack Swift, hence
variable and function names use Swift terminology.
"""
from typing import Dict

from .storagemanager import StorageManager
from .swiftmanager import SwiftManager
from .plain_fs import FilesystemManager
from .helpers import connect_storage, verify_storage_connection


__all__ = ['StorageManager', 'SwiftManager', 'FilesystemManager', 'connect_storage', 'verify_storage_connection']
64 changes: 64 additions & 0 deletions chris_backend/core/storage/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from typing import Dict, Any, ContextManager
from tempfile import TemporaryDirectory
import unittest.mock
from contextlib import contextmanager

from core.storage.storagemanager import StorageManager
from core.storage.swiftmanager import SwiftManager
from core.storage.plain_fs import FilesystemManager


def connect_storage(settings) -> StorageManager:
"""
:param settings: django.conf.settings object
:returns: a manager for the storage configured by settings
"""
storage_name = __get_storage_name(settings)
if storage_name == 'SwiftStorage':
return SwiftManager(settings.SWIFT_CONTAINER_NAME, settings.SWIFT_CONNECTION_PARAMS)
elif storage_name == 'FileSystemStorage':
return FilesystemManager(settings.MEDIA_ROOT)
raise ValueError(f'Unsupported storage system: {storage_name}')


def verify_storage_connection(**kwargs) -> None:
"""
Create a ``StorageManager`` for the given settings. Raises an exception if the connection
or configuration is wrong.
If the connection works, then ``StorageManager.create_container`` is called.
"""
settings = _DummySettings(kwargs)
storage_manager = connect_storage(settings)
storage_manager.create_container()


@contextmanager
def mock_storage(target_settings) -> ContextManager[FilesystemManager]:
"""
For testing only.
Uses ``unittest.mock.patch`` to configure a given settings object to use a temporary directory
for ChRIS files storage.
:param target_settings: a django.conf settings object
:returns: a FilesystemManager for the temporary directory
"""
with TemporaryDirectory() as tmp_dir:
settings = {
'DEFAULT_FILE_STORAGE': 'fake.FileSystemStorage',
'MEDIA_ROOT': tmp_dir
}
with unittest.mock.patch.multiple(target_settings, **settings):
yield FilesystemManager(tmp_dir)


class _DummySettings:

def __init__(self, settings_dict: Dict[str, str]):
for k, v in settings_dict.items():
setattr(self, k, v)


def __get_storage_name(settings: Any) -> str:
return settings.DEFAULT_FILE_STORAGE.rsplit('.', maxsplit=1)[-1]
57 changes: 57 additions & 0 deletions chris_backend/core/storage/plain_fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from pathlib import Path
from typing import Union, List, AnyStr, Optional

from core.storage.storagemanager import StorageManager


class FilesystemManager(StorageManager):
"""
The simplest manager, something everyone has, the one you can trust...
``FilesystemManager`` is for storing files on disk as-is, no magic involved.
More technically, ``FilesystemManager`` methods adapt method calls of ``pathlib`` to the ``StoreManager`` interface.
This code can be used as a reference for how to implement ``StorageManager``
for other file storage services.
"""

def __init__(self, base: Union[str, Path]):
self.__base = Path(base)

def create_container(self) -> None:
self.__base.mkdir(exist_ok=True, parents=True)

def ls(self, path_prefix: str) -> List[str]:
all_paths = (self.__base / path_prefix).rglob('*')
return [str(p) for p in all_paths if p.is_file()]

def path_exists(self, path: str) -> bool:
return (self.__base / path).exists()

def obj_exists(self, file_path: str) -> bool:
return (self.__base / file_path).is_file()

def upload_obj(self, file_path: str, contents: AnyStr, content_type: Optional[str] = None):
dst = (self.__base / file_path)
dst.parent.mkdir(exist_ok=True, parents=True)

if self.__is_textual(content_type):
dst.write_text(contents)
else:
dst.write_bytes(contents)

@staticmethod
def __is_textual(media_type: Optional[str]) -> bool:
"""
:returns: True if given media type is a text-based media type.
"""
return media_type is not None and media_type.split('/', maxsplit=1)[0] == 'text'

def download_obj(self, file_path: str) -> AnyStr:
return (self.__base / file_path).read_bytes()

def copy_obj(self, src: str, dst: str) -> None:
(self.__base / src).link_to(self.__base / dst)

def delete_obj(self, file_path: str) -> None:
(self.__base / file_path).unlink()
70 changes: 70 additions & 0 deletions chris_backend/core/storage/storagemanager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import abc
from typing import List, AnyStr, Optional


class StorageManager(abc.ABC):
"""
``StorageManager`` provides an interface between ChRIS and its file storage backend.
``StorageManager`` methods implement helper functions for browsing stored files and retrieving
file data. These functions are analogous to ``ls``, ``stat``, and ``cat`` commands.
"""

@abc.abstractmethod
def create_container(self) -> None:
"""
Create the container where all ChRIS file data is to be stored.
For Swift, a container is... a container. For S3, a container is a bucket.
For a plain filesystem, a container is simply the top-level/parent directory.
"""
...

def ls(self, path_prefix: str) -> List[str]:
"""
:returns: a list of all files under a given path prefix.
"""
...

def path_exists(self, path: str) -> bool:
"""
:returns: True if path exists (whether it be a directory OR file)
"""
...

def obj_exists(self, file_path: str) -> bool:
"""
:returns: True if given path is an existing file
"""
...

def upload_obj(self, file_path: str, contents: AnyStr, content_type: Optional[str] = None):
"""
Upload file data to the storage service.
:param file_path: file path to upload to
:param contents: file data
:param content_type: optional media type, e.g. "text/plain"
"""
...

def download_obj(self, file_path: str) -> AnyStr:
"""
Download file data from the storage service.
"""
...

def copy_obj(self, src: str, dst: str) -> None:
"""
Copy data to a new path.
Instead of a copy, implementations may create links or shallow copies for efficiency.
"""
...

def delete_obj(self, file_path: str) -> None:
"""
Delete data from the given path.
"""
...
Loading

0 comments on commit df388da

Please sign in to comment.