Skip to content

Commit

Permalink
Merge pull request #142 from jbernal0019/master
Browse files Browse the repository at this point in the history
 Implement ChRIS links
  • Loading branch information
jbernal0019 authored Mar 1, 2024
2 parents 94462c4 + 47b73b7 commit adf9363
Show file tree
Hide file tree
Showing 12 changed files with 677 additions and 72 deletions.
40 changes: 24 additions & 16 deletions make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# make.sh [-h] [-i] [-s] [-N] [-U] \
# [-O <swarm|kubernetes>] \
# [-F <swift|filesystem|zipfile>] \
# [-F <swift|filesystem|fslink|zipfile>] \
# [-S <storeBase>] \
# [local|fnndsc[:dev]]
#
Expand All @@ -32,6 +32,11 @@
#
# unmake.sh -N -F filesystem; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -N -F filesystem
#
# Run full pfcon instantiation operating in-network on Swarm using mounted filesystem
# with ChRIS links storage:
#
# unmake.sh -N -F fslink; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -N -F fslink
#
# Skip the intro:
#
# unmake.sh ; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -s
Expand Down Expand Up @@ -66,9 +71,9 @@
# Optional set pfcon to operate in-network mode (using a swift storage instead of
# a zip file).
#
# -F <swift|filesystem|zipfile>
# -F <swift|filesystem|fslink|zipfile>
#
# Explicitly set the storage environment. This option must be swift or filesystem
# Explicitly set the storage environment. This option must be swift, fslink or filesystem
# for pfcon operating in-network mode. For pfcon operating in out-of-network mode
# it must be set to zipfile (default).
#
Expand Down Expand Up @@ -101,11 +106,11 @@ source ./cparse.sh

declare -i STEP=0
ORCHESTRATOR=swarm
STORAGE=zipfile
STORAGE_ENV=zipfile
HERE=$(pwd)

print_usage () {
echo "Usage: ./make.sh [-h] [-i] [-s] [-N] [-F <swift|filesystem|zipfile>] [-U] [-O <swarm|kubernetes>] [-S <storeBase>] [local|fnndsc[:dev]]"
echo "Usage: ./make.sh [-h] [-i] [-s] [-N] [-F <swift|filesystem|fslink|zipfile>] [-U] [-O <swarm|kubernetes>] [-S <storeBase>] [local|fnndsc[:dev]]"
exit 1
}

Expand All @@ -119,8 +124,8 @@ while getopts ":hsiNUF:O:S:" opt; do
;;
N) b_pfconInNetwork=1
;;
F) STORAGE=$OPTARG
if ! [[ "$STORAGE" =~ ^(swift|filesystem|zipfile)$ ]]; then
F) STORAGE_ENV=$OPTARG
if ! [[ "$STORAGE_ENV" =~ ^(swift|filesystem|fslink|zipfile)$ ]]; then
echo "Invalid value for option -- F"
print_usage
fi
Expand Down Expand Up @@ -175,15 +180,16 @@ title -d 1 "Setting global exports..."
fi
if (( b_pfconInNetwork )) ; then
echo -e "PFCON_INNETWORK=True" | ./boxes.sh
if [[ $STORAGE == 'zipfile' ]]; then
echo -e "Need to pass '-F <swift|filesystem>' when PFCON_INNETWORK=True" | ./boxes.sh
if [[ $STORAGE_ENV == 'zipfile' ]]; then
echo -e "Need to pass '-F <swift|filesystem|fslink|>' when PFCON_INNETWORK=True" | ./boxes.sh
exit 1
fi
else
echo -e "PFCON_INNETWORK=False" | ./boxes.sh
fi
echo -e "ORCHESTRATOR=$ORCHESTRATOR" | ./boxes.sh
echo -e "STORAGE=$STORAGE" | ./boxes.sh
echo -e "exporting STORAGE_ENV=$STORAGE_ENV" | ./boxes.sh
export STORAGE_ENV=$STORAGE_ENV
echo -e "exporting STOREBASE=$STOREBASE " | ./boxes.sh
export STOREBASE=$STOREBASE
export SOURCEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand All @@ -210,9 +216,9 @@ windowBottom
title -d 1 "Building :dev"
cd $HERE
if (( b_pfconInNetwork )) ; then
if [[ $STORAGE == 'swift' ]]; then
if [[ $STORAGE_ENV == 'swift' ]]; then
CMD="docker compose -f swarm/docker-compose_dev_innetwork.yml build"
elif [[ $STORAGE == 'filesystem' ]]; then
else
CMD="docker compose -f swarm/docker-compose_dev_innetwork_fs.yml build"
fi
else
Expand Down Expand Up @@ -259,10 +265,10 @@ windowBottom
title -d 1 "Starting pfcon containerized dev environment on $ORCHESTRATOR"
if [[ $ORCHESTRATOR == swarm ]]; then
if (( b_pfconInNetwork )) ; then
if [[ $STORAGE == 'swift' ]]; then
if [[ $STORAGE_ENV == 'swift' ]]; then
echo "docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack
elif [[ $STORAGE == 'filesystem' ]]; then
else
echo "docker stack deploy -c swarm/docker-compose_dev_innetwork_fs.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev_innetwork_fs.yml pfcon_dev_stack
fi
Expand Down Expand Up @@ -306,10 +312,12 @@ if (( ! b_skipUnitTests )) ; then
sleep 5
if [[ $ORCHESTRATOR == swarm ]]; then
if (( b_pfconInNetwork )) ; then
if [[ $STORAGE == 'swift' ]]; then
if [[ $STORAGE_ENV == 'swift' ]]; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork.py --color=yes
elif [[ $STORAGE == 'filesystem' ]]; then
elif [[ $STORAGE_ENV == 'filesystem' ]]; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork_fs.py --color=yes
elif [[ $STORAGE_ENV == 'fslink' ]]; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork_fslink.py --color=yes
fi
else
docker exec $pfcon_dev pytest tests/test_resources.py --color=yes
Expand Down
76 changes: 75 additions & 1 deletion pfcon/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"""

import logging
import os
import abc
import shutil
import shutil, errno


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -38,3 +39,76 @@ def delete_data(self, job_dir):
Delete job data from the local storage.
"""
shutil.rmtree(job_dir)

def process_chrislink_files(self, job_incoming_dir):
"""
Rearrange the local job incoming directory tree by creating folders that trace
the source dirs pointed by ChRIS link files.
"""
self.job_incoming_dir = job_incoming_dir
self._linked_paths = set()
self._nlinks = 0
self._already_copied_src_set = set()

self._process_chrislink_files(job_incoming_dir)

linked_path_top_folders = set()
for path in self._linked_paths:
linked_path_top_folders.add(path.split('/', 1)[0])

for folder in linked_path_top_folders:
if folder not in self._linked_paths:
self.deletesrc(os.path.join(job_incoming_dir, folder))

return self._nlinks

def _process_chrislink_files(self, dir):
"""
Recursively expand (substitute by actual folders) and remove ChRIS link files.
"""
for root, dirs, files in os.walk(dir):
for filename in files:
if filename.endswith('.chrislink'):
link_file_path = os.path.join(root, filename)

if not link_file_path.startswith(tuple(self._already_copied_src_set)): # only expand a link once
with open(link_file_path, 'rb') as f:
rel_path = f.read().decode().strip()
abs_path = os.path.join(self.job_incoming_dir, rel_path)

if os.path.isfile(abs_path):
rel_path = os.path.dirname(rel_path)
abs_path = os.path.dirname(abs_path)

source_trace_dir = rel_path.replace('/', '_')
dst_path = os.path.join(root, source_trace_dir)

if not os.path.isdir(dst_path): # only copy once to a dest path
self.copysrc(abs_path, dst_path)
self._already_copied_src_set.add(abs_path)
self._process_chrislink_files(dst_path) # recursive call

self._linked_paths.add(rel_path)

os.remove(link_file_path)
self._nlinks += 1

@staticmethod
def copysrc(src, dst):
try:
shutil.copytree(src, dst)
except OSError as e:
if e.errno in (errno.ENOTDIR, errno.EINVAL):
shutil.copy(src, dst)
else:
raise

@staticmethod
def deletesrc(src):
try:
shutil.rmtree(src)
except OSError as e:
if e.errno in (errno.ENOTDIR, errno.EINVAL):
os.remove(src)
else:
raise
2 changes: 1 addition & 1 deletion pfcon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self):

if self.PFCON_INNETWORK:
self.STORAGE_ENV = env('STORAGE_ENV', 'swift')
if self.STORAGE_ENV not in ('swift', 'filesystem'):
if self.STORAGE_ENV not in ('swift', 'filesystem', 'fslink'):
raise ValueError(f"Unsupported value '{self.STORAGE_ENV}' for STORAGE_ENV")
else:
self.STORAGE_ENV = env('STORAGE_ENV', 'zipfile')
Expand Down
8 changes: 5 additions & 3 deletions pfcon/filesystem_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Handle filesystem-based (eg. mounted directory) storage. This is used when pfcon is
in-network and configured to directly access the data from a filesystem.
Handle filesystem-based storage. This is used when pfcon is in-network and configured
to directly access the data from a ChRIS shared filesystem. It assumes that both the
input (read-only)and the output (read-write) directories in the shared storage are
directly mounted into the plugin container.
"""

import logging
Expand All @@ -24,7 +26,7 @@ def __init__(self, config):

self.fs_mount_base_dir = config.get('STOREBASE_MOUNT')

def store_data(self, job_id, job_incoming_dir, data=None, **kwargs):
def store_data(self, job_id, job_incoming_dir, data, **kwargs):
"""
Count the number of files in the specified job incoming directory.
"""
Expand Down
114 changes: 114 additions & 0 deletions pfcon/fslink_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Handle filesystem-based storage. This is used when pfcon is in-network and configured
to directly access the data from a ChRIS shared filesystem. It only assumes that the
output (read-write) directory in the shared storage is directly mounted into the plugin
container. Unlike the 'filesystem' storage this supports ChRIS links.
"""

import logging
import datetime
import os
import shutil


from .filesystem_storage import FileSystemStorage


logger = logging.getLogger(__name__)


class FSLinkStorage(FileSystemStorage):

def store_data(self, job_id, job_incoming_dir, data, **kwargs):
"""
Copy all the files from the filesystem tree under each input folder (storage
prefix) in the specified data list into the specified job incoming directory.
"""
self.job_id = job_id
self.job_output_path = kwargs['job_output_path']

all_file_paths = set()

for storage_path in data:
storage_path = storage_path.strip('/')
file_paths = set()
visited_paths = set()

self._find_all_file_paths(storage_path, file_paths, visited_paths)

for f_path in file_paths:
if f_path not in all_file_paths: # copy a given file only once
fs_file_path = os.path.join(self.fs_mount_base_dir, f_path)

rel_file_path = f_path.replace(storage_path, '', 1).lstrip('/')
local_file_path = os.path.join(job_incoming_dir, rel_file_path)

try:
shutil.copy(fs_file_path, local_file_path)
except FileNotFoundError:
os.makedirs(os.path.dirname(local_file_path))
shutil.copy(fs_file_path, local_file_path)

all_file_paths.add(f_path)

nfiles = len(all_file_paths)
logger.info(f'{nfiles} files fetched from the filesystem for job {job_id}')

nlinks = self.process_chrislink_files(job_incoming_dir)
nfiles -= nlinks

return {
'jid': job_id,
'nfiles': nfiles,
'timestamp': f'{datetime.datetime.now()}',
'path': job_incoming_dir
}

def delete_data(self, job_dir):
"""
Delete job data from the local storage.
"""
shutil.rmtree(job_dir)

def _find_all_file_paths(self, storage_path, file_paths, visited_paths):
"""
Find all file paths under the passed storage path (prefix) by
recursively following ChRIS links. The resulting set of file paths is given
by the file_paths set argument.
"""
if not storage_path.startswith(tuple(visited_paths)): # avoid infinite loops
visited_paths.add(storage_path)
job_id = self.job_id
job_output_path = self.job_output_path
fs_abs_path = os.path.join(self.fs_mount_base_dir, storage_path)

l_ls = []
if os.path.isfile(fs_abs_path):
l_ls.append(fs_abs_path)
else:
for root, dirs, files in os.walk(fs_abs_path):
for filename in files:
l_ls.append(os.path.join(root, filename))

for abs_file_path in l_ls:
if abs_file_path.endswith('.chrislink'):
try:
with open(abs_file_path, 'rb') as f:
linked_path = f.read().decode().strip()
except Exception as e:
logger.error(f'Failed to read file {abs_file_path} for '
f'job {job_id}, detail: {str(e)}')
raise

if f'{job_output_path}/'.startswith(linked_path.rstrip('/') + '/'):
# link files are not allowed to point to the job output dir or
# any of its ancestors
logger.error(f'Found invalid input path {linked_path} for job '
f'{job_id} pointing to an ancestor of the job '
f'output dir: {job_output_path}')
raise ValueError(f'Invalid input path: {linked_path}')

self._find_all_file_paths(linked_path, file_paths,
visited_paths) # recursive call
file_paths.add(abs_file_path.replace(self.fs_mount_base_dir, '',
1).lstrip('/'))
Loading

0 comments on commit adf9363

Please sign in to comment.