Skip to content

Commit

Permalink
Merge branch 'feature/tmp_data_management' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
eboileau committed May 23, 2024
2 parents 4c04350 + 7edb867 commit 832eb50
Show file tree
Hide file tree
Showing 12 changed files with 288 additions and 160 deletions.
4 changes: 2 additions & 2 deletions docker/app_container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ COPY client/dist/. /frontend/
COPY docker/app_container/files/. /app/

RUN apt-get update \
&& apt-get install -y bedtools \
&& apt-get install -y bedtools findutils \
&& useradd app \
&& mkdir -p /uploads /app/venv \
&& chown -R app /uploads /install /app/venv \
Expand All @@ -27,4 +27,4 @@ USER root

RUN rm -rf /install /var/lib/apt/lists/* /tmp/*

ENTRYPOINT /app/entry_point.py
ENTRYPOINT /app/entry_point.py
2 changes: 2 additions & 0 deletions docker/app_container/files/entry_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def write_env_file():
IMPORT_PATH=/import
DATA_PATH=/data
FRONTEND_PATH=/frontend
BEDTOOLS_TMP_PATH=/tmp/bedtools
""",
file=fp,
)
Expand All @@ -42,6 +43,7 @@ def get_secret(path):


write_env_file()
system("cd /app && /app/mini_cron.sh &")
system(
f"exec su - app /app/run_flask.sh {environ.get('HTTP_WORKER_PROCESSES')} {environ.get('HTTP_WORKER_TIMEOUT', 30)}"
)
25 changes: 25 additions & 0 deletions docker/app_container/files/mini_cron.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

set -e -u

. .env

sleep_seconds=600
upload_max_age_minutes=60

if [[ -z $UPLOAD_PATH ]]
then
echo 'mini_cron.sh: No $UPLOAD_PATH - aborting.'
exit 1
fi
if [[ -z $BEDTOOLS_TMP_PATH ]]
then
echo 'mini_cron.sh: No $BEDTOOLS_TMP_PATH - aborting.'
exit 1
fi

while [[ 1 = 1 ]]
do
find "$UPLOAD_PATH" "$BEDTOOLS_TMP_PATH" -type f -mmin +${upload_max_age_minutes} -delete
sleep $sleep_seconds
done
101 changes: 27 additions & 74 deletions server/src/scimodom/api/bam_file.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,43 @@
import re

from flask import Blueprint, request, Response
from flask_cors import cross_origin
from flask_jwt_extended import jwt_required, get_jwt_identity
from sqlalchemy.exc import NoResultFound

from scimodom.services.dataset import get_dataset_service
from flask_jwt_extended import jwt_required

from scimodom.api.helpers import (
get_validate_dataset,
get_user_with_write_permission_on_dataset,
get_valid_bam_file,
ClientResponseException,
validate_request_size,
)
from scimodom.services.file import get_file_service, FileTooLarge
from scimodom.services.permission import get_permission_service
from scimodom.services.user import get_user_service, NoSuchUser

bam_file_api = Blueprint("bam_file_api", __name__)

VALID_DATASET_ID_REGEXP = re.compile(r"\A[a-zA-Z0-9]+\Z")
VALID_FILENAME_REGEXP = re.compile(r"\A[a-zA-Z0-9.,_-]+\Z")

BUFFER_SIZE = 1024 * 1024
MAX_BAM_FILE_SIZE = 1024 * 1024 * 1024


@bam_file_api.route("/all/<dataset_id>", methods=["GET"])
@cross_origin(supports_credentials=True)
def list_bam_files(dataset_id: str):
dataset, error, status = _get_dataset_or_error(dataset_id)
dataset, error, status = get_validate_dataset(dataset_id)
if dataset is None:
return {"message": error}, status

file_service = get_file_service()
return file_service.get_bam_file_list(dataset)


def _get_dataset_or_error(dataset_id):
if not VALID_DATASET_ID_REGEXP.match(dataset_id):
return None, "Bad dataset ID", 400
dataset_service = get_dataset_service()
try:
return dataset_service.get_by_id(dataset_id), None, None
except NoResultFound:
return None, "Unknown dataset", 404


@bam_file_api.route("/<dataset_id>/<name>", methods=["POST"])
@cross_origin(supports_credentials=True)
@jwt_required()
def post_bam_file(dataset_id: str, name: str):
dataset, error, status = _get_dataset_or_error(dataset_id)
if dataset is None:
return {"message": error}, status
user, error, status = _get_user_with_write_permission_or_error(dataset)
if user is None:
return {"message": error}, status
if (
request.content_length is not None
and request.content_length > MAX_BAM_FILE_SIZE
):
return {"message": f"File too large (max. {MAX_BAM_FILE_SIZE} bytes)"}, 413
try:
dataset = get_validate_dataset(dataset_id)
_ = get_user_with_write_permission_on_dataset(dataset)
validate_request_size(MAX_BAM_FILE_SIZE)
except ClientResponseException as e:
return e.response_tupel

file_service = get_file_service()
try:
Expand All @@ -66,31 +49,14 @@ def post_bam_file(dataset_id: str, name: str):
return {"message": "OK"}, 200


def _get_user_with_write_permission_or_error(dataset):
email = get_jwt_identity()
user_service = get_user_service()
permission_service = get_permission_service()

try:
user = user_service.get_user_by_email(email)
except NoSuchUser:
return None, "Unknown user", 404

if permission_service.may_change_dataset(user, dataset):
return user, None, None
else:
return False, "Not your dataset", 401


@bam_file_api.route("/<dataset_id>/<name>", methods=["GET"])
@cross_origin(supports_credentials=True)
def get_bam_file(dataset_id: str, name: str):
dataset, error, status = _get_dataset_or_error(dataset_id)
if dataset is None:
return {"message": error}, status
bam_file, error, status = _get_bam_file_or_error(dataset, name)
if bam_file is None:
return {"message": error}, status
try:
dataset, error, status = get_validate_dataset(dataset_id)
bam_file, error, status = get_valid_bam_file(dataset, name)
except ClientResponseException as e:
return e.response_tupel

file_service = get_file_service()

Expand All @@ -111,29 +77,16 @@ def generate():
)


def _get_bam_file_or_error(dataset, name):
if not VALID_FILENAME_REGEXP.match(name):
return None, "Bad file name", 400
file_service = get_file_service()
try:
return file_service.get_bam_file(dataset, name), None, None
except NoResultFound:
return None, "Unknown file name", 404


@bam_file_api.route("/<dataset_id>/<name>", methods=["DELETE"])
@cross_origin(supports_credentials=True)
@jwt_required()
def delete_bam_file(dataset_id: str, name: str):
dataset, error, status = _get_dataset_or_error(dataset_id)
if dataset is None:
return {"message": error}, status
user, error, status = _get_user_with_write_permission_or_error(dataset)
if user is None:
return {"message": error}, status
bam_file, error, status = _get_bam_file_or_error(dataset, name)
if bam_file is None:
return {"message": error}, status
try:
dataset = get_validate_dataset(dataset_id)
_ = get_user_with_write_permission_on_dataset(dataset)
bam_file = get_valid_bam_file(dataset, name)
except ClientResponseException as e:
return e.response_tupel

file_service = get_file_service()
file_service.remove_bam_file(bam_file)
Expand Down
33 changes: 24 additions & 9 deletions server/src/scimodom/api/dataset.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
import logging
from pathlib import Path
from typing import get_args

from flask import Blueprint, request
from flask_cors import cross_origin
from flask_jwt_extended import jwt_required, get_jwt_identity

from scimodom.api.helpers import (
get_valid_dataset_id_list_from_request_parameter,
get_valid_tmp_file_id_from_request_parameter,
get_valid_boolean_from_request_parameter,
ClientResponseException,
)
from scimodom.config import Config
from scimodom.services.comparison import (
get_comparison_service,
FailedUploadError,
NoRecordsFoundError,
ComparisonService,
)
from scimodom.services.dataset import get_dataset_service
from scimodom.services.user import get_user_service
Expand Down Expand Up @@ -40,15 +48,15 @@ def list_mine():
def compare():
"""Compare dataset (Compare View)."""

def is_true(value):
return value.lower() == "true"

reference_ids = request.args.getlist("reference", type=str)
comparison_ids = request.args.getlist("comparison", type=str)
upload_id = request.args.get("upload", type=str)
operation = request.args.get("operation", type=str)
is_strand = request.args.get("strand", type=is_true)
is_euf = request.args.get("euf", type=is_true)
try:
reference_ids = get_valid_dataset_id_list_from_request_parameter("reference")
comparison_ids = get_valid_dataset_id_list_from_request_parameter("comparison")
upload_id = get_valid_tmp_file_id_from_request_parameter("upload")
operation = _get_operation()
is_strand = get_valid_boolean_from_request_parameter("strand", default=False)
is_euf = get_valid_boolean_from_request_parameter("is_euf", default=False)
except ClientResponseException as e:
return e.response_tupel

comparison_service = get_comparison_service(operation, is_strand)
if upload_id:
Expand Down Expand Up @@ -81,3 +89,10 @@ def is_true(value):
"Contact the system administrator."
)
}, 500


def _get_operation():
operation = request.args.get("operation", type=str)
if operation not in get_args(ComparisonService.OPERATIONS):
raise ClientResponseException(400, "Unsupported operation")
return operation
Loading

0 comments on commit 832eb50

Please sign in to comment.