Skip to content

Commit

Permalink
Setup Azure delivery to use data factory
Browse files Browse the repository at this point in the history
Use compatible psycopg2 package.
Adds transfer_uuid for securing azure deliveries.

Upgrade to python 3.8
  Python 3.9 had an error building greenlet

Upgrade gunicorn for async keyword bug
  Fixes error described here: benoitc/gunicorn#1823

Use Storage-as-a-Service to check recipient container url

Checks the container url entered in the acceptance web form is
owned by the recipient(current user). This is done using the
SAAS `/api/FileSystems/{account}/{container}` API endpoint.
Currently this endpoint fails with a 500 for storage accounts
it cannot find.
See microsoft/storage-as-a-service#111
  • Loading branch information
johnbradley committed Aug 29, 2022
1 parent d1276ad commit d6bc6f1
Show file tree
Hide file tree
Showing 19 changed files with 569 additions and 560 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.6.8
FROM python:3.8
MAINTAINER dan.leehr@duke.edu

# Set timezone
Expand Down
4 changes: 3 additions & 1 deletion d4s2/settings_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,6 @@
USERNAME_EMAIL_HOST = os.getenv('D4S2_USERNAME_EMAIL_HOST')
DIRECTORY_SERVICE_TOKEN = os.getenv('D4S2_DIRECTORY_SERVICE_TOKEN')
DIRECTORY_SERVICE_URL = os.getenv('D4S2_DIRECTORY_SERVICE_URL')
AZCOPY_COMMAND = os.getenv('D4S2_AZCOPY_COMMAND','azcopy')
TRANSFER_PIPELINE_URL = os.getenv('D4S2_TRANSFER_PIPELINE_URL')
AZURE_SAAS_URL = os.getenv('D4S2_SAAS_URL')
AZURE_SAAS_KEY = os.getenv('D4S2_SAAS_KEY')
2 changes: 1 addition & 1 deletion d4s2_api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class S3DeliveryAdmin(SimpleHistoryAdmin):

def restart_transfer(modeladmin, request, queryset):
for delivery in queryset:
TransferFunctions.restart_transfer(delivery.id)
TransferFunctions.transfer_delivery(delivery.id)


class AzDeliveryAdmin(SimpleHistoryAdmin):
Expand Down
25 changes: 25 additions & 0 deletions d4s2_api/migrations/0044_auto_20220608_1539.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2022-06-08 15:39
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('d4s2_api', '0043_auto_20220323_2036'),
]

operations = [
migrations.AlterField(
model_name='emailtemplateset',
name='storage',
field=models.CharField(choices=[('dds', 'Duke Data Service'), ('azure', 'Azure Blob Storage'), ('s3', 'S3')], default='dds', max_length=64),
),
migrations.AlterField(
model_name='useremailtemplateset',
name='storage',
field=models.CharField(choices=[('dds', 'Duke Data Service'), ('azure', 'Azure Blob Storage'), ('s3', 'S3')], default='dds', max_length=64),
),
]
25 changes: 25 additions & 0 deletions d4s2_api/migrations/0045_auto_20220613_1550.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2022-06-13 15:50
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('d4s2_api', '0044_auto_20220608_1539'),
]

operations = [
migrations.AddField(
model_name='azdelivery',
name='fund_code',
field=models.CharField(blank=True, help_text='Fund code used to bill storage costs.', max_length=255),
),
migrations.AddField(
model_name='historicalazdelivery',
name='fund_code',
field=models.CharField(blank=True, help_text='Fund code used to bill storage costs.', max_length=255),
),
]
25 changes: 25 additions & 0 deletions d4s2_api/migrations/0046_auto_20220622_1922.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2022-06-22 19:22
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('d4s2_api', '0045_auto_20220613_1550'),
]

operations = [
migrations.AddField(
model_name='azdelivery',
name='transfer_uuid',
field=models.UUIDField(help_text='UUID field used with transfer webhook.', null=True),
),
migrations.AddField(
model_name='historicalazdelivery',
name='transfer_uuid',
field=models.UUIDField(help_text='UUID field used with transfer webhook.', null=True),
),
]
12 changes: 10 additions & 2 deletions d4s2_api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,13 +522,21 @@ class AzDelivery(DeliveryBase):
share_user_ids = ArrayField(models.CharField(max_length=255), blank=True, default=[])
transfer_state = models.IntegerField(choices=AzTransferStates.CHOICES, default=AzTransferStates.NEW,
help_text='State within transfer')

fund_code = models.CharField(max_length=255, help_text='Fund code used to bill storage costs.', blank=True)
transfer_uuid = models.UUIDField(null=True, help_text='UUID field used with transfer webhook.',)

def get_simple_project_name(self):
return os.path.basename(self.source_project.path)

def make_project_url(self):
return self.get_current_project().make_project_url()
project = self.get_current_project()
if project:
return project.make_project_url()
return ""

def update_destination(self, container_url):
self.destination_project = AzContainerPath.objects.create(path=self.source_project.path, container_url=container_url)
self.save()

def get_current_project(self):
if self.state == State.ACCEPTED:
Expand Down
56 changes: 51 additions & 5 deletions d4s2_api_v2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DDSProjectPermissionSerializer, DDSDeliveryPreviewSerializer, DDSAuthProviderSerializer, DDSAffiliateSerializer, \
AddUserSerializer, DDSProjectSummarySerializer, EmailTemplateSetSerializer, EmailTemplateSerializer, \
DukeUserSerializer, AzDeliverySerializer, AzDeliveryUpdateSerializer, AzStorageConfigSerializer, AzDeliverySummarySerializer, \
AzDeliveryPreviewSerializer, StorageTypes
AzDeliveryPreviewSerializer, StorageTypes, AzTransferSerializer
from d4s2_api.models import DDSDelivery, S3Endpoint, S3User, S3UserTypes, S3Bucket, S3Delivery, EmailTemplateSet, \
EmailTemplate
from d4s2_api_v1.api import AlreadyNotifiedException, get_force_param, build_accept_url, DeliveryViewSet, \
Expand All @@ -22,9 +22,11 @@
from d4s2_api_v2.models import DDSDeliveryPreview, AzDeliveryPreview
from d4s2_api.models import AzDelivery, State, AzStorageConfig
from switchboard.userservice import get_users_for_query, get_user_for_netid, get_netid_from_user
from switchboard.azure_util import project_exists, AzMessageFactory, create_project_summary
from switchboard.azure_util import AzMessageFactory, create_project_summary, get_container_details
from django.core.signing import Signer, BadSignature
from rest_framework.authtoken.models import Token
from rest_framework.views import APIView
from switchboard.azure_util import AzureTransfer


class DataServiceUnavailable(APIException):
Expand Down Expand Up @@ -419,6 +421,7 @@ def current_user(self, request):
serializer = DukeUserSerializer(person)
return Response(serializer.data, status=status.HTTP_200_OK)


class AzDeliveryViewSet(ModelWithEmailTemplateSetMixin, mixins.CreateModelMixin,
mixins.RetrieveModelMixin, mixins.ListModelMixin, mixins.UpdateModelMixin,
viewsets.GenericViewSet):
Expand Down Expand Up @@ -451,16 +454,21 @@ def get_queryset(self):

def before_saving_new_model(self, serializer):
validated_data = serializer.validated_data
source_container_url = validated_data["source_project"]["container_url"]
container_details = get_container_details(container_url=source_container_url)
if not container_details:
raise ValidationError(f"Data Delivery Error: Unable to find project {source_container_url} in Storage-as-a-Service.")
container_owner = container_details['owner']
if container_owner != self.request.user.username:
raise ValidationError(f"Data Delivery Error: This project is owned by {container_owner} not you({self.request.user.username}).")

existing_delivery = AzDelivery.get_incomplete_delivery(
from_netid=validated_data["from_netid"],
source_container_url=validated_data["source_project"]["container_url"],
source_path=validated_data["source_project"]["path"]
)
if existing_delivery:
raise ValidationError("Data Delivery Error: An active delivery for this project already exists.")
source_project = validated_data["source_project"]
if not project_exists(source_project["container_url"], source_project["path"]):
raise ValidationError("Data Delivery Error: Unable to find project {}.".format(source_project["path"]))

@action(detail=True, methods=['POST'])
def send(self, request, pk=None):
Expand Down Expand Up @@ -537,3 +545,41 @@ def create(self, request, *args, **kwargs):
serializer = self.get_serializer(instance=delivery_preview)
headers = self.get_success_headers(serializer.data)
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)


class AzTransferListView(APIView):
permission_classes = [permissions.IsAuthenticated]
"""
Record a transfer result.
"""
def post(self, request, format=None):
# User must be in the 'transfer_poster' group to post transfers
if request.user.groups.filter(name="transfer_poster"):
serializer = AzTransferSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
validated_data = serializer.validated_data
delivery_id = validated_data["delivery_id"]
transfer_uuid = validated_data["transfer_uuid"]
error_message = validated_data.get("error_message")
file_manifest = validated_data.get("manifest")
try:
# Make sure the transfer_uuid matches our delivery
delivery = AzDelivery.objects.get(pk=delivery_id, transfer_uuid=transfer_uuid)
transfer = AzureTransfer(delivery.id)
if error_message:
transfer.set_failed_and_record_message(error_message)
return Response(serializer.data, status=status.HTTP_200_OK)
elif delivery.state == State.TRANSFERRING:
transfer.record_object_manifest(file_manifest)
transfer.mark_complete()
transfer.email_sender()
transfer.email_recipient()
else:
return Response(f"Delivery {delivery_id} not in TRANSFERRING state.",
status=status.HTTP_400_BAD_REQUEST)
return Response(serializer.data, status=status.HTTP_200_OK)
except AzDelivery.DoesNotExist:
msg = f"Unable to find delivery for delivery_id:{delivery_id} and transfer_uuid:{transfer_uuid}"
return Response(msg, status=status.HTTP_400_BAD_REQUEST)
else:
return Response(status=status.HTTP_401_UNAUTHORIZED)
9 changes: 8 additions & 1 deletion d4s2_api_v2/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ class Meta:
resource_name = 'az-deliveries'
fields = ('id', 'source_project', 'from_netid', 'destination_project', 'to_netid', 'state', 'user_message',
'share_user_ids', 'decline_reason', 'performed_by', 'delivery_email_text', 'email_template_set',
'complete', 'status', 'outgoing', 'last_updated_on', 'url')
'complete', 'status', 'outgoing', 'last_updated_on', 'url', 'fund_code')
read_only_fields = ('decline_reason', 'performed_by', 'delivery_email_text', 'email_template_set', 'status',
'outgoing', 'last_updated_on', 'url')

Expand Down Expand Up @@ -351,3 +351,10 @@ class AzDeliveryPreviewSerializer(serializers.Serializer):

class Meta:
resource_name = 'az-delivery-preview'


class AzTransferSerializer(serializers.Serializer):
transfer_uuid = serializers.CharField(required=True)
delivery_id = serializers.CharField(required=True)
error_message = serializers.CharField(required=False)
manifest = serializers.JSONField(required=False)
Loading

0 comments on commit d6bc6f1

Please sign in to comment.