diff --git a/.github/workflows/auto-add-issues.yml b/.github/workflows/auto-add-issues.yml index 79cbb207d..6eca60f09 100644 --- a/.github/workflows/auto-add-issues.yml +++ b/.github/workflows/auto-add-issues.yml @@ -11,7 +11,7 @@ jobs: name: Add issue to project runs-on: ubuntu-latest steps: - - uses: actions/add-to-project@v0.6.0 + - uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/dandi/projects/16 github-token: ${{ secrets.AUTO_ADD_ISSUES }} diff --git a/.github/workflows/cli-integration.yml b/.github/workflows/cli-integration.yml index 7853a2628..b459b60da 100644 --- a/.github/workflows/cli-integration.yml +++ b/.github/workflows/cli-integration.yml @@ -84,12 +84,12 @@ jobs: - name: Dump Docker Compose logs if: failure() run: | - docker-compose \ + docker compose \ -f "$pythonLocation/lib/python${{ matrix.python }}/site-packages/dandi/tests/data/dandiarchive-docker/docker-compose.yml" \ logs --timestamps - name: Shut down Docker Compose run: | - docker-compose \ + docker compose \ -f "$pythonLocation/lib/python${{ matrix.python }}/site-packages/dandi/tests/data/dandiarchive-docker/docker-compose.yml" \ down -v diff --git a/CHANGELOG.md b/CHANGELOG.md index c18591607..b40505585 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,260 @@ +# v0.3.99 (Tue Sep 10 2024) + +#### 🐛 Bug Fix + +- Manually configure celery to retry connections on startup [#2026](https://github.com/dandi/dandi-archive/pull/2026) ([@danlamanna](https://github.com/danlamanna)) + +#### Authors: 1 + +- Dan LaManna ([@danlamanna](https://github.com/danlamanna)) + +--- + +# v0.3.98 (Tue Sep 10 2024) + +#### 🐛 Bug Fix + +- admin view: Also show (list) zarr for Assets view [#2017](https://github.com/dandi/dandi-archive/pull/2017) ([@yarikoptic](https://github.com/yarikoptic)) + +#### Authors: 1 + +- Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) + +--- + +# v0.3.97 (Tue Sep 10 2024) + +#### 🐛 Bug Fix + +- Disable GUI "Unembargo" button if there are active uploads [#2015](https://github.com/dandi/dandi-archive/pull/2015) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### Authors: 1 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) + +--- + +# v0.3.96 (Mon Sep 09 2024) + +#### 🐛 Bug Fix + +- Use bulk_update for updating blob download counts [#2025](https://github.com/dandi/dandi-archive/pull/2025) ([@danlamanna](https://github.com/danlamanna)) + +#### Authors: 1 + +- Dan LaManna ([@danlamanna](https://github.com/danlamanna)) + +--- + +# v0.3.95 (Tue Aug 27 2024) + +#### 🐛 Bug Fix + +- Respond with 409 when creating duplicate asset blobs [#2011](https://github.com/dandi/dandi-archive/pull/2011) ([@danlamanna](https://github.com/danlamanna)) +- Apply new `ruff` rules [#2009](https://github.com/dandi/dandi-archive/pull/2009) ([@mvandenburgh](https://github.com/mvandenburgh)) +- Add Neurosift service for AVI files [#1983](https://github.com/dandi/dandi-archive/pull/1983) ([@magland](https://github.com/magland)) + +#### 🏠 Internal + +- [gh-actions](deps): Bump actions/add-to-project from 0.6.0 to 1.0.2 [#1962](https://github.com/dandi/dandi-archive/pull/1962) ([@dependabot[bot]](https://github.com/dependabot[bot])) + +#### Authors: 4 + +- [@dependabot[bot]](https://github.com/dependabot[bot]) +- Dan LaManna ([@danlamanna](https://github.com/danlamanna)) +- Jeremy Magland ([@magland](https://github.com/magland)) +- Mike VanDenburgh ([@mvandenburgh](https://github.com/mvandenburgh)) + +--- + +# v0.3.94 (Fri Aug 16 2024) + +#### 🐛 Bug Fix + +- Fix admin access to embargoed asset blobs [#2004](https://github.com/dandi/dandi-archive/pull/2004) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Re-validate version metadata during unembargo [#1989](https://github.com/dandi/dandi-archive/pull/1989) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Separate core model logic from top-level asset service layer functions [#1991](https://github.com/dandi/dandi-archive/pull/1991) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### Authors: 1 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) + +--- + +# v0.3.93 (Wed Aug 14 2024) + +#### 🐛 Bug Fix + +- Empty commit to trigger release process [#2005](https://github.com/dandi/dandi-archive/pull/2005) ([@waxlamp](https://github.com/waxlamp)) +- Upgrade oauth client package [#1998](https://github.com/dandi/dandi-archive/pull/1998) ([@mvandenburgh](https://github.com/mvandenburgh)) +- Merge remote-tracking branch 'origin/master' into audit-backend [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Replace `docker-compose` with `docker compose` to fix newly failing CI tests [#1999](https://github.com/dandi/dandi-archive/pull/1999) ([@waxlamp](https://github.com/waxlamp)) +- Merge remote-tracking branch 'origin/fix-docker-compose' into audit-backend [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add test for publish_dandiset audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Disable complexity warning on users view [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add explanatory comment [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Use nested transaction to handle integrity error [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Split long tests up into individual tests [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- bugfix: Pass correct and stringified ID values in asset/zarr records [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- bugfix: Include correct asset in update_asset record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add tests [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Fix invocation of unembargo routines in tests [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Remove duplicate task launch line [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Remove references to deleted model fields [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Use audit service in zarr views [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Eliminate the need for manually calling .save() [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Create a service layer module for audit [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Use the `user` fixture directly [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Reformat long line [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Suppress ruff warnings for complexity and number of arguments [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Report list of paths to zarr chunk audit records [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Report live metadata to audit records [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Move "set new owners" operation inside of transaction [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Remove Dandiset add_owner() and remove_owner() methods [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Apply formatting to new migration [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add explanatory comments for weird char field length limits [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Rename upload_zarr to upload_zarr_chunks [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Fix tests broken by signature changes [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add migration for AuditRecord [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate delete_dandiset audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate publish_dandiset audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate unembargo_dandiset audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate [create|upload|finalize|delete]_zarr audit records [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate [add|update|remove]_asset audit records [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate update_metadata audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate change_owners audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Generate create_dandiset audit record [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add admin model for AuditRecord [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) +- Add AuditRecord model [#1886](https://github.com/dandi/dandi-archive/pull/1886) ([@waxlamp](https://github.com/waxlamp)) + +#### Authors: 2 + +- Mike VanDenburgh ([@mvandenburgh](https://github.com/mvandenburgh)) +- Roni Choudhury ([@waxlamp](https://github.com/waxlamp)) + +--- + +# v0.3.92 (Tue Jul 30 2024) + +#### 🐛 Bug Fix + +- Add retries to sha256 checksum calculation task [#1937](https://github.com/dandi/dandi-archive/pull/1937) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Contact owner [#1840](https://github.com/dandi/dandi-archive/pull/1840) ([@marySalvi](https://github.com/marySalvi) [@mvandenburgh](https://github.com/mvandenburgh)) + +#### Authors: 3 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) +- Mary Salvi ([@marySalvi](https://github.com/marySalvi)) +- Mike VanDenburgh ([@mvandenburgh](https://github.com/mvandenburgh)) + +--- + +# v0.3.91 (Wed Jul 24 2024) + +#### 🐛 Bug Fix + +- Fix N query problem with VersionStatusFilter [#1986](https://github.com/dandi/dandi-archive/pull/1986) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### Authors: 1 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) + +--- + +# v0.3.90 (Mon Jul 22 2024) + +#### 🐛 Bug Fix + +- Automate dandiset unembargo [#1965](https://github.com/dandi/dandi-archive/pull/1965) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### Authors: 1 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) + +--- + +# v0.3.89 (Fri Jul 19 2024) + +#### 🐛 Bug Fix + +- Add dashboard link to view to emit user list as Mailchimp-compatible CSV [#1979](https://github.com/dandi/dandi-archive/pull/1979) ([@waxlamp](https://github.com/waxlamp)) + +#### Authors: 1 + +- Roni Choudhury ([@waxlamp](https://github.com/waxlamp)) + +--- + +# v0.3.88 (Mon Jul 15 2024) + +#### 🐛 Bug Fix + +- Bump dandischema to 0.10.2 (schema version 0.6.8) [#1976](https://github.com/dandi/dandi-archive/pull/1976) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Pin updated dependencies [#1977](https://github.com/dandi/dandi-archive/pull/1977) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Suppress lint error (SIM103) [#1973](https://github.com/dandi/dandi-archive/pull/1973) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Remove File to avoid confusion [#1972](https://github.com/dandi/dandi-archive/pull/1972) ([@yarikoptic](https://github.com/yarikoptic)) + +#### Authors: 2 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) +- Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) + +--- + +# v0.3.87 (Fri Jun 21 2024) + +#### 🐛 Bug Fix + +- Lock dandisets during un-embargo [#1957](https://github.com/dandi/dandi-archive/pull/1957) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### Authors: 1 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) + +--- + +# v0.3.86 (Tue Jun 18 2024) + +#### 🐛 Bug Fix + +- Restrict updates to metadata `access` field [#1954](https://github.com/dandi/dandi-archive/pull/1954) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Fix race condition in sha256 calculation task [#1936](https://github.com/dandi/dandi-archive/pull/1936) ([@mvandenburgh](https://github.com/mvandenburgh)) + +#### Authors: 2 + +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) +- Mike VanDenburgh ([@mvandenburgh](https://github.com/mvandenburgh)) + +--- + +# v0.3.85 (Mon Jun 03 2024) + +:tada: This release contains work from a new contributor! :tada: + +Thank you, Ben Dichter ([@bendichter](https://github.com/bendichter)), for all your work! + +#### 🐛 Bug Fix + +- Empty PR to trigger a release [#1951](https://github.com/dandi/dandi-archive/pull/1951) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Only use custom pagination class for asset list endpoint [#1947](https://github.com/dandi/dandi-archive/pull/1947) ([@jjnesbitt](https://github.com/jjnesbitt)) +- In 1.14.3 it became client_config and .config was announced deprecated [#1946](https://github.com/dandi/dandi-archive/pull/1946) ([@yarikoptic](https://github.com/yarikoptic)) +- neurosift external service for .nwb.lindi.json [#1922](https://github.com/dandi/dandi-archive/pull/1922) ([@magland](https://github.com/magland)) +- Fix documentation for server downtime message var [#1927](https://github.com/dandi/dandi-archive/pull/1927) ([@jjnesbitt](https://github.com/jjnesbitt)) +- Revert "Add `workflow_dispatch` trigger to staging deploy workflow" [#1930](https://github.com/dandi/dandi-archive/pull/1930) ([@jjnesbitt](https://github.com/jjnesbitt)) + +#### 📝 Documentation + +- add handbook to welcome email [#1945](https://github.com/dandi/dandi-archive/pull/1945) ([@bendichter](https://github.com/bendichter)) + +#### Authors: 4 + +- Ben Dichter ([@bendichter](https://github.com/bendichter)) +- Jacob Nesbitt ([@jjnesbitt](https://github.com/jjnesbitt)) +- Jeremy Magland ([@magland](https://github.com/magland)) +- Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) + +--- + # v0.3.84 (Mon Apr 29 2024) #### 🐛 Bug Fix diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index a6efafff4..3484b8c9d 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -4,25 +4,25 @@ This is the simplest configuration for developers to start with. ### Initial Setup -1. Run `docker-compose run --rm django ./manage.py migrate` -2. Run `docker-compose run --rm django ./manage.py createcachetable` -3. Run `docker-compose run --rm django ./manage.py createsuperuser --email $(git config user.email)` +1. Run `docker compose run --rm django ./manage.py migrate` +2. Run `docker compose run --rm django ./manage.py createcachetable` +3. Run `docker compose run --rm django ./manage.py createsuperuser --email $(git config user.email)` and follow the prompts to create your own user. This sets your username to your git email to ensure parity with how GitHub logins work. You can also replace the command substitution expression with a literal email address, or omit the `--email` option entirely to run the command in interactive mode. -4. Run `docker-compose run --rm django ./manage.py create_dev_dandiset --owner $(git config user.email)` +4. Run `docker compose run --rm django ./manage.py create_dev_dandiset --owner $(git config user.email)` to create a dummy dandiset to start working with. ### Run Application -1. Run `docker-compose up` +1. Run `docker compose up` 2. Access the site, starting at http://localhost:8000/admin/ 3. When finished, use `Ctrl+C` ### Application Maintenance Occasionally, new package dependencies or schema changes will necessitate maintenance. To non-destructively update your development stack at any time: -1. Run `docker-compose pull` -2. Run `docker-compose build --pull --no-cache` -3. Run `docker-compose run --rm django ./manage.py migrate` +1. Run `docker compose pull` +2. Run `docker compose build --pull --no-cache` +3. Run `docker compose run --rm django ./manage.py migrate` ## Develop Natively (advanced) This configuration still uses Docker to run attached services in the background, @@ -30,7 +30,7 @@ but allows developers to run Python code on their native system. ### Initial Setup 1. Install [Docker](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) -2. Run `docker-compose -f ./docker-compose.yml up -d` +2. Run `docker compose -f ./docker-compose.yml up -d` 3. Install Python 3.11 4. Install [`psycopg2` build prerequisites](https://www.psycopg.org/docs/install.html#build-prerequisites). @@ -50,20 +50,20 @@ but allows developers to run Python code on their native system. to create a dummy dandiset to start working with. ### Run Application -1. Ensure `docker-compose -f ./docker-compose.yml up -d` is still active +1. Ensure `docker compose -f ./docker-compose.yml up -d` is still active 2. Run: 1. `source ./dev/export-env.sh` 2. `./manage.py runserver` 3. Run in a separate terminal: 1. `source ./dev/export-env.sh` 2. `celery --app dandiapi.celery worker --loglevel INFO --without-heartbeat -Q celery,calculate_sha256,ingest_zarr_archive -B` -4. When finished, run `docker-compose stop` +4. When finished, run `docker compose stop` ## Remap Service Ports (optional) Attached services may be exposed to the host system via alternative ports. Developers who work on multiple software projects concurrently may find this helpful to avoid port conflicts. -To do so, before running any `docker-compose` commands, set any of the environment variables: +To do so, before running any `docker compose` commands, set any of the environment variables: * `DOCKER_POSTGRES_PORT` * `DOCKER_RABBITMQ_PORT` * `DOCKER_MINIO_PORT` @@ -87,7 +87,7 @@ To install the tox pytest dependencies into your environment, run `pip install - These are useful for IDE autocompletion or if you want to run `pytest` directly (not recommended). When running the "Develop with Docker" configuration, all tox commands must be run as -`docker-compose run --rm django tox`; extra arguments may also be appended to this form. +`docker compose run --rm django tox`; extra arguments may also be appended to this form. ### Running Tests Run `tox` to launch the full test suite. diff --git a/dandiapi/analytics/tasks/__init__.py b/dandiapi/analytics/tasks/__init__.py index 1e1ba3d16..3314bc305 100644 --- a/dandiapi/analytics/tasks/__init__.py +++ b/dandiapi/analytics/tasks/__init__.py @@ -10,6 +10,7 @@ from django.db.models.aggregates import Max from django.db.models.expressions import F from django.db.utils import IntegrityError +from more_itertools import batched from s3logparse import s3logparse from dandiapi.analytics.models import ProcessedS3Log @@ -80,10 +81,19 @@ def process_s3_log_file_task(s3_log_key: str) -> None: logger.info('Already processed log file %s', s3_log_key) return + # we need to store all of the fully hydrated blob objects in memory in order to use + # bulk_update, but this turns out to not be very costly. 1,000 blobs use about ~8kb + # of memory. + asset_blobs = [] + + # batch the blob queries to avoid a large WHERE IN clause + for batch in batched(download_counts, 1_000): + asset_blobs += AssetBlob.objects.filter(blob__in=batch) + + for asset_blob in asset_blobs: + asset_blob.download_count = F('download_count') + download_counts[asset_blob.blob] + # note this task is run serially per log file. this is to avoid the contention between # multiple log files trying to update the same blobs. this serialization is enforced through # the task queue configuration. - for blob, download_count in download_counts.items(): - AssetBlob.objects.filter(blob=blob).update( - download_count=F('download_count') + download_count - ) + AssetBlob.objects.bulk_update(asset_blobs, ['download_count'], batch_size=1_000) diff --git a/dandiapi/analytics/tests/test_download_counts.py b/dandiapi/analytics/tests/test_download_counts.py index efde429c7..679c21478 100644 --- a/dandiapi/analytics/tests/test_download_counts.py +++ b/dandiapi/analytics/tests/test_download_counts.py @@ -8,12 +8,12 @@ from dandiapi.api.storage import create_s3_storage, get_boto_client -@pytest.fixture() +@pytest.fixture def s3_log_bucket(): return create_s3_storage(settings.DANDI_DANDISETS_LOG_BUCKET_NAME).bucket_name -@pytest.fixture() +@pytest.fixture def s3_log_file(s3_log_bucket, asset_blob): s3 = get_boto_client() @@ -44,7 +44,7 @@ def s3_log_file(s3_log_bucket, asset_blob): s3.delete_object(Bucket=s3_log_bucket, Key=log_file_name) -@pytest.mark.django_db() +@pytest.mark.django_db def test_processing_s3_log_files(s3_log_file, asset_blob): collect_s3_log_records_task() asset_blob.refresh_from_db() @@ -53,7 +53,7 @@ def test_processing_s3_log_files(s3_log_file, asset_blob): assert asset_blob.download_count == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_processing_s3_log_files_idempotent(s3_log_file, asset_blob): # this tests that the outer task which collects the log files to process is # idempotent, in other words, it uses StartAfter correctly. @@ -66,7 +66,7 @@ def test_processing_s3_log_files_idempotent(s3_log_file, asset_blob): assert asset_blob.download_count == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_processing_s3_log_file_task_idempotent(s3_log_file, asset_blob): # this tests that the inner task which processes a single log file is # idempotent, utilizing the unique constraint on ProcessedS3Log correctly. diff --git a/dandiapi/api/admin.py b/dandiapi/api/admin.py index 936c561ea..f7fbbf62f 100644 --- a/dandiapi/api/admin.py +++ b/dandiapi/api/admin.py @@ -19,6 +19,7 @@ from dandiapi.api.models import ( Asset, AssetBlob, + AuditRecord, Dandiset, Upload, UserMetadata, @@ -135,12 +136,17 @@ class VersionStatusFilter(admin.SimpleListFilter): title = 'status' parameter_name = 'status' - def lookups(self, request, model_admin): - qs = model_admin.get_queryset(request) - for status in qs.values_list('status', flat=True).distinct(): - count = qs.filter(status=status).count() - if count: - yield (status, f'{status} ({count})') + def lookups(self, *args, **kwargs): + # The queryset for VersionAdmin contains unnecessary data, + # so just use base queryset from Version.objects + qs = ( + Version.objects.values_list('status') + .distinct() + .annotate(total=Count('status')) + .order_by() + ) + for status, count in qs: + yield (status, f'{status} ({count})') def queryset(self, request, queryset): status = self.value() @@ -210,6 +216,7 @@ class AssetAdmin(admin.ModelAdmin): 'asset_id', 'path', 'blob', + 'zarr', 'status', 'size', 'modified', @@ -223,3 +230,39 @@ class AssetAdmin(admin.ModelAdmin): class UploadAdmin(admin.ModelAdmin): list_display = ['id', 'upload_id', 'blob', 'etag', 'upload_id', 'size', 'created'] list_display_links = ['id', 'upload_id'] + + +@admin.register(AuditRecord) +class AuditRecordAdmin(admin.ModelAdmin): + actions = None + date_hierarchy = 'timestamp' + search_fields = [ + 'dandiset_id', + 'username', + 'record_type', + ] + list_display = [ + 'id', + 'timestamp', + 'dandiset', + 'record_type', + 'details', + 'username', + ] + + @admin.display(description='Dandiset', ordering='dandiset_id') + def dandiset(self, obj): + return format_html( + '{}', + reverse('admin:api_dandiset_change', args=(obj.dandiset_id,)), + f'{obj.dandiset_id:06}', + ) + + def has_add_permission(self, request): + return False + + def has_change_permission(self, request, obj=None): + return False + + def has_delete_permission(self, request, obj=None): + return False diff --git a/dandiapi/api/mail.py b/dandiapi/api/mail.py index 9f05acdb5..7b0a5b4d1 100644 --- a/dandiapi/api/mail.py +++ b/dandiapi/api/mail.py @@ -43,8 +43,18 @@ def user_greeting_name(user: User, socialaccount: SocialAccount = None) -> str: return social_user['username'] -def build_message(subject: str, message: str, to: list[str], html_message: str | None = None): - email_message = mail.EmailMultiAlternatives(subject=subject, body=message, to=to) +def build_message( # noqa: PLR0913 + to: list[str], + subject: str, + message: str, + html_message: str | None = None, + cc: list[str] | None = None, + bcc: list[str] | None = None, + reply_to: list[str] | None = None, +): + email_message = mail.EmailMultiAlternatives( + subject=subject, body=message, to=to, cc=cc, bcc=bcc, reply_to=reply_to + ) if html_message is not None: email_message.attach_alternative(html_message, 'text/html') return email_message @@ -182,31 +192,6 @@ def send_pending_users_message(users: Iterable[User]): connection.send_messages(messages) -def build_dandisets_to_unembargo_message(dandisets: Iterable[Dandiset]): - dandiset_context = [ - { - 'identifier': ds.identifier, - 'owners': [user.username for user in ds.owners], - 'asset_count': ds.draft_version.asset_count, - 'size': ds.draft_version.size, - } - for ds in dandisets - ] - render_context = {**BASE_RENDER_CONTEXT, 'dandisets': dandiset_context} - return build_message( - subject='DANDI: New Dandisets to un-embargo', - message=render_to_string('api/mail/dandisets_to_unembargo.txt', render_context), - to=[settings.DANDI_DEV_EMAIL], - ) - - -def send_dandisets_to_unembargo_message(dandisets: Iterable[Dandiset]): - logger.info('Sending dandisets to un-embargo message to devs at %s', settings.DANDI_DEV_EMAIL) - messages = [build_dandisets_to_unembargo_message(dandisets)] - with mail.get_connection() as connection: - connection.send_messages(messages) - - def build_dandiset_unembargoed_message(dandiset: Dandiset): dandiset_context = { 'identifier': dandiset.identifier, @@ -219,7 +204,7 @@ def build_dandiset_unembargoed_message(dandiset: Dandiset): } html_message = render_to_string('api/mail/dandiset_unembargoed.html', render_context) return build_message( - subject='Your Dandiset has been un-embargoed!', + subject='Your Dandiset has been unembargoed!', message=strip_tags(html_message), html_message=html_message, to=[owner.email for owner in dandiset.owners], @@ -227,9 +212,34 @@ def build_dandiset_unembargoed_message(dandiset: Dandiset): def send_dandiset_unembargoed_message(dandiset: Dandiset): + logger.info('Sending dandisets unembargoed message to dandiset %s owners.', dandiset.identifier) + messages = [build_dandiset_unembargoed_message(dandiset)] + with mail.get_connection() as connection: + connection.send_messages(messages) + + +def build_dandiset_unembargo_failed_message(dandiset: Dandiset): + dandiset_context = { + 'identifier': dandiset.identifier, + } + + render_context = {**BASE_RENDER_CONTEXT, 'dandiset': dandiset_context} + html_message = render_to_string('api/mail/dandiset_unembargo_failed.html', render_context) + return build_message( + subject=f'DANDI: Unembargo failed for dandiset {dandiset.identifier}', + message=strip_tags(html_message), + html_message=html_message, + to=[owner.email for owner in dandiset.owners], + bcc=[settings.DANDI_DEV_EMAIL], + reply_to=[ADMIN_EMAIL], + ) + + +def send_dandiset_unembargo_failed_message(dandiset: Dandiset): logger.info( - 'Sending dandisets un-embargoed message to dandiset %s owners.', dandiset.identifier + 'Sending dandiset unembargo failed message for dandiset %s to dandiset owners and devs', + dandiset.identifier, ) - messages = [build_dandiset_unembargoed_message(dandiset)] + messages = [build_dandiset_unembargo_failed_message(dandiset)] with mail.get_connection() as connection: connection.send_messages(messages) diff --git a/dandiapi/api/migrations/0008_migrate_embargoed_data.py b/dandiapi/api/migrations/0008_migrate_embargoed_data.py index 13943aeaf..d454fe3fb 100644 --- a/dandiapi/api/migrations/0008_migrate_embargoed_data.py +++ b/dandiapi/api/migrations/0008_migrate_embargoed_data.py @@ -21,7 +21,7 @@ def migrate_embargoed_blob(embargoed_blob): # as the above case, but between an embargoed and open dandiset, instead of two # embargoed dandisets. # - # In case #3, the asset will effectively be un-embargoed. + # In case #3, the asset will effectively be unembargoed. existing_blob = AssetBlob.objects.filter( etag=embargoed_blob.etag, size=embargoed_blob.size ).first() diff --git a/dandiapi/api/migrations/0010_auditrecord.py b/dandiapi/api/migrations/0010_auditrecord.py new file mode 100644 index 000000000..a58f12eb3 --- /dev/null +++ b/dandiapi/api/migrations/0010_auditrecord.py @@ -0,0 +1,51 @@ +# Generated by Django 4.1.13 on 2024-07-24 01:30 +from __future__ import annotations + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('api', '0009_remove_embargoedassetblob_dandiset_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='AuditRecord', + fields=[ + ( + 'id', + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name='ID' + ), + ), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('dandiset_id', models.IntegerField()), + ('username', models.CharField(max_length=39)), + ('user_email', models.CharField(max_length=254)), + ('user_fullname', models.CharField(max_length=301)), + ( + 'record_type', + models.CharField( + choices=[ + ('create_dandiset', 'create_dandiset'), + ('change_owners', 'change_owners'), + ('update_metadata', 'update_metadata'), + ('add_asset', 'add_asset'), + ('update_asset', 'update_asset'), + ('remove_asset', 'remove_asset'), + ('create_zarr', 'create_zarr'), + ('upload_zarr_chunks', 'upload_zarr_chunks'), + ('delete_zarr_chunks', 'delete_zarr_chunks'), + ('finalize_zarr', 'finalize_zarr'), + ('unembargo_dandiset', 'unembargo_dandiset'), + ('publish_dandiset', 'publish_dandiset'), + ('delete_dandiset', 'delete_dandiset'), + ], + max_length=32, + ), + ), + ('details', models.JSONField(blank=True)), + ], + ), + ] diff --git a/dandiapi/api/models/__init__.py b/dandiapi/api/models/__init__.py index d3f0c7a6c..082c7bb95 100644 --- a/dandiapi/api/models/__init__.py +++ b/dandiapi/api/models/__init__.py @@ -2,6 +2,7 @@ from .asset import Asset, AssetBlob from .asset_paths import AssetPath, AssetPathRelation +from .audit import AuditRecord from .dandiset import Dandiset from .oauth import StagingApplication from .upload import Upload @@ -13,6 +14,7 @@ 'AssetBlob', 'AssetPath', 'AssetPathRelation', + 'AuditRecord', 'Dandiset', 'StagingApplication', 'Upload', diff --git a/dandiapi/api/models/asset.py b/dandiapi/api/models/asset.py index e2158560b..7fc106ae4 100644 --- a/dandiapi/api/models/asset.py +++ b/dandiapi/api/models/asset.py @@ -218,7 +218,10 @@ def is_different_from( if self.path != path: return True - return self.metadata != metadata + if self.metadata != metadata: # noqa: SIM103 + return True + + return False @staticmethod def dandi_asset_id(asset_id: str | uuid.UUID): diff --git a/dandiapi/api/models/audit.py b/dandiapi/api/models/audit.py new file mode 100644 index 000000000..56d5254f7 --- /dev/null +++ b/dandiapi/api/models/audit.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import Literal, get_args + +from django.db import models + +AuditRecordType = Literal[ + 'create_dandiset', + 'change_owners', + 'update_metadata', + 'add_asset', + 'update_asset', + 'remove_asset', + 'create_zarr', + 'upload_zarr_chunks', + 'delete_zarr_chunks', + 'finalize_zarr', + 'unembargo_dandiset', + 'publish_dandiset', + 'delete_dandiset', +] +AUDIT_RECORD_CHOICES = [(t, t) for t in get_args(AuditRecordType)] + + +class AuditRecord(models.Model): + timestamp = models.DateTimeField(auto_now_add=True) + dandiset_id = models.IntegerField() + + # GitHub enforces a 39 character limit on usernames (see, e.g., + # https://docs.github.com/en/enterprise-cloud@latest/admin/managing-iam/iam-configuration-reference/username-considerations-for-external-authentication). + username = models.CharField(max_length=39) + + # According to RFC 5321 (https://www.rfc-editor.org/rfc/rfc5321.txt), + # section 4.5.3.1.3, an email address "path" is limited to 256 octets, + # including the surrounding angle brackets. Without the brackets, that + # leaves 254 characters for the email address itself. + user_email = models.CharField(max_length=254) + + # The signup questionnaire imposes a 150 character limit on both first and + # last names; together with a space to separate them, that makes a 301 + # character limit on the full name. + user_fullname = models.CharField(max_length=301) + record_type = models.CharField(max_length=32, choices=AUDIT_RECORD_CHOICES) + details = models.JSONField(blank=True) + + def __str__(self): + return f'{self.record_type}/{self.dandiset_id:06}' diff --git a/dandiapi/api/models/dandiset.py b/dandiapi/api/models/dandiset.py index 91267427b..97cb8bff3 100644 --- a/dandiapi/api/models/dandiset.py +++ b/dandiapi/api/models/dandiset.py @@ -58,6 +58,14 @@ def identifier(self) -> str: # Compare against None, to allow id 0 return f'{self.id:06}' if self.id is not None else '' + @property + def embargoed(self) -> bool: + return self.embargo_status == self.EmbargoStatus.EMBARGOED + + @property + def unembargo_in_progress(self) -> bool: + return self.embargo_status == self.EmbargoStatus.UNEMBARGOING + @property def most_recent_published_version(self): return self.versions.exclude(version='draft').order_by('modified').last() @@ -91,16 +99,6 @@ def set_owners(self, new_owners): # Return the owners added/removed so they can be emailed return removed_owners, added_owners - def add_owner(self, new_owner): - old_owners = get_users_with_perms(self, only_with_perms_in=['owner']) - if new_owner not in old_owners: - assign_perm('owner', new_owner, self) - - def remove_owner(self, owner): - owners = get_users_with_perms(self, only_with_perms_in=['owner']) - if owner in owners: - remove_perm('owner', owner, self) - @classmethod def published_count(cls): """Return the number of Dandisets with published Versions.""" diff --git a/dandiapi/api/models/version.py b/dandiapi/api/models/version.py index d52cba3be..c134b7285 100644 --- a/dandiapi/api/models/version.py +++ b/dandiapi/api/models/version.py @@ -3,6 +3,7 @@ import datetime import logging +from dandischema.models import AccessType from django.conf import settings from django.contrib.postgres.indexes import HashIndex from django.core.validators import RegexValidator @@ -70,6 +71,10 @@ def size(self): or 0 ) + @property + def active_uploads(self): + return self.dandiset.uploads.count() if self.version == 'draft' else 0 + @property def publishable(self) -> bool: if self.status != Version.Status.VALID: @@ -165,7 +170,41 @@ def strip_metadata(cls, metadata): 'publishedBy', 'manifestLocation', ] - return {key: metadata[key] for key in metadata if key not in computed_fields} + stripped = {key: metadata[key] for key in metadata if key not in computed_fields} + + # Strip the status and schemaKey fields, as modifying them is not supported + if ( + 'access' in stripped + and isinstance(stripped['access'], list) + and len(stripped['access']) + and isinstance(stripped['access'][0], dict) + ): + stripped['access'][0].pop('schemaKey', None) + stripped['access'][0].pop('status', None) + + return stripped + + def _populate_access_metadata(self): + default_access = [{}] + access = self.metadata.get('access', default_access) + + # Ensure access is a non-empty list + if not (isinstance(access, list) and access): + access = default_access + + # Ensure that every item in access is a dict + access = [x for x in access if isinstance(x, dict)] or default_access + + # Set first access item + access[0] = { + **access[0], + 'schemaKey': 'AccessRequirements', + 'status': AccessType.EmbargoedAccess.value + if self.dandiset.embargoed + else AccessType.OpenAccess.value, + } + + return access def _populate_metadata(self): from dandiapi.api.manifests import manifest_location @@ -187,6 +226,7 @@ def _populate_metadata(self): f'{self.dandiset.identifier}/{self.version}' ), 'dateCreated': self.dandiset.created.isoformat(), + 'access': self._populate_access_metadata(), } if 'assetsSummary' not in metadata: diff --git a/dandiapi/api/services/asset/__init__.py b/dandiapi/api/services/asset/__init__.py index 6c0ee60ed..5ea352e8f 100644 --- a/dandiapi/api/services/asset/__init__.py +++ b/dandiapi/api/services/asset/__init__.py @@ -3,11 +3,13 @@ from typing import TYPE_CHECKING from django.db import transaction +from django.utils import timezone from dandiapi.api.asset_paths import add_asset_paths, delete_asset_paths, get_conflicting_paths from dandiapi.api.models.asset import Asset, AssetBlob from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.models.version import Version +from dandiapi.api.services import audit from dandiapi.api.services.asset.exceptions import ( AssetAlreadyExistsError, AssetPathConflictError, @@ -43,6 +45,39 @@ def _create_asset( return asset +def _add_asset_to_version( + *, + version: Version, + asset_blob: AssetBlob | None, + zarr_archive: ZarrArchive | None, + metadata: dict, +) -> Asset: + path = metadata['path'] + asset = _create_asset( + path=path, asset_blob=asset_blob, zarr_archive=zarr_archive, metadata=metadata + ) + version.assets.add(asset) + add_asset_paths(asset, version) + + # Trigger a version metadata validation, as saving the version might change the metadata + Version.objects.filter(id=version.id).update( + status=Version.Status.PENDING, modified=timezone.now() + ) + + return asset + + +def _remove_asset_from_version(*, asset: Asset, version: Version): + # Remove asset paths and asset itself from version + delete_asset_paths(asset, version) + version.assets.remove(asset) + + # Trigger a version metadata validation, as saving the version might change the metadata + Version.objects.filter(id=version.id).update( + status=Version.Status.PENDING, modified=timezone.now() + ) + + def change_asset( # noqa: PLR0913 *, user, @@ -84,19 +119,20 @@ def change_asset( # noqa: PLR0913 raise AssetAlreadyExistsError with transaction.atomic(): - remove_asset_from_version(user=user, asset=asset, version=version) - - new_asset = add_asset_to_version( - user=user, + _remove_asset_from_version(asset=asset, version=version) + new_asset = _add_asset_to_version( version=version, asset_blob=new_asset_blob, zarr_archive=new_zarr_archive, metadata=new_metadata, ) + # Set previous asset and save new_asset.previous = asset new_asset.save() + audit.update_asset(dandiset=version.dandiset, user=user, asset=new_asset) + return new_asset, True @@ -135,33 +171,27 @@ def add_asset_to_version( if zarr_archive and zarr_archive.dandiset != version.dandiset: raise ZarrArchiveBelongsToDifferentDandisetError - # Creating an asset in an OPEN dandiset that points to an embargoed blob results in that - # blob being un-embargoed - unembargo_asset_blob = ( - asset_blob is not None - and asset_blob.embargoed - and version.dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN - ) with transaction.atomic(): - if asset_blob and unembargo_asset_blob: + # Creating an asset in an OPEN dandiset that points to an embargoed blob results in that + # blob being unembargoed + if ( + asset_blob is not None + and asset_blob.embargoed + and version.dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN + ): asset_blob.embargoed = False asset_blob.save() + transaction.on_commit( + lambda: remove_asset_blob_embargoed_tag_task.delay(blob_id=asset_blob.blob_id) + ) - asset = _create_asset( - path=path, asset_blob=asset_blob, zarr_archive=zarr_archive, metadata=metadata + asset = _add_asset_to_version( + version=version, + asset_blob=asset_blob, + zarr_archive=zarr_archive, + metadata=metadata, ) - version.assets.add(asset) - add_asset_paths(asset, version) - - # Trigger a version metadata validation, as saving the version might change the metadata - version.status = Version.Status.PENDING - # Save the version so that the modified field is updated - version.save() - - # Perform this after the above transaction has finished, to ensure we only operate on - # un-embargoed asset blobs - if asset_blob and unembargo_asset_blob: - remove_asset_blob_embargoed_tag_task.delay(blob_id=asset_blob.blob_id) + audit.add_asset(dandiset=version.dandiset, user=user, asset=asset) return asset @@ -173,13 +203,7 @@ def remove_asset_from_version(*, user, asset: Asset, version: Version) -> Versio raise DraftDandisetNotModifiableError with transaction.atomic(): - # Remove asset paths and asset itself from version - delete_asset_paths(asset, version) - version.assets.remove(asset) - - # Trigger a version metadata validation, as saving the version might change the metadata - version.status = Version.Status.PENDING - # Save the version so that the modified field is updated - version.save() + _remove_asset_from_version(asset=asset, version=version) + audit.remove_asset(dandiset=version.dandiset, user=user, asset=asset) return version diff --git a/dandiapi/api/services/audit/__init__.py b/dandiapi/api/services/audit/__init__.py new file mode 100644 index 000000000..adad72b1a --- /dev/null +++ b/dandiapi/api/services/audit/__init__.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from dandiapi.api.models.audit import AuditRecord + +if TYPE_CHECKING: + from django.contrib.auth.models import User + + from dandiapi.api.models.asset import Asset + from dandiapi.api.models.audit import AuditRecordType + from dandiapi.api.models.dandiset import Dandiset + from dandiapi.zarr.models import ZarrArchive + + +def _make_audit_record( + *, dandiset: Dandiset, user: User, record_type: AuditRecordType, details: dict +) -> AuditRecord: + audit_record = AuditRecord( + dandiset_id=dandiset.id, + username=user.username, + user_email=user.email, + user_fullname=f'{user.first_name} {user.last_name}', + record_type=record_type, + details=details, + ) + audit_record.save() + + return audit_record + + +def create_dandiset(*, dandiset: Dandiset, user: User, metadata: dict, embargoed: bool): + details = { + 'metadata': metadata, + 'embargoed': embargoed, + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='create_dandiset', details=details + ) + + +def change_owners( + *, dandiset: Dandiset, user: User, removed_owners: list[User], added_owners: list[User] +): + def glean_user_info(user: User): + return { + 'username': user.username, + 'email': user.email, + 'name': f'{user.first_name} {user.last_name}', + } + + details = { + 'removed_owners': [glean_user_info(u) for u in removed_owners], + 'added_owners': [glean_user_info(u) for u in added_owners], + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='change_owners', details=details + ) + + +def update_metadata(*, dandiset: Dandiset, user: User, metadata: dict) -> AuditRecord: + details = {'metadata': metadata} + return _make_audit_record( + dandiset=dandiset, user=user, record_type='update_metadata', details=details + ) + + +def _asset_details(asset: Asset) -> dict: + checksum = (asset.blob and asset.blob.etag) or (asset.zarr and asset.zarr.checksum) + + return { + 'path': asset.path, + 'asset_blob_id': asset.blob and str(asset.blob.blob_id), + 'zarr_archive_id': asset.zarr and str(asset.zarr.zarr_id), + 'asset_id': str(asset.asset_id), + 'checksum': checksum, + 'metadata': asset.metadata, + } + + +def add_asset(*, dandiset: Dandiset, user: User, asset: Asset) -> AuditRecord: + details = _asset_details(asset) + return _make_audit_record( + dandiset=dandiset, user=user, record_type='add_asset', details=details + ) + + +def update_asset(*, dandiset: Dandiset, user: User, asset: Asset) -> AuditRecord: + details = _asset_details(asset) + return _make_audit_record( + dandiset=dandiset, user=user, record_type='update_asset', details=details + ) + + +def remove_asset(*, dandiset: Dandiset, user: User, asset: Asset) -> AuditRecord: + details = { + 'path': asset.path, + 'asset_id': str(asset.asset_id), + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='remove_asset', details=details + ) + + +def create_zarr(*, dandiset: Dandiset, user: User, zarr_archive: ZarrArchive) -> AuditRecord: + details = { + 'zarr_id': str(zarr_archive.zarr_id), + 'name': zarr_archive.name, + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='create_zarr', details=details + ) + + +def upload_zarr_chunks( + *, dandiset: Dandiset, user: User, zarr_archive: ZarrArchive, paths: list[str] +) -> AuditRecord: + details = { + 'zarr_id': str(zarr_archive.zarr_id), + 'paths': paths, + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='upload_zarr_chunks', details=details + ) + + +def delete_zarr_chunks( + *, dandiset: Dandiset, user: User, zarr_archive: ZarrArchive, paths: list[str] +) -> AuditRecord: + details = { + 'zarr_id': str(zarr_archive.zarr_id), + 'paths': paths, + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='delete_zarr_chunks', details=details + ) + + +def finalize_zarr(*, dandiset: Dandiset, user: User, zarr_archive: ZarrArchive) -> AuditRecord: + details = { + 'zarr_id': str(zarr_archive.zarr_id), + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='finalize_zarr', details=details + ) + + +def unembargo_dandiset(*, dandiset: Dandiset, user: User) -> AuditRecord: + return _make_audit_record( + dandiset=dandiset, user=user, record_type='unembargo_dandiset', details={} + ) + + +def publish_dandiset(*, dandiset: Dandiset, user: User, version: str) -> AuditRecord: + details = { + 'version': version, + } + return _make_audit_record( + dandiset=dandiset, user=user, record_type='publish_dandiset', details=details + ) + + +def delete_dandiset(*, dandiset: Dandiset, user: User): + return _make_audit_record( + dandiset=dandiset, user=user, record_type='delete_dandiset', details={} + ) diff --git a/dandiapi/api/services/dandiset/__init__.py b/dandiapi/api/services/dandiset/__init__.py index 0cf80d82c..20067be60 100644 --- a/dandiapi/api/services/dandiset/__init__.py +++ b/dandiapi/api/services/dandiset/__init__.py @@ -1,10 +1,13 @@ from __future__ import annotations from django.db import transaction +from guardian.shortcuts import assign_perm from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.models.version import Version +from dandiapi.api.services import audit from dandiapi.api.services.dandiset.exceptions import DandisetAlreadyExistsError +from dandiapi.api.services.embargo.exceptions import DandisetUnembargoInProgressError from dandiapi.api.services.exceptions import AdminOnlyOperationError, NotAllowedError from dandiapi.api.services.version.metadata import _normalize_version_metadata @@ -35,7 +38,7 @@ def create_dandiset( dandiset = Dandiset(id=identifier, embargo_status=embargo_status) dandiset.full_clean() dandiset.save() - dandiset.add_owner(user) + assign_perm('owner', user, dandiset) draft_version = Version( dandiset=dandiset, name=version_name, @@ -45,6 +48,10 @@ def create_dandiset( draft_version.full_clean(validate_constraints=False) draft_version.save() + audit.create_dandiset( + dandiset=dandiset, user=user, metadata=draft_version.metadata, embargoed=embargo + ) + return dandiset, draft_version @@ -55,9 +62,15 @@ def delete_dandiset(*, user, dandiset: Dandiset) -> None: raise NotAllowedError('Cannot delete dandisets with published versions.') if dandiset.versions.filter(status=Version.Status.PUBLISHING).exists(): raise NotAllowedError('Cannot delete dandisets that are currently being published.') + if dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError # Delete all versions first, so that AssetPath deletion is cascaded # through versions, rather than through zarrs directly with transaction.atomic(): + # Record the audit event first so that the AuditRecord instance has a + # chance to grab the Dandiset information before it is destroyed. + audit.delete_dandiset(dandiset=dandiset, user=user) + dandiset.versions.all().delete() dandiset.delete() diff --git a/dandiapi/api/services/dandiset/exceptions.py b/dandiapi/api/services/dandiset/exceptions.py index a71088c1b..361d2e08b 100644 --- a/dandiapi/api/services/dandiset/exceptions.py +++ b/dandiapi/api/services/dandiset/exceptions.py @@ -7,10 +7,3 @@ class DandisetAlreadyExistsError(DandiError): http_status_code = status.HTTP_400_BAD_REQUEST - - -class UnauthorizedEmbargoAccessError(DandiError): - http_status_code = status.HTTP_401_UNAUTHORIZED - message = ( - 'Authentication credentials must be provided when attempting to access embargoed dandisets' - ) diff --git a/dandiapi/api/services/embargo/__init__.py b/dandiapi/api/services/embargo/__init__.py index dd07e04da..e2f671998 100644 --- a/dandiapi/api/services/embargo/__init__.py +++ b/dandiapi/api/services/embargo/__init__.py @@ -1,25 +1,39 @@ from __future__ import annotations from concurrent.futures import ThreadPoolExecutor +import logging from typing import TYPE_CHECKING from botocore.config import Config from django.conf import settings from django.db import transaction +from more_itertools import chunked -from dandiapi.api.mail import send_dandisets_to_unembargo_message -from dandiapi.api.models import Asset, AssetBlob, Dandiset, Version +from dandiapi.api.mail import send_dandiset_unembargoed_message +from dandiapi.api.models import AssetBlob, Dandiset, Version +from dandiapi.api.services import audit from dandiapi.api.services.asset.exceptions import DandisetOwnerRequiredError +from dandiapi.api.services.exceptions import DandiError +from dandiapi.api.services.metadata import validate_version_metadata from dandiapi.api.storage import get_boto_client +from dandiapi.api.tasks import unembargo_dandiset_task -from .exceptions import AssetBlobEmbargoedError, DandisetNotEmbargoedError +from .exceptions import ( + AssetBlobEmbargoedError, + AssetTagRemovalError, + DandisetActiveUploadsError, + DandisetNotEmbargoedError, +) if TYPE_CHECKING: from django.contrib.auth.models import User - from django.db.models import QuerySet from mypy_boto3_s3 import S3Client +logger = logging.getLogger(__name__) +ASSET_BLOB_TAG_REMOVAL_CHUNK_SIZE = 5000 + + def _delete_asset_blob_tags(client: S3Client, blob: str): client.delete_object_tagging( Bucket=settings.DANDI_DANDISETS_BUCKET_NAME, @@ -29,34 +43,74 @@ def _delete_asset_blob_tags(client: S3Client, blob: str): # NOTE: In testing this took ~2 minutes for 100,000 files def _remove_dandiset_asset_blob_embargo_tags(dandiset: Dandiset): - # First we need to generate a CSV manifest containing all asset blobs that need to be untaged - embargoed_asset_blobs = AssetBlob.objects.filter( - embargoed=True, assets__versions__dandiset=dandiset - ).values_list('blob', flat=True) - client = get_boto_client(config=Config(max_pool_connections=100)) - with ThreadPoolExecutor(max_workers=100) as e: - for blob in embargoed_asset_blobs: - e.submit(_delete_asset_blob_tags, client=client, blob=blob) + embargoed_asset_blobs = ( + AssetBlob.objects.filter(embargoed=True, assets__versions__dandiset=dandiset) + .values_list('blob', flat=True) + .iterator(chunk_size=ASSET_BLOB_TAG_REMOVAL_CHUNK_SIZE) + ) + + # Chunk the blobs so we're never storing a list of all embargoed blobs + chunks = chunked(embargoed_asset_blobs, ASSET_BLOB_TAG_REMOVAL_CHUNK_SIZE) + for chunk in chunks: + with ThreadPoolExecutor(max_workers=100) as e: + futures = [ + e.submit(_delete_asset_blob_tags, client=client, blob=blob) for blob in chunk + ] + + # Check if any failed and raise exception if so + failed = [blob for i, blob in enumerate(chunk) if futures[i].exception() is not None] + if failed: + raise AssetTagRemovalError('Some blobs failed to remove tags', blobs=failed) @transaction.atomic() -def _unembargo_dandiset(dandiset: Dandiset): - # NOTE: Before proceeding, all asset blobs must have their embargoed tags removed in s3 +def unembargo_dandiset(ds: Dandiset, user: User): + """Unembargo a dandiset by copying all embargoed asset blobs to the public bucket.""" + logger.info('Unembargoing Dandiset %s', ds.identifier) + logger.info('\t%s assets', ds.draft_version.assets.count()) + + if ds.embargo_status != Dandiset.EmbargoStatus.UNEMBARGOING: + raise DandiError( + message=f'Expected dandiset state {Dandiset.EmbargoStatus.UNEMBARGOING}, found {ds.embargo_status}', # noqa: E501 + http_status_code=500, + ) + if ds.uploads.all().exists(): + raise DandisetActiveUploadsError(http_status_code=500) + + # Remove tags in S3 + logger.info('Removing tags...') + _remove_dandiset_asset_blob_embargo_tags(ds) + + # Update embargoed flag on asset blobs + updated = AssetBlob.objects.filter(embargoed=True, assets__versions__dandiset=ds).update( + embargoed=False + ) + logger.info('Updated %s asset blobs', updated) + + # Set status to OPEN + Dandiset.objects.filter(pk=ds.pk).update(embargo_status=Dandiset.EmbargoStatus.OPEN) + logger.info('Dandiset embargo status updated') - draft_version: Version = dandiset.draft_version - embargoed_assets: QuerySet[Asset] = draft_version.assets.filter(blob__embargoed=True) - AssetBlob.objects.filter(assets__in=embargoed_assets).update(embargoed=False) + # Fetch version to ensure changed embargo_status is included + # Save version to update metadata through populate_metadata + v = Version.objects.select_for_update().get(dandiset=ds, version='draft') + v.status = Version.Status.PENDING + v.save() + logger.info('Version metadata updated') - # Update draft version metadata - draft_version.metadata['access'] = [ - {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'} - ] - draft_version.save() + # Pre-emptively validate version metadata, so that old validation + # errors don't show up once un-embargo is finished + validate_version_metadata(version=v) + logger.info('Version metadata validated') - # Set access on dandiset - dandiset.embargo_status = Dandiset.EmbargoStatus.OPEN - dandiset.save() + # Notify owners of completed unembargo + send_dandiset_unembargoed_message(ds) + logger.info('Dandiset owners notified.') + + logger.info('...Done') + + audit.unembargo_dandiset(dandiset=ds, user=user) def remove_asset_blob_embargoed_tag(asset_blob: AssetBlob) -> None: @@ -67,18 +121,19 @@ def remove_asset_blob_embargoed_tag(asset_blob: AssetBlob) -> None: _delete_asset_blob_tags(client=get_boto_client(), blob=asset_blob.blob.name) -def unembargo_dandiset(*, user: User, dandiset: Dandiset): - """Unembargo a dandiset by copying all embargoed asset blobs to the public bucket.""" +def kickoff_dandiset_unembargo(*, user: User, dandiset: Dandiset): + """Set dandiset status to kickoff unembargo.""" if dandiset.embargo_status != Dandiset.EmbargoStatus.EMBARGOED: raise DandisetNotEmbargoedError if not user.has_perm('owner', dandiset): raise DandisetOwnerRequiredError - # A scheduled task will pick up any new dandisets with this status and email the admins to - # initiate the un-embargo process - dandiset.embargo_status = Dandiset.EmbargoStatus.UNEMBARGOING - dandiset.save() + if dandiset.uploads.count(): + raise DandisetActiveUploadsError - # Send initial email to ensure it's seen in a timely manner - send_dandisets_to_unembargo_message(dandisets=[dandiset]) + with transaction.atomic(): + Dandiset.objects.filter(pk=dandiset.pk).update( + embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + transaction.on_commit(lambda: unembargo_dandiset_task.delay(dandiset.pk, user.id)) diff --git a/dandiapi/api/services/embargo/exceptions.py b/dandiapi/api/services/embargo/exceptions.py index b9d58aa2f..1bdc0fb25 100644 --- a/dandiapi/api/services/embargo/exceptions.py +++ b/dandiapi/api/services/embargo/exceptions.py @@ -13,3 +13,26 @@ class AssetBlobEmbargoedError(DandiError): class DandisetNotEmbargoedError(DandiError): http_status_code = status.HTTP_400_BAD_REQUEST message = 'Dandiset not embargoed' + + +class DandisetActiveUploadsError(DandiError): + http_status_code = status.HTTP_400_BAD_REQUEST + message = 'Dandiset unembargo not allowed with active uploads' + + +class DandisetUnembargoInProgressError(DandiError): + http_status_code = status.HTTP_400_BAD_REQUEST + message = 'Dandiset modification not allowed during unembargo' + + +class UnauthorizedEmbargoAccessError(DandiError): + http_status_code = status.HTTP_401_UNAUTHORIZED + message = ( + 'Authentication credentials must be provided when attempting to access embargoed dandisets' + ) + + +class AssetTagRemovalError(Exception): + def __init__(self, message: str, blobs: list[str]) -> None: + super().__init__(message) + self.blobs = blobs diff --git a/dandiapi/api/services/metadata/__init__.py b/dandiapi/api/services/metadata/__init__.py index 985c04ff8..0656916a1 100644 --- a/dandiapi/api/services/metadata/__init__.py +++ b/dandiapi/api/services/metadata/__init__.py @@ -172,7 +172,7 @@ def _get_version_validation_result( # If the version has since been modified, return early if current_version.status != Version.Status.PENDING: logger.info( - 'Skipping validation for version %s due to concurrent modification', version_id + 'Skipping validation for version with a status of %s', current_version.status ) return diff --git a/dandiapi/api/services/publish/__init__.py b/dandiapi/api/services/publish/__init__.py index a41ead487..882fa0c7c 100644 --- a/dandiapi/api/services/publish/__init__.py +++ b/dandiapi/api/services/publish/__init__.py @@ -4,12 +4,14 @@ from typing import TYPE_CHECKING from dandischema.metadata import aggregate_assets_summary, validate +from django.contrib.auth.models import User from django.db import transaction from more_itertools import ichunked from dandiapi.api import doi from dandiapi.api.asset_paths import add_version_asset_paths from dandiapi.api.models import Asset, Dandiset, Version +from dandiapi.api.services import audit from dandiapi.api.services.exceptions import NotAllowedError from dandiapi.api.services.publish.exceptions import ( DandisetAlreadyPublishedError, @@ -22,7 +24,6 @@ from dandiapi.api.tasks import write_manifest_files if TYPE_CHECKING: - from django.contrib.auth.models import User from django.db.models import QuerySet @@ -42,17 +43,18 @@ def publish_asset(*, asset: Asset) -> None: locked_asset.save() -def _lock_dandiset_for_publishing(*, user: User, dandiset: Dandiset) -> None: +def _lock_dandiset_for_publishing(*, user: User, dandiset: Dandiset) -> None: # noqa: C901 """ Prepare a dandiset to be published by locking it and setting its status to PUBLISHING. This function MUST be called before _publish_dandiset is called. """ - if ( - not user.has_perm('owner', dandiset) - or dandiset.embargo_status != Dandiset.EmbargoStatus.OPEN - ): + if not user.has_perm('owner', dandiset): raise NotAllowedError + + if dandiset.embargo_status != Dandiset.EmbargoStatus.OPEN: + raise NotAllowedError('Operation only allowed on OPEN dandisets', 400) + if dandiset.zarr_archives.exists() or dandiset.embargoed_zarr_archives.exists(): raise NotAllowedError('Cannot publish dandisets which contain zarrs', 400) @@ -103,7 +105,7 @@ def _build_publishable_version_from_draft(draft_version: Version) -> Version: return publishable_version -def _publish_dandiset(dandiset_id: int) -> None: +def _publish_dandiset(dandiset_id: int, user_id: int) -> None: """ Publish a dandiset. @@ -189,10 +191,15 @@ def _create_doi(version_id: int): transaction.on_commit(lambda: _create_doi(new_version.id)) + user = User.objects.get(id=user_id) + audit.publish_dandiset( + dandiset=new_version.dandiset, user=user, version=new_version.version + ) + def publish_dandiset(*, user: User, dandiset: Dandiset) -> None: from dandiapi.api.tasks import publish_dandiset_task with transaction.atomic(): _lock_dandiset_for_publishing(user=user, dandiset=dandiset) - transaction.on_commit(lambda: publish_dandiset_task.delay(dandiset.id)) + transaction.on_commit(lambda: publish_dandiset_task.delay(dandiset.id, user.id)) diff --git a/dandiapi/api/services/version/metadata.py b/dandiapi/api/services/version/metadata.py index bc04c8114..bd82bcc40 100644 --- a/dandiapi/api/services/version/metadata.py +++ b/dandiapi/api/services/version/metadata.py @@ -32,13 +32,6 @@ def _normalize_version_metadata( 'includeInCitation': True, }, ], - # TODO: move this into dandischema - 'access': [ - { - 'schemaKey': 'AccessRequirements', - 'status': 'dandi:EmbargoedAccess' if embargo else 'dandi:OpenAccess', - } - ], **version_metadata, } # Run the version_metadata through the pydantic model to automatically include any boilerplate diff --git a/dandiapi/api/storage.py b/dandiapi/api/storage.py index dcaa517ea..0fa1e1a34 100644 --- a/dandiapi/api/storage.py +++ b/dandiapi/api/storage.py @@ -155,7 +155,7 @@ class TimeoutS3Storage(S3Storage): def __init__(self, **settings): super().__init__(**settings) - self.config = self.config.merge( + self.client_config = self.client_config.merge( Config(connect_timeout=5, read_timeout=5, retries={'max_attempts': 2}) ) diff --git a/dandiapi/api/tasks/__init__.py b/dandiapi/api/tasks/__init__.py index eb3a6ef99..2b4496c74 100644 --- a/dandiapi/api/tasks/__init__.py +++ b/dandiapi/api/tasks/__init__.py @@ -1,9 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from celery import shared_task +from celery.exceptions import SoftTimeLimitExceeded from celery.utils.log import get_task_logger +from django.contrib.auth.models import User from dandiapi.api.doi import delete_doi +from dandiapi.api.mail import send_dandiset_unembargo_failed_message from dandiapi.api.manifests import ( write_assets_jsonld, write_assets_yaml, @@ -12,6 +17,10 @@ write_dandiset_yaml, ) from dandiapi.api.models import Asset, AssetBlob, Version +from dandiapi.api.models.dandiset import Dandiset + +if TYPE_CHECKING: + from uuid import UUID logger = get_task_logger(__name__) @@ -24,16 +33,21 @@ def remove_asset_blob_embargoed_tag_task(blob_id: str) -> None: remove_asset_blob_embargoed_tag(asset_blob) -@shared_task(queue='calculate_sha256', soft_time_limit=86_400) -def calculate_sha256(blob_id: str) -> None: +@shared_task( + queue='calculate_sha256', + soft_time_limit=86_400, # 24 hours + autoretry_for=(SoftTimeLimitExceeded,), + retry_backoff=True, + max_retries=3, +) +def calculate_sha256(blob_id: str | UUID) -> None: asset_blob = AssetBlob.objects.get(blob_id=blob_id) - logger.info('Found AssetBlob %s', blob_id) + logger.info('Calculating sha256 checksum for asset blob %s', blob_id) sha256 = asset_blob.blob.storage.sha256_checksum(asset_blob.blob.name) # TODO: Run dandi-cli validation - asset_blob.sha256 = sha256 - asset_blob.save() + AssetBlob.objects.filter(blob_id=blob_id).update(sha256=sha256) @shared_task(soft_time_limit=180) @@ -71,7 +85,22 @@ def delete_doi_task(doi: str) -> None: @shared_task -def publish_dandiset_task(dandiset_id: int): +def publish_dandiset_task(dandiset_id: int, user_id: int): from dandiapi.api.services.publish import _publish_dandiset - _publish_dandiset(dandiset_id=dandiset_id) + _publish_dandiset(dandiset_id=dandiset_id, user_id=user_id) + + +@shared_task(soft_time_limit=1200) +def unembargo_dandiset_task(dandiset_id: int, user_id: int): + from dandiapi.api.services.embargo import unembargo_dandiset + + ds = Dandiset.objects.get(pk=dandiset_id) + user = User.objects.get(id=user_id) + + # If the unembargo fails for any reason, send an email, but continue the error propagation + try: + unembargo_dandiset(ds, user) + except Exception: + send_dandiset_unembargo_failed_message(ds) + raise diff --git a/dandiapi/api/tasks/scheduled.py b/dandiapi/api/tasks/scheduled.py index bba10340d..2b4a100e9 100644 --- a/dandiapi/api/tasks/scheduled.py +++ b/dandiapi/api/tasks/scheduled.py @@ -20,10 +20,9 @@ from django.db.models.query_utils import Q from dandiapi.analytics.tasks import collect_s3_log_records_task -from dandiapi.api.mail import send_dandisets_to_unembargo_message, send_pending_users_message +from dandiapi.api.mail import send_pending_users_message from dandiapi.api.models import UserMetadata, Version from dandiapi.api.models.asset import Asset -from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.services.metadata import version_aggregate_assets_summary from dandiapi.api.services.metadata.exceptions import VersionMetadataConcurrentlyModifiedError from dandiapi.api.tasks import ( @@ -112,14 +111,6 @@ def send_pending_users_email() -> None: send_pending_users_message(pending_users) -@shared_task(soft_time_limit=20) -def send_dandisets_to_unembargo_email() -> None: - """Send an email to admins listing dandisets that have requested un-embargo.""" - dandisets = Dandiset.objects.filter(embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING) - if dandisets.exists(): - send_dandisets_to_unembargo_message(dandisets) - - @shared_task(soft_time_limit=60) def refresh_materialized_view_search() -> None: """ @@ -148,9 +139,6 @@ def register_scheduled_tasks(sender: Celery, **kwargs): # Send daily email to admins containing a list of users awaiting approval sender.add_periodic_task(crontab(hour=0, minute=0), send_pending_users_email.s()) - # Send daily email to admins containing a list of dandisets to un-embargo - sender.add_periodic_task(crontab(hour=0, minute=0), send_dandisets_to_unembargo_email.s()) - # Refresh the materialized view used by asset search every 10 mins. sender.add_periodic_task(timedelta(minutes=10), refresh_materialized_view_search.s()) diff --git a/dandiapi/api/templates/api/mail/approved_user_message.txt b/dandiapi/api/templates/api/mail/approved_user_message.txt index 7b85297ee..4c1f73de4 100644 --- a/dandiapi/api/templates/api/mail/approved_user_message.txt +++ b/dandiapi/api/templates/api/mail/approved_user_message.txt @@ -6,6 +6,7 @@ to start creating dandisets and uploading data. Please use the following links to post any questions or issues. +DANDI Handbook: https://www.dandiarchive.org/handbook Discussions: https://github.com/dandi/helpdesk/discussions Issues: https://github.com/dandi/helpdesk/issues/new/choose YouTube: https://www.youtube.com/@dandiarchive diff --git a/dandiapi/api/templates/api/mail/dandiset_unembargo_failed.html b/dandiapi/api/templates/api/mail/dandiset_unembargo_failed.html new file mode 100644 index 000000000..7a4b1356c --- /dev/null +++ b/dandiapi/api/templates/api/mail/dandiset_unembargo_failed.html @@ -0,0 +1,7 @@ +There was an error during the unembargo of dandiset +{{dandiset.identifier }}. This has been reported to the developers, and will be +investigated as soon as possible. We hope to have this resolved soon! Please avoid making any major changes to this +dandiset in the meantime, as that may hinder or delay the process of resolving this issue. +If in two business days your dandiset is still embargoed, please reply to this email. +


+You are receiving this email because you are an owner of this dandiset. \ No newline at end of file diff --git a/dandiapi/api/templates/api/mail/dandiset_unembargoed.html b/dandiapi/api/templates/api/mail/dandiset_unembargoed.html index 978dea42f..236a5281b 100644 --- a/dandiapi/api/templates/api/mail/dandiset_unembargoed.html +++ b/dandiapi/api/templates/api/mail/dandiset_unembargoed.html @@ -1,4 +1,4 @@ -Dandiset {{ dandiset.identifier }} has been un-embargoed, and is now open for +Dandiset {{ dandiset.identifier }} has been unembargoed, and is now open for public access.

You are receiving this email because you are an owner of this dandiset. diff --git a/dandiapi/api/templates/api/mail/dandisets_to_unembargo.txt b/dandiapi/api/templates/api/mail/dandisets_to_unembargo.txt deleted file mode 100644 index 3676f7721..000000000 --- a/dandiapi/api/templates/api/mail/dandisets_to_unembargo.txt +++ /dev/null @@ -1,10 +0,0 @@ -{% autoescape off %} -The following new dandisets are awaiting un-embargo: - -{% for ds in dandisets %} -Dandiset ID: {{ ds.identifier }} -Owners: {{ ds.owners }} -Number of Assets: {{ ds.asset_count }} -Total data size: {{ ds.size }} -{% endfor %} -{% endautoescape %} diff --git a/dandiapi/api/templates/api/mail/registered_message.txt b/dandiapi/api/templates/api/mail/registered_message.txt index aa994d55e..b35668b89 100644 --- a/dandiapi/api/templates/api/mail/registered_message.txt +++ b/dandiapi/api/templates/api/mail/registered_message.txt @@ -14,6 +14,7 @@ registered with our Slack workspace. Please use the following links to post any questions or issues. +DANDI Handbook: https://www.dandiarchive.org/handbook Discussions: https://github.com/dandi/helpdesk/discussions Issues: https://github.com/dandi/helpdesk/issues/new/choose YouTube: https://www.youtube.com/@dandiarchive diff --git a/dandiapi/api/templates/dashboard/base.html b/dandiapi/api/templates/dashboard/base.html index 16e353803..3cbceb8be 100644 --- a/dandiapi/api/templates/dashboard/base.html +++ b/dandiapi/api/templates/dashboard/base.html @@ -16,6 +16,7 @@

{{ site_header|default:_('D Dashboard Home Users Admin + Mailchimp CSV {% if title %} › {{ title }}{% endif %} {% endblock %} diff --git a/dandiapi/api/tests/factories.py b/dandiapi/api/tests/factories.py index a32573e6e..7b3f3e09a 100644 --- a/dandiapi/api/tests/factories.py +++ b/dandiapi/api/tests/factories.py @@ -5,6 +5,7 @@ from allauth.socialaccount.models import SocialAccount from dandischema.digests.dandietag import DandiETag +from dandischema.models import AccessType from django.conf import settings from django.contrib.auth.models import User from django.core import files as django_files @@ -95,10 +96,19 @@ def metadata(self): 'schemaVersion': settings.DANDI_SCHEMA_VERSION, 'schemaKey': 'Dandiset', 'description': faker.Faker().sentence(), + 'access': [ + { + 'schemaKey': 'AccessRequirements', + 'status': AccessType.EmbargoedAccess.value + if self.dandiset.embargoed + else AccessType.OpenAccess.value, + } + ], 'contributor': [ { 'name': f'{faker.Faker().last_name()}, {faker.Faker().first_name()}', 'roleName': ['dcite:ContactPerson'], + 'email': faker.Faker().email(), 'schemaKey': 'Person', } ], diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index ea70ad969..c2f8889a4 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -26,7 +26,7 @@ # Model tests -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_no_blob_zarr(draft_asset_factory): asset = draft_asset_factory() @@ -38,7 +38,7 @@ def test_asset_no_blob_zarr(draft_asset_factory): assert 'blob-xor-zarr' in str(excinfo.value) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_blob_and_zarr(draft_asset, zarr_archive): # An integrity error is thrown by the constraint that both blob and zarr cannot both be defined draft_asset.zarr = zarr_archive @@ -48,7 +48,7 @@ def test_asset_blob_and_zarr(draft_asset, zarr_archive): assert 'blob-xor-zarr' in str(excinfo.value) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_path(api_client, draft_version_factory, asset_factory): # Initialize version and contained assets version: Version = draft_version_factory() @@ -69,7 +69,7 @@ def test_asset_rest_path(api_client, draft_version_factory, asset_factory): assert val['aggregate_files'] == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_path_not_found(api_client, draft_version_factory, asset_factory): # Initialize version and contained assets version: Version = draft_version_factory() @@ -89,7 +89,7 @@ def test_asset_rest_path_not_found(api_client, draft_version_factory, asset_fact assert resp.json() == {'detail': 'Specified path not found.'} -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_s3_url(asset_blob): signed_url = asset_blob.blob.url s3_url = asset_blob.s3_url @@ -97,7 +97,7 @@ def test_asset_s3_url(asset_blob): assert signed_url.split('?')[0] == s3_url -@pytest.mark.django_db() +@pytest.mark.django_db def test_publish_asset(draft_asset: Asset): draft_asset_id = draft_asset.asset_id draft_blob = draft_asset.blob @@ -139,7 +139,7 @@ def test_publish_asset(draft_asset: Asset): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_total_size( draft_version_factory, asset_factory, asset_blob_factory, zarr_archive_factory ): @@ -176,7 +176,7 @@ def test_asset_total_size( # supported, ATM they are not and tested by test_zarr_rest_create_embargoed_dandiset -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_full_metadata(draft_asset_factory): raw_metadata = { 'foo': 'bar', @@ -202,7 +202,7 @@ def test_asset_full_metadata(draft_asset_factory): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_full_metadata_zarr(draft_asset_factory, zarr_archive): raw_metadata = { 'foo': 'bar', @@ -233,7 +233,7 @@ def test_asset_full_metadata_zarr(draft_asset_factory, zarr_archive): # API Tests -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_list(api_client, version, asset, asset_factory): version.assets.add(asset) @@ -260,7 +260,7 @@ def test_asset_rest_list(api_client, version, asset, asset_factory): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_list_include_metadata(api_client, version, asset, asset_factory): version.assets.add(asset) @@ -301,7 +301,7 @@ def test_asset_rest_list_include_metadata(api_client, version, asset, asset_fact 'no-match', ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_list_path_filter(api_client, version, asset_factory, path, result_indices): assets = [ asset_factory(path='foo.txt'), @@ -348,7 +348,7 @@ def test_asset_rest_list_path_filter(api_client, version, asset_factory, path, r ], ids=['created', '-created', 'modified', '-modified', 'path', '-path'], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_list_ordering(api_client, version, asset_factory, order_param, ordering): # Create asset B first so that the path ordering is different from the created ordering. b = asset_factory(path='b') @@ -369,7 +369,7 @@ def test_asset_rest_list_ordering(api_client, version, asset_factory, order_para assert result_paths == ordering -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_ordering(api_client, version, asset_factory): # The default collation will ignore special characters, including slashes, on the first pass. If # there are ties, it uses these characters to break ties. This means that in the below example, @@ -388,7 +388,7 @@ def test_asset_path_ordering(api_client, version, asset_factory): assert asset_listing[1].pk == b.pk -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_retrieve(api_client, version, asset, asset_factory): version.assets.add(asset) @@ -404,7 +404,7 @@ def test_asset_rest_retrieve(api_client, version, asset, asset_factory): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_retrieve_no_sha256(api_client, version, asset): version.assets.add(asset) # Remove the sha256 from the factory asset @@ -420,7 +420,67 @@ def test_asset_rest_retrieve_no_sha256(api_client, version, asset): ) -@pytest.mark.django_db() +@pytest.mark.django_db +def test_asset_rest_retrieve_embargoed_admin( + api_client, + draft_version_factory, + draft_asset_factory, + admin_user, + storage, + monkeypatch, +): + monkeypatch.setattr(AssetBlob.blob.field, 'storage', storage) + + api_client.force_authenticate(user=admin_user) + version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + ds = version.dandiset + + # Create an extra asset so that there are multiple assets to filter down + asset = draft_asset_factory(blob__embargoed=True) + version.assets.add(asset) + + # Asset View + r = api_client.get(f'/api/assets/{asset.asset_id}/') + assert r.status_code == 200 + + # Nested Asset View + r = api_client.get( + f'/api/dandisets/{ds.identifier}/versions/{version.version}/assets/{asset.asset_id}/' + ) + assert r.status_code == 200 + + +@pytest.mark.django_db +def test_asset_rest_download_embargoed_admin( + api_client, + draft_version_factory, + draft_asset_factory, + admin_user, + storage, + monkeypatch, +): + monkeypatch.setattr(AssetBlob.blob.field, 'storage', storage) + + api_client.force_authenticate(user=admin_user) + version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + ds = version.dandiset + + # Create an extra asset so that there are multiple assets to filter down + asset = draft_asset_factory(blob__embargoed=True) + version.assets.add(asset) + + # Asset View + r = api_client.get(f'/api/assets/{asset.asset_id}/download/') + assert r.status_code == 302 + + # Nested Asset View + r = api_client.get( + f'/api/dandisets/{ds.identifier}/versions/{version.version}/assets/{asset.asset_id}/download/' + ) + assert r.status_code == 302 + + +@pytest.mark.django_db def test_asset_rest_info(api_client, version, asset): version.assets.add(asset) @@ -439,7 +499,7 @@ def test_asset_rest_info(api_client, version, asset): } -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( ('status', 'validation_error'), [ @@ -465,7 +525,7 @@ def test_asset_rest_validation(api_client, version, asset, status, validation_er } -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create(api_client, user, draft_version, asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -537,7 +597,7 @@ def test_asset_create(api_client, user, draft_version, asset_blob): ('foo/.bar', 200), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_path_validation( api_client, user, draft_version, asset_blob, path, expected_status_code ): @@ -560,7 +620,7 @@ def test_asset_create_path_validation( assert resp.status_code == expected_status_code, resp.data -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_conflicting_path(api_client, user, draft_version, asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -601,7 +661,7 @@ def test_asset_create_conflicting_path(api_client, user, draft_version, asset_bl ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_embargo( api_client, user, draft_version_factory, dandiset_factory, embargoed_asset_blob ): @@ -636,12 +696,41 @@ def test_asset_create_embargo( assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db +def test_asset_create_unembargo_in_progress( + api_client, user, draft_version_factory, dandiset_factory, embargoed_asset_blob +): + dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING) + draft_version = draft_version_factory(dandiset=dandiset) + + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + path = 'test/create/asset.txt' + metadata = { + 'encodingFormat': 'application/x-nwb', + 'path': path, + 'meta': 'data', + 'foo': ['bar', 'baz'], + '1': 2, + } + + resp = api_client.post( + f'/api/dandisets/{draft_version.dandiset.identifier}' + f'/versions/{draft_version.version}/assets/', + {'metadata': metadata, 'blob_id': embargoed_asset_blob.blob_id}, + format='json', + ) + + assert resp.status_code == 400 + + +@pytest.mark.django_db(transaction=True) def test_asset_create_embargoed_asset_blob_open_dandiset( api_client, user, draft_version, embargoed_asset_blob, mocker ): # Ensure that creating an asset in an open dandiset that points to an embargoed asset blob - # results in that asset blob being un-embargoed + # results in that asset blob being unembargoed assert draft_version.dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN assert embargoed_asset_blob.embargoed @@ -679,7 +768,7 @@ def test_asset_create_embargoed_asset_blob_open_dandiset( assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_zarr(api_client, user, draft_version, zarr_archive): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -776,7 +865,7 @@ def test_asset_create_zarr_validated( assert asset2.status == Asset.Status.VALID -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_zarr_wrong_dandiset( api_client, user, draft_version, zarr_archive_factory, dandiset_factory ): @@ -805,7 +894,7 @@ def test_asset_create_zarr_wrong_dandiset( assert resp.json() == 'The zarr archive belongs to a different dandiset' -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_no_blob_or_zarr(api_client, user, draft_version): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -829,7 +918,7 @@ def test_asset_create_no_blob_or_zarr(api_client, user, draft_version): assert resp.json() == {'blob_id': ['Exactly one of blob_id or zarr_id must be specified.']} -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_blob_and_zarr(api_client, user, draft_version, asset_blob, zarr_archive): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -853,7 +942,7 @@ def test_asset_create_blob_and_zarr(api_client, user, draft_version, asset_blob, assert resp.json() == {'blob_id': ['Exactly one of blob_id or zarr_id must be specified.']} -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_no_valid_blob(api_client, user, draft_version): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -871,7 +960,7 @@ def test_asset_create_no_valid_blob(api_client, user, draft_version): assert resp.status_code == 404 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_no_path(api_client, user, draft_version, asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -888,7 +977,7 @@ def test_asset_create_no_path(api_client, user, draft_version, asset_blob): assert resp.data == {'metadata': ['No path specified in metadata.']}, resp.data -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_not_an_owner(api_client, user, version): api_client.force_authenticate(user=user) @@ -902,7 +991,7 @@ def test_asset_create_not_an_owner(api_client, user, version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_published_version(api_client, user, published_version, asset): assign_perm('owner', user, published_version.dandiset) api_client.force_authenticate(user=user) @@ -923,7 +1012,7 @@ def test_asset_create_published_version(api_client, user, published_version, ass assert resp.data == 'Only draft versions can be modified.' -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_create_existing_path(api_client, user, draft_version, asset_blob, asset_factory): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -949,7 +1038,7 @@ def test_asset_create_existing_path(api_client, user, draft_version, asset_blob, assert resp.status_code == 409 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_rename(api_client, user, draft_version, asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -975,7 +1064,7 @@ def test_asset_rest_rename(api_client, user, draft_version, asset_blob): assert resp.json()['asset_id'] != str(asset.asset_id) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update(api_client, user, draft_version, asset, asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -1036,7 +1125,7 @@ def test_asset_rest_update(api_client, user, draft_version, asset, asset_blob): assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update_embargo(api_client, user, draft_version, asset, embargoed_asset_blob): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -1092,7 +1181,36 @@ def test_asset_rest_update_embargo(api_client, user, draft_version, asset, embar assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db +def test_asset_rest_update_unembargo_in_progress( + api_client, user, draft_version_factory, asset, embargoed_asset_blob +): + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + draft_version.assets.add(asset) + + new_path = 'test/asset/rest/update.txt' + new_metadata = { + 'encodingFormat': 'application/x-nwb', + 'path': new_path, + 'foo': 'bar', + 'num': 123, + 'list': ['a', 'b', 'c'], + } + + resp = api_client.put( + f'/api/dandisets/{draft_version.dandiset.identifier}/' + f'versions/{draft_version.version}/assets/{asset.asset_id}/', + {'metadata': new_metadata, 'blob_id': embargoed_asset_blob.blob_id}, + format='json', + ) + assert resp.status_code == 400 + + +@pytest.mark.django_db def test_asset_rest_update_zarr( api_client, user, @@ -1161,7 +1279,7 @@ def test_asset_rest_update_zarr( assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update_unauthorized(api_client, draft_version, asset): draft_version.assets.add(asset) new_metadata = asset.metadata @@ -1177,7 +1295,7 @@ def test_asset_rest_update_unauthorized(api_client, draft_version, asset): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update_not_an_owner(api_client, user, draft_version, asset): api_client.force_authenticate(user=user) draft_version.assets.add(asset) @@ -1195,7 +1313,7 @@ def test_asset_rest_update_not_an_owner(api_client, user, draft_version, asset): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update_published_version(api_client, user, published_version, asset): assign_perm('owner', user, published_version.dandiset) api_client.force_authenticate(user=user) @@ -1213,7 +1331,7 @@ def test_asset_rest_update_published_version(api_client, user, published_version assert resp.data == 'Only draft versions can be modified.' -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_update_to_existing(api_client, user, draft_version, asset_factory): old_asset = asset_factory() existing_asset = asset_factory() @@ -1234,7 +1352,7 @@ def test_asset_rest_update_to_existing(api_client, user, draft_version, asset_fa assert resp.status_code == 409 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_delete(api_client, user, draft_version, asset): assign_perm('owner', user, draft_version.dandiset) draft_version.assets.add(asset) @@ -1266,7 +1384,24 @@ def test_asset_rest_delete(api_client, user, draft_version, asset): assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db +def test_asset_rest_delete_unembargo_in_progress(api_client, user, draft_version_factory, asset): + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + assign_perm('owner', user, draft_version.dandiset) + draft_version.assets.add(asset) + + # Make request + api_client.force_authenticate(user=user) + response = api_client.delete( + f'/api/dandisets/{draft_version.dandiset.identifier}/' + f'versions/{draft_version.version}/assets/{asset.asset_id}/' + ) + assert response.status_code == 400 + + +@pytest.mark.django_db def test_asset_rest_delete_zarr( api_client, user, @@ -1296,7 +1431,7 @@ def test_asset_rest_delete_zarr( assert resp.status_code == 204 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_delete_zarr_modified( api_client, user, @@ -1355,7 +1490,7 @@ def test_asset_rest_delete_zarr_modified( assert resp.status_code == 204 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_delete_not_an_owner(api_client, user, version, asset): api_client.force_authenticate(user=user) version.assets.add(asset) @@ -1369,7 +1504,7 @@ def test_asset_rest_delete_not_an_owner(api_client, user, version, asset): assert asset in Asset.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_rest_delete_published_version(api_client, user, published_version, asset): api_client.force_authenticate(user=user) assign_perm('owner', user, published_version.dandiset) @@ -1383,7 +1518,7 @@ def test_asset_rest_delete_published_version(api_client, user, published_version assert response.data == 'Only draft versions can be modified.' -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_download(api_client, storage, version, asset): # Pretend like AssetBlob was defined with the given storage AssetBlob.blob.field.storage = storage @@ -1409,7 +1544,7 @@ def test_asset_download(api_client, storage, version, asset): assert download.content == reader.read() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_download_embargo( authenticated_api_client, user, @@ -1455,7 +1590,7 @@ def test_asset_download_embargo( assert download.content == reader.read() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_download_zarr(api_client, version, asset_factory, zarr_archive): asset = asset_factory(blob=None, zarr=zarr_archive) version.assets.add(asset) @@ -1467,7 +1602,7 @@ def test_asset_download_zarr(api_client, version, asset_factory, zarr_archive): assert response.status_code == 400 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_direct_download(api_client, storage, version, asset): # Pretend like AssetBlob was defined with the given storage AssetBlob.blob.field.storage = storage @@ -1490,7 +1625,7 @@ def test_asset_direct_download(api_client, storage, version, asset): assert download.content == reader.read() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_direct_download_zarr(api_client, version, asset_factory, zarr_archive): asset = asset_factory(blob=None, zarr=zarr_archive) version.assets.add(asset) @@ -1499,7 +1634,7 @@ def test_asset_direct_download_zarr(api_client, version, asset_factory, zarr_arc assert response.status_code == 400 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_direct_download_head(api_client, storage, version, asset): # Pretend like AssetBlob was defined with the given storage AssetBlob.blob.field.storage = storage @@ -1522,14 +1657,14 @@ def test_asset_direct_download_head(api_client, storage, version, asset): assert download.content == reader.read() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_direct_metadata(api_client, asset): assert ( json.loads(api_client.get(f'/api/assets/{asset.asset_id}/').content) == asset.full_metadata ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_direct_info(api_client, asset): assert api_client.get(f'/api/assets/{asset.asset_id}/info/').json() == { 'asset_id': str(asset.asset_id), @@ -1543,7 +1678,7 @@ def test_asset_direct_info(api_client, asset): } -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( ('glob_pattern', 'expected_paths'), [ diff --git a/dandiapi/api/tests/test_asset_paths.py b/dandiapi/api/tests/test_asset_paths.py index 3d8f5b1ff..3bd35bcf4 100644 --- a/dandiapi/api/tests/test_asset_paths.py +++ b/dandiapi/api/tests/test_asset_paths.py @@ -19,7 +19,7 @@ from dandiapi.api.tasks import publish_dandiset_task -@pytest.fixture() +@pytest.fixture def ingested_asset(draft_version_factory, asset_factory) -> Asset: asset: Asset = asset_factory() version: Version = draft_version_factory() @@ -43,7 +43,7 @@ def test_extract_paths(path, expected): assert extract_paths(path) == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_asset(draft_version_factory, asset_factory): # Create asset with version asset: Asset = asset_factory() @@ -84,7 +84,7 @@ def test_asset_path_add_asset(draft_version_factory, asset_factory): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_asset_idempotent(draft_version_factory, asset_factory): # Create asset with version asset: Asset = asset_factory() @@ -101,7 +101,7 @@ def test_asset_path_add_asset_idempotent(draft_version_factory, asset_factory): assert path.aggregate_size == asset.size -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_asset_conflicting_path(draft_version_factory, asset_factory): # Create asset with version asset1: Asset = asset_factory() @@ -122,7 +122,7 @@ def test_asset_path_add_asset_conflicting_path(draft_version_factory, asset_fact assert version.asset_paths.filter(asset__isnull=False).count() == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory): # Create asset with version version: Version = draft_version_factory() @@ -156,7 +156,7 @@ def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_version_asset_paths_idempotent(draft_version_factory, asset_factory): # Create asset with version version: Version = draft_version_factory() @@ -178,7 +178,7 @@ def test_asset_path_add_version_asset_paths_idempotent(draft_version_factory, as assert path.aggregate_size == path.asset.size -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_add_asset_shared_paths(draft_version_factory, asset_factory): # Create asset with version version: Version = draft_version_factory() @@ -197,7 +197,7 @@ def test_asset_path_add_asset_shared_paths(draft_version_factory, asset_factory) assert path.aggregate_files == 2 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_delete_asset(ingested_asset): asset = ingested_asset version = ingested_asset.versions.first() @@ -210,7 +210,7 @@ def test_asset_path_delete_asset(ingested_asset): assert not AssetPath.objects.filter(path=asset.path, version=version).exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_delete_asset_idempotent(ingested_asset): asset = ingested_asset version = ingested_asset.versions.first() @@ -220,7 +220,7 @@ def test_asset_path_delete_asset_idempotent(ingested_asset): delete_asset_paths(asset, version) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_update_asset(draft_version_factory, asset_factory): # Create asset with version version: Version = draft_version_factory() @@ -248,7 +248,7 @@ def test_asset_path_update_asset(draft_version_factory, asset_factory): assert AssetPath.objects.filter(path=path, version=version).exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_delete_asset_shared_paths( draft_version_factory, asset_factory, asset_blob_factory ): @@ -272,7 +272,7 @@ def test_asset_path_delete_asset_shared_paths( assert path.aggregate_files == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_search_asset_paths(draft_version_factory, asset_factory): version: Version = draft_version_factory() assets = [asset_factory(path=path) for path in ['foo/bar.txt', 'foo/baz.txt', 'bar/foo.txt']] @@ -294,8 +294,8 @@ def test_asset_path_search_asset_paths(draft_version_factory, asset_factory): assert all(x.asset is not None for x in qs) -@pytest.mark.django_db() -def test_asset_path_publish_version(draft_version_factory, asset_factory): +@pytest.mark.django_db +def test_asset_path_publish_version(draft_version_factory, asset_factory, user): version: Version = draft_version_factory() asset = asset_factory(path='foo/bar.txt', status=Asset.Status.VALID) version.assets.add(asset) @@ -309,7 +309,7 @@ def test_asset_path_publish_version(draft_version_factory, asset_factory): version.save() # Publish - publish_dandiset_task(version.dandiset.id) + publish_dandiset_task(version.dandiset.id, user.id) # Get published version published_version = ( @@ -325,7 +325,7 @@ def test_asset_path_publish_version(draft_version_factory, asset_factory): AssetPath.objects.get(path=path, version=published_version) -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_get_root_paths(draft_version_factory, asset_factory): version = draft_version_factory() version.assets.add(asset_factory(path='a')) @@ -337,7 +337,7 @@ def test_asset_path_get_root_paths(draft_version_factory, asset_factory): assert get_root_paths(version).count() == 4 -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_path_get_root_paths_many(draft_version_factory, asset_factory): version = draft_version_factory() version.assets.add(asset_factory(path='a')) diff --git a/dandiapi/api/tests/test_audit.py b/dandiapi/api/tests/test_audit.py new file mode 100644 index 000000000..6562755b6 --- /dev/null +++ b/dandiapi/api/tests/test_audit.py @@ -0,0 +1,445 @@ +from __future__ import annotations + +import base64 +import hashlib +from typing import TYPE_CHECKING + +from guardian.shortcuts import assign_perm +import pytest + +from dandiapi.api.asset_paths import add_version_asset_paths +from dandiapi.api.models import AuditRecord, Dandiset +from dandiapi.api.services.metadata import validate_asset_metadata, validate_version_metadata +from dandiapi.api.storage import get_boto_client +from dandiapi.zarr.models import ZarrArchive + +if TYPE_CHECKING: + from django.contrib.auth import User + + from dandiapi.api.models.audit import AuditRecordType + + +def verify_model_properties(rec: AuditRecord, user: User): + """Assert the correct content of the common AuditRecord fields.""" + assert rec.username == user.username + assert rec.user_email == user.email + assert rec.user_fullname == f'{user.first_name} {user.last_name}' + + +def get_latest_audit_record(*, dandiset: Dandiset, record_type: AuditRecordType): + """Get the most recent AuditRecord associated with a given Dandiset and record type.""" + record = ( + AuditRecord.objects.filter(dandiset_id=dandiset.id, record_type=record_type) + .order_by('-timestamp') + .first() + ) + assert record is not None + + return record + + +def create_dandiset( + api_client, + *, + user: User, + name: str | None = None, + metadata: dict | None = None, + embargoed: bool = False, +) -> Dandiset: + """Create a Dandiset for testing through the REST API.""" + name = name or 'Test Dandiset' + metadata = metadata or {} + + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/dandisets/{"?embargo=true" if embargoed else ""}', + {'name': name, 'metadata': metadata}, + format='json', + ) + assert resp.status_code == 200 + + dandiset_id = int(resp.json()['identifier']) + return Dandiset.objects.get(pk=dandiset_id) + + +@pytest.mark.django_db +def test_audit_create_dandiset(api_client, user): + """Test create_dandiset audit record.""" + # Create a Dandiset with specified name and metadata. + name = 'Dandiset Extraordinaire' + metadata = {'foo': 'bar'} + dandiset = create_dandiset(api_client, user=user, name=name, metadata=metadata) + + # Verify the create_dandiset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='create_dandiset') + verify_model_properties(rec, user) + assert rec.details['embargoed'] is False + for key, value in metadata.items(): + assert rec.details['metadata'][key] == value + assert rec.details['metadata']['name'] == name + + +@pytest.mark.django_db +def test_audit_change_owners(api_client, user_factory, draft_version): + """Test the change_owners audit record.""" + # Create some users. + alice = user_factory() + bob = user_factory() + charlie = user_factory() + + dandiset = draft_version.dandiset + assign_perm('owner', alice, dandiset) + + # Change the owners. + new_owners = [bob, charlie] + api_client.force_authenticate(user=alice) + resp = api_client.put( + f'/api/dandisets/{dandiset.identifier}/users/', + [{'username': u.username} for u in new_owners], + format='json', + ) + assert resp.status_code == 200 + + # Verify the change_owners audit record. + def user_info(u): + return { + 'username': u.username, + 'email': u.email, + 'name': f'{u.first_name} {u.last_name}', + } + + rec = get_latest_audit_record(dandiset=dandiset, record_type='change_owners') + verify_model_properties(rec, alice) + assert rec.details == { + 'added_owners': [user_info(u) for u in [bob, charlie]], + 'removed_owners': [user_info(u) for u in [alice]], + } + + +@pytest.mark.django_db +def test_audit_update_metadata(api_client, draft_version, user): + # Create a Dandiset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + # Edit its metadata. + metadata = draft_version.metadata + metadata['foo'] = 'bar' + + api_client.force_authenticate(user=user) + resp = api_client.put( + f'/api/dandisets/{dandiset.identifier}/versions/draft/', + { + 'name': 'baz', + 'metadata': metadata, + }, + format='json', + ) + assert resp.status_code == 200 + + # Verify the update_metadata audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='update_metadata') + verify_model_properties(rec, user) + metadata = rec.details['metadata'] + assert metadata['name'] == 'baz' + assert metadata['foo'] == 'bar' + + +@pytest.mark.django_db +def test_audit_delete_dandiset(api_client, user, draft_version): + # Create a Dandiset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + # Delete the dandiset. + api_client.force_authenticate(user=user) + resp = api_client.delete(f'/api/dandisets/{dandiset.identifier}/') + assert resp.status_code == 204 + + # Verify the delete_dandiset audit record + rec = get_latest_audit_record(dandiset=dandiset, record_type='delete_dandiset') + verify_model_properties(rec, user) + assert rec.details == {} + + +@pytest.mark.django_db(transaction=True) +def test_audit_unembargo(api_client, user): + """Test the unembargo audit record.""" + # Create an embargoed Dandiset. + dandiset = create_dandiset(api_client, user=user, embargoed=True) + + # Verify the create_dandiset record's embargoed field. + rec = get_latest_audit_record(dandiset=dandiset, record_type='create_dandiset') + verify_model_properties(rec, user) + assert rec.details['embargoed'] is True + + # Unembargo the Dandiset. + resp = api_client.post(f'/api/dandisets/{dandiset.identifier}/unembargo/') + assert resp.status_code == 200 + + # Verify the unembargo_dandiset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='unembargo_dandiset') + verify_model_properties(rec, user) + assert rec.details == {} + + +@pytest.mark.django_db +def test_audit_add_asset(api_client, user, draft_version, asset_blob_factory): + # Create a Dandiset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + # Add a new asset. + blob = asset_blob_factory() + path = 'foo/bar.txt' + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/dandisets/{dandiset.identifier}/versions/draft/assets/', + { + 'blob_id': str(blob.blob_id), + 'metadata': { + 'path': path, + }, + }, + ) + assert resp.status_code == 200 + + # Verify add_asset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='add_asset') + verify_model_properties(rec, user) + assert rec.details['path'] == path + assert rec.details['asset_blob_id'] == blob.blob_id + + +@pytest.mark.django_db +def test_audit_update_asset( + api_client, user, draft_version, asset_blob_factory, draft_asset_factory +): + # Create a Dandiset with an asset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + path = 'foo/bar.txt' + asset = draft_asset_factory(path=path) + asset.versions.add(draft_version) + + # Replace the asset. + asset_id = asset.asset_id + blob = asset_blob_factory() + api_client.force_authenticate(user=user) + resp = api_client.put( + f'/api/dandisets/{dandiset.identifier}/versions/draft/assets/{asset_id}/', + { + 'blob_id': str(blob.blob_id), + 'metadata': { + 'path': path, + }, + }, + ) + assert resp.status_code == 200 + + # Verify update_asset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='update_asset') + verify_model_properties(rec, user) + assert rec.details['path'] == path + assert rec.details['asset_blob_id'] == blob.blob_id + + +@pytest.mark.django_db +def test_audit_remove_asset( + api_client, user, draft_version, asset_blob_factory, draft_asset_factory +): + # Create a Dandiset with an asset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + path = 'foo/bar.txt' + asset = draft_asset_factory(path=path) + asset.versions.add(draft_version) + + # Delete the asset. + asset_id = asset.asset_id + api_client.force_authenticate(user=user) + resp = api_client.delete( + f'/api/dandisets/{dandiset.identifier}/versions/draft/assets/{asset_id}/', + ) + assert resp.status_code == 204 + + # Verify remove_asset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='remove_asset') + verify_model_properties(rec, user) + assert rec.details['path'] == path + assert rec.details['asset_id'] == str(asset_id) + + +@pytest.mark.django_db(transaction=True) +def test_audit_publish_dandiset( + api_client, user, dandiset_factory, draft_version_factory, draft_asset_factory +): + # Create a Dandiset whose draft version has one asset. + dandiset = dandiset_factory() + assign_perm('owner', user, dandiset) + draft_version = draft_version_factory(dandiset=dandiset) + draft_asset = draft_asset_factory() + draft_version.assets.add(draft_asset) + add_version_asset_paths(draft_version) + + # Validate the asset and then the draft version (to make it publishable + # through the API). + validate_asset_metadata(asset=draft_asset) + validate_version_metadata(version=draft_version) + + # Publish the Dandiset. + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/dandisets/{dandiset.identifier}/versions/draft/publish/', + ) + assert resp.status_code == 202 + + # Verify publish_dandiset audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='publish_dandiset') + verify_model_properties(rec, user) + assert rec.details['version'] == dandiset.most_recent_published_version.version + + +@pytest.mark.django_db +def test_audit_zarr_create(api_client, user, draft_version): + # Create a Dandiset. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + + # Create a Zarr archive. + api_client.force_authenticate(user=user) + resp = api_client.post( + '/api/zarr/', + { + 'name': 'Test Zarr', + 'dandiset': dandiset.identifier, + }, + ) + assert resp.status_code == 200 + + # Verify the create_zarr audit record. + zarr_id = resp.json()['zarr_id'] + + rec = get_latest_audit_record(dandiset=dandiset, record_type='create_zarr') + verify_model_properties(rec, user) + assert rec.details['name'] == 'Test Zarr' + assert rec.details['zarr_id'] == zarr_id + + +@pytest.mark.django_db +def test_audit_upload_zarr_chunks(api_client, user, draft_version, zarr_archive_factory, storage): + ZarrArchive.storage = storage + + # Create a Dandiset and a Zarr archive. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + zarr = zarr_archive_factory(dandiset=dandiset) + + # Request some chunk uploads. + b64hash = base64.b64encode(hashlib.md5(b'a').hexdigest().encode()) + paths = ['a.txt', 'b.txt', 'c.txt'] + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/zarr/{zarr.zarr_id}/files/', + [{'path': path, 'base64md5': b64hash} for path in paths], + ) + assert resp.status_code == 200 + + # Verify the upload_zarr_chunks audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='upload_zarr_chunks') + verify_model_properties(rec, user) + assert rec.details['zarr_id'] == zarr.zarr_id + assert rec.details['paths'] == paths + + +@pytest.mark.django_db +def test_audit_finalize_zarr( + api_client, user, draft_version, zarr_archive_factory, storage, settings +): + ZarrArchive.storage = storage + + # Create a Dandiset and a Zarr archive. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + zarr = zarr_archive_factory(dandiset=dandiset) + + # Request some chunk uploads. + b64hash = base64.b64encode(hashlib.md5(b'a').hexdigest().encode()) + paths = ['a.txt', 'b.txt', 'c.txt'] + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/zarr/{zarr.zarr_id}/files/', + [{'path': path, 'base64md5': b64hash} for path in paths], + ) + assert resp.status_code == 200 + + # Upload to the presigned URLs. + boto = get_boto_client() + zarr_archive = ZarrArchive.objects.get(zarr_id=zarr.zarr_id) + for path in paths: + boto.put_object( + Bucket=settings.DANDI_DANDISETS_BUCKET_NAME, Key=zarr_archive.s3_path(path), Body=b'a' + ) + + # Finalize the zarr. + resp = api_client.post( + f'/api/zarr/{zarr.zarr_id}/finalize/', + ) + assert resp.status_code == 204 + + # Verify the finalize_zarr audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='finalize_zarr') + verify_model_properties(rec, user) + assert rec.details['zarr_id'] == zarr.zarr_id + + +@pytest.mark.django_db +def test_audit_delete_zarr_chunks( + api_client, user, draft_version, zarr_archive_factory, storage, settings +): + ZarrArchive.storage = storage + + # Create a Dandiset and a Zarr archive. + dandiset = draft_version.dandiset + assign_perm('owner', user, dandiset) + zarr = zarr_archive_factory(dandiset=dandiset) + + # Request some chunk uploads. + b64hash = base64.b64encode(hashlib.md5(b'a').hexdigest().encode()) + paths = ['a.txt', 'b.txt', 'c.txt'] + api_client.force_authenticate(user=user) + resp = api_client.post( + f'/api/zarr/{zarr.zarr_id}/files/', + [{'path': path, 'base64md5': b64hash} for path in paths], + ) + assert resp.status_code == 200 + + # Upload to the presigned URLs. + boto = get_boto_client() + zarr_archive = ZarrArchive.objects.get(zarr_id=zarr.zarr_id) + for path in paths: + boto.put_object( + Bucket=settings.DANDI_DANDISETS_BUCKET_NAME, Key=zarr_archive.s3_path(path), Body=b'a' + ) + + # Finalize the zarr. + resp = api_client.post( + f'/api/zarr/{zarr.zarr_id}/finalize/', + ) + assert resp.status_code == 204 + + # Delete some zarr chunks. + deleted = ['b.txt', 'c.txt'] + resp = api_client.delete( + f'/api/zarr/{zarr.zarr_id}/files/', + [{'path': path} for path in deleted], + ) + assert resp.status_code == 204 + + # Verify the delete_zarr_chunks audit record. + rec = get_latest_audit_record(dandiset=dandiset, record_type='delete_zarr_chunks') + verify_model_properties(rec, user) + assert rec.details['zarr_id'] == zarr.zarr_id + assert rec.details['paths'] == deleted diff --git a/dandiapi/api/tests/test_auth.py b/dandiapi/api/tests/test_auth.py index 24cd3a9f5..f9a13ad08 100644 --- a/dandiapi/api/tests/test_auth.py +++ b/dandiapi/api/tests/test_auth.py @@ -4,24 +4,24 @@ from rest_framework.authtoken.models import Token -@pytest.fixture() +@pytest.fixture def token(user) -> Token: return Token.objects.get(user=user) -@pytest.mark.django_db() +@pytest.mark.django_db def test_auth_token_retrieve(api_client, user, token): api_client.force_authenticate(user=user) assert api_client.get('/api/auth/token/').data == token.key -@pytest.mark.django_db() +@pytest.mark.django_db def test_auth_token_retrieve_unauthorized(api_client, user, token): assert api_client.get('/api/auth/token/').status_code == 401 -@pytest.mark.django_db() +@pytest.mark.django_db def test_auth_token_refresh(api_client, user, token): api_client.force_authenticate(user=user) @@ -32,6 +32,6 @@ def test_auth_token_refresh(api_client, user, token): assert new_token_key == new_token.key -@pytest.mark.django_db() +@pytest.mark.django_db def test_auth_token_reset_unauthorized(api_client, user, token): assert api_client.post('/api/auth/token/').status_code == 401 diff --git a/dandiapi/api/tests/test_create_dev_dandiset.py b/dandiapi/api/tests/test_create_dev_dandiset.py index 6a738bf50..b3204870b 100644 --- a/dandiapi/api/tests/test_create_dev_dandiset.py +++ b/dandiapi/api/tests/test_create_dev_dandiset.py @@ -6,7 +6,7 @@ from dandiapi.api.models import Asset, AssetBlob, Dandiset, Version -@pytest.mark.django_db() +@pytest.mark.django_db def test_create_dev_dandiset(user): create_dev_dandiset('--name', 'My Test Dandiset', '--owner', user.email) diff --git a/dandiapi/api/tests/test_dandiset.py b/dandiapi/api/tests/test_dandiset.py index 34e656d2d..1dbf86355 100644 --- a/dandiapi/api/tests/test_dandiset.py +++ b/dandiapi/api/tests/test_dandiset.py @@ -13,7 +13,7 @@ from .fuzzy import DANDISET_ID_RE, DANDISET_SCHEMA_ID_RE, TIMESTAMP_RE, UTC_ISO_TIMESTAMP_RE -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_identifier(dandiset): assert int(dandiset.identifier) == dandiset.id @@ -24,7 +24,7 @@ def test_dandiset_identifer_missing(dandiset_factory): assert dandiset.identifier == '' -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_published_count( dandiset_factory, draft_version_factory, published_version_factory ): @@ -52,7 +52,7 @@ def test_dandiset_published_count( ('UNEMBARGOING', 'not-owner', False), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_manager_visible_to( dandiset_factory, user_factory, embargo_status, user_status, visible ): @@ -64,7 +64,7 @@ def test_dandiset_manager_visible_to( assert list(Dandiset.objects.visible_to(user)) == ([dandiset] if visible else []) -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_list(api_client, user, dandiset): # Test un-authenticated request assert api_client.get('/api/dandisets/', {'draft': 'true', 'empty': 'true'}).json() == { @@ -113,7 +113,7 @@ def test_dandiset_rest_list(api_client, user, dandiset): 'draft-false-empty-false', ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_versions( api_client, dandiset_factory, @@ -203,7 +203,7 @@ def expected_serialization(dandiset: Dandiset): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_list_for_user(api_client, user, dandiset_factory): dandiset = dandiset_factory() # Create an extra dandiset that should not be included in the response @@ -228,7 +228,7 @@ def test_dandiset_rest_list_for_user(api_client, user, dandiset_factory): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_retrieve(api_client, dandiset): assert api_client.get(f'/api/dandisets/{dandiset.identifier}/').data == { 'identifier': dandiset.identifier, @@ -246,7 +246,7 @@ def test_dandiset_rest_retrieve(api_client, dandiset): [choice[0] for choice in Dandiset.EmbargoStatus.choices], ids=[choice[1] for choice in Dandiset.EmbargoStatus.choices], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_embargo_access( api_client, user_factory, dandiset_factory, embargo_status: str ): @@ -321,7 +321,7 @@ def test_dandiset_rest_embargo_access( ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create(api_client, user): user.first_name = 'John' user.last_name = 'Doe' @@ -343,6 +343,7 @@ def test_dandiset_rest_create(api_client, user): 'version': 'draft', 'name': name, 'asset_count': 0, + 'active_uploads': 0, 'size': 0, 'dandiset': { 'identifier': DANDISET_ID_RE, @@ -412,7 +413,7 @@ def test_dandiset_rest_create(api_client, user): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create_with_identifier(api_client, admin_user): admin_user.first_name = 'John' admin_user.last_name = 'Doe' @@ -436,6 +437,7 @@ def test_dandiset_rest_create_with_identifier(api_client, admin_user): 'version': 'draft', 'name': name, 'asset_count': 0, + 'active_uploads': 0, 'size': 0, 'dandiset': { 'identifier': identifier, @@ -505,7 +507,7 @@ def test_dandiset_rest_create_with_identifier(api_client, admin_user): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create_with_contributor(api_client, admin_user): admin_user.first_name = 'John' admin_user.last_name = 'Doe' @@ -543,6 +545,7 @@ def test_dandiset_rest_create_with_contributor(api_client, admin_user): 'version': 'draft', 'name': name, 'asset_count': 0, + 'active_uploads': 0, 'size': 0, 'dandiset': { 'identifier': identifier, @@ -611,7 +614,7 @@ def test_dandiset_rest_create_with_contributor(api_client, admin_user): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create_embargoed(api_client, user): user.first_name = 'John' user.last_name = 'Doe' @@ -633,6 +636,7 @@ def test_dandiset_rest_create_embargoed(api_client, user): 'version': 'draft', 'name': name, 'asset_count': 0, + 'active_uploads': 0, 'size': 0, 'dandiset': { 'identifier': DANDISET_ID_RE, @@ -702,7 +706,7 @@ def test_dandiset_rest_create_embargoed(api_client, user): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create_with_duplicate_identifier(api_client, admin_user, dandiset): api_client.force_authenticate(user=admin_user) name = 'Test Dandiset' @@ -718,7 +722,7 @@ def test_dandiset_rest_create_with_duplicate_identifier(api_client, admin_user, assert response.data == f'Dandiset {identifier} already exists' -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_create_with_invalid_identifier(api_client, admin_user): api_client.force_authenticate(user=admin_user) name = 'Test Dandiset' @@ -734,24 +738,32 @@ def test_dandiset_rest_create_with_invalid_identifier(api_client, admin_user): assert response.data == f'Invalid Identifier {identifier}' +@pytest.mark.django_db @pytest.mark.parametrize( - ('embargo_status'), - [choice[0] for choice in Dandiset.EmbargoStatus.choices], - ids=[choice[1] for choice in Dandiset.EmbargoStatus.choices], + ('embargo_status', 'success'), + [ + (Dandiset.EmbargoStatus.OPEN, True), + (Dandiset.EmbargoStatus.EMBARGOED, True), + (Dandiset.EmbargoStatus.UNEMBARGOING, False), + ], ) -@pytest.mark.django_db() -def test_dandiset_rest_delete(api_client, draft_version_factory, user, embargo_status): - draft_version = draft_version_factory(dandiset__embargo_status=embargo_status) +def test_dandiset_rest_delete(api_client, draft_version_factory, user, embargo_status, success): api_client.force_authenticate(user=user) - assign_perm('owner', user, draft_version.dandiset) + # Ensure that open or embargoed dandisets can be deleted + draft_version = draft_version_factory(dandiset__embargo_status=embargo_status) + assign_perm('owner', user, draft_version.dandiset) response = api_client.delete(f'/api/dandisets/{draft_version.dandiset.identifier}/') - assert response.status_code == 204 - assert not Dandiset.objects.all() + if success: + assert response.status_code == 204 + assert not Dandiset.objects.all() + else: + assert response.status_code >= 400 + assert Dandiset.objects.count() == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_delete_with_zarrs( api_client, draft_version, @@ -773,7 +785,7 @@ def test_dandiset_rest_delete_with_zarrs( assert not Dandiset.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_delete_not_an_owner(api_client, draft_version, user): api_client.force_authenticate(user=user) @@ -783,7 +795,7 @@ def test_dandiset_rest_delete_not_an_owner(api_client, draft_version, user): assert draft_version.dandiset in Dandiset.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_delete_published(api_client, published_version, user): api_client.force_authenticate(user=user) assign_perm('owner', user, published_version.dandiset) @@ -795,7 +807,7 @@ def test_dandiset_rest_delete_published(api_client, published_version, user): assert published_version.dandiset in Dandiset.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_delete_published_admin(api_client, published_version, admin_user): api_client.force_authenticate(user=admin_user) @@ -806,7 +818,7 @@ def test_dandiset_rest_delete_published_admin(api_client, published_version, adm assert published_version.dandiset in Dandiset.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_get_owners(api_client, dandiset, social_account): assign_perm('owner', social_account.user, dandiset) @@ -817,28 +829,41 @@ def test_dandiset_rest_get_owners(api_client, dandiset, social_account): { 'username': social_account.extra_data['login'], 'name': social_account.extra_data['name'], + 'email': None, } ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_get_owners_no_social_account(api_client, dandiset, user): assign_perm('owner', user, dandiset) resp = api_client.get(f'/api/dandisets/{dandiset.identifier}/users/') assert resp.status_code == 200 - assert resp.data == [{'username': user.username, 'name': f'{user.first_name} {user.last_name}'}] + assert resp.data == [ + { + 'username': user.username, + 'name': f'{user.first_name} {user.last_name}', + 'email': None, + } + ] -@pytest.mark.django_db() +@pytest.mark.parametrize( + 'embargo_status', + [Dandiset.EmbargoStatus.OPEN, Dandiset.EmbargoStatus.EMBARGOED], +) +@pytest.mark.django_db def test_dandiset_rest_change_owner( api_client, - draft_version, + draft_version_factory, user_factory, social_account_factory, mailoutbox, + embargo_status, ): + draft_version = draft_version_factory(dandiset__embargo_status=embargo_status) dandiset = draft_version.dandiset user1 = user_factory() user2 = user_factory() @@ -857,6 +882,7 @@ def test_dandiset_rest_change_owner( { 'username': social_account2.extra_data['login'], 'name': social_account2.extra_data['name'], + 'email': social_account2.extra_data['email'], } ] assert list(dandiset.owners) == [user2] @@ -868,7 +894,38 @@ def test_dandiset_rest_change_owner( assert mailoutbox[1].to == [user2.email] -@pytest.mark.django_db() +@pytest.mark.django_db +def test_dandiset_rest_change_owners_unembargo_in_progress( + api_client, + draft_version_factory, + user_factory, + social_account_factory, +): + """Test that a dandiset undergoing unembargo prevents user modification.""" + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + dandiset = draft_version.dandiset + user1 = user_factory() + user2 = user_factory() + social_account1 = social_account_factory(user=user1) + social_account2 = social_account_factory(user=user2) + assign_perm('owner', user1, dandiset) + api_client.force_authenticate(user=user1) + + resp = api_client.put( + f'/api/dandisets/{dandiset.identifier}/users/', + [ + {'username': social_account1.extra_data['login']}, + {'username': social_account2.extra_data['login']}, + ], + format='json', + ) + + assert resp.status_code == 400 + + +@pytest.mark.django_db def test_dandiset_rest_add_owner( api_client, draft_version, @@ -898,10 +955,12 @@ def test_dandiset_rest_add_owner( { 'username': social_account1.extra_data['login'], 'name': social_account1.extra_data['name'], + 'email': social_account1.extra_data['email'], }, { 'username': social_account2.extra_data['login'], 'name': social_account2.extra_data['name'], + 'email': social_account2.extra_data['email'], }, ] assert list(dandiset.owners) == [user1, user2] @@ -911,7 +970,7 @@ def test_dandiset_rest_add_owner( assert mailoutbox[0].to == [user2.email] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_remove_owner( api_client, draft_version, @@ -938,6 +997,7 @@ def test_dandiset_rest_remove_owner( { 'username': social_account1.extra_data['login'], 'name': social_account1.extra_data['name'], + 'email': social_account1.extra_data['email'], } ] assert list(dandiset.owners) == [user1] @@ -947,7 +1007,7 @@ def test_dandiset_rest_remove_owner( assert mailoutbox[0].to == [user2.email] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_not_an_owner(api_client, dandiset, user): api_client.force_authenticate(user=user) @@ -959,7 +1019,7 @@ def test_dandiset_rest_not_an_owner(api_client, dandiset, user): assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_delete_all_owners_fails(api_client, dandiset, user): assign_perm('owner', user, dandiset) api_client.force_authenticate(user=user) @@ -973,7 +1033,7 @@ def test_dandiset_rest_delete_all_owners_fails(api_client, dandiset, user): assert resp.data == ['Cannot remove all draft owners'] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_add_owner_does_not_exist(api_client, dandiset, user): assign_perm('owner', user, dandiset) api_client.force_authenticate(user=user) @@ -988,7 +1048,7 @@ def test_dandiset_rest_add_owner_does_not_exist(api_client, dandiset, user): assert resp.data == [f'User {fake_name} not found'] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_add_malformed(api_client, dandiset, user): assign_perm('owner', user, dandiset) api_client.force_authenticate(user=user) @@ -1002,17 +1062,17 @@ def test_dandiset_rest_add_malformed(api_client, dandiset, user): assert resp.data == [{'username': ['This field is required.']}] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_search_no_query(api_client): assert api_client.get('/api/dandisets/').data['results'] == [] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_search_empty_query(api_client): assert api_client.get('/api/dandisets/', {'search': ''}).data['results'] == [] -@pytest.mark.django_db() +@pytest.mark.django_db def test_dandiset_rest_search_identifier(api_client, draft_version): results = api_client.get( '/api/dandisets/', @@ -1027,7 +1087,7 @@ def test_dandiset_rest_search_identifier(api_client, draft_version): assert results[0]['draft_version']['name'] == draft_version.name -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( 'contributors', [None, 'string', 1, [], {}], diff --git a/dandiapi/api/tests/test_embargo.py b/dandiapi/api/tests/test_embargo.py index ff249c30e..206510e8e 100644 --- a/dandiapi/api/tests/test_embargo.py +++ b/dandiapi/api/tests/test_embargo.py @@ -61,7 +61,7 @@ def embargo_status(request): ), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_embargo_visibility( api_client, user, @@ -89,7 +89,7 @@ def test_embargo_visibility( assert response.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_remove_asset_blob_embargoed_tag_fails_on_embargod(embargoed_asset_blob, asset_blob): with pytest.raises(AssetBlobEmbargoedError): remove_asset_blob_embargoed_tag(embargoed_asset_blob) diff --git a/dandiapi/api/tests/test_garbage.py b/dandiapi/api/tests/test_garbage.py index 0a191cef9..853c7b46d 100644 --- a/dandiapi/api/tests/test_garbage.py +++ b/dandiapi/api/tests/test_garbage.py @@ -9,7 +9,7 @@ from dandiapi.api.models import Asset, Version -@pytest.mark.django_db() +@pytest.mark.django_db def test_stale_assets(version: Version, draft_asset_factory, published_asset_factory): stale_date = timezone.now() - timedelta(days=8) diff --git a/dandiapi/api/tests/test_manifests.py b/dandiapi/api/tests/test_manifests.py index a148479f3..bd9ac342c 100644 --- a/dandiapi/api/tests/test_manifests.py +++ b/dandiapi/api/tests/test_manifests.py @@ -21,7 +21,7 @@ from django.core.files.storage import Storage -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_dandiset_jsonld(storage: Storage, version: Version): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -38,7 +38,7 @@ def test_write_dandiset_jsonld(storage: Storage, version: Version): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_assets_jsonld(storage: Storage, version: Version, asset_factory): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -58,7 +58,7 @@ def test_write_assets_jsonld(storage: Storage, version: Version, asset_factory): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_collection_jsonld(storage: Storage, version: Version, asset): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -85,7 +85,7 @@ def test_write_collection_jsonld(storage: Storage, version: Version, asset): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_dandiset_yaml(storage: Storage, version: Version): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -102,7 +102,7 @@ def test_write_dandiset_yaml(storage: Storage, version: Version): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_assets_yaml(storage: Storage, version: Version, asset_factory): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -122,7 +122,7 @@ def test_write_assets_yaml(storage: Storage, version: Version, asset_factory): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_dandiset_yaml_already_exists(storage: Storage, version: Version): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -142,7 +142,7 @@ def test_write_dandiset_yaml_already_exists(storage: Storage, version: Version): assert f.read() == expected -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_assets_yaml_already_exists(storage: Storage, version: Version, asset_factory): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls diff --git a/dandiapi/api/tests/test_pagination.py b/dandiapi/api/tests/test_pagination.py index bc890be81..f6ec4ffcd 100644 --- a/dandiapi/api/tests/test_pagination.py +++ b/dandiapi/api/tests/test_pagination.py @@ -3,66 +3,7 @@ import pytest -@pytest.mark.django_db() -def test_dandiset_pagination(api_client, dandiset_factory): - endpoint = '/api/dandisets/' - for _ in range(10): - dandiset_factory() - - # First page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json() - assert resp['count'] == 10 - assert resp['next'] is not None - page_one = resp['results'] - assert len(page_one) == 5 - - # Second page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - # Full page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - # Assert full list is ordered the same as both paginated lists - assert full_page == page_one + page_two - - -@pytest.mark.django_db() -def test_version_pagination(api_client, dandiset, published_version_factory): - endpoint = f'/api/dandisets/{dandiset.identifier}/versions/' - - for _ in range(10): - published_version_factory(dandiset=dandiset) - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json() - - assert resp['count'] == 10 - page_one = resp['results'] - assert len(page_one) == 5 - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - assert full_page == page_one + page_two - - -@pytest.mark.django_db() +@pytest.mark.django_db def test_asset_pagination(api_client, version, asset_factory): endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/' @@ -92,30 +33,3 @@ def test_asset_pagination(api_client, version, asset_factory): # Assert full list is ordered the same as both paginated lists assert full_page == page_one + page_two - - -@pytest.mark.django_db() -def test_zarr_pagination(api_client, zarr_archive_factory): - endpoint = '/api/zarr/' - - for _ in range(10): - zarr_archive_factory() - - resp = api_client.get(endpoint, {'page_size': 5}).json() - assert resp['count'] == 10 - page_one = resp['results'] - assert len(page_one) == 5 - - resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - resp = api_client.get(endpoint, {'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - assert full_page == page_one + page_two diff --git a/dandiapi/api/tests/test_permission.py b/dandiapi/api/tests/test_permission.py index 14ea28dc2..af744ce2f 100644 --- a/dandiapi/api/tests/test_permission.py +++ b/dandiapi/api/tests/test_permission.py @@ -67,7 +67,7 @@ ('post', '/api/zarr/{zarr.zarr_id}/files/', True), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_approved_or_readonly( api_client, user, diff --git a/dandiapi/api/tests/test_search.py b/dandiapi/api/tests/test_search.py index bd5511774..d94cef3c9 100644 --- a/dandiapi/api/tests/test_search.py +++ b/dandiapi/api/tests/test_search.py @@ -8,7 +8,7 @@ from rest_framework.test import APIClient -@pytest.mark.django_db() +@pytest.mark.django_db def test_search_rest(api_client: APIClient) -> None: resp = api_client.get('/api/dandisets/search/') assert resp.status_code == 200 diff --git a/dandiapi/api/tests/test_stats.py b/dandiapi/api/tests/test_stats.py index 946f95102..d1579fab6 100644 --- a/dandiapi/api/tests/test_stats.py +++ b/dandiapi/api/tests/test_stats.py @@ -6,7 +6,7 @@ from dandiapi.api.models import UserMetadata -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_baseline(api_client): assert api_client.get('/api/stats/').data == { 'dandiset_count': 0, @@ -16,7 +16,7 @@ def test_stats_baseline(api_client): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_draft(api_client, dandiset): stats = api_client.get('/api/stats/').data @@ -24,7 +24,7 @@ def test_stats_draft(api_client, dandiset): assert stats['published_dandiset_count'] == 0 -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_published(api_client, published_version_factory): published_version_factory() stats = api_client.get('/api/stats/').data @@ -67,7 +67,7 @@ def test_stats_user(api_client, user_factory): assert stats['user_count'] == approved_user_count -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_asset(api_client, version, asset): version.assets.add(asset) stats = api_client.get('/api/stats/').data @@ -75,7 +75,7 @@ def test_stats_asset(api_client, version, asset): assert stats['size'] == asset.size -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_embargoed_asset(api_client, version, asset_factory, embargoed_asset_blob_factory): embargoed_asset = asset_factory() embargoed_asset.blob = embargoed_asset_blob_factory() @@ -85,7 +85,7 @@ def test_stats_embargoed_asset(api_client, version, asset_factory, embargoed_ass assert stats['size'] == embargoed_asset.size -@pytest.mark.django_db() +@pytest.mark.django_db def test_stats_embargoed_and_regular_blobs( api_client, version, asset_factory, asset_blob_factory, embargoed_asset_blob_factory ): diff --git a/dandiapi/api/tests/test_tasks.py b/dandiapi/api/tests/test_tasks.py index 8e5fe6aa8..3cb632bb9 100644 --- a/dandiapi/api/tests/test_tasks.py +++ b/dandiapi/api/tests/test_tasks.py @@ -22,7 +22,7 @@ from rest_framework.test import APIClient -@pytest.mark.django_db() +@pytest.mark.django_db def test_calculate_checksum_task(storage: Storage, asset_blob_factory): # Pretend like AssetBlob was defined with the given storage AssetBlob.blob.field.storage = storage @@ -40,7 +40,7 @@ def test_calculate_checksum_task(storage: Storage, asset_blob_factory): assert asset_blob.sha256 == sha256 -@pytest.mark.django_db() +@pytest.mark.django_db def test_calculate_checksum_task_embargo( storage: Storage, embargoed_asset_blob_factory, monkeypatch ): @@ -60,7 +60,7 @@ def test_calculate_checksum_task_embargo( assert asset_blob.sha256 == sha256 -@pytest.mark.django_db() +@pytest.mark.django_db def test_write_manifest_files(storage: Storage, version: Version, asset_factory): # Pretend like AssetBlob was defined with the given storage # The task piggybacks off of the AssetBlob storage to write the yamls @@ -100,7 +100,7 @@ def test_write_manifest_files(storage: Storage, version: Version, asset_factory) assert storage.exists(collection_jsonld_path) -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata(draft_asset: Asset): tasks.validate_asset_metadata_task(draft_asset.id) @@ -110,7 +110,7 @@ def test_validate_asset_metadata(draft_asset: Asset): assert draft_asset.validation_errors == [] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata_malformed_schema_version(draft_asset: Asset): draft_asset.metadata['schemaVersion'] = 'xxx' draft_asset.save() @@ -127,7 +127,7 @@ def test_validate_asset_metadata_malformed_schema_version(draft_asset: Asset): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata_no_encoding_format(draft_asset: Asset): del draft_asset.metadata['encodingFormat'] draft_asset.save() @@ -142,7 +142,7 @@ def test_validate_asset_metadata_no_encoding_format(draft_asset: Asset): ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata_no_digest(draft_asset: Asset): draft_asset.blob.sha256 = None draft_asset.blob.save() @@ -160,7 +160,7 @@ def test_validate_asset_metadata_no_digest(draft_asset: Asset): ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata_malformed_keywords(draft_asset: Asset): draft_asset.metadata['keywords'] = 'foo' draft_asset.save() @@ -175,7 +175,7 @@ def test_validate_asset_metadata_malformed_keywords(draft_asset: Asset): ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_asset_metadata_saves_version(draft_asset: Asset, draft_version: Version): draft_version.assets.add(draft_asset) @@ -191,7 +191,7 @@ def test_validate_asset_metadata_saves_version(draft_asset: Asset, draft_version assert draft_version.modified != old_datetime -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_version_metadata(draft_version: Version, asset: Asset): draft_version.assets.add(asset) @@ -210,7 +210,27 @@ def test_validate_version_metadata(draft_version: Version, asset: Asset): assert draft_version.modified > old_modified -@pytest.mark.django_db() +@pytest.mark.django_db +def test_validate_version_metadata_non_pending_version(draft_version: Version, asset: Asset): + # Bypass .save to manually set an older timestamp, set status to INVALID + old_modified = timezone.now() - datetime.timedelta(minutes=10) + updated = Version.objects.filter(id=draft_version.id).update( + modified=old_modified, status=Version.Status.INVALID, validation_errors=['foobar'] + ) + assert updated == 1 + + draft_version.refresh_from_db() + old_validation_errors = draft_version.validation_errors + tasks.validate_version_metadata_task(draft_version.id) + draft_version.refresh_from_db() + + # Assert fields not updated + assert draft_version.status == Version.Status.INVALID + assert draft_version.validation_errors == old_validation_errors + assert draft_version.modified == old_modified + + +@pytest.mark.django_db def test_validate_version_metadata_no_side_effects(draft_version: Version, asset: Asset): draft_version.assets.add(asset) @@ -232,7 +252,7 @@ def test_validate_version_metadata_no_side_effects(draft_version: Version, asset assert old_data == new_data -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_version_metadata_malformed_schema_version(draft_version: Version, asset: Asset): draft_version.assets.add(asset) @@ -250,7 +270,7 @@ def test_validate_version_metadata_malformed_schema_version(draft_version: Versi ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_version_metadata_no_description(draft_version: Version, asset: Asset): draft_version.assets.add(asset) @@ -267,7 +287,7 @@ def test_validate_version_metadata_no_description(draft_version: Version, asset: ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_version_metadata_malformed_license(draft_version: Version, asset: Asset): draft_version.assets.add(asset) @@ -284,7 +304,7 @@ def test_validate_version_metadata_malformed_license(draft_version: Version, ass ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_validate_version_metadata_no_assets( draft_version: Version, ): @@ -301,7 +321,7 @@ def test_validate_version_metadata_no_assets( ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_publish_task( api_client: APIClient, user: User, @@ -327,7 +347,7 @@ def test_publish_task( starting_version_count = draft_version.dandiset.versions.count() with django_capture_on_commit_callbacks(execute=True): - tasks.publish_dandiset_task(draft_version.dandiset.id) + tasks.publish_dandiset_task(draft_version.dandiset.id, user.id) assert draft_version.dandiset.versions.count() == starting_version_count + 1 diff --git a/dandiapi/api/tests/test_unembargo.py b/dandiapi/api/tests/test_unembargo.py index ac41a6c41..ebd86b336 100644 --- a/dandiapi/api/tests/test_unembargo.py +++ b/dandiapi/api/tests/test_unembargo.py @@ -1,14 +1,31 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +import dandischema from guardian.shortcuts import assign_perm import pytest from dandiapi.api.models.dandiset import Dandiset -from dandiapi.api.services.embargo import AssetBlobEmbargoedError, remove_asset_blob_embargoed_tag -from dandiapi.api.tasks.scheduled import send_dandisets_to_unembargo_email +from dandiapi.api.models.version import Version +from dandiapi.api.services.embargo import ( + AssetBlobEmbargoedError, + _remove_dandiset_asset_blob_embargo_tags, + remove_asset_blob_embargoed_tag, + unembargo_dandiset, +) +from dandiapi.api.services.embargo.exceptions import ( + AssetTagRemovalError, + DandisetActiveUploadsError, +) +from dandiapi.api.services.exceptions import DandiError +from dandiapi.api.tasks import unembargo_dandiset_task + +if TYPE_CHECKING: + from dandiapi.api.models.asset import AssetBlob -@pytest.mark.django_db() +@pytest.mark.django_db def test_remove_asset_blob_embargoed_tag_fails_on_embargod(embargoed_asset_blob, asset_blob): with pytest.raises(AssetBlobEmbargoedError): remove_asset_blob_embargoed_tag(embargoed_asset_blob) @@ -17,25 +34,236 @@ def test_remove_asset_blob_embargoed_tag_fails_on_embargod(embargoed_asset_blob, remove_asset_blob_embargoed_tag(asset_blob) -@pytest.mark.django_db() -def test_unembargo_dandiset_sends_emails( - api_client, user, dandiset, draft_version_factory, mailoutbox +@pytest.mark.django_db +def test_kickoff_dandiset_unembargo_dandiset_not_embargoed( + api_client, user, dandiset_factory, draft_version_factory ): + dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.OPEN) draft_version_factory(dandiset=dandiset) - assign_perm('owner', user, dandiset) api_client.force_authenticate(user=user) - dandiset.embargo_status = Dandiset.EmbargoStatus.EMBARGOED - dandiset.save() + resp = api_client.post(f'/api/dandisets/{dandiset.identifier}/unembargo/') + assert resp.status_code == 400 + + +@pytest.mark.django_db +def test_kickoff_dandiset_unembargo_not_owner( + api_client, user, dandiset_factory, draft_version_factory +): + dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + draft_version_factory(dandiset=dandiset) + api_client.force_authenticate(user=user) + + resp = api_client.post(f'/api/dandisets/{dandiset.identifier}/unembargo/') + assert resp.status_code == 403 + + +@pytest.mark.django_db +def test_kickoff_dandiset_unembargo_active_uploads( + api_client, user, dandiset_factory, draft_version_factory, upload_factory +): + dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + draft_version_factory(dandiset=dandiset) + assign_perm('owner', user, dandiset) + api_client.force_authenticate(user=user) + # Test that active uploads prevent unembargp + upload_factory(dandiset=dandiset) resp = api_client.post(f'/api/dandisets/{dandiset.identifier}/unembargo/') + assert resp.status_code == 400 + + +# transaction=True required due to how `kickoff_dandiset_unembargo` calls `unembargo_dandiset_task` +@pytest.mark.django_db(transaction=True) +def test_kickoff_dandiset_unembargo(api_client, user, draft_version_factory, mailoutbox, mocker): + draft_version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + ds: Dandiset = draft_version.dandiset + + assign_perm('owner', user, ds) + api_client.force_authenticate(user=user) + + # mock this task to check if called + patched_task = mocker.patch('dandiapi.api.services.embargo.unembargo_dandiset_task') + + resp = api_client.post(f'/api/dandisets/{ds.identifier}/unembargo/') assert resp.status_code == 200 - # Simulate the scheduled task calling this function - send_dandisets_to_unembargo_email() + ds.refresh_from_db() + assert ds.embargo_status == Dandiset.EmbargoStatus.UNEMBARGOING + + # Check that unembargo dandiset task was delayed + assert len(patched_task.mock_calls) == 1 + assert str(patched_task.mock_calls[0]) == f'call.delay({ds.pk}, {user.id})' + + +@pytest.mark.django_db +def test_unembargo_dandiset_not_unembargoing(draft_version_factory, user, api_client): + draft_version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + ds: Dandiset = draft_version.dandiset + + assign_perm('owner', user, ds) + api_client.force_authenticate(user=user) + + with pytest.raises(DandiError): + unembargo_dandiset(ds, user) + + +@pytest.mark.django_db +def test_unembargo_dandiset_uploads_exist(draft_version_factory, upload_factory, user, api_client): + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + ds: Dandiset = draft_version.dandiset + + assign_perm('owner', user, ds) + api_client.force_authenticate(user=user) + + upload_factory(dandiset=ds) + with pytest.raises(DandisetActiveUploadsError): + unembargo_dandiset(ds, user) + + +@pytest.mark.django_db +def test_remove_dandiset_asset_blob_embargo_tags_chunks( + draft_version_factory, + asset_factory, + embargoed_asset_blob_factory, + mocker, +): + delete_asset_blob_tags_mock = mocker.patch( + 'dandiapi.api.services.embargo._delete_asset_blob_tags' + ) + chunk_size = mocker.patch('dandiapi.api.services.embargo.ASSET_BLOB_TAG_REMOVAL_CHUNK_SIZE', 2) + + draft_version: Version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + ds: Dandiset = draft_version.dandiset + for _ in range(chunk_size + 1): + asset = asset_factory(blob=embargoed_asset_blob_factory()) + draft_version.assets.add(asset) + + _remove_dandiset_asset_blob_embargo_tags(dandiset=ds) + + # Assert that _delete_asset_blob_tags was called chunk_size +1 times, to ensure that it works + # correctly across chunks + assert len(delete_asset_blob_tags_mock.mock_calls) == chunk_size + 1 + + +@pytest.mark.django_db +def test_delete_asset_blob_tags_fails( + draft_version_factory, + asset_factory, + embargoed_asset_blob_factory, + mocker, +): + mocker.patch('dandiapi.api.services.embargo._delete_asset_blob_tags', side_effect=ValueError) + draft_version: Version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + ds: Dandiset = draft_version.dandiset + asset = asset_factory(blob=embargoed_asset_blob_factory()) + draft_version.assets.add(asset) + + # Check that if an exception within `_delete_asset_blob_tags` is raised, it's propagated upwards + # as an AssetTagRemovalError + with pytest.raises(AssetTagRemovalError): + _remove_dandiset_asset_blob_embargo_tags(dandiset=ds) + + +@pytest.mark.django_db +def test_unembargo_dandiset( + draft_version_factory, + asset_factory, + embargoed_asset_blob_factory, + mocker, + mailoutbox, + user_factory, +): + draft_version: Version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + ds: Dandiset = draft_version.dandiset + owners = [user_factory() for _ in range(5)] + for user in owners: + assign_perm('owner', user, ds) + + embargoed_blob: AssetBlob = embargoed_asset_blob_factory() + asset = asset_factory(blob=embargoed_blob) + draft_version.assets.add(asset) + assert embargoed_blob.embargoed + + # Patch this function to check if it's been called, since we can't test the tagging directly + patched = mocker.patch('dandiapi.api.services.embargo._delete_asset_blob_tags') + + unembargo_dandiset(ds, owners[0]) + patched.assert_called_once() + + embargoed_blob.refresh_from_db() + ds.refresh_from_db() + draft_version.refresh_from_db() + assert not embargoed_blob.embargoed + assert ds.embargo_status == Dandiset.EmbargoStatus.OPEN + assert ( + draft_version.metadata['access'][0]['status'] + == dandischema.models.AccessType.OpenAccess.value + ) + + # Check that a correct email exists + assert mailoutbox + assert 'has been unembargoed' in mailoutbox[0].subject + payload = mailoutbox[0].message().get_payload()[0].get_payload() + assert ds.identifier in payload + assert 'has been unembargoed' in payload + + # Check that the email was sent to all owners + owner_email_set = {user.email for user in owners} + mailoutbox_to_email_set = set(mailoutbox[0].to) + assert owner_email_set == mailoutbox_to_email_set + + +@pytest.mark.django_db +def test_unembargo_dandiset_validate_version_metadata( + draft_version_factory, asset_factory, user, mocker +): + from dandiapi.api.services import embargo as embargo_service + + draft_version: Version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + ds: Dandiset = draft_version.dandiset + assign_perm('owner', user, ds) + + draft_version.validation_errors = ['error ajhh'] + draft_version.status = Version.Status.INVALID + draft_version.save() + draft_version.assets.add(asset_factory()) + + # Spy on the imported function in the embargo service + validate_version_spy = mocker.spy(embargo_service, 'validate_version_metadata') + + unembargo_dandiset(ds, user=user) + + assert validate_version_spy.call_count == 1 + draft_version.refresh_from_db() + assert not draft_version.validation_errors + + +@pytest.mark.django_db +def test_unembargo_dandiset_task_failure(draft_version_factory, mailoutbox, user, api_client): + # Intentionally set the status to embargoed so the task will fail + draft_version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + ds: Dandiset = draft_version.dandiset + + assign_perm('owner', user, ds) + api_client.force_authenticate(user=user) + + with pytest.raises(DandiError): + unembargo_dandiset_task.delay(ds.pk, user.id) assert mailoutbox - assert 'un-embargo' in mailoutbox[0].subject - assert dandiset.identifier in mailoutbox[0].message().get_payload() - assert user.username in mailoutbox[0].message().get_payload() + assert 'Unembargo failed' in mailoutbox[0].subject + payload = mailoutbox[0].message().get_payload()[0].get_payload() + assert ds.identifier in payload + assert 'error during the unembargo' in payload diff --git a/dandiapi/api/tests/test_upload.py b/dandiapi/api/tests/test_upload.py index e8a7493f8..8e6eaa462 100644 --- a/dandiapi/api/tests/test_upload.py +++ b/dandiapi/api/tests/test_upload.py @@ -16,7 +16,7 @@ def mb(bytes_size: int) -> int: return bytes_size * 2**20 -@pytest.mark.django_db() +@pytest.mark.django_db def test_blob_read(api_client, asset_blob): assert api_client.post( '/api/blobs/digest/', @@ -30,7 +30,7 @@ def test_blob_read(api_client, asset_blob): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_blob_read_sha256(api_client, asset_blob): assert api_client.post( '/api/blobs/digest/', @@ -44,7 +44,7 @@ def test_blob_read_sha256(api_client, asset_blob): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_blob_read_bad_algorithm(api_client, asset_blob): resp = api_client.post( '/api/blobs/digest/', @@ -55,7 +55,7 @@ def test_blob_read_bad_algorithm(api_client, asset_blob): assert resp.data == 'Unsupported Digest Algorithm. Supported: dandi:dandi-etag, dandi:sha2-256' -@pytest.mark.django_db() +@pytest.mark.django_db def test_blob_read_does_not_exist(api_client): resp = api_client.post( '/api/blobs/digest/', @@ -65,7 +65,7 @@ def test_blob_read_does_not_exist(api_client): assert resp.status_code == 404 -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize('embargoed', [True, False]) def test_upload_initialize(api_client, user, dandiset_factory, embargoed): dandiset = dandiset_factory( @@ -109,7 +109,26 @@ def test_upload_initialize(api_client, user, dandiset_factory, embargoed): assert upload.blob.name == f'test-prefix/blobs/{upload_id[:3]}/{upload_id[3:6]}/{upload_id}' -@pytest.mark.django_db() +@pytest.mark.django_db +def test_upload_initialize_unembargo_in_progress(api_client, user, dandiset_factory): + dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING) + api_client.force_authenticate(user=user) + assign_perm('owner', user, dandiset) + + content_size = 123 + resp = api_client.post( + '/api/uploads/initialize/', + { + 'contentSize': content_size, + 'digest': {'algorithm': 'dandi:dandi-etag', 'value': 'f' * 32 + '-1'}, + 'dandiset': dandiset.identifier, + }, + format='json', + ) + assert resp.status_code == 400 + + +@pytest.mark.django_db def test_upload_initialize_existing_asset_blob(api_client, user, dandiset, asset_blob): api_client.force_authenticate(user=user) assign_perm('owner', user, dandiset) @@ -129,7 +148,7 @@ def test_upload_initialize_existing_asset_blob(api_client, user, dandiset, asset assert not Upload.objects.all().exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_upload_initialize_not_an_owner(api_client, user, dandiset): api_client.force_authenticate(user=user) @@ -148,7 +167,7 @@ def test_upload_initialize_not_an_owner(api_client, user, dandiset): assert not Upload.objects.all().exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_upload_initialize_embargo_not_an_owner(api_client, user, dandiset_factory): api_client.force_authenticate(user=user) dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) @@ -170,7 +189,7 @@ def test_upload_initialize_embargo_not_an_owner(api_client, user, dandiset_facto assert not Upload.objects.all().exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_upload_initialize_embargo_existing_asset_blob( api_client, user, dandiset_factory, asset_blob ): @@ -194,7 +213,7 @@ def test_upload_initialize_embargo_existing_asset_blob( assert not Upload.objects.all().exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_upload_initialize_embargo_existing_embargoed_asset_blob( api_client, user, dandiset_factory, embargoed_asset_blob_factory ): @@ -219,7 +238,7 @@ def test_upload_initialize_embargo_existing_embargoed_asset_blob( assert not Upload.objects.all().exists() -@pytest.mark.django_db() +@pytest.mark.django_db def test_upload_initialize_unauthorized(api_client): assert ( api_client.post( @@ -505,16 +524,10 @@ def test_upload_validate_wrong_etag(api_client, user, upload): def test_upload_validate_existing_assetblob(api_client, user, upload, asset_blob_factory): api_client.force_authenticate(user=user) - asset_blob = asset_blob_factory(etag=upload.etag, size=upload.size) + asset_blob_factory(etag=upload.etag, size=upload.size) resp = api_client.post(f'/api/uploads/{upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(asset_blob.blob_id), - 'etag': asset_blob.etag, - 'sha256': asset_blob.sha256, - 'size': asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 assert not Upload.objects.all().exists() @@ -530,16 +543,10 @@ def test_upload_validate_embargo_existing_assetblob( embargoed_upload = embargoed_upload_factory(dandiset=dandiset) # The upload should recognize this preexisting AssetBlob and use it instead - asset_blob = asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) + asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) resp = api_client.post(f'/api/uploads/{embargoed_upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(asset_blob.blob_id), - 'etag': asset_blob.etag, - 'sha256': asset_blob.sha256, - 'size': asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 @@ -555,17 +562,9 @@ def test_upload_validate_embargo_existing_embargoed_assetblob( # The upload should recognize this preexisting embargoed AssetBlob and use it instead # This only works because the embargoed asset blob belongs to the same dandiset - embargoed_asset_blob = embargoed_asset_blob_factory( - etag=embargoed_upload.etag, size=embargoed_upload.size - ) + embargoed_asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) resp = api_client.post(f'/api/uploads/{embargoed_upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(embargoed_asset_blob.blob_id), - 'etag': embargoed_asset_blob.etag, - 'sha256': embargoed_asset_blob.sha256, - 'size': embargoed_asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 diff --git a/dandiapi/api/tests/test_users.py b/dandiapi/api/tests/test_users.py index feac3af91..75af4dfb4 100644 --- a/dandiapi/api/tests/test_users.py +++ b/dandiapi/api/tests/test_users.py @@ -31,7 +31,7 @@ def serialize_social_account(social_account): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_registration_email_content( social_account: SocialAccount, mailoutbox: list[EmailMessage], api_client: APIClient ): @@ -71,7 +71,7 @@ def test_user_registration_email_content( (UserMetadata.Status.APPROVED, 0), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_registration_email_count( social_account: SocialAccount, mailoutbox: list[EmailMessage], @@ -90,7 +90,7 @@ def test_user_registration_email_count( assert len(mailoutbox) == email_count -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_me(api_client, social_account): api_client.force_authenticate(user=social_account.user) @@ -100,7 +100,7 @@ def test_user_me(api_client, social_account): ).data == serialize_social_account(social_account) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_me_admin(api_client, admin_user, social_account_factory): api_client.force_authenticate(user=admin_user) social_account = social_account_factory(user=admin_user) @@ -112,7 +112,7 @@ def test_user_me_admin(api_client, admin_user, social_account_factory): ).data == serialize_social_account(social_account) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search(api_client, social_account, social_account_factory): api_client.force_authenticate(user=social_account.user) @@ -128,7 +128,7 @@ def test_user_search(api_client, social_account, social_account_factory): ).data == [serialize_social_account(social_account)] -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_prefer_social(api_client, user_factory, social_account): api_client.force_authenticate(user=social_account.user) @@ -149,7 +149,7 @@ def test_user_search_prefer_social(api_client, user_factory, social_account): ).data == [user_to_dict(user)] -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_blank_username(api_client, user): api_client.force_authenticate(user=user) @@ -163,7 +163,7 @@ def test_user_search_blank_username(api_client, user): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_no_matches(api_client, user): api_client.force_authenticate(user=user) @@ -177,7 +177,7 @@ def test_user_search_no_matches(api_client, user): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_multiple_matches(api_client, user, user_factory, social_account_factory): api_client.force_authenticate(user=user) @@ -200,7 +200,7 @@ def test_user_search_multiple_matches(api_client, user, user_factory, social_acc ).data == [serialize_social_account(social_account) for social_account in social_accounts[:3]] -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_limit_enforced(api_client, user, user_factory, social_account_factory): api_client.force_authenticate(user=user) @@ -217,7 +217,7 @@ def test_user_search_limit_enforced(api_client, user, user_factory, social_accou ] -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_search_extra_data(api_client, user, social_account, social_account_factory): """Test that searched keyword isn't caught by a different field in `extra_data`.""" api_client.force_authenticate(user=user) @@ -279,7 +279,7 @@ def test_user_search_extra_data(api_client, user, social_account, social_account ), ], ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_user_status( api_client: APIClient, user: User, @@ -312,7 +312,7 @@ def test_user_status( assert response.data == expected_search_results_value -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( ('questions', 'querystring', 'expected_status_code'), [ @@ -334,7 +334,7 @@ def test_user_questionnaire_view( assertContains(resp, question['question']) -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( ('email', 'expected_status'), [ diff --git a/dandiapi/api/tests/test_version.py b/dandiapi/api/tests/test_version.py index 9c530999d..a85445164 100644 --- a/dandiapi/api/tests/test_version.py +++ b/dandiapi/api/tests/test_version.py @@ -4,11 +4,13 @@ from time import sleep from typing import TYPE_CHECKING +from dandischema.models import AccessType from django.conf import settings from freezegun import freeze_time from guardian.shortcuts import assign_perm import pytest +from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.services.metadata import version_aggregate_assets_summary from dandiapi.api.services.metadata.exceptions import VersionMetadataConcurrentlyModifiedError @@ -25,7 +27,7 @@ from .fuzzy import TIMESTAMP_RE, URN_RE, UTC_ISO_TIMESTAMP_RE, VERSION_ID_RE -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_next_published_version_nosave(dandiset): # Without saving, the output should be reproducible version_str_1 = Version.next_published_version(dandiset) @@ -34,7 +36,7 @@ def test_version_next_published_version_nosave(dandiset): assert version_str_1 == VERSION_ID_RE -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_next_published_version_save(mocker, dandiset, published_version_factory): # Given an existing version at the current time, a different one should be allocated next_published_version_spy = mocker.spy(Version, 'next_published_version') @@ -45,7 +47,7 @@ def test_version_next_published_version_save(mocker, dandiset, published_version assert version_1.version != version_str_2 -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_next_published_version_simultaneous_save( dandiset_factory, published_version_factory, @@ -61,7 +63,7 @@ def test_version_next_published_version_simultaneous_save( assert version_1.version == version_2.version -@pytest.mark.django_db() +@pytest.mark.django_db def test_draft_version_metadata_computed(draft_version: Version): original_metadata = {'schemaVersion': settings.DANDI_SCHEMA_VERSION} draft_version.metadata = original_metadata @@ -84,6 +86,7 @@ def test_draft_version_metadata_computed(draft_version: Version): ), 'repository': settings.DANDI_WEB_APP_URL, 'dateCreated': draft_version.dandiset.created.isoformat(), + 'access': [{'schemaKey': 'AccessRequirements', 'status': AccessType.OpenAccess.value}], '@context': f'https://raw.githubusercontent.com/dandi/schema/master/releases/{settings.DANDI_SCHEMA_VERSION}/context.json', 'assetsSummary': { 'numberOfBytes': 0, @@ -96,7 +99,7 @@ def test_draft_version_metadata_computed(draft_version: Version): assert draft_version.metadata == expected_metadata -@pytest.mark.django_db() +@pytest.mark.django_db def test_published_version_metadata_computed(published_version: Version): original_metadata = {'schemaVersion': settings.DANDI_SCHEMA_VERSION} published_version.metadata = original_metadata @@ -127,6 +130,7 @@ def test_published_version_metadata_computed(published_version: Version): ), 'repository': settings.DANDI_WEB_APP_URL, 'dateCreated': published_version.dandiset.created.isoformat(), + 'access': [{'schemaKey': 'AccessRequirements', 'status': AccessType.OpenAccess.value}], '@context': f'https://raw.githubusercontent.com/dandi/schema/master/releases/{settings.DANDI_SCHEMA_VERSION}/context.json', 'assetsSummary': { 'numberOfBytes': 0, @@ -139,7 +143,7 @@ def test_published_version_metadata_computed(published_version: Version): assert published_version.metadata == expected_metadata -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_draft(draft_version): name = draft_version.metadata['name'].rstrip('.') year = datetime.datetime.now(datetime.UTC).year @@ -153,7 +157,7 @@ def test_version_metadata_citation_draft(draft_version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_published(published_version): name = published_version.metadata['name'].rstrip('.') year = datetime.datetime.now(datetime.UTC).year @@ -164,7 +168,7 @@ def test_version_metadata_citation_published(published_version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_no_contributors(version): version.metadata['contributor'] = [] version.save() @@ -176,7 +180,7 @@ def test_version_metadata_citation_no_contributors(version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_contributor_not_in_citation(version): version.metadata['contributor'] = [ {'name': 'Jane Doe'}, @@ -191,7 +195,7 @@ def test_version_metadata_citation_contributor_not_in_citation(version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_contributor(version): version.metadata['contributor'] = [{'name': 'Doe, Jane', 'includeInCitation': True}] version.save() @@ -203,7 +207,7 @@ def test_version_metadata_citation_contributor(version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_citation_multiple_contributors(version): version.metadata['contributor'] = [ {'name': 'John Doe', 'includeInCitation': True}, @@ -219,7 +223,7 @@ def test_version_metadata_citation_multiple_contributors(version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_context(version): version.metadata['schemaVersion'] = '6.6.6' version.save() @@ -229,7 +233,7 @@ def test_version_metadata_context(version): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_metadata_assets_summary_missing(version, asset): version.assets.add(asset) @@ -237,7 +241,7 @@ def test_version_metadata_assets_summary_missing(version, asset): version.save() -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_aggregate_assets_summary_valid_assets(draft_version, draft_asset_factory): valid_asset = draft_asset_factory(status=Asset.Status.VALID) invalid_asset = draft_asset_factory(status=Asset.Status.INVALID) @@ -247,7 +251,7 @@ def test_version_aggregate_assets_summary_valid_assets(draft_version, draft_asse assert draft_version.metadata['assetsSummary']['numberOfFiles'] == 1 -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_valid_with_valid_asset(version, asset): version.assets.add(asset) @@ -259,7 +263,7 @@ def test_version_valid_with_valid_asset(version, asset): assert version.publishable -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( 'status', [ @@ -275,7 +279,7 @@ def test_version_invalid(version, status): assert not version.publishable -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( 'status', [ @@ -296,7 +300,7 @@ def test_version_valid_with_invalid_asset(version, asset, status): assert not version.publishable -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_publish_version(draft_version, asset): # Normally the publish endpoint would inject a doi, so we must do it manually fake_doi = 'doi' @@ -352,7 +356,7 @@ def test_version_publish_version(draft_version, asset): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_aggregate_assets_summary(draft_version_factory, draft_asset_factory): version = draft_version_factory(status=Version.Status.VALID) asset = draft_asset_factory(status=Asset.Status.VALID) @@ -366,7 +370,7 @@ def test_version_aggregate_assets_summary(draft_version_factory, draft_asset_fac assert version.metadata['assetsSummary']['schemaKey'] == 'AssetsSummary' -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_aggregate_assets_summary_metadata_modified( draft_version_factory, draft_asset_factory ): @@ -380,7 +384,7 @@ def test_version_aggregate_assets_summary_metadata_modified( version_aggregate_assets_summary(version) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_size( version, asset_factory, @@ -396,7 +400,7 @@ def test_version_size( assert version.size == 700 -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_list(api_client, version, draft_version_factory): # Create an extra version so that there are multiple versions to filter down draft_version_factory() @@ -419,6 +423,7 @@ def test_version_rest_list(api_client, version, draft_version_factory): 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'asset_count': 0, + 'active_uploads': 0, 'size': 0, 'status': 'Pending', } @@ -426,7 +431,7 @@ def test_version_rest_list(api_client, version, draft_version_factory): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_retrieve(api_client, version, draft_version_factory): # Create an extra version so that there are multiple versions to filter down draft_version_factory() @@ -439,7 +444,7 @@ def test_version_rest_retrieve(api_client, version, draft_version_factory): ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_info(api_client, version): assert api_client.get( f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/info/' @@ -456,6 +461,7 @@ def test_version_rest_info(api_client, version): 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'asset_count': 0, + 'active_uploads': 0, 'metadata': version.metadata, 'size': version.size, 'status': 'Pending', @@ -465,7 +471,7 @@ def test_version_rest_info(api_client, version): } -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( 'asset_status', [Asset.Status.PENDING, Asset.Status.VALIDATING, Asset.Status.VALID, Asset.Status.INVALID], @@ -506,6 +512,7 @@ def test_version_rest_info_with_asset( 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'asset_count': 1, + 'active_uploads': 0, 'metadata': version.metadata, 'size': version.size, 'status': Asset.Status.VALID, @@ -515,7 +522,7 @@ def test_version_rest_info_with_asset( } -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_update(api_client, user, draft_version): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -556,6 +563,7 @@ def test_version_rest_update(api_client, user, draft_version): 'url': url, 'repository': settings.DANDI_WEB_APP_URL, 'dateCreated': UTC_ISO_TIMESTAMP_RE, + 'access': [{'schemaKey': 'AccessRequirements', 'status': AccessType.OpenAccess.value}], 'citation': f'{new_name} ({year}). (Version draft) [Data set]. DANDI archive. {url}', 'assetsSummary': { 'numberOfBytes': 0, @@ -581,6 +589,7 @@ def test_version_rest_update(api_client, user, draft_version): 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'asset_count': draft_version.asset_count, + 'active_uploads': draft_version.active_uploads, 'metadata': saved_metadata, 'size': draft_version.size, 'status': 'Pending', @@ -600,7 +609,33 @@ def test_version_rest_update(api_client, user, draft_version): assert draft_version.status == Version.Status.PENDING -@pytest.mark.django_db() +@pytest.mark.django_db +def test_version_rest_update_unembargo_in_progress(api_client, user, draft_version_factory): + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + new_name = 'A unique and special name!' + new_metadata = { + '@context': ( + 'https://raw.githubusercontent.com/dandi/schema/master/releases/' + f'{settings.DANDI_SCHEMA_VERSION}/context.json' + ), + 'schemaVersion': settings.DANDI_SCHEMA_VERSION, + 'num': 123, + } + + resp = api_client.put( + f'/api/dandisets/{draft_version.dandiset.identifier}/versions/{draft_version.version}/', + {'metadata': new_metadata, 'name': new_name}, + format='json', + ) + assert resp.status_code == 400 + + +@pytest.mark.django_db def test_version_rest_update_published_version(api_client, user, published_version): assign_perm('owner', user, published_version.dandiset) api_client.force_authenticate(user=user) @@ -618,7 +653,7 @@ def test_version_rest_update_published_version(api_client, user, published_versi assert resp.data == 'Only draft versions can be modified.' -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_update_not_an_owner(api_client, user, version): api_client.force_authenticate(user=user) @@ -635,7 +670,95 @@ def test_version_rest_update_not_an_owner(api_client, user, version): ) -@pytest.mark.django_db() +@pytest.mark.parametrize( + ('access'), + [ + 'some value', + 123, + None, + [], + ['a', 'b'], + ['a', 'b', {}], + [{'schemaKey': 'AccessRequirements', 'status': 'foobar'}], + ], +) +@pytest.mark.django_db +def test_version_rest_update_access_values(api_client, user, draft_version, access): + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + new_metadata = {**draft_version.metadata, 'access': access} + resp = api_client.put( + f'/api/dandisets/{draft_version.dandiset.identifier}/versions/{draft_version.version}/', + {'metadata': new_metadata, 'name': draft_version.name}, + format='json', + ) + assert resp.status_code == 200 + draft_version.refresh_from_db() + + access = draft_version.metadata['access'] + assert access != new_metadata['access'] + assert access[0]['schemaKey'] == 'AccessRequirements' + assert ( + access[0]['status'] == AccessType.EmbargoedAccess.value + if draft_version.dandiset.embargoed + else AccessType.OpenAccess.value + ) + + +@pytest.mark.django_db +def test_version_rest_update_access_missing(api_client, user, draft_version): + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + # Check that the field missing entirely is also okay + new_metadata = {**draft_version.metadata} + new_metadata.pop('access', None) + + resp = api_client.put( + f'/api/dandisets/{draft_version.dandiset.identifier}/versions/{draft_version.version}/', + {'metadata': new_metadata, 'name': draft_version.name}, + format='json', + ) + assert resp.status_code == 200 + draft_version.refresh_from_db() + assert 'access' in draft_version.metadata + access = draft_version.metadata['access'] + assert access[0]['schemaKey'] == 'AccessRequirements' + assert ( + access[0]['status'] == AccessType.EmbargoedAccess.value + if draft_version.dandiset.embargoed + else AccessType.OpenAccess.value + ) + + +@pytest.mark.django_db +def test_version_rest_update_access_valid(api_client, user, draft_version): + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + # Check that extra fields persist + new_metadata = {**draft_version.metadata, 'access': [{'extra': 'field'}]} + resp = api_client.put( + f'/api/dandisets/{draft_version.dandiset.identifier}/versions/{draft_version.version}/', + {'metadata': new_metadata, 'name': draft_version.name}, + format='json', + ) + assert resp.status_code == 200 + draft_version.refresh_from_db() + access = draft_version.metadata['access'] + assert access != new_metadata['access'] + assert access[0]['schemaKey'] == 'AccessRequirements' + assert ( + access[0]['status'] == AccessType.EmbargoedAccess.value + if draft_version.dandiset.embargoed + else AccessType.OpenAccess.value + ) + + assert access[0]['extra'] == 'field' + + +@pytest.mark.django_db def test_version_rest_publish( api_client: APIClient, user: User, @@ -669,7 +792,37 @@ def test_version_rest_publish( assert draft_version.status == Version.Status.PUBLISHING -@pytest.mark.django_db() +@pytest.mark.django_db +def test_version_rest_publish_embargo(api_client: APIClient, user: User, draft_version_factory): + draft_version = draft_version_factory(dandiset__embargo_status=Dandiset.EmbargoStatus.EMBARGOED) + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + resp = api_client.post( + f'/api/dandisets/{draft_version.dandiset.identifier}' + f'/versions/{draft_version.version}/publish/' + ) + assert resp.status_code == 400 + + +@pytest.mark.django_db +def test_version_rest_publish_unembargo_in_progress( + api_client: APIClient, user: User, draft_version_factory +): + draft_version = draft_version_factory( + dandiset__embargo_status=Dandiset.EmbargoStatus.UNEMBARGOING + ) + assign_perm('owner', user, draft_version.dandiset) + api_client.force_authenticate(user=user) + + resp = api_client.post( + f'/api/dandisets/{draft_version.dandiset.identifier}' + f'/versions/{draft_version.version}/publish/' + ) + assert resp.status_code == 400 + + +@pytest.mark.django_db def test_version_rest_publish_zarr( api_client, user: User, @@ -707,7 +860,7 @@ def test_version_rest_publish_zarr( assert resp.json() == 'Cannot publish dandisets which contain zarrs' -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_publish_not_an_owner(api_client, user, version, asset): api_client.force_authenticate(user=user) version.assets.add(asset) @@ -718,7 +871,7 @@ def test_version_rest_publish_not_an_owner(api_client, user, version, asset): assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_publish_not_a_draft(api_client, user, published_version, asset): assign_perm('owner', user, published_version.dandiset) api_client.force_authenticate(user=user) @@ -731,7 +884,7 @@ def test_version_rest_publish_not_a_draft(api_client, user, published_version, a assert resp.status_code == 405 -@pytest.mark.django_db() +@pytest.mark.django_db @pytest.mark.parametrize( ('status', 'expected_data', 'expected_status_code'), [ @@ -774,7 +927,7 @@ def test_version_rest_publish_invalid( assert resp.data == expected_data -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_update_no_changed_metadata( api_client: APIClient, admin_user, draft_version: Version ): @@ -796,7 +949,7 @@ def test_version_rest_update_no_changed_metadata( assert draft_version.modified == old_modified_time -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_delete_published_not_admin(api_client, user, published_version): assign_perm('owner', user, published_version.dandiset) api_client.force_authenticate(user=user) @@ -809,7 +962,7 @@ def test_version_rest_delete_published_not_admin(api_client, user, published_ver assert published_version in Version.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_delete_published_admin(api_client, admin_user, published_version): api_client.force_authenticate(user=admin_user) response = api_client.delete( @@ -820,7 +973,7 @@ def test_version_rest_delete_published_admin(api_client, admin_user, published_v assert not Version.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_delete_draft_not_admin(api_client, user, draft_version): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) @@ -832,7 +985,7 @@ def test_version_rest_delete_draft_not_admin(api_client, user, draft_version): assert draft_version in Version.objects.all() -@pytest.mark.django_db() +@pytest.mark.django_db def test_version_rest_delete_draft_admin(api_client, admin_user, draft_version): api_client.force_authenticate(user=admin_user) response = api_client.delete( diff --git a/dandiapi/api/views/__init__.py b/dandiapi/api/views/__init__.py index 7254ba465..4e7b90cdc 100644 --- a/dandiapi/api/views/__init__.py +++ b/dandiapi/api/views/__init__.py @@ -3,7 +3,7 @@ from .asset import AssetViewSet, NestedAssetViewSet from .auth import auth_token_view, authorize_view, user_questionnaire_form_view from .dandiset import DandisetViewSet -from .dashboard import DashboardView, user_approval_view +from .dashboard import DashboardView, mailchimp_csv_view, user_approval_view from .info import info_view from .root import root_content_view from .stats import stats_view @@ -25,6 +25,7 @@ 'authorize_view', 'auth_token_view', 'blob_read_view', + 'mailchimp_csv_view', 'upload_initialize_view', 'upload_complete_view', 'upload_validate_view', diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 4595b9ebd..814c98dea 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -9,6 +9,7 @@ remove_asset_from_version, ) from dandiapi.api.services.asset.exceptions import DraftDandisetNotModifiableError +from dandiapi.api.services.embargo.exceptions import DandisetUnembargoInProgressError from dandiapi.zarr.models import ZarrArchive try: @@ -46,7 +47,7 @@ VERSIONS_DANDISET_PK_PARAM, VERSIONS_VERSION_PARAM, ) -from dandiapi.api.views.pagination import DandiPagination +from dandiapi.api.views.pagination import DandiPagination, LazyPagination from dandiapi.api.views.serializers import ( AssetDetailSerializer, AssetDownloadQueryParameterSerializer, @@ -83,23 +84,32 @@ def raise_if_unauthorized(self): # We need to check the dandiset to see if it's embargoed, and if so whether or not the # user has ownership asset_id = self.kwargs.get('asset_id') - if asset_id is not None: - asset = get_object_or_404(Asset.objects.select_related('blob'), asset_id=asset_id) - # TODO: When EmbargoedZarrArchive is implemented, check that as well - if asset.blob and asset.blob.embargoed: - if not self.request.user.is_authenticated: - # Clients must be authenticated to access it - raise NotAuthenticated - - # User must be an owner on any of the dandisets this asset belongs to - asset_dandisets = Dandiset.objects.filter(versions__in=asset.versions.all()) - asset_dandisets_owned_by_user = DandisetUserObjectPermission.objects.filter( - content_object__in=asset_dandisets, - user=self.request.user, - permission__codename='owner', - ) - if not asset_dandisets_owned_by_user.exists(): - raise PermissionDenied + if asset_id is None: + return + + asset = get_object_or_404(Asset.objects.select_related('blob'), asset_id=asset_id) + + # TODO: When EmbargoedZarrArchive is implemented, check that as well + if not (asset.blob and asset.blob.embargoed): + return + + # Clients must be authenticated to access it + if not self.request.user.is_authenticated: + raise NotAuthenticated + + # Admins are allowed to access any embargoed asset blob + if self.request.user.is_superuser: + return + + # User must be an owner on any of the dandisets this asset belongs to + asset_dandisets = Dandiset.objects.filter(versions__in=asset.versions.all()) + asset_dandisets_owned_by_user = DandisetUserObjectPermission.objects.filter( + content_object__in=asset_dandisets, + user=self.request.user, + permission__codename='owner', + ) + if not asset_dandisets_owned_by_user.exists(): + raise PermissionDenied def get_queryset(self): self.raise_if_unauthorized() @@ -318,11 +328,14 @@ def validation(self, request, **kwargs): ) def create(self, request, versions__dandiset__pk, versions__version): version: Version = get_object_or_404( - Version, + Version.objects.select_related('dandiset'), dandiset=versions__dandiset__pk, version=versions__version, ) + if version.dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError + serializer = AssetRequestSerializer(data=self.request.data) serializer.is_valid(raise_exception=True) @@ -351,12 +364,14 @@ def create(self, request, versions__dandiset__pk, versions__version): def update(self, request, versions__dandiset__pk, versions__version, **kwargs): """Update the metadata of an asset.""" version: Version = get_object_or_404( - Version, + Version.objects.select_related('dandiset'), dandiset__pk=versions__dandiset__pk, version=versions__version, ) if version.version != 'draft': raise DraftDandisetNotModifiableError + if version.dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError serializer = AssetRequestSerializer(data=self.request.data) serializer.is_valid(raise_exception=True) @@ -389,10 +404,12 @@ def update(self, request, versions__dandiset__pk, versions__version, **kwargs): ) def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs): version = get_object_or_404( - Version, + Version.objects.select_related('dandiset'), dandiset__pk=versions__dandiset__pk, version=versions__version, ) + if version.dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError # Lock asset for delete with transaction.atomic(): @@ -431,7 +448,10 @@ def list(self, request, *args, **kwargs): asset_queryset = asset_queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*')) # Retrieve just the first N asset IDs, and use them for pagination - page_of_asset_ids = self.paginate_queryset(asset_queryset.values_list('id', flat=True)) + # Use custom pagination class to reduce unnecessary counts of assets + paginator = LazyPagination() + qs = asset_queryset.values_list('id', flat=True) + page_of_asset_ids = paginator.paginate_queryset(qs, request=self.request, view=self) # Not sure when the page is ever None, but this condition is checked for compatibility with # the original implementation: https://github.com/encode/django-rest-framework/blob/f4194c4684420ac86485d9610adf760064db381f/rest_framework/mixins.py#L37-L46 @@ -453,7 +473,7 @@ def list(self, request, *args, **kwargs): # Paginate and return serializer = self.get_serializer(queryset, many=True, metadata=include_metadata) - return self.get_paginated_response(serializer.data) + return paginator.get_paginated_response(serializer.data) @swagger_auto_schema( query_serializer=AssetPathsQueryParameterSerializer, diff --git a/dandiapi/api/views/dandiset.py b/dandiapi/api/views/dandiset.py index 93bcef604..0e6790ee3 100644 --- a/dandiapi/api/views/dandiset.py +++ b/dandiapi/api/views/dandiset.py @@ -4,6 +4,7 @@ from allauth.socialaccount.models import SocialAccount from django.contrib.auth.models import User +from django.db import transaction from django.db.models import Count, Max, OuterRef, Subquery, Sum from django.db.models.functions import Coalesce from django.db.models.query_utils import Q @@ -24,9 +25,13 @@ from dandiapi.api.asset_paths import get_root_paths_many from dandiapi.api.mail import send_ownership_change_emails from dandiapi.api.models import Dandiset, Version +from dandiapi.api.services import audit from dandiapi.api.services.dandiset import create_dandiset, delete_dandiset -from dandiapi.api.services.dandiset.exceptions import UnauthorizedEmbargoAccessError -from dandiapi.api.services.embargo import unembargo_dandiset +from dandiapi.api.services.embargo import kickoff_dandiset_unembargo +from dandiapi.api.services.embargo.exceptions import ( + DandisetUnembargoInProgressError, + UnauthorizedEmbargoAccessError, +) from dandiapi.api.views.common import DANDISET_PK_PARAM from dandiapi.api.views.pagination import DandiPagination from dandiapi.api.views.serializers import ( @@ -346,7 +351,7 @@ def destroy(self, request, dandiset__pk): @method_decorator(permission_required_or_403('owner', (Dandiset, 'pk', 'dandiset__pk'))) def unembargo(self, request, dandiset__pk): dandiset: Dandiset = get_object_or_404(Dandiset, pk=dandiset__pk) - unembargo_dandiset(user=request.user, dandiset=dandiset) + kickoff_dandiset_unembargo(user=request.user, dandiset=dandiset) return Response(None, status=status.HTTP_200_OK) @@ -371,9 +376,12 @@ def unembargo(self, request, dandiset__pk): ) # TODO: move these into a viewset @action(methods=['GET', 'PUT'], detail=True) - def users(self, request, dandiset__pk): + def users(self, request, dandiset__pk): # noqa: C901 dandiset: Dandiset = self.get_object() if request.method == 'PUT': + if dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError + # Verify that the user is currently an owner response = get_40x_or_None(request, ['owner'], dandiset, return_403=True) if response: @@ -406,9 +414,18 @@ def users(self, request, dandiset__pk): raise ValidationError(f'User {username} not found') # All owners found - owners = user_owners + [acc.user for acc in socialaccount_owners] - removed_owners, added_owners = dandiset.set_owners(owners) - dandiset.save() + with transaction.atomic(): + owners = user_owners + [acc.user for acc in socialaccount_owners] + removed_owners, added_owners = dandiset.set_owners(owners) + dandiset.save() + + if removed_owners or added_owners: + audit.change_owners( + dandiset=dandiset, + user=request.user, + removed_owners=removed_owners, + added_owners=added_owners, + ) send_ownership_change_emails(dandiset, removed_owners, added_owners) @@ -417,8 +434,13 @@ def users(self, request, dandiset__pk): try: owner_account = SocialAccount.objects.get(user=owner_user) owner_dict = {'username': owner_account.extra_data['login']} - if 'name' in owner_account.extra_data: - owner_dict['name'] = owner_account.extra_data['name'] + owner_dict['name'] = owner_account.extra_data.get('name', None) + owner_dict['email'] = ( + owner_account.extra_data['email'] + # Only logged-in users can see owners' email addresses + if request.user.is_authenticated and 'email' in owner_account.extra_data + else None + ) owners.append(owner_dict) except SocialAccount.DoesNotExist: # Just in case some users aren't using social accounts, have a fallback @@ -426,6 +448,8 @@ def users(self, request, dandiset__pk): { 'username': owner_user.username, 'name': f'{owner_user.first_name} {owner_user.last_name}', + 'email': owner_user.email if request.user.is_authenticated else None, } ) + return Response(owners, status=status.HTTP_200_OK) diff --git a/dandiapi/api/views/dashboard.py b/dandiapi/api/views/dashboard.py index 6a595e086..4f567cbce 100644 --- a/dandiapi/api/views/dashboard.py +++ b/dandiapi/api/views/dashboard.py @@ -1,5 +1,6 @@ from __future__ import annotations +import csv from typing import TYPE_CHECKING from django.conf import settings @@ -7,7 +8,7 @@ from django.contrib.auth.models import User from django.core.exceptions import PermissionDenied, ValidationError from django.db.models import Exists, OuterRef -from django.http import HttpRequest, HttpResponseRedirect +from django.http import HttpRequest, HttpResponseRedirect, StreamingHttpResponse from django.shortcuts import get_object_or_404, render from django.views.decorators.http import require_http_methods from django.views.generic.base import TemplateView @@ -17,6 +18,8 @@ from dandiapi.api.views.users import social_account_to_dict if TYPE_CHECKING: + from collections.abc import Iterator + from allauth.socialaccount.models import SocialAccount @@ -82,6 +85,40 @@ def _users(self): ) +def mailchimp_csv_view(request: HttpRequest) -> StreamingHttpResponse: + """Generate a Mailchimp-compatible CSV file of all active users.""" + # Exclude the django-guardian anonymous user account. + users = User.objects.filter(metadata__status=UserMetadata.Status.APPROVED).exclude( + username='AnonymousUser' + ) + + fieldnames = ['email', 'first_name', 'last_name'] + data = users.values(*fieldnames).iterator() + + def streaming_output() -> Iterator[str]: + """Stream out the header and CSV rows (for consumption by streaming response).""" + + # This class implements a filelike's write() interface to provide a way + # for the CSV writer to "return" the CSV lines as strings. + class Echo: + def write(self, value): + return value + + # Yield back the rows of the CSV file. + writer = csv.DictWriter(Echo(), fieldnames=fieldnames) + yield writer.writeheader() + for row in data: + yield writer.writerow(row) + + return StreamingHttpResponse( + streaming_output(), + content_type='text/csv', + headers={ + 'Content-Disposition': 'attachment; filename="dandi_users_mailchimp.csv"', + }, + ) + + @require_http_methods(['GET', 'POST']) def user_approval_view(request: HttpRequest, username: str): # Redirect user to login if they're not authenticated diff --git a/dandiapi/api/views/pagination.py b/dandiapi/api/views/pagination.py index c48b76f72..d2efaffbe 100644 --- a/dandiapi/api/views/pagination.py +++ b/dandiapi/api/views/pagination.py @@ -1,12 +1,3 @@ -""" -Implement an optimized pagination scheme. - -This module provides a custom pagination implementation, as the existing `PageNumberPagination` -class returns a `count` field for every page returned. This can be very inefficient on large tables, -and in reality, the count is only necessary on the first page of results. This module implements -such a pagination scheme, only returning 'count' on the first page of results. -""" - from __future__ import annotations from collections import OrderedDict @@ -17,6 +8,24 @@ class returns a `count` field for every page returned. This can be very ineffici from rest_framework.response import Response +class DandiPagination(PageNumberPagination): + page_size = 100 + max_page_size = 1000 + page_size_query_param = 'page_size' + + @cached_property + def page_size_query_description(self): + return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).' + + +""" +The below code provides a custom pagination implementation, as the existing `PageNumberPagination` +class returns a `count` field for every page returned. This can be very inefficient on large tables, +and in reality, the count is only necessary on the first page of results. This module implements +such a pagination scheme, only returning 'count' on the first page of results. +""" + + class LazyPage(Page): """ A page class that doesn't call .count() on the queryset it's paging from. @@ -96,7 +105,3 @@ def get_paginated_response(self, data) -> Response: ) return Response(page_dict) - - -# Alias -DandiPagination = LazyPagination diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 3f96c37b9..fb9ff03a0 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -88,6 +88,7 @@ class Meta: 'version', 'name', 'asset_count', + 'active_uploads', 'size', 'status', 'created', diff --git a/dandiapi/api/views/upload.py b/dandiapi/api/views/upload.py index 178d9ea16..ab4c11a1c 100644 --- a/dandiapi/api/views/upload.py +++ b/dandiapi/api/views/upload.py @@ -17,6 +17,7 @@ from dandiapi.api.models import AssetBlob, Dandiset, Upload from dandiapi.api.permissions import IsApproved +from dandiapi.api.services.embargo.exceptions import DandisetUnembargoInProgressError from dandiapi.api.tasks import calculate_sha256 from dandiapi.api.views.serializers import AssetBlobSerializer @@ -135,6 +136,10 @@ def upload_initialize_view(request: Request) -> HttpResponseBase: if response: return response + # Ensure dandiset not in the process of unembargo + if dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError + logging.info( 'Starting upload initialization of size %s, ETag %s to dandiset %s', content_size, @@ -235,23 +240,26 @@ def upload_validate_view(request: Request, upload_id: str) -> HttpResponseBase: f'ETag {upload.etag} does not match actual ETag {upload.actual_etag()}.' ) - # Use a transaction here so we can use select_for_update to lock the DB rows to avoid - # a race condition where two clients are uploading the same blob at the same time. - # This also ensures that the minting of the new AssetBlob and deletion of the Upload - # is an atomic operation. with transaction.atomic(): - try: - # Perhaps another upload completed before this one and has already created an AssetBlob. - asset_blob = AssetBlob.objects.select_for_update(no_key=True).get( - etag=upload.etag, size=upload.size - ) - except AssetBlob.DoesNotExist: - asset_blob = upload.to_asset_blob() - asset_blob.save() + # Avoid a race condition where two clients are uploading the same blob at the same time. + asset_blob, created = AssetBlob.objects.get_or_create( + etag=upload.etag, + size=upload.size, + defaults={ + 'embargoed': upload.embargoed, + 'blob_id': upload.upload_id, + 'blob': upload.blob, + }, + ) # Clean up the upload upload.delete() + if not created: + return Response( + 'An identical blob has already been uploaded.', status=status.HTTP_409_CONFLICT + ) + # Start calculating the sha256 in the background calculate_sha256.delay(asset_blob.blob_id) diff --git a/dandiapi/api/views/version.py b/dandiapi/api/views/version.py index 2cfc3335a..51a6542d2 100644 --- a/dandiapi/api/views/version.py +++ b/dandiapi/api/views/version.py @@ -14,6 +14,8 @@ from rest_framework_extensions.mixins import DetailSerializerMixin, NestedViewSetMixin from dandiapi.api.models import Dandiset, Version +from dandiapi.api.services import audit +from dandiapi.api.services.embargo.exceptions import DandisetUnembargoInProgressError from dandiapi.api.services.publish import publish_dandiset from dandiapi.api.tasks import delete_doi_task from dandiapi.api.views.common import DANDISET_PK_PARAM, VERSION_PARAM @@ -94,6 +96,8 @@ def update(self, request, **kwargs): 'Only draft versions can be modified.', status=status.HTTP_405_METHOD_NOT_ALLOWED, ) + if version.dandiset.unembargo_in_progress: + raise DandisetUnembargoInProgressError serializer = VersionMetadataSerializer(data=request.data) serializer.is_valid(raise_exception=True) @@ -117,6 +121,12 @@ def update(self, request, **kwargs): locked_version.status = Version.Status.PENDING locked_version.save() + audit.update_metadata( + dandiset=locked_version.dandiset, + user=request.user, + metadata=locked_version.metadata, + ) + serializer = VersionDetailSerializer(instance=locked_version) return Response(serializer.data, status=status.HTTP_200_OK) diff --git a/dandiapi/conftest.py b/dandiapi/conftest.py index 50a6a2a65..ce8826796 100644 --- a/dandiapi/conftest.py +++ b/dandiapi/conftest.py @@ -50,12 +50,12 @@ # Register zarr file/directory factories -@pytest.fixture() +@pytest.fixture def zarr_file_factory(): return upload_zarr_file -@pytest.fixture() +@pytest.fixture def user(user_factory): """Override the default `user` fixture to use our `UserFactory` so `UserMetadata` works.""" return user_factory() @@ -66,7 +66,7 @@ def asset_factory(request): return request.param -@pytest.fixture() +@pytest.fixture def asset(asset_factory): return asset_factory() @@ -76,12 +76,12 @@ def version(request): return request.param() -@pytest.fixture() +@pytest.fixture def api_client() -> APIClient: return APIClient() -@pytest.fixture() +@pytest.fixture def authenticated_api_client(user) -> APIClient: client = APIClient() client.force_authenticate(user=user) @@ -107,12 +107,12 @@ def minio_storage_factory() -> MinioStorage: return base_minio_storage_factory(settings.DANDI_DANDISETS_BUCKET_NAME) -@pytest.fixture() +@pytest.fixture def s3_storage() -> S3Storage: return s3_storage_factory() -@pytest.fixture() +@pytest.fixture def minio_storage() -> MinioStorage: return minio_storage_factory() diff --git a/dandiapi/search/tests/test_permissions.py b/dandiapi/search/tests/test_permissions.py index 1de98f343..6117b9fdc 100644 --- a/dandiapi/search/tests/test_permissions.py +++ b/dandiapi/search/tests/test_permissions.py @@ -7,7 +7,7 @@ from dandiapi.search.models import AssetSearch -@pytest.mark.django_db() +@pytest.mark.django_db def test_assetsearch_visible_to_permissions(draft_asset_factory, draft_version, user_factory): asset = draft_asset_factory() draft_version.dandiset.embargo_status = Dandiset.EmbargoStatus.EMBARGOED diff --git a/dandiapi/search/tests/test_views.py b/dandiapi/search/tests/test_views.py index 5f777d765..6e8ce0a28 100644 --- a/dandiapi/search/tests/test_views.py +++ b/dandiapi/search/tests/test_views.py @@ -8,13 +8,13 @@ from rest_framework.test import APIClient -@pytest.mark.django_db() +@pytest.mark.django_db def test_genotype_autocomplete_rest(api_client: APIClient) -> None: resp = api_client.get('/api/search/genotypes/') assert resp.status_code == 200 -@pytest.mark.django_db() +@pytest.mark.django_db def test_species_autocomplete_rest(api_client: APIClient) -> None: resp = api_client.get('/api/search/species/') assert resp.status_code == 200 diff --git a/dandiapi/settings.py b/dandiapi/settings.py index f2d1e0ef3..0e87dfc2d 100644 --- a/dandiapi/settings.py +++ b/dandiapi/settings.py @@ -113,6 +113,8 @@ def mutate_configuration(configuration: type[ComposedConfiguration]): # The CloudAMQP connection was dying, using the heartbeat should keep it alive CELERY_BROKER_HEARTBEAT = 20 + # Retry connections in case rabbit isn't immediately running + CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True # Clearing out the stock `SWAGGER_SETTINGS` variable causes a Django login # button to appear in Swagger, along with a spurious "authorize" button that diff --git a/dandiapi/urls.py b/dandiapi/urls.py index 32267b165..55eb92632 100644 --- a/dandiapi/urls.py +++ b/dandiapi/urls.py @@ -18,6 +18,7 @@ authorize_view, blob_read_view, info_view, + mailchimp_csv_view, root_content_view, stats_view, upload_complete_view, @@ -105,6 +106,7 @@ def to_url(self, value): path('admin/', admin.site.urls), path('dashboard/', DashboardView.as_view(), name='dashboard-index'), path('dashboard/user//', user_approval_view, name='user-approval'), + path('dashboard/mailchimp/', mailchimp_csv_view, name='mailchimp-csv'), # this url overrides the authorize url in oauth2_provider.urls to # support our user signup workflow re_path(r'^oauth/authorize/$', authorize_view, name='authorize'), diff --git a/dandiapi/zarr/tests/test_zarr.py b/dandiapi/zarr/tests/test_zarr.py index 93c91b728..5a0a886ff 100644 --- a/dandiapi/zarr/tests/test_zarr.py +++ b/dandiapi/zarr/tests/test_zarr.py @@ -11,7 +11,7 @@ from dandiapi.zarr.tasks import ingest_zarr_archive -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_create(authenticated_api_client, user, dandiset): assign_perm('owner', user, dandiset) name = 'My Zarr File!' @@ -39,7 +39,7 @@ def test_zarr_rest_create(authenticated_api_client, user, dandiset): assert zarr_archive.name == name -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_dandiset_malformed(authenticated_api_client, user, dandiset): assign_perm('owner', user, dandiset) resp = authenticated_api_client.post( @@ -54,7 +54,7 @@ def test_zarr_rest_dandiset_malformed(authenticated_api_client, user, dandiset): assert resp.json() == {'dandiset': ['This value does not match the required pattern.']} -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_create_not_an_owner(authenticated_api_client, zarr_archive): resp = authenticated_api_client.post( '/api/zarr/', @@ -67,7 +67,7 @@ def test_zarr_rest_create_not_an_owner(authenticated_api_client, zarr_archive): assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_create_duplicate(authenticated_api_client, user, zarr_archive): assign_perm('owner', user, zarr_archive.dandiset) resp = authenticated_api_client.post( @@ -82,7 +82,7 @@ def test_zarr_rest_create_duplicate(authenticated_api_client, user, zarr_archive assert resp.json() == ['Zarr already exists'] -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_create_embargoed_dandiset( authenticated_api_client, user, @@ -103,7 +103,7 @@ def test_zarr_rest_create_embargoed_dandiset( assert resp.json() == ['Cannot add zarr to embargoed dandiset'] -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_get(authenticated_api_client, storage, zarr_archive_factory, zarr_file_factory): # Pretend like ZarrArchive was defined with the given storage ZarrArchive.storage = storage @@ -128,7 +128,7 @@ def test_zarr_rest_get(authenticated_api_client, storage, zarr_archive_factory, } -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_list_filter(authenticated_api_client, dandiset_factory, zarr_archive_factory): # Create dandisets and zarrs dandiset_a: Dandiset = dandiset_factory() @@ -171,7 +171,7 @@ def test_zarr_rest_list_filter(authenticated_api_client, dandiset_factory, zarr_ assert results[0]['zarr_id'] == zarr_archive_b_a.zarr_id -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_get_very_big(authenticated_api_client, zarr_archive_factory): ten_quadrillion = 10**16 ten_petabytes = 10**16 @@ -193,7 +193,7 @@ def test_zarr_rest_get_very_big(authenticated_api_client, zarr_archive_factory): } -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_get_empty(authenticated_api_client, zarr_archive: ZarrArchive): resp = authenticated_api_client.get(f'/api/zarr/{zarr_archive.zarr_id}/') assert resp.status_code == 200 @@ -208,7 +208,7 @@ def test_zarr_rest_get_empty(authenticated_api_client, zarr_archive: ZarrArchive } -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_delete_file( authenticated_api_client, user, @@ -257,7 +257,7 @@ def test_zarr_rest_delete_file( assert zarr_archive.size == 0 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_delete_file_asset_metadata( authenticated_api_client, user, @@ -299,7 +299,7 @@ def test_zarr_rest_delete_file_asset_metadata( assert asset.full_metadata['contentSize'] == 0 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_delete_file_not_an_owner( authenticated_api_client, storage, zarr_archive: ZarrArchive, zarr_file_factory ): @@ -313,7 +313,7 @@ def test_zarr_rest_delete_file_not_an_owner( assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_delete_multiple_files( authenticated_api_client, user, @@ -344,7 +344,7 @@ def test_zarr_rest_delete_multiple_files( assert zarr_archive.size == 0 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_delete_missing_file( authenticated_api_client, user, @@ -382,7 +382,7 @@ def test_zarr_rest_delete_missing_file( assert zarr_archive.size == zarr_file.size -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_file_list(api_client, storage, zarr_archive: ZarrArchive, zarr_file_factory): # Pretend like ZarrArchive was defined with the given storage ZarrArchive.storage = storage @@ -438,7 +438,7 @@ def test_zarr_file_list(api_client, storage, zarr_archive: ZarrArchive, zarr_fil ) -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_explore_head(api_client, storage, zarr_archive: ZarrArchive): # Pretend like ZarrArchive was defined with the given storage ZarrArchive.storage = storage diff --git a/dandiapi/zarr/tests/test_zarr_upload.py b/dandiapi/zarr/tests/test_zarr_upload.py index fbb29ddc1..83b0a4697 100644 --- a/dandiapi/zarr/tests/test_zarr_upload.py +++ b/dandiapi/zarr/tests/test_zarr_upload.py @@ -8,7 +8,7 @@ from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_upload_start( authenticated_api_client, user, zarr_archive: ZarrArchive, storage, monkeypatch ): @@ -50,7 +50,7 @@ def test_zarr_rest_upload_start( assert resp.json() == [HTTP_URL_RE] -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_upload_start_not_an_owner(authenticated_api_client, zarr_archive: ZarrArchive): resp = authenticated_api_client.post( f'/api/zarr/{zarr_archive.zarr_id}/files/', @@ -60,7 +60,7 @@ def test_zarr_rest_upload_start_not_an_owner(authenticated_api_client, zarr_arch assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_finalize( authenticated_api_client, user, @@ -87,13 +87,13 @@ def test_zarr_rest_finalize( assert zarr_archive.status == ZarrArchiveStatus.COMPLETE -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_finalize_not_an_owner(authenticated_api_client, zarr_archive: ZarrArchive): resp = authenticated_api_client.post(f'/api/zarr/{zarr_archive.zarr_id}/finalize/') assert resp.status_code == 403 -@pytest.mark.django_db() +@pytest.mark.django_db def test_zarr_rest_finalize_already_ingested( authenticated_api_client, user, zarr_archive: ZarrArchive ): diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index 24d6713da..39510008d 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -15,6 +15,7 @@ from rest_framework.viewsets import ReadOnlyModelViewSet from dandiapi.api.models.dandiset import Dandiset +from dandiapi.api.services import audit from dandiapi.api.storage import get_boto_client from dandiapi.api.views.pagination import DandiPagination from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus @@ -138,10 +139,15 @@ def create(self, request): if dandiset.embargo_status != Dandiset.EmbargoStatus.OPEN: raise ValidationError('Cannot add zarr to embargoed dandiset') zarr_archive: ZarrArchive = ZarrArchive(name=name, dandiset=dandiset) - try: - zarr_archive.save() - except IntegrityError as e: - raise ValidationError('Zarr already exists') from e + with transaction.atomic(): + # Use nested transaction block to prevent zarr creation race condition + try: + with transaction.atomic(): + zarr_archive.save() + except IntegrityError as e: + raise ValidationError('Zarr already exists') from e + + audit.create_zarr(dandiset=dandiset, user=request.user, zarr_archive=zarr_archive) serializer = ZarrSerializer(instance=zarr_archive) return Response(serializer.data, status=status.HTTP_200_OK) @@ -175,6 +181,10 @@ def finalize(self, request, zarr_id): zarr_archive.status = ZarrArchiveStatus.UPLOADED zarr_archive.save() + audit.finalize_zarr( + dandiset=zarr_archive.dandiset, user=request.user, zarr_archive=zarr_archive + ) + # Dispatch task ingest_zarr_archive.delay(zarr_id=zarr_archive.zarr_id) return Response(None, status=status.HTTP_204_NO_CONTENT) @@ -280,15 +290,23 @@ def create_files(self, request, zarr_id): serializer = ZarrFileCreationSerializer(data=request.data, many=True) serializer.is_valid(raise_exception=True) + paths = serializer.validated_data # Generate presigned urls logger.info('Beginning upload to zarr archive %s', zarr_archive.zarr_id) - urls = zarr_archive.generate_upload_urls(serializer.validated_data) + urls = zarr_archive.generate_upload_urls(paths) # Set status back to pending, since with these URLs the zarr could have been changed zarr_archive.mark_pending() zarr_archive.save() + audit.upload_zarr_chunks( + dandiset=zarr_archive.dandiset, + user=request.user, + zarr_archive=zarr_archive, + paths=[p['path'] for p in paths], + ) + # Return presigned urls logger.info( 'Presigned %d URLs to upload to zarr archive %s', len(urls), zarr_archive.zarr_id @@ -320,4 +338,11 @@ def delete_files(self, request, zarr_id): paths = [file['path'] for file in serializer.validated_data] zarr_archive.delete_files(paths) + audit.delete_zarr_chunks( + dandiset=zarr_archive.dandiset, + user=request.user, + zarr_archive=zarr_archive, + paths=paths, + ) + return Response(None, status=status.HTTP_204_NO_CONTENT) diff --git a/dev/django.Dockerfile b/dev/django.Dockerfile index 9ac4d9ac2..069a9105d 100644 --- a/dev/django.Dockerfile +++ b/dev/django.Dockerfile @@ -20,7 +20,7 @@ COPY ./setup.py /opt/django-project/setup.py # Copy git folder for setuptools_scm COPY ./.git/ /opt/django-project/.git/ -# Don't install as editable, so that when the directory is mounted over with docker-compose, the +# Don't install as editable, so that when the directory is mounted over with `docker compose`, the # installation still exists (otherwise the dandiapi.egg-info/ directory would be overwritten, and # the installation would no longer exist) RUN pip install /opt/django-project[dev] diff --git a/doc/design/embargo-redesign.md b/doc/design/embargo-redesign.md index 754395afc..d3e783c2b 100644 --- a/doc/design/embargo-redesign.md +++ b/doc/design/embargo-redesign.md @@ -71,9 +71,9 @@ sequenceDiagram end ``` -## Change to Un-Embargo Procedure +## Change to Unembargo Procedure -Once the time comes to *********un-embargo********* those files, all that is required is to remove the `embargoed` tag from all of the objects. This can be achieved by an [S3 Batch Operations Job](https://docs.aws.amazon.com/AmazonS3/latest/userguide/batch-ops-create-job.html), in which the list of files is specified (all files belonging to the dandiset), and the desired action is specified (delete/replace tags). +Once the time comes to *********unembargo********* those files, all that is required is to remove the `embargoed` tag from all of the objects. This can be achieved by an [S3 Batch Operations Job](https://docs.aws.amazon.com/AmazonS3/latest/userguide/batch-ops-create-job.html), in which the list of files is specified (all files belonging to the dandiset), and the desired action is specified (delete/replace tags). The benefit of this approach is that once the files are uploaded, no further movement is required to change the embargo state, eliminating the storage, egress, and time costs associated with unembargoing from a second bucket. Using S3 Batch Operations to perform the untagging also means we can rely on AWS’s own error reporting mechanisms, while retrying any failed operations requires only minimal engineering effort within the Archive codebase. @@ -85,7 +85,7 @@ The benefit of this approach is that once the files are uploaded, no further mov 4. If there are no failures, the Job ID is set to null in the DB model, and the embargo status, metadata, etc. is updated to reflect that the dandiset is now `OPEN`. 5. Otherwise, an exception is raised and attended to by the developers. -A diagram of the un-embargo procedure (pertaining to just the objects) is shown below +A diagram of the unembargo procedure (pertaining to just the objects) is shown below ```mermaid sequenceDiagram @@ -95,8 +95,8 @@ sequenceDiagram participant Worker participant S3 - Client ->> Server: Un-embargo dandiset - Server ->> Worker: Dispatch un-embargo task + Client ->> Server: Unembargo dandiset + Server ->> Worker: Dispatch unembargo task Worker ->> S3: List of all dandiset objects are aggregated into a manifest Worker ->> S3: S3 Batch Operation job created S3 ->> Worker: Job ID is returned diff --git a/setup.py b/setup.py index eeab69fe0..e7db9cc0a 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,8 @@ include_package_data=True, install_requires=[ 'celery', - 'dandischema~=0.10.1', + # Pin dandischema to exact version to make explicit which schema version is being used + 'dandischema==0.10.2', # schema version 0.6.8 'django~=4.1.0', 'django-admin-display', # Require 0.58.0 as it is the first version to support postgres' native @@ -67,7 +68,7 @@ # Production-only 'django-composed-configuration[prod]>=0.23.0', 'django-s3-file-field[s3]>=1.0.0', - 'django-storages[s3]>=1.14.2', + 'django-storages[s3]==1.14.3', 'gunicorn', # Development-only, but required 'django-minio-storage', diff --git a/web/package.json b/web/package.json index 543fd2205..8f1bbee11 100644 --- a/web/package.json +++ b/web/package.json @@ -10,8 +10,8 @@ }, "dependencies": { "@apidevtools/json-schema-ref-parser": "^9.0.7", - "@girder/oauth-client": "^0.8.0", "@koumoul/vjsf": "2.23.3", + "@resonant/oauth-client": "^0.9.0", "@sentry/integrations": "^7.88.0", "@sentry/vue": "^7.88.0", "ajv": "^8.12.0", diff --git a/web/src/components/Meditor/types.ts b/web/src/components/Meditor/types.ts index 5ca93b13f..0910a98c2 100644 --- a/web/src/components/Meditor/types.ts +++ b/web/src/components/Meditor/types.ts @@ -65,7 +65,7 @@ export const isJSONSchema = (schema: JSONSchemaUnionType): schema is JSONSchema7 export const isBasicSchema = (schema: JSONSchemaUnionType): schema is BasicSchema => ( isJSONSchema(schema) - && (isBasicType(schema.type) || schema.type === undefined) + && (isBasicType(schema.type)) ); export const isObjectSchema = (schema: JSONSchemaUnionType): schema is ObjectSchema => ( diff --git a/web/src/rest.ts b/web/src/rest.ts index 4e3775d17..69922b1e3 100644 --- a/web/src/rest.ts +++ b/web/src/rest.ts @@ -1,7 +1,7 @@ import type { AxiosRequestConfig, AxiosResponse } from 'axios'; import axios from 'axios'; import { ref } from 'vue'; -import OAuthClient from '@girder/oauth-client'; +import OAuthClient from '@resonant/oauth-client'; import type { Asset, Dandiset, diff --git a/web/src/types/index.ts b/web/src/types/index.ts index 6be6486c4..7e6caaf69 100644 --- a/web/src/types/index.ts +++ b/web/src/types/index.ts @@ -7,6 +7,7 @@ export interface User { username: string, name: string, admin?: boolean, + email: string, status: 'INCOMPLETE' | 'PENDING' | 'APPROVED' | 'REJECTED', approved: boolean, } @@ -37,6 +38,7 @@ export interface Version { version: string, name: string, asset_count: number, + active_uploads: number size: number, status: 'Pending' | 'Validating' | 'Valid' | 'Invalid' | 'Published', validation_error?: string, diff --git a/web/src/views/DandisetLandingView/ContactDialog.vue b/web/src/views/DandisetLandingView/ContactDialog.vue new file mode 100644 index 000000000..0ab319ae6 --- /dev/null +++ b/web/src/views/DandisetLandingView/ContactDialog.vue @@ -0,0 +1,143 @@ + + + diff --git a/web/src/views/DandisetLandingView/DandisetActions.vue b/web/src/views/DandisetLandingView/DandisetActions.vue index fc9ae4391..02b7647ca 100644 --- a/web/src/views/DandisetLandingView/DandisetActions.vue +++ b/web/src/views/DandisetLandingView/DandisetActions.vue @@ -37,13 +37,15 @@ - +