From eca9fd3401f0b49ab2b41a3ce3da72d127a18dd2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 6 Feb 2023 14:37:01 -0500 Subject: [PATCH 1/5] Add custom dictionary for custom fixes --- .codespell_dict | 1 + .codespellrc | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 .codespell_dict create mode 100644 .codespellrc diff --git a/.codespell_dict b/.codespell_dict new file mode 100644 index 000000000..adf6d3052 --- /dev/null +++ b/.codespell_dict @@ -0,0 +1 @@ +zar->zarr diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 000000000..30434316d --- /dev/null +++ b/.codespellrc @@ -0,0 +1,4 @@ +[codespell] +skip = .git,node_modules,venvs,.tox,yarn.lock +dictionary = .codespell_dict +# ignore-words-list = From 91c9eca19b664d64037b7e07370ebff11220ea8b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 6 Feb 2023 14:54:33 -0500 Subject: [PATCH 2/5] [DATALAD RUNCMD] Run codespell throughout fixing few new typos === Do not change lines below === { "chain": [], "cmd": "codespell -w", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- dandiapi/api/tests/test_asset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index a5ed86d75..7d6fef4ce 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -161,7 +161,7 @@ def test_asset_total_size( assert Asset.total_size() == asset_blob.size + zarr_archive.size - # TODO: add testing for embargoed zar added, whenever embargoed zarrs + # TODO: add testing for embargoed zarr added, whenever embargoed zarrs # supported, ATM they are not and tested by test_zarr_rest_create_embargoed_dandiset From adf912e9e6a990ba283df06003f8adb9910cf26d Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 6 Feb 2023 14:56:03 -0500 Subject: [PATCH 3/5] Disable custom vocab since cannot specify multiple in config yet --- .codespellrc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.codespellrc b/.codespellrc index 30434316d..172bd86bd 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,4 +1,6 @@ [codespell] skip = .git,node_modules,venvs,.tox,yarn.lock -dictionary = .codespell_dict +# Disabled until https://github.com/codespell-project/codespell/issues/2727 +# got answer/re-solution +# dictionary = .codespell_dict # ignore-words-list = From 04594a1d85d8c03e016eededaffa99f543e1b93a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 6 Feb 2023 14:57:09 -0500 Subject: [PATCH 4/5] [DATALAD RUNCMD] Run codespell throughout fixing few new typos === Do not change lines below === { "chain": [], "cmd": "codespell -w", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- dandiapi/api/models/oauth.py | 2 +- dandiapi/api/tests/test_asset_paths.py | 4 ++-- dandiapi/api/tests/test_upload.py | 2 +- dandiapi/api/views/serializers.py | 2 +- dandiapi/zarr/tests/test_ingest_zarr_archive.py | 2 +- doc/design/deployment-1.md | 4 ++-- doc/design/embargo-full.md | 4 ++-- doc/design/embargo-mvp.md | 6 +++--- doc/design/publish-1.md | 2 +- doc/design/zarr-performance-redesign.md | 6 +++--- doc/design/zarr-support-3.md | 2 +- web/src/components/Meditor/state.ts | 4 ++-- web/src/rest.ts | 2 +- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/dandiapi/api/models/oauth.py b/dandiapi/api/models/oauth.py index 64f63dc5d..e55982cdc 100644 --- a/dandiapi/api/models/oauth.py +++ b/dandiapi/api/models/oauth.py @@ -26,7 +26,7 @@ def clean(self): try: super().clean() except ValidationError as e: - # don't validate URLs so we can use wilcards too + # don't validate URLs so we can use wildcards too if 'Enter a valid URL.' not in str(e): raise e diff --git a/dandiapi/api/tests/test_asset_paths.py b/dandiapi/api/tests/test_asset_paths.py index bc1e33285..3a5f569a3 100644 --- a/dandiapi/api/tests/test_asset_paths.py +++ b/dandiapi/api/tests/test_asset_paths.py @@ -105,7 +105,7 @@ def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory version.assets.add(asset_factory(path='foo/baz/file.txt')) version.assets.add(asset_factory(path='top.txt')) - # Add verison asset paths + # Add version asset paths add_version_asset_paths(version) # Check paths have expected file count and size @@ -139,7 +139,7 @@ def test_asset_path_add_version_asset_paths_idempotent(draft_version_factory, as version.assets.add(asset_factory(path='foo/baz/file.txt')) version.assets.add(asset_factory(path='top.txt')) - # Add verison asset paths + # Add version asset paths add_version_asset_paths(version) add_version_asset_paths(version) diff --git a/dandiapi/api/tests/test_upload.py b/dandiapi/api/tests/test_upload.py index c5aee59b8..4d667b11c 100644 --- a/dandiapi/api/tests/test_upload.py +++ b/dandiapi/api/tests/test_upload.py @@ -212,7 +212,7 @@ def test_upload_initialize_embargo_existing_asset_blob( dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) assign_perm('owner', user, dandiset) - # Embargoed assets that are already uploaded publically don't need to be private + # Embargoed assets that are already uploaded publicly don't need to be private resp = api_client.post( '/api/uploads/initialize/', { diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 555242840..831d2d743 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -91,7 +91,7 @@ class Meta: class DandisetVersionSerializer(serializers.ModelSerializer): - """The verison serializer nested within the Dandiset Serializer.""" + """The version serializer nested within the Dandiset Serializer.""" class Meta: model = Version diff --git a/dandiapi/zarr/tests/test_ingest_zarr_archive.py b/dandiapi/zarr/tests/test_ingest_zarr_archive.py index ae437c7c5..d9773a4f1 100644 --- a/dandiapi/zarr/tests/test_ingest_zarr_archive.py +++ b/dandiapi/zarr/tests/test_ingest_zarr_archive.py @@ -131,7 +131,7 @@ def test_ingest_zarr_archive_force(zarr_upload_file_factory, zarr_archive_factor # Perform initial ingest ingest_zarr_archive(str(zarr.zarr_id)) - # Get inital checksum + # Get initial checksum zarr.refresh_from_db() first_checksum = zarr.checksum diff --git a/doc/design/deployment-1.md b/doc/design/deployment-1.md index 71e9ab97f..43ab4b758 100644 --- a/doc/design/deployment-1.md +++ b/doc/design/deployment-1.md @@ -15,12 +15,12 @@ This proposed solution is to use a second `release` branch which tracks the rele There are two central git branches: - **`master`**: the active development branch. PRs should always use `master` as their merge target. -- **`release`**: the *current* release branch. This will be reset to point to the top of master whenever a release ocurrs. +- **`release`**: the *current* release branch. This will be reset to point to the top of master whenever a release occurs. Staging is deployed from `master`, while production is deployed from `release`. ## The `release` branch -The `release` branch is kept up to date using a GitHub CI workflow. Whenever a release ocurrs, the `release` branch is reset to point to `master` (to avoid merge conflicts). The `release` branch should therefore always be pointed at the latest release tag. +The `release` branch is kept up to date using a GitHub CI workflow. Whenever a release occurs, the `release` branch is reset to point to `master` (to avoid merge conflicts). The `release` branch should therefore always be pointed at the latest release tag. ## Netlify deployment The staging and production Netlify sites are now both managed using a single `netlify.toml`. [Deploy contexts](https://docs.netlify.com/configure-builds/file-based-configuration/#deploy-contexts) allow us to differentiate between the production and staging sites. Production uses the default configuration, while staging uses a `branch-deploy` configuration. diff --git a/doc/design/embargo-full.md b/doc/design/embargo-full.md index a2ca3d36d..936908621 100644 --- a/doc/design/embargo-full.md +++ b/doc/design/embargo-full.md @@ -50,7 +50,7 @@ A link to the dandiset with this secret URL parameter is included somewhere on t ### TODO how to download anonymously with the CLI? ## Embargo period enforcement -NIH embargoes (and embargos in general) will have an end date to ensure that the data is not secret forever. +NIH embargoes (and embargoes in general) will have an end date to ensure that the data is not secret forever. We will enforce that an end date be specified for every new embargoed dandiset, and forcibly release embargoed dandisets that expire. The MVP collects the NIH award number and stores it in the metadata. @@ -76,7 +76,7 @@ If this becomes an issue, we could: This could be as simple as adding a new django app and a new Heroku dyno, or as complex as a Lambda@Edge+CloudFront service. * Dynamically provision IAM users with permission to access prefixes in the embargo bucket and distribute access keys to users. This would require the API server to manage IAM directly, which is a lot of complexity to manage. -* Make the embargo bucket publically readable, but not listable. +* Make the embargo bucket publicly readable, but not listable. If anyone knows the full S3 object key they have the ability to download the data, but they will not have the ability to search for or scan the bucket for new content. We would then distribute the zarr_id to anyone who needs to access an embargoed zarr archive, giving them permanent read access to it. The downside is that access is not revocable, since we cannot take back the zarr ID from the user or efficiently change the location of the zarr archive. diff --git a/doc/design/embargo-mvp.md b/doc/design/embargo-mvp.md index aadeaca60..013fb85ae 100644 --- a/doc/design/embargo-mvp.md +++ b/doc/design/embargo-mvp.md @@ -26,7 +26,7 @@ Uploads to an embargoed dandiset will function exactly the same from an API pers # Data storage Embargoed assets will be stored in a separate S3 bucket. -This bucket is private and not browseable by the general public. +This bucket is private and not browsable by the general public. Each dandiset stored in the embargoed bucket will be prefixed with a dandiset identifier. This will make it easier to manage embargo permissions for a specific embargoed dandiset. @@ -40,7 +40,7 @@ Assuming dandiset `123456` was embargoed: When unembargoing an embargoed dandiset, all asset data for that dandiset is copied to the public bucket. -When uploading a new asset to an embargoed dandiset, the server will first check if that blob has already been uploaded publically. +When uploading a new asset to an embargoed dandiset, the server will first check if that blob has already been uploaded publicly. If so, the public blob will be used instead of uploading the data again to the embargo bucket. # Data download @@ -62,7 +62,7 @@ A test implementation can be found [here](https://github.com/dandi/dandi-api/com ## Models The `Dandiset` model will have an `embargo_status` field that is one of `EMBARGOED`, `UNEMBARGOING`, or `OPEN`. -* `OPEN` means that the Dandiset is publically accessible and publishable. +* `OPEN` means that the Dandiset is publicly accessible and publishable. This is the state all Dandisets currently have. * `EMBARGOED` means that the Dandiset is embargoed. It is searchable and viewable to owners. diff --git a/doc/design/publish-1.md b/doc/design/publish-1.md index eae56c51b..82265df61 100644 --- a/doc/design/publish-1.md +++ b/doc/design/publish-1.md @@ -108,7 +108,7 @@ If a Version is not `VALID` or any of the Assets in the Version is not `VALID`, 1. The dandiset is locked so that no other publishes can happen simultaneously. 2. A new published Version is created with the required publish metadata information. -It is initialy empty. +It is initially empty. 3. A new DOI is created that points to the new Version. This is an API call to an external service, https://datacite.org/. diff --git a/doc/design/zarr-performance-redesign.md b/doc/design/zarr-performance-redesign.md index afb0a3bf5..4aa8ddaeb 100644 --- a/doc/design/zarr-performance-redesign.md +++ b/doc/design/zarr-performance-redesign.md @@ -53,11 +53,11 @@ The process is as follows. (Steps 1 and 2): `dandi-cli` asks the server to create a new Zarr archive, which is put into the `PENDING` state. -(Steps 3 and 4): For each batch of (maxiumum) 255 Zarr chunk files the client wants to upload, `dandi-cli` asks the server to create an `Upload`, supplying the list of file paths and associated etags, and receiving a list of signed upload URLs. +(Steps 3 and 4): For each batch of (maximum) 255 Zarr chunk files the client wants to upload, `dandi-cli` asks the server to create an `Upload`, supplying the list of file paths and associated etags, and receiving a list of signed upload URLs. (Step 5): `dandi-cli` uses these URLs to upload the files in that batch. -(Steps 6 and 7): Then, `dandi-cli` asks the server to finalize the batch, and the server does so, matching etags and verifiying that all files were uploaded. *This step is very costly, due to the server's need to contact S3 to verify these conditions.* +(Steps 6 and 7): Then, `dandi-cli` asks the server to finalize the batch, and the server does so, matching etags and verifying that all files were uploaded. *This step is very costly, due to the server's need to contact S3 to verify these conditions.* (Step 8): When all batches are uploaded, `dandi-cli` signals the server to ingest the Zarr archive. @@ -208,4 +208,4 @@ and gain significant performance for Zarr upload. We previously included extra functionality, which involved *including* the locally computed checksum when finalizing the zarr archive (step 7), and adding a `MISMATCH` state to the zarr `status` field, which would be set if the checksum produced by the asynchronous zarr checksum task didn't match the checksum provided in step 7. -This addition was later reverted in the interest of simplicity, as well as the fact that it is funtionally equivalent to the current design. +This addition was later reverted in the interest of simplicity, as well as the fact that it is functionally equivalent to the current design. diff --git a/doc/design/zarr-support-3.md b/doc/design/zarr-support-3.md index 3be5741ff..6833c6e93 100644 --- a/doc/design/zarr-support-3.md +++ b/doc/design/zarr-support-3.md @@ -152,4 +152,4 @@ This ensures that published dandisets are truly immutable. Immutability is enforced by disabled the upload and delete endpoints for the zarr archive. -The client needs to agressively inform users that publishing a dandiset with a zarr archive will render that zarr archive immutable. +The client needs to aggressively inform users that publishing a dandiset with a zarr archive will render that zarr archive immutable. diff --git a/web/src/components/Meditor/state.ts b/web/src/components/Meditor/state.ts index 16a84a3f3..608688788 100644 --- a/web/src/components/Meditor/state.ts +++ b/web/src/components/Meditor/state.ts @@ -1,10 +1,10 @@ import Vue, { computed, ref } from 'vue'; import { EditorInterface } from './editor'; -// NOTE: it would be better to use a single ref here instead of seperate state/computed +// NOTE: it would be better to use a single ref here instead of separate state/computed // variables, but doing so introduces a strange bug where editorInterface.basicModel is // un-reffed immediately after instantiation. This does not occur when using a computed -// variable with a seperate state object, so we do that here as a workaround. +// variable with a separate state object, so we do that here as a workaround. const state = { editorInterface: null as EditorInterface | null, }; diff --git a/web/src/rest.ts b/web/src/rest.ts index c904c46ad..5bfdf44d1 100644 --- a/web/src/rest.ts +++ b/web/src/rest.ts @@ -57,7 +57,7 @@ const dandiRest = new Vue({ // Fetch user this.user = await this.me(); } catch (e) { - // A status of 401 indicates login failed, so the exception should be supressed. + // A status of 401 indicates login failed, so the exception should be suppressed. if (axios.isAxiosError(e) && e.response?.status === 401) { await oauthClient.logout(); } else { From 8333f75618b0b893cf210d6ea52ef058f533ce87 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 6 Feb 2023 15:00:43 -0500 Subject: [PATCH 5/5] Add codespell to tox -e lint We have lint doing codespell also in dandi-cli --- tox.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tox.ini b/tox.ini index 74fd6085e..8f18bd507 100644 --- a/tox.ini +++ b/tox.ini @@ -9,8 +9,11 @@ envlist = skipsdist = true extras = lint +deps = + codespell~=2.0 commands = flake8 --config=tox.ini {posargs:.} + codespell [testenv:type] extras =