Merge branch 'master' into feat/area-or-point

linz · Oct 23, 2024 · 63cdc87 · 63cdc87
2 parents 067356c + ea18ebf
commit 63cdc87
Show file tree

Hide file tree

Showing 197 changed files with 59,303 additions and 59 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,13 @@
+### Motivation
+
+<!-- TODO: Say why you made your changes. -->
+
+### Modifications
+
+<!-- TODO: Say what changes you made. -->
+
+<!-- TODO: Attach screenshots if you changed the UI. -->
+
+### Verification
+
+<!-- TODO: Say how you tested your changes. -->
diff --git a/.github/workflows/actionlint.dockerfile b/.github/workflows/actionlint.dockerfile
@@ -0,0 +1,4 @@
+# Workaround for https://github.com/dependabot/dependabot-core/issues/8362.
+# Once that is fixed, remove this file and replace the Docker build and run
+# lines in `.github/workflows/*.yml` with a `uses: docker://rhysd/…` line.
+FROM rhysd/actionlint:1.6.26@sha256:2362769b1d75056da70e7af1b12d9e52746f3a123b8f22a4322869e8f2cd45f2
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -1,42 +1,146 @@
 name: Publish
-
-on: [push]
+on:
+  push:
+  workflow_dispatch:
 
 jobs:
   main:
-    name: Publish
+    name: Validate STAC
+    runs-on: ubuntu-latest
+    steps:
+      - uses: linz/action-typescript@9bf69b0f313b3525d3ba3116f26b1aff7eb7a6c0 # v3
+        with:
+          node-version: 20.x
+
+      - name: Create STAC Catalog
+        uses: docker://ghcr.io/linz/argo-tasks:v4
+        with:
+          args: stac-catalog --output stac/catalog.json --template template/catalog.json /github/workspace/stac/
+
+      - name: Validate STAC Catalog
+        uses: docker://ghcr.io/linz/argo-tasks:v4
+        with:
+          args: stac-validate /github/workspace/stac/catalog.json
+
+      - name: Validate STAC Collections
+        run: |
+          # Enable double star operator
+          shopt -s globstar
+          docker run -v "$PWD:$PWD" ghcr.io/linz/argo-tasks:v4 stac-validate "$PWD"/stac/**/collection.json
+
+      - name: Download actionlint
+        run: docker build --tag actionlint - < .github/workflows/actionlint.dockerfile
+
+      - name: Run actionlint to check workflow files
+        run: docker run --volume="${PWD}:/repo" --workdir=/repo actionlint -color
+
+  publish-odr:
+    name: Publish ODR
     runs-on: ubuntu-latest
+    needs: main
+    if: ${{ github.ref == 'refs/heads/master' }}
+
+    environment:
+      name: prod
+
     permissions:
       id-token: write
       contents: read
 
     steps:
-      - uses: linz/action-typescript@v3
+      - uses: linz/action-typescript@9bf69b0f313b3525d3ba3116f26b1aff7eb7a6c0 # v3
+        with:
+          node-version: 20.x
 
-      # FIXME: catalog.json is not pushed to the repository (temporary solution)
-      - name: Create Catalog
-        uses: docker://ghcr.io/linz/argo-tasks:v2
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
         with:
-          args: stac-catalog --output stac/catalog.json --template template/catalog.json /github/workspace/stac/
+          fetch-depth: 0
+
+      - name: Use Node.js 18.x
+        uses: actions/setup-node@8f152de45cc393bb48ce5d89d36b731f54556e65 # v4.0.0
+        with:
+          node-version: '18.x'
 
-      # FIXME: no collections exist so nothing to validate
-      # - name: Validate STAC
-      #   run: |
-      #     # Enable double star operator
-      #     shopt -s globstar
-      #     docker run -v $PWD:$PWD ghcr.io/linz/argo-tasks:v2 stac-validate $PWD/stac/**/collection.json
+      - name: Setup kubectl
+        uses: azure/setup-kubectl@3e0aec4d80787158d308d7b364cb1b702e7feb7f # v4.0.0
+        with:
+          version: 'latest'
 
       - name: AWS Configure
-        if: github.ref == 'refs/heads/master'
-        uses: aws-actions/configure-aws-credentials@v2
+        uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4
         with:
           aws-region: ap-southeast-2
           mask-aws-account-id: true
-          role-to-assume: ${{ secrets.AWS_CI_ROLE }}
+          role-to-assume: ${{ secrets.AWS_EKS_CI_ROLE }}
+
+      - name: Login to EKS
+        run: |
+          aws eks update-kubeconfig --name Workflows --region ap-southeast-2
+
+      - name: Check EKS connection
+        run: |
+          kubectl get nodes
+
+      - name: Install Argo
+        run: |
+          curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.4.11/argo-linux-amd64.gz
+          gunzip argo-linux-amd64.gz
+          chmod +x argo-linux-amd64
+          ./argo-linux-amd64 version
+
+      - name: Submit Added/Changed Parameter Files
+        id: modified-files
+        if: ${{ !contains(github.event.head_commit.message, '[skip-argo]')}}
+        run: |
+          # AM = Include: Added, Modified
+          mapfile -d '' modified_parameter_files < <(git diff --name-only --diff-filter=AM -z ${{ github.event.before }} ${{ github.event.after }} -- "publish-odr-parameters/*.yaml")
+
+          for file in "${modified_parameter_files[@]}"; do
+            ./argo-linux-amd64 submit --wait --from wftmpl/copy -n argo -f "$file" -p aws_role_config_path="s3://linz-bucket-config/config-write.open-data-registry.json" -p exclude="collection.json$" --generate-name "publish-odr-file-copy-"
+          done
+
+  sync-stac:
+    name: Sync STAC files
+    runs-on: ubuntu-latest
+    concurrency: publish-${{ github.ref }}
+    needs: publish-odr
+    if: ${{ github.ref == 'refs/heads/master' }}
+
+    environment:
+      name: prod
+
+    permissions:
+      id-token: write
+      contents: read
+
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        with:
+          ref: master # Ensure only the latest STAC documents are synced into S3
+
+      - name: AWS Configure
+        uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4
+        with:
+          aws-region: ap-southeast-2
+          mask-aws-account-id: true
+          role-to-assume: ${{ secrets.AWS_ODR_CI_ROLE }}
+
+      - name: AWS Configure ODR
+        uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4
+        with:
+          aws-region: ap-southeast-2
+          mask-aws-account-id: true
+          role-to-assume: ${{ secrets.AWS_ODR_DATA_MANAGER_ROLE }}
+          role-chaining: true
+
+      - name: Create STAC Catalog
+        uses: docker://ghcr.io/linz/argo-tasks:v4
+        with:
+          args: stac-catalog --output stac/catalog.json --template template/catalog.json /github/workspace/stac/
 
       # Sync STAC files only on push to 'master'
       - name: Sync STAC
-        uses: docker://ghcr.io/linz/argo-tasks:v2
-        if: github.ref == 'refs/heads/master'
+        if: ${{ !contains(github.event.head_commit.message, '[skip-sync]')}}
+        uses: docker://ghcr.io/linz/argo-tasks:v4
         with:
-          args: stac-sync /github/workspace/stac/ s3://linz-elevation/
+          args: stac-sync /github/workspace/stac/ s3://nz-elevation/
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1 @@
+publish-odr-parameters
diff --git a/README.md b/README.md
@@ -1,7 +1,26 @@
 # New Zealand Elevation
 
-Toitū Te Whenua makes New Zealand’s most up-to-date publicly owned elevation data freely available to use under an open licence.
-You can access this through the [LINZ Data Service](https://data.linz.govt.nz/data/category/elevation/).
+Toitū Te Whenua makes New Zealand's most up-to-date publicly owned elevation data freely available to use under an open licence. You can access this through the [LINZ Data Service](https://data.linz.govt.nz/data/category/elevation/), [LINZ Basemaps](https://basemaps.linz.govt.nz/@-41.8899962,174.0492437,z5?i=elevation) or the [Registry of Open Data on AWS](https://registry.opendata.aws/nz-elevation/).
+
+This repository contains STAC Collection metadata for each elevation dataset, as well as some guidance documentation:
+
+- [Naming](docs/naming.md) covers the s3://nz-elevation bucket naming structure
+- [Usage](docs/usage.md) shows how TIFFs can be interacted with from S3 using GDAL, QGIS, etc
+- [Elevation Compression](docs/tiff-compression) provides commentary and analysis on the compression options we explored.
+
+## AWS Access
+
+Toitū Te Whenua owns and maintains a public bucket which is sponsored and shared via the [Registry of Open Data on AWS](https://registry.opendata.aws/nz-elevation/) `s3://nz-elevation` in `ap-southeast-2`.
+
+Using the [AWS CLI](https://aws.amazon.com/cli/) anyone can access all of the imagery specified in this repository.
+
+```
+aws s3 ls --no-sign-request s3://nz-elevation/
+```
+
+### Browsing the S3 Bucket
+
+[STAC Browser](https://radiantearth.github.io/stac-browser/#/external/nz-elevation.s3.ap-southeast-2.amazonaws.com/catalog.json) can be used to browse through the contents of the S3 bucket.
 
 ## Related
 

diff --git a/docs/GeoJSON-compression.md b/docs/GeoJSON-compression.md
@@ -0,0 +1,23 @@
+# GeoJSON compression
+
+## Summary
+
+Toitū Te Whenua has decided to store all metadata _uncompressed,_ including GeoJSON.
+
+## Arguments
+
+Pro compression:
+
+- Saves money on storage
+- Saves time and money on transfer of data
+- Metadata files are highly compressible, since they have a lot of text strings and repetition
+
+Contra compression:
+
+- Some tools do not seamlessly decompress files
+  - [AWS CLI issue](https://github.com/aws/aws-cli/issues/6765)
+  - [boto3 issue](https://github.com/boto/botocore/issues/1255)
+- Any files on S3 "[smaller than 128 KB](https://aws.amazon.com/s3/pricing/)" (presumably actually 128 KiB) are treated as being 128 KB for pricing purposes, so there would be no price gain from compressing any files which are smaller than this
+- The extra development time to deal with compressing and decompressing JSON files larger than 128 KB would not offset the savings:
+    - We can get the sizes of JSON files by running `aws s3api list-objects-v2 --bucket=nz-elevation --no-sign-request --query="Contents[?ends_with(Key, 'json')].Size"` and `aws s3api list-objects-v2 --bucket=nz-imagery --no-sign-request --query="Contents[?ends_with(Key, 'json')].Size"`
+    - Summing up the sizes of files larger than 128 KB we get a total of only _33 MB_ at time of writing
diff --git a/docs/img/usage/arcgis_pro_catalog_view.png b/docs/img/usage/arcgis_pro_catalog_view.png
diff --git a/docs/img/usage/arcgis_pro_connection_details.png b/docs/img/usage/arcgis_pro_connection_details.png
diff --git a/docs/img/usage/arcgis_pro_new_cloud_connection.png b/docs/img/usage/arcgis_pro_new_cloud_connection.png
diff --git a/docs/img/usage/cyberduck_bucket_view.png b/docs/img/usage/cyberduck_bucket_view.png
diff --git a/docs/img/usage/cyberduck_open_connection.png b/docs/img/usage/cyberduck_open_connection.png
diff --git a/docs/img/usage/qgis_data-source-manager.png b/docs/img/usage/qgis_data-source-manager.png
diff --git a/docs/img/usage/qgis_visualisation.png b/docs/img/usage/qgis_visualisation.png
diff --git a/docs/naming.md b/docs/naming.md
@@ -0,0 +1,154 @@
+# Elevation Dataset Naming Conventions
+
+Elevation dataset titles and S3 paths are constructed from metadata about each elevation survey so that they will be consistent and human readable. Elevation is stored according to the majority region that each dataset covers.
+
+## Elevation Dataset Titles
+
+The elevation dataset title is constructed from metadata that is entered when an elevation dataset is processed.
+
+```
+<region>[ - <geographic_description>?] LiDAR <gsd>m <geospatial_category> (<start_year>[-<end_year>?])[ - <lifecycle>?]
+```
+
+which can be broken down as:
+
+- the main `<region>` that the dataset covers
+- then if it exists, the `<geographic_description>` is used
+- then "LiDAR"
+- then `<gsd>` (which is always in metres)
+- then `<geospatial_category>`
+- then `<start_year>` (using all four digits to indicate the year)
+- if the elevation dataset was captured over multiple years, include a hyphen and the `<end_year>` (using all four digits to indicate the year)
+- if the elevation dataset has been processed as a QC preview or if it only represents partial capture, include "- Preview" or "- Draft" at the end of the title, from the dataset `<lifecycle>`
+
+## Elevation Dataset S3 Paths
+
+The elevation dataset S3 path is also constructed from similar metadata.
+
+```
+<region>/
+  [<geographic_description>|<region>]_<start_year>[-<end_year>?]/
+    <product>_<gsd>m/
+      <crs>/
+```
+
+which can be broken down as:
+
+- the main `<region>` that the dataset covers
+- then if it exists, the `<geographic_description>` is used, if not, `<region>` is repeated instead (this would be the case where the elevation dataset contains full coverage of the region)
+- then `<start_year>` (using all four digits to indicate the year)
+- if the imagery dataset was captured over multiple years, include a hyphen and the `<end_year>` (using all four digits to indicate the year)
+- then `<product>` as multiple products may be created from the same elevation survey
+- then `<gsd>` (which is always in metres)
+- then `<crs>` as we may store the data in different coordinate reference systems for different purposes
+
+### S3 Path Restrictions
+
+The _path_ is restricted to a limited set of characters with no whitespace: lowercase "a through "z", numbers "0" through "9", hyphen ("-"), and underscore ("\_"). When generating a [dataset S3 path](#imagery-dataset-s3-paths), the system will pass through these characters unchanged to the path, and will transform many others to allowed characters - see the subsections for details. Any characters not mentioned in this section or subsections will result in an error.
+
+#### Uppercase characters
+
+Uppercase characters are changed to lowercase. For example, "Wellington" is changed to "wellington".
+
+#### Diacritics
+
+Characters with [diacritics](https://www.compart.com/en/unicode/block/U+0300), such as macrons ("ā", "ē", etc), are transliterated into Latin script. For example, a dataset with "Ōmāpere" in the title would have "omapere" in the path.
+
+#### Spaces, commas, and slashes
+
+These characters are replaced with a hyphen. For example, "Tikitapu/Blue Lake" is changed to "tikitapu-blue-lake".
+
+#### Apostrophes
+
+These are _removed,_ so "Hawke's Bay" is changed to "hawkes-bay".
+
+#### Ampersands
+
+These are replaced with "-and-", so "Gore A&P Showgrounds" is changed to "gore-a-and-p-showgrounds".
+
+#### Other characters
+
+"ø" is transliterated to "o", so "Mount Brøgger" is changed to "mount-brogger".
+
+## Title and S3 Path Components
+
+### `crs`
+
+EPSG Code for the coordinate reference system of the elevation data. Generally this is [`2193`](https://epsg.io/2193) as it is the primary projection for most of LINZ's elevation data.
+
+### `geographic_description`
+
+This is free text and at the imagery maintainer's discretion. A specific city or sub-region or event name may be used to help describe the elevation data capture area. The [Gazetteer](https://gazetteer.linz.govt.nz/) is referenced to ensure official names with correct spelling are used. If the region has full coverage, then the geographic description can be empty and the region will be used instead.
+
+### `geospatial_category`
+
+A general categorisation of elevation data held within our archive that includes the following possible values:
+
+- DEM `dem`
+- DSM `dsm`
+
+### `gsd`
+
+The GSD or spatial resolution is the area covered on the ground by a single pixel. This is stored in metadata in metres with no trailing zeros after the decimal point.
+
+### `lifecycle`
+
+If `lifecycle = preview` then ` - Preview` is appended to the end of the imagery dataset title and if `lifecycle = ongoing` then ` - Draft` is appended to the end of the imagery dataset title. For any other lifecycle values, nothing is appended.
+
+### `product`
+
+Elevation data product type, currently either `dem` or `dsm`.
+
+### `region`
+
+Is taken from a list of regions:
+
+- Antarctica `antarctica`
+- Auckland `auckland`
+- Bay of Plenty `bay-of-plenty`
+- Canterbury `canterbury`
+- Northland `northland`
+- Gisborne `gisborne`
+- Global `global`
+- Hawke's Bay `hawkes-bay`
+- Manawatū-Whanganui `manawatu-whanganui`
+- Marlborough `marlborough`
+- Nelson `nelson`
+- New Zealand `new-zealand`
+- Otago `otago`
+- Pacific Islands `pacific-islands`
+- Southland `southland`
+- Taranaki `taranaki`
+- Tasman `tasman`
+- Waikato `waikato`
+- Wellington `wellington`
+- West Coast `west-coast`
+
+### `start_year` and `end_year`
+
+In both cases, the full four digits should be used. If the elevation dataset was entirely captured within one year, then only a `start_year` is provided.
+
+As elevation data can be updated after it is "named" for initial processing the `end_year` or lack of an `end_year` may be incorrect in the S3 Path. It is best to use this as a rough guideline and then use the STAC Collection for a more precise capture timeframe.
+
+## Examples
+
+1m DSM covering Upper Hutt City in the Wellington region captured in 2021
+
+```
+Title: Wellington - Upper Hutt City LiDAR 1m DSM (2021)
+Path: s3://nz-elevation/wellington/upper_hutt_city_2021/dsm_1m/2193/collection.json
+```
+
+1m DEM covering the Hawke's Bay region captured in 2020-2021 (apostrophe removed in elevation dataset path)
+
+```
+Title: Hawke's Bay LiDAR 1m DEM (2020-2021)
+Path: s3://nz-elevation/hawkes-bay/hawkes-bay_2020-2021/dem_1m/2193/collection.json
+```
+
+1m DEM covering Kaikōura in the Canterbury region captured in 2016 (macron removed in elevation dataset path)
+
+```
+Title: Canterbury - Kaikōura LiDAR 1m DEM (2016)
+Path: s3://nz-elevation/canterbury/kaikoura/dem_1m/2193/collection.json
+```