From c54f56385a1f9a72688d78c7875df23d27557d1a Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 8 Nov 2023 18:23:32 -0500 Subject: [PATCH 01/17] add devtool utilities --- CHANGES.md | 9 ++++ LICENSE | 21 ++++++++ Makefile | 37 ++++++++++++- README.md | 31 ++++++++--- STACpopulator/__init__.py | 2 +- pyproject.toml | 111 +++++++++++++++++++++++++++++++++++++- requirements.txt | 6 --- 7 files changed, 200 insertions(+), 17 deletions(-) create mode 100644 CHANGES.md create mode 100644 LICENSE delete mode 100644 requirements.txt diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..ffb9e64 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,9 @@ +# Changes + +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) + + + +## [0.0.1](https://github.com/crim-ca/stac-populator/tree/0.0.1) (2023-08-22) + +* Initial release with refactored implementation of `CMIP6_UofT`. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..17ee2a6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2013-2014 Computer Research Institute of Montreal (CRIM) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile index 439f93e..4a21439 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,14 @@ -IMP_DIR = STACpopulator/implementations -STAC_HOST = http://localhost:8880/stac +MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +# Include custom config if it is available +-include Makefile.config +APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) +APP_NAME := $(shell basename $(APP_ROOT)) +APP_VERSION ?= 0.0.1 + + +IMP_DIR := STACpopulator/implementations +STAC_HOST := http://localhost:8880/stac + testcmip6: python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html @@ -18,3 +27,27 @@ del_docker_volume: stophost docker volume rm stac-populator_stac-db resethost: del_docker_volume starthost + + +## -- Versioning targets -------------------------------------------------------------------------------------------- ## + +# Bumpversion 'dry' config +# if 'dry' is specified as target, any bumpversion call using 'BUMP_XARGS' will not apply changes +BUMP_TOOL := bump-my-version +BUMP_XARGS ?= --verbose --allow-dirty +ifeq ($(filter dry, $(MAKECMDGOALS)), dry) + BUMP_XARGS := $(BUMP_XARGS) --dry-run +endif +.PHONY: dry +dry: pyproject.toml ## run 'bump' target without applying changes (dry-run) [make VERSION= bump dry] + @-echo > /dev/null + +.PHONY: bump +bump: ## bump version using VERSION specified as user input [make VERSION= bump] + @-echo "Updating package version ..." + @[ "${VERSION}" ] || ( echo ">> 'VERSION' is not set"; exit 1 ) + @-bash -c '$(CONDA_CMD) $(BUMP_TOOL) $(BUMP_XARGS) --new-version "${VERSION}" patch;' + +.PHONY: version +version: ## display current version + @-echo "$(APP_NAME) version: $(APP_VERSION)" diff --git a/README.md b/README.md index 808926c..6555bb8 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,38 @@ # STAC Catalog Populator +![Latest Version](https://img.shields.io/badge/latest%20version-0.0.1-blue?logo=github) +![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.0.1.svg?logo=github) -This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](implementations)) for populating the STAC catalog on a DACCS node. +This repository contains a framework [STACpopulator](STACpopulator) +that can be used to implement concrete populators (see [implementations](STACpopulator/implementations)) +for populating the STAC Catalog, Collections and Items from various dataset/catalog sources, and pushed using +STAC API on a server node. ## Framework -The framwork is centered around a Python Abstract Base Class: `STACpopulatorBase` that implements all the logic for populating a STAC catalog. This class implements an abstract method called `process_STAC_item` that should be defined in implementations of the class and contain all the logic for constructing the STAC representation for an item in the collection that is to be processed. +The framework is centered around a Python Abstract Base Class: `STACpopulatorBase` that implements all the logic +for populating a STAC catalog. This class provides abstract methods that should be overridden by implementations that +contain all the logic for constructing the STAC representation for an item in the collection that is to be processed. ## Implementations -Currently, one implementation of `STACpopulatorBase` is provided in [add_CMIP6.py](implementations/add_CMIP6.py). +Provided implementations of `STACpopulatorBase`: + +- [CMIP6_UofT][CMIP6_UofT] + +[CMIP6_UofT]: STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py ## Testing -The provided `docker-compose` file can be used to launch a test STAC server. The `add_CMIP6.py` script can be run as: +The provided [`docker-compose`](docker-compose.yml) configuration file can be used to launch a test STAC server. +For example, the [CMIP6_UofT][CMIP6_UofT] script can be run as: +```shell +python STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py \ + "http://localhost:8880/stac/" \ + "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html" \ + "STACpopulator/implementations/CMIP6_UofT/collection_config.yml" ``` -python implementations/CMIP6-UofT/add_CMIP6.py http://localhost:8880/stac/ https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html implementations/CMIP6-UofT/CMIP6.yml -``` -Note: in the script above, I am currently using a sample THREDDS catalog URL and not one relevant to the global scale CMIP6 data. + +*Note*: +In the script above, a sample THREDDS catalog URL is employed and not one relevant to the global scale CMIP6 data. diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py index f217a91..f102a9c 100644 --- a/STACpopulator/__init__.py +++ b/STACpopulator/__init__.py @@ -1 +1 @@ -from .populator_base import STACpopulatorBase +__version__ = "0.0.1" diff --git a/pyproject.toml b/pyproject.toml index dc08b7b..3e50ac0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "STACpopulator" version = "0.0.1" +description = "Utility for populating the STAC Catalog, Collections and Items from various dataset/catalog sources." requires-python = ">=3.10" dependencies = [ "colorlog", @@ -13,10 +14,118 @@ dependencies = [ "pystac", "xncml", "pydantic", - "pyessv" + "pyessv", + "requests", + "lxml", +] +urls = [ + "https://github.com/crim-ca/stac-populator" +] +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Database :: Database Engines/Servers", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator", + "Topic :: Utilities", +] +authors = [ + { name = "Francis Charette-Migneault", email = "francis.charette-migneault@crim.ca" }, + { name = "Deepak Chandan", email = "dchandan@cs.toronto.edu" }, + { name = "David Huard", email = "huard.david@ouranos.ca" }, +] +maintainers = [ + { name = "Francis Charette-Migneault", email = "francis.charette-migneault@crim.ca" }, + { name = "Deepak Chandan", email = "dchandan@cs.toronto.edu" }, + { name = "David Huard", email = "huard.david@ouranos.ca" }, +] +keywords = [ + "STAC", + "SpatioTemporal Asset Catalog", + "Data Ingestion", + "THREDDS", + "CMIP6" ] [tool.setuptools] py-modules = ["STACpopulator"] +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-cov", + "coverage", + "bump-my-version", +] + +[tool.pytest.ini_options] +norecursedirs = [ + ".*", + "build", + "dist", + "{arch}", + "*.egg", + "venv", + "requirements*", + "lib", +] +python_files = "test*.py" +addopts = [ + "--cov", + "--cov-report=term", + "--cov-report=html", +] + +[tool.coverage.html] +directory = "reports/coverage/html" + +[tool.coverage.xml] +output = "reports/coverage.xml" + +[tool.bumpversion] +current_version = "0.0.1" +commit = true +commit_args = "--no-verify" +tag = true +tag_name = "{new_version}" +allow_dirty = true +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\.dev\\d+)?" +serialize = [ + "{major}.{minor}.{patch}.dev{distance_to_latest_tag}", + "{major}.{minor}.{patch}" +] +message = "Version updated from {current_version} to {new_version}" + +[[tool.bumpversion.files]] +filename = "STACpopulator/__init__.py" + +[[tool.bumpversion.files]] +filename = "README.md" + +[[tool.bumpversion.files]] +filename = "Makefile" +search = "APP_VERSION ?= {current_version}" +replace = "APP_VERSION ?= {new_version}" + +[[tool.bumpversion.files]] +filename = "CHANGES.md" +search = "## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" +replace = """ +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" + + +## [{new_version}](https://github.com/crim-ca/stac-populator/tree/{new_version}) ({now:%Y-%m-%d}) +""" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index dc03813..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -colorlog -requests -pystac -pyyaml -siphon -lxml From 1550b88e55d18d9ed8f69fe7b0b9d96964437d94 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 8 Nov 2023 18:44:05 -0500 Subject: [PATCH 02/17] update changes --- CHANGES.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ffb9e64..80d4563 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,8 +2,15 @@ ## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) - +* Add `LICENSE` file. +* Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions. +* Add more metadata to `pyproject.toml`. +* Adjust `README.md` with updated references and release version indicators. +* Add `CHANGES.md` to record version updates. +* Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). +* Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. +* Refactor of `CMIP6_UofT` with more robust parsing strategies and STAC Item generation from THREDDS NCML metadata. ## [0.0.1](https://github.com/crim-ca/stac-populator/tree/0.0.1) (2023-08-22) -* Initial release with refactored implementation of `CMIP6_UofT`. +* Initial release with implementation of `CMIP6_UofT`. From 922bb9bd67f69460c8f373a81bb3856d2a4ead52 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 8 Nov 2023 19:13:20 -0500 Subject: [PATCH 03/17] ignore test results --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 7e12211..277a829 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +.coverage +.pytest_cache +reports *.pyc STACpopulator.egg-info/ .vscode/ From ff6f054ad519541f185f995d1b8245044cfaec3a Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 8 Nov 2023 19:33:40 -0500 Subject: [PATCH 04/17] add CI test + fix pyproject URLs --- .github/workflows/tests.yml | 155 ++++++++++++++++++++++++++++++++++++ CHANGES.md | 1 + Makefile | 21 +++-- pyproject.toml | 7 +- 4 files changed, 176 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..beb0729 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,155 @@ +# run test suites + +name: Tests +on: + - pull_request + - push + - release + - workflow_dispatch + +# cancel the current workflow if another commit was pushed on the same PR or reference +# uses the GitHub workflow name to avoid collision with other workflows running on the same PR/reference +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # see: https://github.com/fkirc/skip-duplicate-actions + skip_duplicate: + continue-on-error: true + runs-on: ubuntu-latest + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip && ! contains(github.ref, 'refs/tags') }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@master + with: + concurrent_skipping: "same_content" + skip_after_successful_duplicate: "true" + do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "release"]' + + # see: https://github.com/actions/setup-python + tests: + needs: skip_duplicate + if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }} + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.allow-failure }} + env: + # override make command to install directly in active python + CONDA_CMD: "" + + strategy: + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12"] + allow-failure: [false] + test-case: [test-unit] +# include: +# # experimental python +# - os: ubuntu-latest +# python-version: "3.13" +# allow-failure: true +# test-case: test-unit-only +# - os: ubuntu-latest +# python-version: "3.13" +# allow-failure: true +# test-case: test-func-only +# # linter tests +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: check-all +# # documentation build +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: docs +# # coverage test +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: test-coverage-only +# # smoke test of Docker image +# - os: ubuntu-latest +# python-version: "3.10" # doesn't matter which one (in docker), but match default of repo +# allow-failure: false +# test-case: test-docker + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: "0" + - name: Setup Python + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python-version }}" + - name: Parse Python Version + id: python-semver + run: | + echo "::set-output name=major:$(echo ${{ matrix.python-version }} | cut -d '.' -f 1)" + echo "::set-output name=minor:$(echo ${{ matrix.python-version }} | cut -d '.' -f 2)" + - uses: actions/cache@v3 + name: Check Proj Lib Pre-Built in Cache + id: cache-proj + with: + # note: '22' is v8, '21' is v7 + path: /tmp/proj-8.2.1/install + key: ${{ runner.os }}-python${{ matrix.python-version }}-proj + - name: Install Dependencies + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + # install package and dependencies directly, + # skip sys/conda setup to use active python + run: make install-sys install-pkg install-pip install-raw install-dev version + - name: Display Packages + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + run: pip freeze + #- name: Setup Environment Variables + # uses: c-py/action-dotenv-to-setenv@v2 + # with: + # env-file: ./ci/weaver.env + - name: Display Environment Variables + run: | + hash -r + env | sort + - name: Run Tests + run: make ${{ matrix.test-case }} + - name: Upload coverage report + uses: codecov/codecov-action@v2 + if: ${{ success() && matrix.test-case == 'test-coverage-only' }} + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./reports/coverage.xml + fail_ci_if_error: true + verbose: true + +# deploy-docker: +# needs: tests +# if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# with: +# fetch-depth: "0" +# - name: Get Tag Version +# id: version +# shell: bash +# run: | +# if [[ "${GITHUB_REF}" == "refs/heads/master" ]]; then +# echo "::set-output name=TAG_VERSION::latest" +# else +# echo "::set-output name=TAG_VERSION::${GITHUB_REF##*/}" +# fi +# - name: Build Docker +# run: | +# make DOCKER_REPO=pavics/weaver APP_VERSION=${{ steps.version.outputs.TAG_VERSION }} docker-info docker-build +# - name: Login to DockerHub +# uses: docker/login-action@v1 +# with: +# username: ${{ secrets.DOCKERHUB_USERNAME }} +# password: ${{ secrets.DOCKERHUB_TOKEN }} +# - name: Push to DockerHub +# run: | +# make DOCKER_REPO=pavics/weaver APP_VERSION=${{ steps.version.outputs.TAG_VERSION }} docker-push diff --git a/CHANGES.md b/CHANGES.md index 80d4563..d37c753 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ * Adjust `README.md` with updated references and release version indicators. * Add `CHANGES.md` to record version updates. * Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). +* Add GitHub CI tests. * Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. * Refactor of `CMIP6_UofT` with more robust parsing strategies and STAC Item generation from THREDDS NCML metadata. diff --git a/Makefile b/Makefile index 4a21439..5e265ef 100644 --- a/Makefile +++ b/Makefile @@ -7,27 +7,38 @@ APP_VERSION ?= 0.0.1 IMP_DIR := STACpopulator/implementations -STAC_HOST := http://localhost:8880/stac +STAC_HOST ?= http://localhost:8880/stac +## -- Testing targets -------------------------------------------------------------------------------------------- ## -testcmip6: +test-cmip6: python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html -delcmip6: +del-cmip6: curl --location --request DELETE '$(STAC_HOST)/collections/CMIP6_UofT' @echo "" -starthost: +docker-start: docker compose up +starthost: docker-start -stophost: +docker-stop: docker compose down +stophost: docker-stop del_docker_volume: stophost docker volume rm stac-populator_stac-db resethost: del_docker_volume starthost +install: + pip install "$(APP_ROOT)" + +install-dev: + pip install "$(APP_ROOT)[dev]" + +test-unit: + pytest "$(APP_ROOT)" ## -- Versioning targets -------------------------------------------------------------------------------------------- ## diff --git a/pyproject.toml b/pyproject.toml index 3e50ac0..aaf3284 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,9 +18,6 @@ dependencies = [ "requests", "lxml", ] -urls = [ - "https://github.com/crim-ca/stac-populator" -] readme = "README.md" license = { file = "LICENSE" } classifiers = [ @@ -59,6 +56,10 @@ keywords = [ "CMIP6" ] +[project.urls] +Repository = "https://github.com/crim-ca/stac-populator" +Changelog = "https://github.com/crim-ca/stac-populator/blob/master/CHANGES.md" + [tool.setuptools] py-modules = ["STACpopulator"] From 2f6b15c122f31a9b5a6e75738dba14da672de4eb Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 10:06:13 -0500 Subject: [PATCH 05/17] Version updated from 0.0.1 to 0.1.0 --- CHANGES.md | 15 ++++++--------- Makefile | 2 +- README.md | 4 ++-- STACpopulator/__init__.py | 2 +- pyproject.toml | 2 +- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d37c753..142fa60 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,15 +1,12 @@ # Changes -## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" + + + +## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-09) + -* Add `LICENSE` file. -* Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions. -* Add more metadata to `pyproject.toml`. -* Adjust `README.md` with updated references and release version indicators. -* Add `CHANGES.md` to record version updates. -* Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). -* Add GitHub CI tests. -* Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. * Refactor of `CMIP6_UofT` with more robust parsing strategies and STAC Item generation from THREDDS NCML metadata. ## [0.0.1](https://github.com/crim-ca/stac-populator/tree/0.0.1) (2023-08-22) diff --git a/Makefile b/Makefile index 5e265ef..bf84203 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -include Makefile.config APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) APP_NAME := $(shell basename $(APP_ROOT)) -APP_VERSION ?= 0.0.1 +APP_VERSION ?= 0.1.0 IMP_DIR := STACpopulator/implementations diff --git a/README.md b/README.md index 6555bb8..cb56e4c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # STAC Catalog Populator -![Latest Version](https://img.shields.io/badge/latest%20version-0.0.1-blue?logo=github) -![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.0.1.svg?logo=github) +![Latest Version](https://img.shields.io/badge/latest%20version-0.1.0-blue?logo=github) +![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.1.0.svg?logo=github) This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](STACpopulator/implementations)) diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py index f102a9c..3dc1f76 100644 --- a/STACpopulator/__init__.py +++ b/STACpopulator/__init__.py @@ -1 +1 @@ -__version__ = "0.0.1" +__version__ = "0.1.0" diff --git a/pyproject.toml b/pyproject.toml index aaf3284..1276403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ directory = "reports/coverage/html" output = "reports/coverage.xml" [tool.bumpversion] -current_version = "0.0.1" +current_version = "0.1.0" commit = true commit_args = "--no-verify" tag = true From 40c34cdc896fae2c84fa056ea4ac254c41720a4c Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 10:07:20 -0500 Subject: [PATCH 06/17] split up changelog of devtools and CMIP6 refactor --- CHANGES.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 142fa60..3c6b83b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,9 +2,16 @@ ## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" - +* Add `LICENSE` file. +* Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions. +* Add more metadata to `pyproject.toml`. +* Adjust `README.md` with updated references and release version indicators. +* Add `CHANGES.md` to record version updates. +* Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). +* Add GitHub CI tests. +* Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. -## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-09) +## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-08) * Refactor of `CMIP6_UofT` with more robust parsing strategies and STAC Item generation from THREDDS NCML metadata. From f163b3f8fc6f9947f7ba35d290a1aad870eb7232 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 10:43:28 -0500 Subject: [PATCH 07/17] remove concurrency skip causing duplicate cancel working against skip-duplicate-actions --- .github/workflows/tests.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index beb0729..efed36f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,11 +7,12 @@ on: - release - workflow_dispatch +# handled by skip-duplicate-actions instead with more options # cancel the current workflow if another commit was pushed on the same PR or reference # uses the GitHub workflow name to avoid collision with other workflows running on the same PR/reference -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true +#concurrency: +# group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} +# cancel-in-progress: true jobs: # see: https://github.com/fkirc/skip-duplicate-actions From 1e2182d439a65384bb53f91f546053e1770e42dc Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 12:10:26 -0500 Subject: [PATCH 08/17] revert skip duplicate, use github concurrency --- .github/workflows/tests.yml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index efed36f..ba6dd27 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,32 +7,32 @@ on: - release - workflow_dispatch -# handled by skip-duplicate-actions instead with more options # cancel the current workflow if another commit was pushed on the same PR or reference # uses the GitHub workflow name to avoid collision with other workflows running on the same PR/reference -#concurrency: -# group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} -# cancel-in-progress: true +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: # see: https://github.com/fkirc/skip-duplicate-actions - skip_duplicate: - continue-on-error: true - runs-on: ubuntu-latest - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip && ! contains(github.ref, 'refs/tags') }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@master - with: - concurrent_skipping: "same_content" - skip_after_successful_duplicate: "true" - do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "release"]' +# skip_duplicate: +# continue-on-error: true +# runs-on: ubuntu-latest +# outputs: +# should_skip: ${{ steps.skip_check.outputs.should_skip && ! contains(github.ref, 'refs/tags') }} +# steps: +# - id: skip_check +# uses: fkirc/skip-duplicate-actions@master +# with: +# concurrent_skipping: "same_content" +# skip_after_successful_duplicate: "true" +# do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "release"]' # see: https://github.com/actions/setup-python tests: - needs: skip_duplicate - if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }} + # FIXME: https://github.com/fkirc/skip-duplicate-actions/issues/90 + #needs: skip_duplicate + #if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }} runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.allow-failure }} env: From b360ceae0b38c4d252ca3146a8e2852a8fe57194 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 12:11:35 -0500 Subject: [PATCH 09/17] update CI install step --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ba6dd27..83363b6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -102,7 +102,7 @@ jobs: if: ${{ matrix.test-case != 'test-docker' }} # install package and dependencies directly, # skip sys/conda setup to use active python - run: make install-sys install-pkg install-pip install-raw install-dev version + run: make install-dev version - name: Display Packages # skip python setup if running with docker if: ${{ matrix.test-case != 'test-docker' }} From 4acf96f6997919099807d79e4c2e73a4fdc5ee77 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 16:07:43 -0500 Subject: [PATCH 10/17] ensure the project self-bumps its own pyproject version --- pyproject.toml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1276403..9c1f730 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "STACpopulator" -version = "0.0.1" +version = "0.1.0" description = "Utility for populating the STAC Catalog, Collections and Items from various dataset/catalog sources." requires-python = ">=3.10" dependencies = [ @@ -130,3 +130,10 @@ replace = """ ## [{new_version}](https://github.com/crim-ca/stac-populator/tree/{new_version}) ({now:%Y-%m-%d}) """ + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +# ensure the regex does not match another version by mistake using the package name as guide +regex = true +search = "^name = \"STACpopulator\"\nversion = \"{current_version}\"$" +replace = "name = \"STACpopulator\"\nversion = \"{new_version}\"" From 3277b706b9ef67572c635d777af7aec61e6835a2 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 18:09:56 -0500 Subject: [PATCH 11/17] patch tests --- .github/workflows/tests.yml | 4 +- .gitignore | 1 + CHANGES.md | 4 +- Makefile | 6 ++ STACpopulator/api_requests.py | 10 ++- .../implementations/CMIP6_UofT/add_CMIP6.py | 4 +- STACpopulator/input.py | 20 ++++- STACpopulator/populator_base.py | 19 +++- pyproject.toml | 30 +++++-- ...llection_testdata_xclim_cmip6_catalog.json | 30 +++++++ .../stac_item_testdata_xclim_cmip6_ncml.json} | 0 tests/test_standalone_stac_item.py | 88 +++++++++++++++---- 12 files changed, 178 insertions(+), 38 deletions(-) create mode 100644 tests/data/stac_collection_testdata_xclim_cmip6_catalog.json rename tests/{ref.json => data/stac_item_testdata_xclim_cmip6_ncml.json} (100%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 83363b6..8a33819 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -44,7 +44,7 @@ jobs: os: [ubuntu-latest] python-version: ["3.10", "3.11", "3.12"] allow-failure: [false] - test-case: [test-unit] + test-case: [test-cov] # include: # # experimental python # - os: ubuntu-latest @@ -102,7 +102,7 @@ jobs: if: ${{ matrix.test-case != 'test-docker' }} # install package and dependencies directly, # skip sys/conda setup to use active python - run: make install-dev version + run: make setup-pyessv-archive install-dev version - name: Display Packages # skip python setup if running with docker if: ${{ matrix.test-case != 'test-docker' }} diff --git a/.gitignore b/.gitignore index 277a829..c6ac5f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .coverage .pytest_cache +build reports *.pyc STACpopulator.egg-info/ diff --git a/CHANGES.md b/CHANGES.md index 3c6b83b..3a3eece 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # Changes -## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) * Add `LICENSE` file. * Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions. @@ -10,6 +10,8 @@ * Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). * Add GitHub CI tests. * Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. +* Add test to validate STAC Collection and Item contain `source` with expected THREDDS format. +* Fix broken tests and invalid imports. ## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-08) diff --git a/Makefile b/Makefile index bf84203..fff402a 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,9 @@ STAC_HOST ?= http://localhost:8880/stac ## -- Testing targets -------------------------------------------------------------------------------------------- ## +setup-pyessv-archive: + git clone "https://github.com/ES-DOC/pyessv-archive" ~/.esdoc/pyessv-archive + test-cmip6: python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html @@ -40,6 +43,9 @@ install-dev: test-unit: pytest "$(APP_ROOT)" +test-cov: + pytest "$(APP_ROOT)" --cov --cov-report=term --cov-report=html + ## -- Versioning targets -------------------------------------------------------------------------------------------- ## # Bumpversion 'dry' config diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index 35b0dc2..a67473b 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -1,6 +1,6 @@ import logging import os -from typing import Any, Optional +from typing import Any, MutableMapping, Optional import requests from colorlog import ColoredFormatter @@ -35,7 +35,7 @@ def stac_collection_exists(stac_host: str, collection_id: str) -> bool: return r.status_code == 200 -def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Optional[bool] = True) -> None: +def post_stac_collection(stac_host: str, json_data: MutableMapping[str, Any], update: Optional[bool] = True) -> None: """Post/create a collection on the STAC host :param stac_host: address of the STAC host @@ -62,7 +62,11 @@ def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Opti def post_stac_item( - stac_host: str, collection_id: str, item_name: str, json_data: dict[str, dict], update: Optional[bool] = True + stac_host: str, + collection_id: str, + item_name: str, + json_data: MutableMapping[str, dict], + update: Optional[bool] = True, ) -> None: """Post a STAC item to the host server. diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index 6d6fedb..f913a3d 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -10,10 +10,10 @@ from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator from pystac.extensions.datacube import DatacubeExtension -from STACpopulator import STACpopulatorBase from STACpopulator.implementations.CMIP6_UofT.extensions import DataCubeHelper -from STACpopulator.input import GenericLoader, THREDDSLoader +from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader from STACpopulator.models import GeoJSONPolygon, STACItemProperties +from STACpopulator.populator_base import STACpopulatorBase from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal LOGGER = logging.getLogger(__name__) diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 25750c0..a5d2774 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -31,12 +31,23 @@ def __iter__(self): A generator that returns an item from the input. The item could be anything depending on the specific concrete implementation of this abstract class. """ - pass + raise NotImplementedError @abstractmethod def reset(self): """Reset the internal state of the generator.""" - pass + raise NotImplementedError + + +class ErrorLoader(GenericLoader): + def __init__(self): # noqa + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + def reset(self): + raise NotImplementedError class THREDDSLoader(GenericLoader): @@ -84,9 +95,10 @@ def magpie_collection_link(self) -> pystac.Link: url = self.thredds_catalog_URL parts = url.split("/") i = parts.index("catalog") - # service = parts[i - 1] + service = parts[i - 1] path = "/".join(parts[i + 1 : -1]) - return pystac.Link(rel="source", target=url, media_type="text/xml", title=path) + title = f"{service}:{path}" + return pystac.Link(rel="source", target=url, media_type="text/xml", title=title) def reset(self): """Reset the generator.""" diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index f8ccb1c..f8c8314 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -1,3 +1,4 @@ +import functools import logging from abc import ABC, abstractmethod from datetime import datetime @@ -41,7 +42,8 @@ def __init__( """ super().__init__() - self._collection_info = load_collection_configuration() + self._collection_info = None + self.load_config() self._ingest_pipeline = data_loader self._stac_host = self.validate_host(stac_host) @@ -52,6 +54,9 @@ def __init__( LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") self.create_stac_collection() + def load_config(self): + self._collection_info = load_collection_configuration() + @property def collection_name(self) -> str: return self._collection_info["title"] @@ -90,7 +95,11 @@ def validate_host(self, stac_host: str) -> str: return stac_host - def create_stac_collection(self) -> None: + # FIXME: should provide a way to update after item generation + # STAC collections are supposed to include 'summaries' with + # an aggregation of all supported 'properties' by its child items + @functools.cache + def create_stac_collection(self) -> MutableMapping[str, Any]: """ Create a basic STAC collection. @@ -112,8 +121,12 @@ def create_stac_collection(self) -> None: collection = pystac.Collection(**self._collection_info) collection.add_links(self._ingest_pipeline.links) + collection_data = collection.to_dict() + self.publish_stac_collection(collection_data) + return collection_data - post_stac_collection(self.stac_host, collection.to_dict(), self.update) + def publish_stac_collection(self, collection_data: MutableMapping[str, Any]) -> None: + post_stac_collection(self.stac_host, collection_data, self.update) def ingest(self) -> None: LOGGER.info("Data ingestion") diff --git a/pyproject.toml b/pyproject.toml index 9c1f730..ac83d51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,18 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" +[tool.setuptools.packages.find] +include = ["STACpopulator*"] +exclude = [ + ".deprecated", + ".pytest_cache", + "__pycache__", + "__pycache__.*", + "__pycache__*", + "STACpopulator.__pycache__*", + "tests*", +] + [project] name = "STACpopulator" version = "0.1.0" @@ -60,9 +72,6 @@ keywords = [ Repository = "https://github.com/crim-ca/stac-populator" Changelog = "https://github.com/crim-ca/stac-populator/blob/master/CHANGES.md" -[tool.setuptools] -py-modules = ["STACpopulator"] - [project.optional-dependencies] dev = [ "pytest", @@ -83,10 +92,15 @@ norecursedirs = [ "lib", ] python_files = "test*.py" -addopts = [ - "--cov", - "--cov-report=term", - "--cov-report=html", +# these break debugger breakpoints +# add them manually with 'make test-cov' +#addopts = [ +# "--cov", +# "--cov-report=term", +# "--cov-report=html", +#] +markers = [ + "online" ] [tool.coverage.html] @@ -124,7 +138,7 @@ replace = "APP_VERSION ?= {new_version}" filename = "CHANGES.md" search = "## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" replace = """ -## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) diff --git a/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json b/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json new file mode 100644 index 0000000..982e9e5 --- /dev/null +++ b/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json @@ -0,0 +1,30 @@ +{ + "type": "Collection", + "id": "test", + "title": "test", + "stac_version": "1.0.0", + "description": "test", + "summaries": {"needs_summaries_update": ["true"]}, + "extent": { + "spatial": { + "bbox": [ + [-180, -90, 180, 90] + ] + }, + "temporal": { + "interval": [ + ["1850-01-01T00:00:00Z", null] + ] + } + }, + "license": "MIT", + "keywords": ["test"], + "links": [ + { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.xml", + "rel": "source", + "title": "thredds:birdhouse/testdata/xclim/cmip6", + "type": "text/xml" + } + ] +} diff --git a/tests/ref.json b/tests/data/stac_item_testdata_xclim_cmip6_ncml.json similarity index 100% rename from tests/ref.json rename to tests/data/stac_item_testdata_xclim_cmip6_ncml.json diff --git a/tests/test_standalone_stac_item.py b/tests/test_standalone_stac_item.py index d7239a8..3163cd5 100644 --- a/tests/test_standalone_stac_item.py +++ b/tests/test_standalone_stac_item.py @@ -1,30 +1,88 @@ import json - +import pytest import requests +import os +import tempfile +from urllib.parse import quote + import xncml -from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import ( - CMIP6ItemProperties, - make_cmip6_item_id, -) +from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6ItemProperties, CMIP6populator +from STACpopulator.input import THREDDSLoader from STACpopulator.models import GeoJSONPolygon from STACpopulator.stac_utils import STAC_item_from_metadata +CUR_DIR = os.path.dirname(__file__) + + +def quote_none_safe(url): + return quote(url, safe="") -def test_standalone_stac_item(): - url = ( - "https://pavics.ouranos.ca/twitcher/ows/proxy/" - "thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" - "?catalog=https%3A%2F%2Fpavics.ouranos.ca%2Ftwitcher%2Fows%2Fproxy%2F" - "thredds%2Fcatalog%2Fbirdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fcatalog.html" - "&dataset=birdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fsic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + +@pytest.mark.online +def test_standalone_stac_item_thredds_ncml(): + thredds_url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds" + thredds_path = "birdhouse/testdata/xclim/cmip6" + thredds_nc = "sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + thredds_catalog = f"{thredds_url}/catalog/{thredds_path}/catalog.html" + thredds_ds = f"{thredds_path}/{thredds_nc}" + thredds_ncml_url = ( + f"{thredds_url}/ncml/{thredds_path}/{thredds_nc}" + f"?catalog={quote_none_safe(thredds_catalog)}&dataset={quote_none_safe(thredds_ds)}" ) - attrs = xncml.Dataset.from_text(requests.get(url).content).to_cf_dict() - stac_item_id = make_cmip6_item_id(attrs["attributes"]) + # FIXME: avoid hackish workarounds + data = requests.get(thredds_ncml_url).text + attrs = xncml.Dataset.from_text(data).to_cf_dict() + attrs["access_urls"] = { # FIXME: all following should be automatically added, but they are not! + "HTTPServer": f"{thredds_url}/fileServer/{thredds_path}/{thredds_nc}", + "OPENDAP": f"{thredds_url}/dodsC/{thredds_path}/{thredds_nc}", + "WCS": f"{thredds_url}/wcs/{thredds_path}/{thredds_nc}?service=WCS&version=1.0.0&request=GetCapabilities", + "WMS": f"{thredds_url}/wms/{thredds_path}/{thredds_nc}?service=WMS&version=1.3.0&request=GetCapabilities", + "NetcdfSubset": f"{thredds_url}/ncss/{thredds_path}/{thredds_nc}/dataset.html", + } + + stac_item_id = CMIP6populator.make_cmip6_item_id(attrs["attributes"]) stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon) - with open("tests/ref.json", "r") as ff: + ref_file = os.path.join(CUR_DIR, "data/stac_item_testdata_xclim_cmip6_ncml.json") + with open(ref_file, mode="r", encoding="utf-8") as ff: reference = json.load(ff) assert stac_item.to_dict() == reference + + +class MockedNoSTACUpload(CMIP6populator): + def load_config(self): + # bypass auto-load config + self._collection_info = { + "id": "test", + "title": "test", + "description": "test", + "keywords": ["test"], + "license": "MIT", + "spatialextent": [-180, -90, 180, 90], + "temporalextent": ['1850-01-01', None] + } + + def validate_host(self, stac_host: str) -> str: + pass # don't care + + def publish_stac_collection(self, *_) -> None: + pass # don't push to STAC API + + +@pytest.mark.online +def test_cmip6_stac_thredds_catalog_parsing(): + url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html" + loader = THREDDSLoader(url) + with tempfile.NamedTemporaryFile(): + populator = MockedNoSTACUpload("https://host-dont-care.com", loader) + + result = populator.create_stac_collection() + + ref_file = os.path.join(CUR_DIR, "data/stac_collection_testdata_xclim_cmip6_catalog.json") + with open(ref_file, mode="r", encoding="utf-8") as ff: + reference = json.load(ff) + + assert result == reference From d22214a61c83698628f689507b2353ec82a68fc0 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 9 Nov 2023 18:14:52 -0500 Subject: [PATCH 12/17] fix coverage reporting wrong location --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 7c03490..19ca1c0 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) # Include custom config if it is available -include Makefile.config APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) -APP_NAME := $(shell basename $(APP_ROOT)) +APP_NAME := STACpopulator APP_VERSION ?= 0.1.0 -IMP_DIR := STACpopulator/implementations +IMP_DIR := $(APP_NAME)/implementations STAC_HOST ?= http://localhost:8880/stac # CATALOG = https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/catalog.html @@ -48,7 +48,7 @@ test-unit: pytest "$(APP_ROOT)" test-cov: - pytest "$(APP_ROOT)" --cov --cov-report=term --cov-report=html + pytest "$(APP_ROOT)" --cov="$(APP_NAME)" --cov-report=term --cov-report=html ## -- Versioning targets -------------------------------------------------------------------------------------------- ## From 1855b73edde345ac8ff0fc65089bb6da6a0d5388 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 10:08:47 -0500 Subject: [PATCH 13/17] update xncml version for python 3.12 support --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ac83d51..efeff4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "pyyaml", "siphon", "pystac", - "xncml", + "xncml>=0.3.1", # python 3.12 support "pydantic", "pyessv", "requests", From 1b0ba5c9c3c7e474a21cc991da4662fd12643807 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 11:59:58 -0500 Subject: [PATCH 14/17] add dockerfile and related configs --- .dockerignore | 28 ++++++++++++++++ .github/workflows/release.yml | 3 ++ .gitignore | 24 ++++++++++---- Makefile | 9 ++++-- README.md | 26 ++++++++++++++- docker/Dockerfile | 32 +++++++++++++++++++ .../docker-compose.yml | 0 pyproject.toml | 7 +++- 8 files changed, 118 insertions(+), 11 deletions(-) create mode 100644 .dockerignore create mode 100644 docker/Dockerfile rename docker-compose.yml => docker/docker-compose.yml (100%) diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..cf551b9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,28 @@ +## IDE +.idea/ +.vscode/ + +## SCM +.git* + +## Configurations +.* +*.rc + +## Environment +.conda/ +.env* +*.env +.venv/ +jupyter/ + +## Tests +.coverage +.pytest_cache +reports + +## Caches +**/__pycache__/ +STACpopulator.egg-info/ +build +*.pyc diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 47c1e7b..f937cb3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,6 +15,8 @@ jobs: release: name: release runs-on: ubuntu-latest + # FIXME: until working + ## if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} steps: - name: Checkout uses: actions/checkout@v2 @@ -47,5 +49,6 @@ jobs: uses: docker/build-push-action@v3 with: context: . + file: docker/Dockerfile push: true tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.extract_branch.outputs.branch }} diff --git a/.gitignore b/.gitignore index c6ac5f8..9d5674a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,21 @@ +## IDE +.idea/ +.vscode/ + +## Environment +.conda/ +.env* +*.env +.venv/ +jupyter/ + +## Tests .coverage .pytest_cache -build reports -*.pyc + +## Caches +**/__pycache__/ STACpopulator.egg-info/ -.vscode/ -.venv/ -jupyter/ -.idea -.vscode +build +*.pyc diff --git a/Makefile b/Makefile index 19ca1c0..bd5daeb 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,8 @@ APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) APP_NAME := STACpopulator APP_VERSION ?= 0.1.0 +DOCKER_COMPOSE_FILES := -f "$(APP_ROOT)/docker/docker-compose.yml" +DOCKER_TAG := ghcr.io/crim-ca/stac-populator:$(APP_VERSION) IMP_DIR := $(APP_NAME)/implementations STAC_HOST ?= http://localhost:8880/stac @@ -26,13 +28,16 @@ del-cmip6: @echo "" docker-start: - docker compose up + docker compose $(DOCKER_COMPOSE_FILES) up starthost: docker-start docker-stop: - docker compose down + docker compose $(DOCKER_COMPOSE_FILES) down stophost: docker-stop +docker-build: + docker build "$(APP_ROOT)" -f "$(APP_ROOT)/docker/Dockerfile" -t "$(DOCKER_TAG)" + del_docker_volume: stophost docker volume rm stac-populator_stac-db diff --git a/README.md b/README.md index cb56e4c..4efb3ee 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,33 @@ Provided implementations of `STACpopulatorBase`: [CMIP6_UofT]: STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +## Installation and Execution + +Either with Python directly (in an environment of your choosing): + +```shell +pip install . +# OR +make install +``` + +With development packages: + +```shell +pip install .[dev] +# OR +make install-dev +``` + +You can also employ the pre-built Docker: + +```shell +docker run -ti ghcr.io/crim-ca/stac-populator:0.1.0 [command] +``` + ## Testing -The provided [`docker-compose`](docker-compose.yml) configuration file can be used to launch a test STAC server. +The provided [`docker-compose`](docker/docker-compose.yml) configuration file can be used to launch a test STAC server. For example, the [CMIP6_UofT][CMIP6_UofT] script can be run as: ```shell diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..07c269c --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,32 @@ +FROM python:3.10-slim +LABEL description.short="STAC Populator" +LABEL description.long="Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." +LABEL maintainer="Francis Charette-Migneault " +LABEL vendor="CRIM" +LABEL version="0.1.0" + +# setup paths +ENV APP_DIR=/opt/local/src/stac-populator +WORKDIR ${APP_DIR} + +# obtain source files +COPY STACpopulator/ ${APP_DIR}/STACpopulator/ +COPY README.md LICENSE pyproject.toml ${APP_DIR}/ + +# install runtime/package dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + netbase \ + git \ + && mkdir -p /home/stac/.esdoc/ \ + && git clone "https://github.com/ES-DOC/pyessv-archive" /home/stac/.esdoc/pyessv-archive/ \ + && pip install --no-cache-dir ${APP_DIR} \ + && apt-get remove -y \ + git \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r stac && useradd -r -g stac stac +USER stac + +# FIXME: use common CLI +CMD ["bash"] diff --git a/docker-compose.yml b/docker/docker-compose.yml similarity index 100% rename from docker-compose.yml rename to docker/docker-compose.yml diff --git a/pyproject.toml b/pyproject.toml index efeff4e..ca64cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ exclude = [ [project] name = "STACpopulator" version = "0.1.0" -description = "Utility for populating the STAC Catalog, Collections and Items from various dataset/catalog sources." +description = "Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." requires-python = ">=3.10" dependencies = [ "colorlog", @@ -134,6 +134,11 @@ filename = "Makefile" search = "APP_VERSION ?= {current_version}" replace = "APP_VERSION ?= {new_version}" +[[tool.bumpversion.files]] +filename = "docker/Dockerfile" +search = "LABEL version=\"{current_version}\"" +replace = "LABEL version=\"{new_version}\"" + [[tool.bumpversion.files]] filename = "CHANGES.md" search = "## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" From d108ef5af3eba8d60b10ab5de9901d060b060882 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 12:12:45 -0500 Subject: [PATCH 15/17] limit docker build CI release to new versions or latest now that it is confirmed to work --- .github/workflows/release.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f937cb3..ec87644 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,8 +15,7 @@ jobs: release: name: release runs-on: ubuntu-latest - # FIXME: until working - ## if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} + if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} steps: - name: Checkout uses: actions/checkout@v2 From 0d99de281d08182b5c0df94e67f3971113c588e7 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 12:47:36 -0500 Subject: [PATCH 16/17] revert MutableMapping to dict but align caller types with it --- STACpopulator/api_requests.py | 6 +++--- STACpopulator/populator_base.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index a67473b..e7a5380 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -1,6 +1,6 @@ import logging import os -from typing import Any, MutableMapping, Optional +from typing import Any, Optional import requests from colorlog import ColoredFormatter @@ -35,7 +35,7 @@ def stac_collection_exists(stac_host: str, collection_id: str) -> bool: return r.status_code == 200 -def post_stac_collection(stac_host: str, json_data: MutableMapping[str, Any], update: Optional[bool] = True) -> None: +def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Optional[bool] = True) -> None: """Post/create a collection on the STAC host :param stac_host: address of the STAC host @@ -65,7 +65,7 @@ def post_stac_item( stac_host: str, collection_id: str, item_name: str, - json_data: MutableMapping[str, dict], + json_data: dict[str, dict], update: Optional[bool] = True, ) -> None: """Post a STAC item to the host server. diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 83eb2b0..55db015 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -2,7 +2,7 @@ import logging from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, MutableMapping, Optional +from typing import Any, Optional import pystac from colorlog import ColoredFormatter @@ -84,7 +84,7 @@ def item_geometry_model(self): raise NotImplementedError @abstractmethod - def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def create_stac_item(self, item_name: str, item_data: dict[str, Any]) -> dict[str, Any]: raise NotImplementedError def validate_host(self, stac_host: str) -> str: @@ -99,7 +99,7 @@ def validate_host(self, stac_host: str) -> str: # STAC collections are supposed to include 'summaries' with # an aggregation of all supported 'properties' by its child items @functools.cache - def create_stac_collection(self) -> MutableMapping[str, Any]: + def create_stac_collection(self) -> dict[str, Any]: """ Create a basic STAC collection. @@ -125,7 +125,7 @@ def create_stac_collection(self) -> MutableMapping[str, Any]: self.publish_stac_collection(collection_data) return collection_data - def publish_stac_collection(self, collection_data: MutableMapping[str, Any]) -> None: + def publish_stac_collection(self, collection_data: dict[str, Any]) -> None: post_stac_collection(self.stac_host, collection_data, self.update) def ingest(self) -> None: From 3cd9e24f49827e99cd306f11c2f989a0d9a6b2da Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 12:48:20 -0500 Subject: [PATCH 17/17] fix wrong request content/bytes to text/str type --- STACpopulator/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/STACpopulator/input.py b/STACpopulator/input.py index a5d2774..f72bc6e 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -125,7 +125,7 @@ def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, An url = ds.access_urls["NCML"] r = requests.get(url) # Convert NcML to CF-compliant dictionary - attrs = xncml.Dataset.from_text(r.content).to_cf_dict() + attrs = xncml.Dataset.from_text(r.text).to_cf_dict() attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"]) attrs["access_urls"] = ds.access_urls return attrs