From f57046fda206a1e52556f381d976c53a16f0925d Mon Sep 17 00:00:00 2001
From: mishaschwartz <4380924+mishaschwartz@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:50:05 -0400
Subject: [PATCH] make magpie THREDDS permissions more configurable
---
CHANGES.md | 10 ++++-
.../config/magpie/providers.cfg.template | 37 ++++++++++---------
birdhouse/components/thredds/default.env | 5 +++
birdhouse/env.local.example | 35 +++++++++++++++---
4 files changed, 64 insertions(+), 23 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index 03c1ca4bd..9334470c6 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -36,8 +36,16 @@
Data* dataset requires a basic configuration in order to properly serve WPS outputs. Making significant changes
to this configuration could have unexpected negative impacts on WPS usage.
+ - In order to allow customization of the Magpie THREDDS configuration in case new file extensions are added we introduce
+ two additional variables:
+ - `THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES`: additional file prefixes (ie. regular expression match patterns) that Magpie
+ should treat as metadata (accessible with "browse" permissions).
+ - `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES`: additional file prefixes (ie. regular expression match patterns) that Magpie
+ should treat as data (accessible with "read" permissions).
+
- The defaults for these new variables are fully backwards compatible. Without changing these variables, the THREDDS
- server should behave exactly the same as before.
+ server should behave exactly the same as before except that .md files and .rst files are now considered metadata
+ files according to the Magpie configuration, meaning that they can now be viewed with "browse" permissions.
[2.5.3](https://github.com/bird-house/birdhouse-deploy/tree/2.5.3) (2024-09-11)
------------------------------------------------------------------------------------------------------------------
diff --git a/birdhouse/components/thredds/config/magpie/providers.cfg.template b/birdhouse/components/thredds/config/magpie/providers.cfg.template
index 3ca5f9d84..499944ccb 100644
--- a/birdhouse/components/thredds/config/magpie/providers.cfg.template
+++ b/birdhouse/components/thredds/config/magpie/providers.cfg.template
@@ -15,21 +15,24 @@ providers:
- ".+\\.ncml" # match longest extension first to avoid tuncating it by match of sorter '.nc'
- ".+\\.nc"
metadata_type:
- prefixes:
- - null # note: special YAML value evaluated as `no-prefix`, use quotes if literal value is needed
- - "\\w+\\.gif" # threddsIcon, folder icon, etc.
- - "\\w+\\.ico" # favicon
- - "\\w+\\.txt" # licence
- - "\\w+\\.css" # tds.css
- - "catalog\\.\\w+" # note: special case for `THREDDS` top-level directory (root) accessed for `BROWSE`
- - catalog
- - ncml
- - uddc
- - iso
+ prefixes: [
+ null, # note: special YAML value evaluated as `no-prefix`, use quotes if literal value is needed
+ "\\w+\\.gif", # threddsIcon, folder icon, etc.
+ "\\w+\\.ico", # favicon
+ "\\w+\\.css", # tds.css
+ "catalog\\.\\w+", # note: special case for `THREDDS` top-level directory (root) accessed for `BROWSE`
+ catalog,
+ ncml,
+ uddc,
+ iso,
+ ${THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES}
+ ]
data_type:
- prefixes:
- - fileServer
- - dodsC
- - wcs
- - wms
- - ncss
+ prefixes: [
+ fileServer,
+ dodsC,
+ wcs,
+ wms,
+ ncss,
+ ${THREDDS_MAGPIE_EXTRA_DATA_PREFIXES}
+ ]
diff --git a/birdhouse/components/thredds/default.env b/birdhouse/components/thredds/default.env
index d3b43e3fa..7ce986d92 100644
--- a/birdhouse/components/thredds/default.env
+++ b/birdhouse/components/thredds/default.env
@@ -17,6 +17,9 @@ export THREDDS_SERVICE_DATA_LOCATION_NAME='Birdhouse'
export THREDDS_DATASET_URL_PATH='datasets'
export THREDDS_SERVICE_DATA_URL_PATH='birdhouse'
+export THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES='".+\\.txt", ".+\\.md", ".+\\.rst"'
+export THREDDS_MAGPIE_EXTRA_DATA_PREFIXES=''
+
export THREDDS_DEFAULT_FILE_FILTERS='
@@ -61,6 +64,8 @@ OPTIONAL_VARS="
\$THREDDS_IMAGE_URI
\$THREDDS_ADDITIONAL_CATALOG
\$THREDDS_SERVICE_DATA_EXTRA_FILE_FILTERS
+ \$THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES
+ \$THREDDS_MAGPIE_EXTRA_DATA_PREFIXES
"
export DELAYED_EVAL="
diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example
index a130c2e8a..172b42dad 100644
--- a/birdhouse/env.local.example
+++ b/birdhouse/env.local.example
@@ -475,6 +475,12 @@ export THREDDS_ADDITIONAL_CATALOG=''
#
#
#'
+# It is possible to define additional compound services in the THREDDS_ADDITIONAL_CATALOG variable as well.
+# This may be useful if you are creating a catalog that only provides a subset of the services defined in the
+# compound service named "all" (see birdhouse/components/thredds/catalog.xml.template).
+# DO NOT define any non-compound services in THREDDS_ADDITIONAL_CATALOG that is not an exact copy of one of the
+# variables defined in "all"! Especially, do not change the "base" attribute of any existing service.
+# Doing so may break the way that access permissions are enforced when accessing data through this service.
# Additional file filters to add for the Service Data THREDDS dataset. By default, the Service Data dataset will only
# serve files with the following extensions: .nc .ncml .txt .md .rst .csv
@@ -493,7 +499,7 @@ export THREDDS_ADDITIONAL_CATALOG=''
# THREDDS_ADDITIONAL_CATALOG variable).
# By default, the main dataset will only serve files with the following extensions: .nc .ncml .txt .md .rst .csv and will use
# the THREDDS service named "all" (see components/thredds/catalog.xml.template). However this can be customized if desired.
-# See the example below which would change the configuration to also serve .h5 and .json files instead of .md and .rst files.
+# See the example below which would change the configuration to also serve .h5 and .json files and exclude .md files.
# See the THREDDS documentation for the element for all configuration options.
#export THREDDS_DATASET_DATASETSCAN_BODY='
#
@@ -501,15 +507,34 @@ export THREDDS_ADDITIONAL_CATALOG=''
#
#
#
-#
-#
-#
+# ${THREDDS_DEFAULT_FILE_FILTERS}
#
#
-#
+#
#
#'
+# Files served by THREDDS are considered to either contain data or metadata (or both). The THREDDS Magpie service allows
+# us to handle access permissions different for metadata vs. data. Magpie let's users with "browse" permissions access
+# metadata but only users with "read" permissions can access data.
+# By accessing files through different THREDDS services (see THREDDS documentation), we can either read the metadata with
+# "browse" permissions or the data itself with "read" permissions. For example, by default a NetCDF file can be accessed
+# using the NCML service to get its metadata or through the NCSS service to access the data itself.
+#
+# If you have a file that you would like to be treated as metadata (Magpie will allow users with "browse" permissions to
+# access it) no matter which THREDDS service is used to access it, add the file pattern to the `THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES`
+# variable. Similarly, if you have a file that you would like to be treated as data no matter which THREDDS service is used
+# to access it, add the file pattern to the `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES` variable.
+#
+# For example, if you want all files with a .h5 extension to be treated as data files in all cases, add '".+\\.h5"' to the
+# `THREDDS_MAGPIE_EXTRA_DATA_PREFIXES` variable. Note that values are regular expressions (python) where slashes are double
+# escaped. Expressions should be surrounded by double quotes and if multiple expressions are included they should be comma
+# delimited.
+#
+# Current defaults are:
+#export THREDDS_MAGPIE_EXTRA_METADATA_PREFIXES='".+\\.txt", ".+\\.md", ".+\\.rst"'
+#export THREDDS_MAGPIE_EXTRA_DATA_PREFIXES=''
+
# Allow using Github as external AuthN/AuthZ provider with Magpie
# To setup Github as login, goto under section [OAuth Apps]
# and create a new Magpie application with configurations: