Merge branch 'develop' into 8344-file-upload-default-mime

IQSS · Mar 3, 2022 · 8698f78 · 8698f78
2 parents 8f59653 + ee7cc4b
commit 8698f78
Show file tree

Hide file tree

Showing 75 changed files with 2,306 additions and 1,472 deletions.
diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml
@@ -10,16 +10,24 @@ on:
 
 jobs:
     unittest:
-        name: (JDK ${{ matrix.jdk }} / ${{ matrix.os }}) Unit Tests
+        name: (${{ matrix.status}} / JDK ${{ matrix.jdk }}) Unit Tests
         strategy:
             fail-fast: false
             matrix:
-                os: [ ubuntu-latest ]
                 jdk: [ '11' ]
+                experimental: [false]
+                status:  ["Stable"]
+                #
+                # JDK 17 builds disabled due to non-essential fails marking CI jobs as completely failed within
+                # Github Projects, PR lists etc. This was consensus on Slack #dv-tech. See issue #8094
+                # (This is a limitation of how Github is currently handling these things.)
+                #
                 #include:
-                #    -   os: ubuntu-latest
-                #        jdk: '16'
-        runs-on: ${{ matrix.os }}
+                #    - jdk: '17'
+                #      experimental: true
+                #      status: "Experimental"
+        continue-on-error: ${{ matrix.experimental }}
+        runs-on: ubuntu-latest
         steps:
           - uses: actions/checkout@v2
           - name: Set up JDK ${{ matrix.jdk }}
@@ -34,7 +42,7 @@ jobs:
                 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
                 restore-keys: ${{ runner.os }}-m2
           - name: Build with Maven
-            run: mvn -DcompilerArgument=-Xlint:unchecked -P all-unit-tests clean test
+            run: mvn -DcompilerArgument=-Xlint:unchecked -Dtarget.java.version=${{ matrix.jdk }} -P all-unit-tests clean test
           - name: Maven Code Coverage
             env:
                 CI_NAME: github

diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,6 @@ venv
 scripts/search/data/binary/trees.png.thumb140
 src/main/webapp/resources/images/cc0.png.thumb140
 src/main/webapp/resources/images/dataverseproject.png.thumb140
+
+# apache-maven is downloaded by docker-aio
+apache-maven*
diff --git a/conf/docker-aio/0prep_deps.sh b/conf/docker-aio/0prep_deps.sh
@@ -9,11 +9,11 @@ if [ ! -e dv/deps/payara-5.2021.5.zip ]; then
 	wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip  -O dv/deps/payara-5.2021.5.zip
 fi
 
-if [ ! -e dv/deps/solr-8.8.1dv.tgz ]; then
+if [ ! -e dv/deps/solr-8.11.1dv.tgz ]; then
 	echo "solr dependency prep"
 	# schema changes *should* be the only ones...
 	cd dv/deps/	
-	wget https://archive.apache.org/dist/lucene/solr/8.8.1/solr-8.8.1.tgz -O solr-8.8.1dv.tgz
+	wget https://archive.apache.org/dist/lucene/solr/8.11.1/solr-8.11.1.tgz -O solr-8.11.1dv.tgz
 	cd ../../
 fi
 
diff --git a/conf/docker-aio/1prep.sh b/conf/docker-aio/1prep.sh
@@ -4,8 +4,8 @@
 # this was based off the phoenix deployment; and is likely uglier and bulkier than necessary in a perfect world
 
 mkdir -p testdata/doc/sphinx-guides/source/_static/util/
-cp ../solr/8.8.1/schema*.xml testdata/
-cp ../solr/8.8.1/solrconfig.xml testdata/
+cp ../solr/8.11.1/schema*.xml testdata/
+cp ../solr/8.11.1/solrconfig.xml testdata/
 cp ../jhove/jhove.conf testdata/
 cp ../jhove/jhoveConfig.xsd testdata/
 cd ../../

diff --git a/conf/docker-aio/c8.dockerfile b/conf/docker-aio/c8.dockerfile
@@ -23,7 +23,7 @@ COPY testdata/sushi_sample_logs.json /tmp/
 COPY disableipv6.conf /etc/sysctl.d/
 RUN rm /etc/httpd/conf/*
 COPY httpd.conf /etc/httpd/conf 
-RUN cd /opt ; tar zxf /tmp/dv/deps/solr-8.8.1dv.tgz 
+RUN cd /opt ; tar zxf /tmp/dv/deps/solr-8.11.1dv.tgz
 RUN cd /opt ; unzip /tmp/dv/deps/payara-5.2021.5.zip ; ln -s /opt/payara5 /opt/glassfish4
 
 # this copy of domain.xml is the result of running `asadmin set server.monitoring-service.module-monitoring-levels.jvm=LOW` on a default glassfish installation (aka - enable the glassfish REST monitir endpoint for the jvm`
@@ -34,9 +34,9 @@ RUN sudo -u postgres /usr/pgsql-13/bin/initdb -D /var/lib/pgsql/13/data -E 'UTF-
 
 # copy configuration related files
 RUN cp /tmp/dv/pg_hba.conf /var/lib/pgsql/13/data/
-RUN cp -r /opt/solr-8.8.1/server/solr/configsets/_default /opt/solr-8.8.1/server/solr/collection1
-RUN cp /tmp/dv/schema*.xml /opt/solr-8.8.1/server/solr/collection1/conf/
-RUN cp /tmp/dv/solrconfig.xml /opt/solr-8.8.1/server/solr/collection1/conf/solrconfig.xml
+RUN cp -r /opt/solr-8.11.1/server/solr/configsets/_default /opt/solr-8.11.1/server/solr/collection1
+RUN cp /tmp/dv/schema*.xml /opt/solr-8.11.1/server/solr/collection1/conf/
+RUN cp /tmp/dv/solrconfig.xml /opt/solr-8.11.1/server/solr/collection1/conf/solrconfig.xml
 
 # skipping payara user and solr user (run both as root)
 

diff --git a/conf/docker-aio/entrypoint.bash b/conf/docker-aio/entrypoint.bash
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 export LANG=en_US.UTF-8
 sudo -u postgres /usr/pgsql-13/bin/pg_ctl start -D /var/lib/pgsql/13/data &
-cd /opt/solr-8.8.1/
+cd /opt/solr-8.11.1/
 # TODO: Run Solr as non-root and remove "-force".
 bin/solr start -force
 bin/solr create_core -c collection1 -d server/solr/collection1/conf -force

diff --git a/conf/solr/8.8.1/readme.md → conf/solr/8.11.1/readme.md b/conf/solr/8.8.1/readme.md → conf/solr/8.11.1/readme.md
diff --git a/conf/solr/8.8.1/schema.xml → conf/solr/8.11.1/schema.xml b/conf/solr/8.8.1/schema.xml → conf/solr/8.11.1/schema.xml
diff --git a/conf/solr/8.8.1/solrconfig.xml → conf/solr/8.11.1/solrconfig.xml b/conf/solr/8.8.1/solrconfig.xml → conf/solr/8.11.1/solrconfig.xml
@@ -408,7 +408,7 @@
                       to occupy. Note that when this option is specified, the size
                       and initialSize parameters are ignored.
       -->
-    <filterCache class="solr.FastLRUCache"
+    <filterCache class="solr.search.CaffeineCache"
                  size="512"
                  initialSize="512"
                  autowarmCount="0"/>
@@ -421,7 +421,7 @@
             maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
                        to occupy
       -->
-    <queryResultCache class="solr.LRUCache"
+    <queryResultCache class="solr.search.CaffeineCache"
                       size="512"
                       initialSize="512"
                       autowarmCount="0"/>
@@ -432,14 +432,14 @@
          document).  Since Lucene internal document ids are transient,
          this cache will not be autowarmed.
       -->
-    <documentCache class="solr.LRUCache"
+    <documentCache class="solr.search.CaffeineCache"
                    size="512"
                    initialSize="512"
                    autowarmCount="0"/>
 
     <!-- custom cache currently used by block join -->
     <cache name="perSegFilter"
-           class="solr.search.LRUCache"
+           class="solr.search.CaffeineCache"
            size="10"
            initialSize="0"
            autowarmCount="10"

diff --git a/conf/solr/8.8.1/update-fields.sh → conf/solr/8.11.1/update-fields.sh b/conf/solr/8.8.1/update-fields.sh → conf/solr/8.11.1/update-fields.sh
diff --git a/conf/vagrant/etc/yum.repos.d/shibboleth.repo b/conf/vagrant/etc/yum.repos.d/shibboleth.repo
@@ -1,7 +1,9 @@
-[security_shibboleth]
-name=Shibboleth (CentOS_8)
+[shibboleth]
+name=Shibboleth (rockylinux8)
+# Please report any problems to https://shibboleth.atlassian.net/jira
 type=rpm-md
-baseurl=http://download.opensuse.org/repositories/security:/shibboleth/CentOS_8/
+mirrorlist=https://shibboleth.net/cgi-bin/mirrorlist.cgi/rockylinux8
 gpgcheck=1
-gpgkey=http://download.opensuse.org/repositories/security:/shibboleth/CentOS_8/repodata/repomd.xml.key
+gpgkey=https://shibboleth.net/downloads/service-provider/RPMS/repomd.xml.key
+        https://shibboleth.net/downloads/service-provider/RPMS/cantor.repomd.xml.key
 enabled=1
diff --git a/doc/release-notes/47-solr-upgrade.md b/doc/release-notes/47-solr-upgrade.md
@@ -0,0 +1,13 @@
+### Solr 8 Support/Upgrade
+
+The Dataverse Software now runs on Solr 8.11.1, the latest available stable release in the Solr 8.x series.
+
+### Solr Upgrade
+
+With this release we upgrade to the latest available stable release in the Solr 8.x branch. We recommend a fresh installation of Solr (the index will be empty) followed by an "index all".
+
+Before you start the "index all", the Dataverse installation will appear to be empty because the search results come from Solr. As indexing progresses, partial results will appear until indexing is complete.
+
+See <http://guides.dataverse.org/en/5.10/installation/prerequisites.html#installing-solr> for more information.
+
+Also please refer to <https://guides.dataverse.org/en/5.10/admin/metadatacustomization.html#updating-the-solr-schema> for information on updating your solr schema for any custom metadata blocks in your installation.
diff --git a/doc/release-notes/7863-metadata-fields.md b/doc/release-notes/7863-metadata-fields.md
@@ -0,0 +1,5 @@
+### Dynamically Request Arbitrary Metadata Fields from Search API 
+
+The Search API now allows arbitrary metadata fields to be requested when displaying results from datasets. You can request all fields from metadata blocks or pick and choose certain fields.
+
+The new parameter is called `metadata_fields` and the Search API documentation contains details and examples: https://guides.dataverse.org/en/5.9/api/search.html
diff --git a/doc/release-notes/8244-guestbook.md b/doc/release-notes/8244-guestbook.md
@@ -0,0 +1,3 @@
+A download button has been added to the page that lists guestbooks. This saves a click but you can still download from the "View Responses" page, as before.
+
+Links to the guides about guestbooks have been added in additional places.
diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr
@@ -5,7 +5,7 @@
 # chkconfig: 35 92 08
 # description: Starts and stops Apache Solr
 
-SOLR_DIR="/usr/local/solr/solr-8.8.1"
+SOLR_DIR="/usr/local/solr/solr-8.11.1"
 SOLR_COMMAND="bin/solr"
 SOLR_ARGS="-m 1g -j jetty.host=127.0.0.1"
 SOLR_USER=solr

diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service
@@ -5,9 +5,9 @@ After = syslog.target network.target remote-fs.target nss-lookup.target
 [Service]
 User = solr
 Type = forking
-WorkingDirectory = /usr/local/solr/solr-8.8.1
-ExecStart = /usr/local/solr/solr-8.8.1/bin/solr start -m 1g -j "jetty.host=127.0.0.1"
-ExecStop = /usr/local/solr/solr-8.8.1/bin/solr stop
+WorkingDirectory = /usr/local/solr/solr-8.11.1
+ExecStart = /usr/local/solr/solr-8.11.1/bin/solr start -m 1g -j "jetty.host=127.0.0.1"
+ExecStop = /usr/local/solr/solr-8.11.1/bin/solr stop
 LimitNOFILE=65000
 LimitNPROC=65000
 Restart=on-failure

diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
@@ -11,6 +11,13 @@ Getting Data In
 
 A variety of integrations are oriented toward making it easier for your researchers to deposit data into your Dataverse installation.
 
+GitHub
++++++++
+
+Dataverse integration with GitHub is implemented via a Dataverse Uploader GitHub Action. It is a reusable, composite workflow for uploading a git repository or subdirectory into a dataset on a target Dataverse installation. The action is customizable, allowing users to choose to replace a dataset, add to the dataset, publish it or leave it as a draft version on Dataverse. The action provides some metadata to the dataset, such as the origin GitHub repository, and it preserves the directory tree structure. 
+
+For instructions on using Dataverse Uploader GitHub Action, visit https://github.com/marketplace/actions/dataverse-uploader-action
+
 Dropbox
 +++++++
 

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -644,7 +644,7 @@ the Solr schema configuration, including any enabled metadata schemas:
 
 ``curl "http://localhost:8080/api/admin/index/solr/schema"``
 
-You can use :download:`update-fields.sh <../../../../conf/solr/8.8.1/update-fields.sh>` to easily add these to the
+You can use :download:`update-fields.sh <../../../../conf/solr/8.11.1/update-fields.sh>` to easily add these to the
 Solr schema you installed for your Dataverse installation.
 
 The script needs a target XML file containing your Solr schema. (See the :doc:`/installation/prerequisites/` section of
@@ -668,7 +668,7 @@ from some place else than your Dataverse installation).
 Please note that reconfigurations of your Solr index might require a re-index. Usually release notes indicate
 a necessary re-index, but for your custom metadata you will need to keep track on your own.
 
-Please note also that if you are going to make a pull request updating ``conf/solr/8.8.1/schema.xml`` with fields you have
+Please note also that if you are going to make a pull request updating ``conf/solr/8.11.1/schema.xml`` with fields you have
 added, you should first load all the custom metadata blocks in ``scripts/api/data/metadatablocks`` (including ones you
 don't care about) to create a complete list of fields. (This might change in the future.)
 

diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst
@@ -138,7 +138,7 @@ The following table lists the available metrics endpoints (not including the Mak
     /api/info/metrics/datasets/monthly,"date, count","json, csv",collection subtree,"released, choice of all, local or remote (harvested)",y,monthly cumulative  timeseries from first date of first entry to now,released means only currently released dataset versions (not unpublished or DEACCESSIONED versions)
     /api/info/metrics/datasets/pastDays/{n},count,json,collection subtree,"released, choice of all, local or remote (harvested)",y,aggregate count for past n days,
     /api/info/metrics/datasets/bySubject,"subject, count","json, csv",collection subtree,"released, choice of all, local or remote (harvested)",y,total count per subject,
-    /api/info/metrics/datasets/bySubjecttoMonth/{yyyy-MM},"subject, count","json, csv",collection subtree,"released, choice of all, local or remote (harvested)",y,cumulative cont per subject up to month specified,
+    /api/info/metrics/datasets/bySubject/toMonth/{yyyy-MM},"subject, count","json, csv",collection subtree,"released, choice of all, local or remote (harvested)",y,cumulative cont per subject up to month specified,
     /api/info/metrics/files,count,json,collection subtree,in released datasets,y,as of now/total,
     /api/info/metrics/files/toMonth/{yyyy-MM},count,json,collection subtree,in released datasets,y,cumulative up to month specified,
     /api/info/metrics/files/monthly,"date, count","json, csv",collection subtree,in released datasets,y,monthly cumulative  timeseries from first date of first entry to now,date is the month when the first version containing the file was released (or created for harvested versions)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
@@ -584,9 +584,13 @@ The fully expanded example above (without environment variables) looks like this
 
 You should expect a 200 ("OK") response and JSON output.
 
+.. _download-guestbook-api:
+
 Retrieve Guestbook Responses for a Dataverse Collection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+For more about guestbooks, see :ref:`dataset-guestbooks` in the User Guide.
+
 In order to retrieve the Guestbook Responses for a Dataverse collection, you must know either its "alias" (which the GUI calls an "identifier") or its database ID. If the Dataverse collection has more than one guestbook you may provide the id of a single guestbook as an optional parameter. If no guestbook id is provided the results returned will be the same as pressing the "Download All Responses" button on the Manage Dataset Guestbook page. If the guestbook id is provided then only those responses from that guestbook will be included.  The FILENAME parameter is optional, and if it is not included, the responses will be displayed in the console.
 
 .. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below.
@@ -1516,6 +1520,9 @@ The fully expanded example above (without environment variables) looks like this
 Dataset Locks
 ~~~~~~~~~~~~~
 
+Manage Locks on a Specific Dataset
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 To check if a dataset is locked:
 
 .. code-block:: bash
@@ -1547,7 +1554,7 @@ The fully expanded example above (without environment variables) looks like this
 
   curl "https://demo.dataverse.org/api/datasets/24/locks?type=Ingest"
 
-Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``pidRegister``, and ``EditInProgress``.
+Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``.
 
 The API will output the list of locks, for example:: 
 
@@ -1556,12 +1563,14 @@ The API will output the list of locks, for example::
       {
         "lockType":"Ingest",
         "date":"Fri Aug 17 15:05:51 EDT 2018",
-        "user":"dataverseAdmin"
+        "user":"dataverseAdmin",
+        "dataset":"doi:12.34567/FK2/ABCDEF"
       },
       {
         "lockType":"Workflow",
         "date":"Fri Aug 17 15:02:00 EDT 2018",
-        "user":"dataverseAdmin"
+        "user":"dataverseAdmin",
+        "dataset":"doi:12.34567/FK2/ABCDEF"
       }
     ]
   }
@@ -1608,20 +1617,43 @@ Or, to delete a lock of the type specified only. Note that this requires “supe
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
   export ID=24
-  export LOCK_TYPE=pidRegister
+  export LOCK_TYPE=finalizePublication
 
   curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE $SERVER_URL/api/datasets/$ID/locks?type=$LOCK_TYPE
 
 The fully expanded example above (without environment variables) looks like this:
 
 .. code-block:: bash
 
-  curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/locks?type=pidRegister
+  curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE https://demo.dataverse.org/api/datasets/24/locks?type=finalizePublication
 
 If the dataset is not locked (or if there is no lock of the specified type), the API will exit with a warning message.
 
 (Note that the API calls above all support both the database id and persistent identifier notation for referencing the dataset)
 
+List Locks Across All Datasets
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Note that this API requires “superuser” credentials. You must supply the ``X-Dataverse-key`` header with the api token of an admin user (as in the example below).
+
+The output of this API is formatted identically to the API that lists the locks for a specific dataset, as in one of the examples above. 
+
+Use the following API to list ALL the locks on all the datasets in your installation:
+
+  ``/api/datasets/locks``
+
+The listing can be filtered by specific lock type **and/or** user, using the following *optional* query parameters:
+
+* ``userIdentifier`` - To list the locks owned by a specific user
+* ``type`` - To list the locks of the type specified. If the supplied value does not match a known lock type, the API will return an error and a list of valid lock types. As of writing this, the implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``.
+
+For example:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key: xxx" "http://localhost:8080/api/datasets/locks?type=Ingest&userIdentifier=davis4ever"
+
+
 .. _dataset-metrics-api:
 
 Dataset Metrics
@@ -3723,6 +3755,8 @@ Note: setting ``:InheritParentRoleAssignments`` will automatically trigger inher
 Manage Available Standard License Terms
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+For more context about configuring licenses, see :ref:`license-config` in the Installation Guide.
+
 View the list of standard license terms that can be selected for a dataset:
 
 .. code-block:: bash
@@ -3738,7 +3772,7 @@ View the details of the standard license with the database ID specified in ``$ID
   curl $SERVER_URL/api/licenses/$ID
 
 
-Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. :
+Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`:
 
 .. code-block:: bash
 
@@ -3758,7 +3792,7 @@ Superusers can set which license is the default specified by the license ``$ID``
 
   curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN --data-binary @edit-license.json $SERVER_URL/api/licenses/default/$ID
 
-Superusers can delete a license that is not in useby the license ``$ID``:
+Superusers can delete a license that is not in use by the license ``$ID``:
 
 .. code-block:: bash