Merge pull request #9023 from IQSS/develop

merge develop to master for 5.12
IQSS · Oct 4, 2022 · 71341c0 · 71341c0
2 parents c08e3a9 + 9686ab8
commit 71341c0
Show file tree

Hide file tree

Showing 263 changed files with 11,894 additions and 2,318 deletions.
diff --git a/conf/docker-aio/0prep_deps.sh b/conf/docker-aio/0prep_deps.sh
@@ -4,9 +4,9 @@ if [ ! -d dv/deps ]; then
 fi
 wdir=`pwd`
 
-if [ ! -e dv/deps/payara-5.2021.6.zip ]; then
+if [ ! -e dv/deps/payara-5.2022.3.zip ]; then
 	echo "payara dependency prep"
-	wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.6/payara-5.2021.6.zip  -O dv/deps/payara-5.2021.6.zip
+	wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip  -O dv/deps/payara-5.2022.3.zip
 fi
 
 if [ ! -e dv/deps/solr-8.11.1dv.tgz ]; then

diff --git a/conf/docker-aio/c8.dockerfile b/conf/docker-aio/c8.dockerfile
@@ -24,7 +24,7 @@ COPY disableipv6.conf /etc/sysctl.d/
 RUN rm /etc/httpd/conf/*
 COPY httpd.conf /etc/httpd/conf 
 RUN cd /opt ; tar zxf /tmp/dv/deps/solr-8.11.1dv.tgz
-RUN cd /opt ; unzip /tmp/dv/deps/payara-5.2021.6.zip ; ln -s /opt/payara5 /opt/glassfish4
+RUN cd /opt ; unzip /tmp/dv/deps/payara-5.2022.3.zip ; ln -s /opt/payara5 /opt/glassfish4
 
 # this copy of domain.xml is the result of running `asadmin set server.monitoring-service.module-monitoring-levels.jvm=LOW` on a default glassfish installation (aka - enable the glassfish REST monitir endpoint for the jvm`
 # this dies under Java 11, do we keep it?

diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml
@@ -261,6 +261,9 @@
     <field name="cleaningOperations" type="text_en" multiValued="false" stored="true" indexed="true"/>
     <field name="collectionMode" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="collectorTraining" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="workflowType" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="workflowCodeRepository" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="workflowDocumentation" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="contributor" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="contributorName" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="contributorType" type="text_en" multiValued="true" stored="true" indexed="true"/>
@@ -498,6 +501,9 @@
     <copyField source="cleaningOperations" dest="_text_" maxChars="3000"/>
     <copyField source="collectionMode" dest="_text_" maxChars="3000"/>
     <copyField source="collectorTraining" dest="_text_" maxChars="3000"/>
+    <copyField source="workflowType" dest="_text_" maxChars="3000"/>
+    <copyField source="workflowCodeRepository" dest="_text_" maxChars="3000"/>
+    <copyField source="workflowDocumentation" dest="_text_" maxChars="3000"/>
     <copyField source="contributor" dest="_text_" maxChars="3000"/>
     <copyField source="contributorName" dest="_text_" maxChars="3000"/>
     <copyField source="contributorType" dest="_text_" maxChars="3000"/>

diff --git a/doc/release-notes/5.10-release-notes.md b/doc/release-notes/5.10-release-notes.md
@@ -140,7 +140,7 @@ or
 To find datasets with a without a CC0 license and with empty terms:
 
 ```
-select CONCAT('doi:', dvo.authority, '/', dvo.identifier), v.alias as dataverse_alias, case when versionstate='RELEASED' then concat(dv.versionnumber, '.', dv.minorversionnumber) else versionstate END  as version, dv.id as datasetversion_id, t.id as termsofuseandaccess_id, t.termsofuse, t.confidentialitydeclaration, t.specialpermissions, t.restrictions, t.citationrequirements, t.depositorrequirements, t.conditions, t.disclaimer from dvobject dvo, termsofuseandaccess t, datasetversion dv, dataverse v where dv.dataset_id=dvo.id and dv.termsofuseandaccess_id=t.id and dvo.owner_id=v.id and t.license='NONE' and t.termsofuse is null;
+select CONCAT('doi:', dvo.authority, '/', dvo.identifier), v.alias as dataverse_alias, case when versionstate='RELEASED' then concat(dv.versionnumber, '.', dv.minorversionnumber) else versionstate END  as version, dv.id as datasetversion_id, t.id as termsofuseandaccess_id, t.termsofuse, t.confidentialitydeclaration, t.specialpermissions, t.restrictions, t.citationrequirements, t.depositorrequirements, t.conditions, t.disclaimer from dvobject dvo, termsofuseandaccess t, datasetversion dv, dataverse v where dv.dataset_id=dvo.id and dv.termsofuseandaccess_id=t.id and dvo.owner_id=v.id and (t.license='NONE' or t.license is null) and t.termsofuse is null;
 ```
 
 As before, there are a couple options.

diff --git a/doc/release-notes/5.12-release-notes.md b/doc/release-notes/5.12-release-notes.md
diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -1,5 +1,5 @@
 Tool	Type	Scope	Description
 Data Explorer	explore	file	A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. 
 Whole Tale	explore	dataset	A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide <https://wholetale.readthedocs.io/en/stable/users_guide/integration.html>`_.
-File Previewers	explore	file	A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, text, video, tabular data, and spreadsheets - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers
+File Previewers	explore	file	A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, text, video, tabular data, spreadsheets, and GeoJSON - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers
 Data Curation Tool	configure	file	A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions.
diff --git a/doc/sphinx-guides/source/_static/api/dataverse-facets.json b/doc/sphinx-guides/source/_static/api/dataverse-facets.json
@@ -0,0 +1 @@
+["authorName", "authorAffiliation"]
diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml
@@ -142,6 +142,7 @@
     </method>
     <dataAccs>
       <notes type="DVN:TOA" level="dv">Terms of Access</notes>
+      <notes type="DVN:TOU" level="dv">Terms of Use</notes>
       <setAvail>
         <accsPlac>Data Access Place</accsPlac>
         <origArch>Original Archive</origArch>

diff --git a/doc/sphinx-guides/source/_static/api/metadata-block-facets.json b/doc/sphinx-guides/source/_static/api/metadata-block-facets.json
@@ -0,0 +1 @@
+["socialscience", "geospatial"]
diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
@@ -70,6 +70,13 @@ Amnesia is a flexible data anonymization tool that transforms relational and tra
 
 For instructions on depositing or loading data from Dataverse installations to Amnesia, visit https://amnesia.openaire.eu/about-documentation.html
 
+SampleDB
+++++++++
+
+SampleDB is a web-based electronic lab notebook (ELN) with a focus on flexible metadata. SampleDB can export this flexible, process-specific metadata to a new Dataset in a Dataverse installation using the EngMeta Process Metadata block.
+
+For instructions on using the Dataverse export, you can visit https://scientific-it-systems.iffgit.fz-juelich.de/SampleDB/administrator_guide/dataverse_export.html
+
 
 Embedding Data on Websites
 --------------------------

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -571,9 +571,13 @@ Configuration involves specifying which fields are to be mapped, whether free-te
 These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing.
 The scripts required can be hosted locally or retrieved dynamically from https://gdcc.github.io/ (similar to how dataverse-previewers work).
 
+Please note that in addition to the :ref:`:CVocConf` described above, an alternative is the :ref:`:ControlledVocabularyCustomJavaScript` setting.
+
 Tips from the Dataverse Community
 ---------------------------------
 
+When creating new metadata blocks, please review the :doc:`/style/text` section of the Style Guide, which includes guidance about naming metadata fields and writing text for metadata tooltips and watermarks.
+
 If there are tips that you feel are omitted from this document, please open an issue at https://github.com/IQSS/dataverse/issues and consider making a pull request to make improvements. You can find this document at https://github.com/IQSS/dataverse/blob/develop/doc/sphinx-guides/source/admin/metadatacustomization.rst
 
 Alternatively, you are welcome to request "edit" access to this "Tips for Dataverse Software metadata blocks from the community" Google doc: https://docs.google.com/document/d/1XpblRw0v0SvV-Bq6njlN96WyHJ7tqG0WWejqBdl7hE0/edit?usp=sharing

diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst
@@ -11,19 +11,35 @@ Publishing a dataset automatically starts a metadata export job, that will run i
 
 A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the :doc:`timers` section of this Admin Guide for more information.)
 
-Batch exports through the API 
+.. _batch-exports-through-the-api:
+
+Batch Exports Through the API
 -----------------------------
 
-In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following 2 API calls are provided: 
+In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following four API calls are provided: 
 
 ``curl http://localhost:8080/api/admin/metadata/exportAll``
 
 ``curl http://localhost:8080/api/admin/metadata/reExportAll``
 
-The former will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. 
-The latter will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. 
+``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps``
+
+``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000``
+
+The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. 
+The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. 
+
+The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``.
+
+Instead of running "reExportAll" the same can be accomplished using "clearExportTimestamps" followed by "exportAll".
+The difference is that when exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. 
+Calling clearExportTimestamps should return ``{"status":"OK","data":{"message":"cleared: X"}}`` where "X" is the total number of datasets cleared.
+
+The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. Note that :ref:`export-dataset-metadata-api` is a related API.
+
+reExportDataset can be called with either ``persistentId`` (as shown above, with a DOI) or with the database id of a dataset (as shown below, with "42" as the database id).
 
-These calls return a status message informing the administrator, that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``.
+``curl http://localhost:8080/api/admin/metadata/42/reExportDataset``
 
 Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set.
 

diff --git a/doc/sphinx-guides/source/admin/troubleshooting.rst b/doc/sphinx-guides/source/admin/troubleshooting.rst
@@ -96,10 +96,8 @@ Sometimes your Dataverse installation fails to deploy, or Payara fails to restar
 
 We don't know what's causing this issue, but here's a known workaround: 
 
-- Stop Payara; 
-
-- Remove the ``generated`` and ``osgi-cache`` directories;
-
+- Stop Payara;
+- Remove the ``generated`` and ``osgi-cache`` directories from the ``domain1`` directory;
 - Start Payara
 
 The shell script below performs the steps above. 
@@ -146,7 +144,7 @@ To identify the specific invalid values in the affected datasets, or to check al
 Many Files with a File Type of "Unknown", "Application", or "Binary"
 --------------------------------------------------------------------
 
-From the home page of a Dataverse installation you can get a count of files by file type by clicking "Files" and then scrolling down to "File Type". If you see a lot of files that are "Unknown", "Application", or "Binary" you can have the Dataverse  installation attempt to redetect the file type by using the :ref:`Redetect File Type <redetect-file-type>` API endpoint.
+From the home page of a Dataverse installation you can get a count of files by file type by clicking "Files" and then scrolling down to "File Type". If you see a lot of files that are "Unknown", "Application", or "Binary" you can have the Dataverse installation attempt to redetect the file type by using the :ref:`Redetect File Type <redetect-file-type>` API endpoint.
 
 .. _actionlogrecord-trimming:
 

diff --git a/doc/sphinx-guides/source/api/apps.rst b/doc/sphinx-guides/source/api/apps.rst
@@ -28,9 +28,9 @@ https://github.com/scholarsportal/Dataverse-Data-Curation-Tool
 File Previewers
 ~~~~~~~~~~~~~~~
 
-File Previewers are tools that display the content of files - including audio, html, Hypothes.is annotations, images, PDF, text, video - allowing them to be viewed without downloading.
+File Previewers are tools that display the content of files - including audio, html, Hypothes.is annotations, images, PDF, text, video, GeoJSON - allowing them to be viewed without downloading.
 
-https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers
+https://github.com/gdcc/dataverse-previewers
 
 Python
 ------
@@ -102,9 +102,9 @@ Please note that there is a Java library for Dataverse Software APIs listed in t
 DVUploader
 ~~~~~~~~~~
 
-The open-source DVUploader tool is a stand-alone command-line Java application that uses the Dataverse Software API to upload files to a specified Dataset. Files can be specified by name, or the DVUploader can upload all files in a directory or recursively from a directory tree. The DVUploader can also verify that uploaded files match their local sources by comparing the local and remote fixity checksums. Source code, release 1.0.0- jar file, and documentation are available on GitHub. DVUploader's creation was supported by the Texas Digital Library.
+The open-source DVUploader tool is a stand-alone command-line Java application that uses the Dataverse Software API to upload files to a specified Dataset. Files can be specified by name, or the DVUploader can upload all files in a directory or recursively from a directory tree. The DVUploader can also verify that uploaded files match their local sources by comparing the local and remote fixity checksums. Source code, the latest release - jar file, and documentation are available on GitHub. DVUploader's creation was supported by the Texas Digital Library.
 
-https://github.com/IQSS/dataverse-uploader
+https://github.com/GlobalDataverseCommunityConsortium/dataverse-uploader
 
 Dataverse for Android
 ~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst
@@ -1,7 +1,7 @@
 Client Libraries
 ================
 
-Currently there are client libraries for Python, Javascript, R, and Java that can be used to develop against Dataverse Software APIs. We use the term "client library" on this page but "Dataverse Software SDK" (software development kit) is another way of describing these resources. They are designed to help developers express Dataverse Software concepts more easily in the languages listed below. For support on any of these client libraries, please consult each project's README.
+Currently there are client libraries for Python, Javascript, R, Java, and Julia that can be used to develop against Dataverse Software APIs. We use the term "client library" on this page but "Dataverse Software SDK" (software development kit) is another way of describing these resources. They are designed to help developers express Dataverse Software concepts more easily in the languages listed below. For support on any of these client libraries, please consult each project's README.
 
 Because a Dataverse installation is a SWORD server, additional client libraries exist for Java, Ruby, and PHP per the :doc:`/api/sword` page.
 
@@ -45,3 +45,10 @@ Ruby
 https://github.com/libis/dataverse_api is a Ruby gem for Dataverse Software APIs. It is registered as a library on Rubygems (https://rubygems.org/search?query=dataverse).
 
 The gem is created and maintained by the LIBIS team (https://www.libis.be) at the University of Leuven (https://www.kuleuven.be).
+
+Julia
+-----
+
+https://github.com/gaelforget/Dataverse.jl is the official Julia package for Dataverse Software APIs. It can be found on JuliaHub (https://juliahub.com/ui/Packages/Dataverse/xWAqY/) and leverages pyDataverse to provide an interface to Dataverse's data access API and native API. Dataverse.jl provides a few additional functionalities with documentation (https://gaelforget.github.io/Dataverse.jl/dev/) and a demo notebook (https://gaelforget.github.io/Dataverse.jl/dev/notebook.html).
+
+It was created and is maintained by `Gael Forget <https://github.com/gaelforget>`_.
diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst
@@ -21,5 +21,6 @@ API Guide
    client-libraries
    external-tools
    curation-labels
+   linkeddatanotification
    apps
    faq