From 4b96cec067fcd66f3a12be96bda91c5b049f00af Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Sep 2024 15:00:32 -0400 Subject: [PATCH] Remap oai_dc fields dc:type and dc:date (#10737) * Remap oai_dc fields dc:type, dc:date, and dc:rights #8129. The `oai_dc` export and harvesting format has had the following fields remapped: - dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". - dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped only to the field "Publication Date". - dc:rights was not mapped to anything. Now it is mapped (when available) to terms of use, restrictions, and license. * add tests for export and citation date #8129 * map dc:date to pub date or field for citation date #8129 * back out of any changes to dc:rights #8129 * remove OAI-PMH changes from API changelog (also in release note) #8129 * tweak release note, mention backward incompatibility, reexport #8129 --- doc/release-notes/8129-harvesting.md | 18 ++++ doc/sphinx-guides/source/api/native-api.rst | 2 + .../dublincore/DublinCoreExportUtil.java | 33 +++++-- .../harvard/iq/dataverse/api/DatasetsIT.java | 88 +++++++++++++++++-- .../edu/harvard/iq/dataverse/api/UtilIT.java | 27 ++++++ 5 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 doc/release-notes/8129-harvesting.md diff --git a/doc/release-notes/8129-harvesting.md b/doc/release-notes/8129-harvesting.md new file mode 100644 index 00000000000..63ca8744941 --- /dev/null +++ b/doc/release-notes/8129-harvesting.md @@ -0,0 +1,18 @@ +### Remap oai_dc export and harvesting format fields: dc:type and dc:date + +The `oai_dc` export and harvesting format has had the following fields remapped: + +- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". +- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.3/api/native-api.html#set-citation-date-field-type-for-a-dataset)). + +In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. + +For more information, please see #8129 and #10737. + +### Backward incompatible changes + +See the "Remap oai_dc export" section above. + +### Upgrade instructions + +In order for changes to the `oai_dc` metadata export format to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index ad1d217b9a1..117aceb141d 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1836,6 +1836,8 @@ The fully expanded example above (without environment variables) looks like this .. note:: You cannot deaccession a dataset more than once. If you call this endpoint twice for the same dataset version, you will get a not found error on the second call, since the dataset you are looking for will no longer be published since it is already deaccessioned. +.. _set-citation-date-field: + Set Citation Date Field Type for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 6b7cb844f3e..9a2c3085d2d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -7,6 +7,8 @@ import com.google.gson.Gson; import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; @@ -176,11 +178,24 @@ private static void createOAIDC(XMLStreamWriter xmlw, DatasetDTO datasetDto, Str writeFullElementList(xmlw, dcFlavor+":"+"language", dto2PrimitiveList(version, DatasetFieldConstant.language)); - String date = dto2Primitive(version, DatasetFieldConstant.productionDate); - if (date == null) { - date = datasetDto.getPublicationDate(); + /** + * dc:date. "I suggest changing the Dataverse / DC Element (oai_dc) + * mapping, so that dc:date is mapped with Publication Date. This is + * also in line with citation recommendations. The publication date is + * the preferred date when citing research data; see, e.g., page 12 in + * The Tromsø Recommendations for Citation of Research Data in + * Linguistics; https://doi.org/10.15497/rda00040 ." -- + * https://github.com/IQSS/dataverse/issues/8129 + * + * However, if the citation date field has been set, use that. + */ + String date = datasetDto.getPublicationDate(); + DatasetFieldType citationDataType = jakarta.enterprise.inject.spi.CDI.current().select(DatasetServiceBean.class).get().findByGlobalId(globalId.asString()).getCitationDateDatasetFieldType(); + if (citationDataType != null) { + date = dto2Primitive(version, citationDataType.getName()); } - writeFullElement(xmlw, dcFlavor+":"+"date", date); + + writeFullElement(xmlw, dcFlavor+":"+"date", date); writeFullElement(xmlw, dcFlavor+":"+"contributor", dto2Primitive(version, DatasetFieldConstant.depositor)); @@ -188,10 +203,16 @@ private static void createOAIDC(XMLStreamWriter xmlw, DatasetDTO datasetDto, Str writeFullElementList(xmlw, dcFlavor+":"+"relation", dto2PrimitiveList(version, DatasetFieldConstant.relatedDatasets)); - writeFullElementList(xmlw, dcFlavor+":"+"type", dto2PrimitiveList(version, DatasetFieldConstant.kindOfData)); + /** + * dc:type. "Dublin Core (see + * https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/type + * ) recommends “to use a controlled vocabulary such as the DCMI Type + * Vocabulary” for dc:type." So we hard-coded it to "Dataset". See + * https://github.com/IQSS/dataverse/issues/8129 + */ + writeFullElement(xmlw, dcFlavor+":"+"type", "Dataset"); writeFullElementList(xmlw, dcFlavor+":"+"source", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 3ff580268a9..f52aa4fe9bd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -630,8 +630,7 @@ public void testCreatePublishDestroyDataset() { Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPersistentId, "oai_dc", apiToken); exportDatasetAsDublinCore.prettyPrint(); exportDatasetAsDublinCore.then().assertThat() - // FIXME: Get this working. See https://github.com/rest-assured/rest-assured/wiki/Usage#example-3---complex-parsing-and-validation - // .body("oai_dc:dc.find { it == 'dc:title' }.item", hasItems("Darwin's Finches")) + .body("oai_dc.title", is("Darwin's Finches")) .statusCode(OK.getStatusCode()); Response exportDatasetAsDdi = UtilIT.exportDataset(datasetPersistentId, "ddi", apiToken); @@ -1195,8 +1194,7 @@ public void testExport() { Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPersistentId, "oai_dc", apiToken); exportDatasetAsDublinCore.prettyPrint(); exportDatasetAsDublinCore.then().assertThat() - // FIXME: Get this working. See https://github.com/rest-assured/rest-assured/wiki/Usage#example-3---complex-parsing-and-validation - // .body("oai_dc:dc.find { it == 'dc:title' }.item", hasItems("Darwin's Finches")) + .body("oai_dc.title", is("Dataset One")) .statusCode(OK.getStatusCode()); Response exportDatasetAsDdi = UtilIT.exportDataset(datasetPersistentId, "ddi", apiToken); @@ -4103,7 +4101,87 @@ public void getDatasetVersionCitation() { .assertThat().body("data.message", containsString(String.valueOf(persistentId))); } - + @Test + public void testCitationDate() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(apiToken); + createDataverse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); + Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse); + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId"); + + Path pathToAddDateOfDepositJson = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "dateOfDeposit.json"); + String dateOfDeposit = """ +{ + "fields": [ + { + "typeName": "dateOfDeposit", + "value": "1999-12-31" + } + ] +} +"""; + java.nio.file.Files.write(pathToAddDateOfDepositJson, dateOfDeposit.getBytes()); + + Response addDateOfDeposit = UtilIT.addDatasetMetadataViaNative(datasetPid, pathToAddDateOfDepositJson.toString(), apiToken); + addDateOfDeposit.prettyPrint(); + addDateOfDeposit.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.metadataBlocks.citation.fields[5].value", equalTo("1999-12-31")); + + Response setCitationDate = UtilIT.setDatasetCitationDateField(datasetPid, "dateOfDeposit", apiToken); + setCitationDate.prettyPrint(); + setCitationDate.then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + Response getCitationAfter = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken); + getCitationAfter.prettyPrint(); + + String doi = datasetPid.substring(4); + + // Note that the year 1999 appears in the citation because we + // set the citation date field to a field that has that year. + String expectedCitation = "Finch, Fiona, 1999, \"Darwin's Finches\", https://doi.org/" + doi + ", Root, V1"; + + getCitationAfter.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", is(expectedCitation)); + + Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPid, "oai_dc", apiToken); + exportDatasetAsDublinCore.prettyPrint(); + exportDatasetAsDublinCore.then().assertThat() + .body("oai_dc.type", equalTo("Dataset")) + .body("oai_dc.date", equalTo("1999-12-31")) + .statusCode(OK.getStatusCode()); + + Response clearDateField = UtilIT.clearDatasetCitationDateField(datasetPid, apiToken); + clearDateField.prettyPrint(); + clearDateField.then().assertThat().statusCode(OK.getStatusCode()); + + // Clearing not enough. You have to reexport because the previous date is cached. + Response rexport = UtilIT.reexportDatasetAllFormats(datasetPid); + rexport.prettyPrint(); + rexport.then().assertThat().statusCode(OK.getStatusCode()); + + String todayDate = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); + Response exportPostClear = UtilIT.exportDataset(datasetPid, "oai_dc", apiToken); + exportPostClear.prettyPrint(); + exportPostClear.then().assertThat() + .body("oai_dc.type", equalTo("Dataset")) + .body("oai_dc.date", equalTo(todayDate)) + .statusCode(OK.getStatusCode()); + } + @Test public void getVersionFiles() throws IOException, InterruptedException { Response createUser = UtilIT.createRandomUser(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 3dfb1a428d2..4e20e8e4c33 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3735,6 +3735,33 @@ static Response getDatasetVersionCitation(Integer datasetId, String version, boo return response; } + static Response setDatasetCitationDateField(String datasetIdOrPersistentId, String dateField, String apiToken) { + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(dateField) + .put("/api/datasets/" + idInPath + "/citationdate" + optionalQueryParam); + return response; + } + + static Response clearDatasetCitationDateField(String datasetIdOrPersistentId, String apiToken) { + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/datasets/" + idInPath + "/citationdate" + optionalQueryParam); + return response; + } + static Response getFileCitation(Integer fileId, String datasetVersion, String apiToken) { Boolean includeDeaccessioned = null; return getFileCitation(fileId, datasetVersion, includeDeaccessioned, apiToken);