From 7568cd05c4a82331b5539491a76c547cb00a4e93 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 18 Sep 2024 22:44:03 -0400 Subject: [PATCH] Some cleanup/streamlining #10734 --- .../api/imports/ImportServiceBean.java | 42 ++++++++----------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index 78bf9af99fa..73d47b18b9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -303,9 +303,8 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve Dataset existingDataset = datasetService.findByGlobalId(globalIdString); - try { - Dataset harvestedDataset = null; + Dataset harvestedDataset; JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, datasetTypeService, harvestingClient); parser.setLenient(true); @@ -314,7 +313,6 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve // Creating a new dataset from scratch: harvestedDataset = parser.parseDataset(obj); - harvestedDataset.setOwner(owner); harvestedDataset.setHarvestedFrom(harvestingClient); harvestedDataset.setHarvestIdentifier(harvestIdentifier); @@ -322,6 +320,7 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve harvestedVersion = harvestedDataset.getVersions().get(0); } else { // We already have a dataset with this id in the database. + // Let's check a few things before we go any further with it: // If this dataset already exists IN ANOTHER COLLECTION // we are just going to skip it! @@ -335,20 +334,22 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve throw new ImportException("A LOCAL dataset with the global id " + globalIdString + " already exists in this dataverse; skipping."); } // For harvested datasets, there should always only be one version. - // We will replace the current version with the imported version. - // @todo or should we just destroy any extra versions quietly? if (existingDataset.getVersions().size() != 1) { throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDataset.getVersions().size() + " versions"); } - + + // We will attempt to import the new version, and replace the + // current, already existing version with it. harvestedVersion = parser.parseDatasetVersion(obj.getJsonObject("datasetVersion")); - Dataset tempDataset = createTemporaryHarvestedDataset(harvestedVersion); - // Temporarily attach the temporary dataset to the parent Collection: - // (this will be needed for the purposes of looking up the configured - // metadatablocks and such) - tempDataset.setOwner(owner); + + // For the purposes of validation, the version needs to be attached + // to a non-null dataset. We will reate a throwaway temporary + // dataset for this: + harvestedDataset = createTemporaryHarvestedDataset(harvestedVersion); } + harvestedDataset.setOwner(owner); + // Either a full new import, or an update of an existing harvested // Dataset, perform some cleanup on the new version imported from the // parsed metadata: @@ -359,28 +360,19 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve harvestedVersion.setReleaseTime(oaiDateStamp); } - // Check data against required constraints - List> violations = harvestedVersion.validateRequired(); - if (!violations.isEmpty()) { - // ... and fill the missing required values with "NA"s: - for (ConstraintViolation v : violations) { - DatasetField f = v.getRootBean(); - f.setSingleValue(DatasetField.NA_VALUE); - } - } - // is this the right place to call tidyUpFields()? // usually it is called within the body of the create/update commands // later on. DatasetFieldUtil.tidyUpFields(harvestedVersion.getDatasetFields(), true); - // Check data against validation constraints - // Similarly to how we handle missing required values (above), we - // replace invalid values with NA when harvesting. + // Check data against validation constraints. + // Make an attempt to sanitize any invalid fields encountered - + // missing required fields or invalid values, by filling the values + // with NAs. boolean sanitized = validateAndSanitizeVersionMetadata(harvestedVersion, cleanupLog); - // Note: our sanitizing approach, of replacing invalid values with + // Note: this sanitizing approach, of replacing invalid values with // "NA" does not work with certain fields. For example, using it to // populate a GeoBox coordinate value will result in an invalid // field. So we will attempt to re-validate the santized version.