Skip to content

Commit

Permalink
quick draft implementation of addressing issue 1. from #10909.
Browse files Browse the repository at this point in the history
  • Loading branch information
landreev committed Oct 21, 2024
1 parent d039a10 commit 6d336c8
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,16 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping

}

// Helper method for importing harvested Dublin Core xml.
// Helper methods for importing harvested Dublin Core xml.
// Dublin Core is considered a mandatory, built in metadata format mapping.
// It is distributed as required content, in reference_data.sql.
// Note that arbitrary formatting tags are supported for the outer xml
// wrapper. -- L.A. 4.5
public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException {
return processOAIDCxml(DcXmlToParse, null);
}

public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier) throws XMLStreamException {
// look up DC metadata mapping:

ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS);
Expand Down Expand Up @@ -185,18 +189,37 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException

datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED);

// Our DC import handles the contents of the dc:identifier field
// as an "other id". In the context of OAI harvesting, we expect
// the identifier to be a global id, so we need to rearrange that:
// In some cases, the identifier that we want to use for the dataset is
// already supplied to the method explicitly. For example, in some
// harvesting cases we'll want to use the OAI identifier (the identifier
// from the <header> section of the OAI record) for that purpose, without
// expecting to find a valid persistent id in the body of the DC record:

String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion());
logger.fine("Imported identifier: "+identifier);
String globalIdentifier;

String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
logger.fine("Detected global identifier: "+globalIdentifier);
if (oaiIdentifier != null) {
logger.fine("Attempting to use " + oaiIdentifier + " as the persistentId of the imported dataset");

globalIdentifier = reassignIdentifierAsGlobalId(oaiIdentifier, datasetDTO);
} else {
// Our DC import handles the contents of the dc:identifier field
// as an "other id". Unless we are using an externally supplied
// global id, we will be using the first such "other id" that we
// can parse and recognize as the global id for the imported dataset
// (note that this is the default behavior during harvesting),
// so we need to reaassign it accordingly:
String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion());
logger.fine("Imported identifier: " + identifier);

globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
logger.fine("Detected global identifier: " + globalIdentifier);
}

if (globalIdentifier == null) {
throw new EJBException("Failed to find a global identifier in the OAI_DC XML record.");
String exceptionMsg = oaiIdentifier == null ?
"Failed to find a global identifier in the OAI_DC XML record." :
"Failed to parse the supplied identifier as a valid Persistent Id";
throw new EJBException(exceptionMsg);
}

return datasetDTO;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,13 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse
}

@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException {
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest,
HarvestingClient harvestingClient,
String harvestIdentifier,
String metadataFormat,
File metadataFile,
Date oaiDateStamp,
PrintWriter cleanupLog) throws ImportException, IOException {
if (harvestingClient == null || harvestingClient.getDataverse() == null) {
throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
}
Expand Down Expand Up @@ -243,7 +249,10 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
logger.fine("importing DC "+metadataFile.getAbsolutePath());
try {
String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
dsDTO = importGenericService.processOAIDCxml(xmlToParse);
String suggestedIdentifier = harvestingClient.isUseOaiIdentifiersAsPids()
? harvestIdentifier
: null;
dsDTO = importGenericService.processOAIDCxml(xmlToParse, suggestedIdentifier);
} catch (IOException | XMLStreamException e) {
throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ public void setArchiveDescription(String archiveDescription) {
this.archiveDescription = archiveDescription;
}

@Column(columnDefinition="TEXT")
private String harvestingSet;

public String getHarvestingSet() {
Expand Down Expand Up @@ -252,8 +253,26 @@ public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) {
this.allowHarvestingMissingCVV = allowHarvestingMissingCVV;
}

// TODO: do we need "orphanRemoval=true"? -- L.A. 4.4
// TODO: should it be @OrderBy("startTime")? -- L.A. 4.4
private Boolean useListRecords;

public Boolean isUseListRecords() {
return useListRecords;
}

public void setUseListrecords(boolean useListRecords) {
this.useListRecords = useListRecords;
}

private Boolean useOaiIdAsPid;

public Boolean isUseOaiIdentifiersAsPids() {
return useOaiIdAsPid;
}

public void setUseOaiIdentifiersAsPids(boolean useOaiIdAsPid) {
this.useOaiIdAsPid = useOaiIdAsPid;
}

@OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
@OrderBy("id")
private List<ClientHarvestRun> harvestHistory;
Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/db/migration/V6.4.0.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Add these boolean flags to accommodate new harvesting client features
ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS useOaiIdAsPid BOOLEAN DEFAULT FALSE;
ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS useListRecords BOOLEAN DEFAULT FALSE;

0 comments on commit 6d336c8

Please sign in to comment.