Skip to content

Commit

Permalink
Changed json parser to modify the "storageidentifiers" of remote file…
Browse files Browse the repository at this point in the history
…s harvested in the proprietary json format. #7736
  • Loading branch information
landreev committed Mar 22, 2023
1 parent 63cf379 commit b27d266
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
JsonObject obj = jsonReader.readObject();
//and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService);
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, harvestingClient);
parser.setLenient(true);
Dataset ds = parser.parseDataset(obj);

Expand Down
30 changes: 29 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public class JsonParser {
MetadataBlockServiceBean blockService;
SettingsServiceBean settingsService;
LicenseServiceBean licenseService;
HarvestingClient harvestingClient = null;

/**
* if lenient, we will accept alternate spellings for controlled vocabulary values
Expand All @@ -85,10 +86,15 @@ public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceB
}

public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService, LicenseServiceBean licenseService) {
this(datasetFieldSvc, blockService, settingsService, licenseService, null);
}

public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService, LicenseServiceBean licenseService, HarvestingClient harvestingClient) {
this.datasetFieldSvc = datasetFieldSvc;
this.blockService = blockService;
this.settingsService = settingsService;
this.licenseService = licenseService;
this.harvestingClient = harvestingClient;
}

public JsonParser() {
Expand Down Expand Up @@ -522,7 +528,29 @@ public DataFile parseDataFile(JsonObject datafileJson) {
if (contentType == null) {
contentType = "application/octet-stream";
}
String storageIdentifier = datafileJson.getString("storageIdentifier", " ");
String storageIdentifier = null;
/**
* When harvesting from other Dataverses using this json format, we
* don't want to import their storageidentifiers verbatim. Instead, we
* will modify them to point to the access API location on the remote
* archive side.
*/
if (harvestingClient != null && datafileJson.containsKey("id")) {
String remoteId = datafileJson.getJsonNumber("id").toString();
storageIdentifier = harvestingClient.getArchiveUrl()
+ "/api/access/datafile/"
+ remoteId;
/**
* Note that we don't have any practical use for these urls as
* of now. We used to, in the past, perform some tasks on harvested
* content that involved trying to access the files. In any event, it
* makes more sense to collect these urls, than the storage
* identifiers imported as is, which become completely meaningless
* on the local system.
*/
} else {
storageIdentifier = datafileJson.getString("storageIdentifier", null);
}
JsonObject checksum = datafileJson.getJsonObject("checksum");
if (checksum != null) {
// newer style that allows for SHA-1 rather than MD5
Expand Down

0 comments on commit b27d266

Please sign in to comment.