Skip to content

Commit

Permalink
Fixes the bug in the standalone zipper utility where it can't properl…
Browse files Browse the repository at this point in the history
…y handle duplicate file names (IQSS/dataverse.harvard.edu#80)
  • Loading branch information
landreev committed Sep 20, 2020
1 parent efcd24d commit 5c14006
Showing 1 changed file with 31 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public void processFiles() {
}

Set<String> zippedFolders = new HashSet<>();
Set<String> fileNamesList = new HashSet<>();

for (String [] fileEntry : jobFiles) {
String storageLocation = fileEntry[0];
Expand All @@ -144,22 +145,24 @@ public void processFiles() {

InputStream inputStream = this.directAccessUtil.openDirectAccess(storageLocation);

// (potential?) TODO: String zipEntryName = checkZipEntryName(fileName);
String zipEntryName = checkZipEntryName(fileName, fileNamesList);
// this may not be needed anymore - some extra sanitizing of the file
// name we used to have to do - since all the values in a current Dataverse
// database may already be santized enough.
// database may already be santized enough.
// (Edit: Yes, we still need this - there are still datasets with multiple
// files with duplicate names; this method takes care of that)
if (inputStream != null && this.zipOutputStream != null) {

ZipEntry entry = new ZipEntry(fileName);
ZipEntry entry = new ZipEntry(zipEntryName);

byte[] bytes = new byte[2 * 8192];
int read = 0;
long readSize = 0L;

try {
// Does this file have a folder name?
if (hasFolder(fileName)) {
addFolderToZipStream(getFolderName(fileName), zippedFolders);
if (hasFolder(zipEntryName)) {
addFolderToZipStream(getFolderName(zipEntryName), zippedFolders);
}

this.zipOutputStream.putNextEntry(entry);
Expand All @@ -168,7 +171,6 @@ public void processFiles() {
this.zipOutputStream.write(bytes, 0, read);
readSize += read;
}
inputStream.close();
this.zipOutputStream.closeEntry();

/*if (fileSize == readSize) {
Expand All @@ -178,6 +180,12 @@ public void processFiles() {
}*/
} catch (IOException ioex) {
System.err.println("Failed to compress "+storageLocation);
} finally {
try {
inputStream.close();
} catch (IOException ioexIgnore) {
System.err.println("Warning: IO exception trying to close input stream - "+storageLocation);
}
}
} else {
System.err.println("Failed to access "+storageLocation);
Expand Down Expand Up @@ -237,4 +245,21 @@ private void addFolderToZipStream(String folderName, Set<String> zippedFolders)
}
}
}

// check for and process duplicates:
private String checkZipEntryName(String originalName, Set<String> fileNames) {
String name = originalName;
int fileSuffix = 1;
int extensionIndex = originalName.lastIndexOf(".");

while (fileNames.contains(name)) {
if (extensionIndex != -1) {
name = originalName.substring(0, extensionIndex) + "_" + fileSuffix++ + originalName.substring(extensionIndex);
} else {
name = originalName + "_" + fileSuffix++;
}
}
fileNames.add(name);
return name;
}
}

0 comments on commit 5c14006

Please sign in to comment.