Merge branch 'develop' into 6919-preview-tools #6919

IQSS · Oct 8, 2020 · 5696f5f · 5696f5f
2 parents e469cc4 + 8564ff6
commit 5696f5f
Show file tree

Hide file tree

Showing 14 changed files with 169 additions and 67 deletions.
diff --git a/doc/release-notes/5.1-release-notes.md b/doc/release-notes/5.1-release-notes.md
@@ -87,7 +87,7 @@ If this is a new installation, please see our [Installation Guide](http://guides
 
 1. Update Biomedical Metadata Block (if used), Reload Solr, ReExportAll
 
-   `wget https://github.com/IQSS/dataverse/releases/download/5.1/biomedical.tsv`
+   `wget https://github.com/IQSS/dataverse/releases/download/v5.1/biomedical.tsv`
    `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @biomedical.tsv -H "Content-type: text/tab-separated-values"`
 
 - copy schema_dv_mdb_fields.xml and schema_dv_mdb_copies.xml to solr server, for example into /usr/local/solr/solr-7.7.2/server/solr/collection1/conf/ directory

diff --git a/doc/release-notes/5.1.1-release-notes.md b/doc/release-notes/5.1.1-release-notes.md
@@ -0,0 +1,59 @@
+# Dataverse 5.1.1
+
+This minor release adds important scaling improvements for installations running on AWS S3. It is recommended that 5.1.1 be used in production instead of 5.1.
+
+## Release Highlights
+
+### Connection Pool Size Configuration Option, Connection Optimizations
+
+Dataverse 5.1 improved the efficiency of making S3 connections through use of an http connection pool. This release adds optimizations around closing streams and channels that may hold S3 http connections open and exhaust the connection pool. In parallel, this release increases the default pool size from 50 to 256 and adds the ability to increase the size of the connection pool, so a larger pool can be configured if needed.
+
+## Major Use Cases
+
+Newly-supported use cases in this release include:
+
+- Administrators of installations using S3 will be able to define the connection pool size, allowing better resource scaling for larger installations (Issue #7309, PR #7313)
+
+## Notes for Dataverse Installation Administrators
+
+### 5.1.1 vs. 5.1 for Production Use
+
+As mentioned above, we encourage 5.1.1 instead of 5.1 for production use.
+
+### New JVM Option for Connection Pool Size
+
+Larger installations may want to increase the number of open S3 connections allowed (default is 256). For example, to set the value to 4096:
+
+``./asadmin create-jvm-options "-Ddataverse.files.<id>.connection-pool-size=4096"`
+
+The JVM Options section of the [Configuration Guide](http://guides.dataverse.org/en/5.1.1/installation/config/) has more information.
+
+## Complete List of Changes
+
+For the complete list of code changes in this release, see the [5.1.1 Milestone](https://github.com/IQSS/dataverse/milestone/91?closed=1) in Github.
+
+For help with upgrading, installing, or general questions please post to the [Dataverse Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org.
+
+## Installation
+
+If this is a new installation, please see our [Installation Guide](http://guides.dataverse.org/en/5.1.1/installation/)
+
+## Upgrade Instructions
+
+0. These instructions assume that you've already successfully upgraded to Dataverse 5.1 following the instructions in the [Dataverse 5.1 Release Notes](https://github.com/IQSS/dataverse/releases/tag/v5.1).
+
+1. Undeploy the previous version.
+
+<payara install path>/payara/bin/asadmin list-applications
+<payara install path>/payara/bin/asadmin undeploy dataverse
+
+2. Stop payara and remove the generated directory, start.
+
+- service payara stop
+- remove the generated directory: rm -rf <payara install path>payara/payara/domains/domain1/generated
+- service payara start
+
+3. Deploy this version.
+<payara install path>/payara/bin/asadmin deploy <path>dataverse-5.1.1.war
+
+4. Restart payara
diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py
@@ -65,9 +65,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '5.1'
+version = '5.1.1'
 # The full version, including alpha/beta/rc tags.
-release = '5.1'
+release = '5.1.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
@@ -516,6 +516,9 @@ By default, your store will use the [default] profile in you .aws configuration
 
 ``./asadmin create-jvm-options "-Ddataverse.files.<id>.profile=<profilename>"``
 
+Larger installations may want to increase the number of open S3 connections allowed (default is 256): For example, 
+
+``./asadmin create-jvm-options "-Ddataverse.files.<id>.connection-pool-size=4096"``
 
 In case you would like to configure Dataverse to use a custom S3 service instead of Amazon S3 services, please
 add the options for the custom URL and region as documented below. Please read above if your desired combination has
@@ -541,6 +544,7 @@ dataverse.files.<id>.custom-endpoint-region  <?>                 Only used when
 dataverse.files.<id>.path-style-access       ``true``/``false``  Use path style buckets instead of subdomains. Optional.                    ``false``
 dataverse.files.<id>.payload-signing         ``true``/``false``  Enable payload signing. Optional                                           ``false``
 dataverse.files.<id>.chunked-encoding        ``true``/``false``  Disable chunked encoding. Optional                                         ``true``
+dataverse.files.<id>.connection-pool-size    <?>                 The maximum number of open connections to the S3 server                    ``256``
 ===========================================  ==================  =========================================================================  =============
 
 Reported Working S3-Compatible Storage

diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst
@@ -6,8 +6,9 @@ Dataverse Documentation Versions
 
 This list provides a way to refer to the documentation for previous versions of Dataverse. In order to learn more about the updates delivered from one version to another, visit the `Releases <https://github.com/IQSS/dataverse/releases>`__ page in our GitHub repo.
 
-- 5.1
+- 5.1.1
 
+- `5.1 </en/5.1/>`__
 - `5.0 </en/5.0/>`__
 - `4.20 </en/4.20/>`__
 - `4.19 </en/4.19/>`__

diff --git a/pom.xml b/pom.xml
@@ -7,7 +7,7 @@
     -->
     <groupId>edu.harvard.iq</groupId>
     <artifactId>dataverse</artifactId>
-    <version>5.1</version>
+    <version>5.1.1</version>
     <packaging>war</packaging>
     <name>dataverse</name>
     <properties>

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataConverter.java
@@ -26,6 +26,9 @@
 import java.io.IOException;
 
 import java.util.logging.Logger;
+
+import org.apache.commons.io.IOUtils;
+
 import java.util.List; 
 import java.util.Map;
 import java.util.HashMap;
@@ -171,7 +174,9 @@ public static File downloadFromStorageIO(StorageIO<DataFile> storageIO) {
         } else {
             try {
                 storageIO.open();
-                return downloadFromByteChannel(storageIO.getReadChannel(), storageIO.getSize());
+                try (ReadableByteChannel tabFileChannel = storageIO.getReadChannel()) {
+                  return downloadFromByteChannel(tabFileChannel, storageIO.getSize());
+                }
             } catch (IOException ex) {
                 logger.warning("caught IOException trying to store tabular file " + storageIO.getDataFile().getStorageIdentifier() + " as a temp file.");
             }
@@ -184,12 +189,13 @@ private static File downloadFromByteChannel(ReadableByteChannel tabFileChannel,
             logger.fine("opening datafFileIO for the source tabular file...");
 
             File tabFile = File.createTempFile("tempTabFile", ".tmp");
-            FileChannel tempFileChannel = new FileOutputStream(tabFile).getChannel();
-            tempFileChannel.transferFrom(tabFileChannel, 0, size);
-            return tabFile;
+            try (FileChannel tempFileChannel = new FileOutputStream(tabFile).getChannel();) {
+              tempFileChannel.transferFrom(tabFileChannel, 0, size);
+              return tabFile;
+            }
         } catch (IOException ioex) {
             logger.warning("caught IOException trying to store tabular file as a temp file.");
-        }
+        } 
         return null;
     }
 
@@ -237,8 +243,10 @@ private static File runFormatConversion (DataFile file, File tabFile, String for
                 try {
                     StorageIO<DataFile> storageIO = file.getStorageIO();
                     long size = storageIO.getAuxObjectSize("orig");
-                    File origFile = downloadFromByteChannel((ReadableByteChannel) storageIO.openAuxChannel("orig"), size);
-                    resultInfo = dfs.directConvert(origFile, origFormat);
+                    try (ReadableByteChannel origChannel = (ReadableByteChannel) storageIO.openAuxChannel("orig")) {
+                      File origFile = downloadFromByteChannel(origChannel, size);
+                      resultInfo = dfs.directConvert(origFile, origFormat);
+                    }
                 } catch (IOException ex) {
                     ex.printStackTrace();
                     return null;

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -235,6 +235,7 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
                 return false;
             } finally {
                 IOUtils.closeQuietly(tempFileChannel);
+                IOUtils.closeQuietly(pdfFileChannel);
             }
             sourcePdfFile = tempFile;
         }
@@ -272,7 +273,9 @@ private static boolean generateImageThumbnail(StorageIO<DataFile> storageIO, int
 
         try {
             storageIO.open();
-            return generateImageThumbnailFromInputStream(storageIO, size, storageIO.getInputStream());
+            try(InputStream inputStream = storageIO.getInputStream()) {
+              return generateImageThumbnailFromInputStream(storageIO, size, inputStream);
+            }
         } catch (IOException ioex) {
             logger.warning("caught IOException trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier() + ioex);
             return false;
@@ -312,16 +315,18 @@ private static boolean generateWorldMapThumbnail(StorageIO<DataFile> storageIO,
                 worldMapImageInputStream.close();
                 return false;
             }
+            return generateImageThumbnailFromInputStream(storageIO, size, worldMapImageInputStream);
         } catch (FileNotFoundException fnfe) {
             logger.fine("No .img file for this worldmap file yet; giving up. Original Error: " + fnfe);
             return false;
 
         } catch (IOException ioex) {
             logger.warning("caught IOException trying to open an input stream for worldmap .img file (" + storageIO.getDataFile().getStorageIdentifier() + "). Original Error: " + ioex);
             return false;
+        } finally {
+        	IOUtils.closeQuietly(worldMapImageInputStream);
         }
-
-        return generateImageThumbnailFromInputStream(storageIO, size, worldMapImageInputStream);
+
     }
 
     /*
@@ -750,15 +755,14 @@ private static void rescaleImage(BufferedImage fullSizeImage, int width, int hei
         g2.drawImage(thumbImage, 0, 0, null);
         g2.dispose();
 
-        try {
-            ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
+        try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);) {
+
             writer.setOutput(ios);
 
             // finally, save thumbnail image:
             writer.write(lowRes);
             writer.dispose();
 
-            ios.close();
             thumbImage.flush();
             //fullSizeImage.flush();
             lowRes.flush();

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -1,6 +1,7 @@
 package edu.harvard.iq.dataverse.dataaccess;
 
 import com.amazonaws.AmazonClientException;
+import com.amazonaws.ClientConfiguration;
 import com.amazonaws.HttpMethod;
 import com.amazonaws.SdkClientException;
 import com.amazonaws.auth.profile.ProfileCredentialsProvider;
@@ -88,7 +89,14 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) {
         	minPartSize = getMinPartSize(driverId);
             s3=getClient(driverId);
             tm=getTransferManager(driverId);
-
+            //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened.
+            try {
+                if (bucketName == null || !s3.doesBucketExistV2(bucketName)) {
+                    throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
+                }
+            } catch (SdkClientException sce) {
+                throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage());
+            }
         } catch (Exception e) {
             throw new AmazonClientException(
                         "Cannot instantiate a S3 client; check your AWS credentials and region",
@@ -124,14 +132,6 @@ public void open(DataAccessOption... options) throws IOException {
             throw new IOException("ERROR: s3 not initialised. ");
         }
 
-        try {
-            if (bucketName == null || !s3.doesBucketExist(bucketName)) {
-                throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
-            }
-        } catch (SdkClientException sce) {
-            throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage());
-        }
-
         DataAccessRequest req = this.getRequest();
 
         if (isWriteAccessRequested(options)) {
@@ -578,18 +578,20 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) thr
 
     //Helper method for supporting saving streams with unknown length to S3
     //We save those streams to a file and then upload the file
-    private File createTempFile(Path path, InputStream inputStream) throws IOException {
+	private File createTempFile(Path path, InputStream inputStream) throws IOException {
+
+        File targetFile = new File(path.toUri()); // File needs a name
+        try (OutputStream outStream = new FileOutputStream(targetFile);) {
 
-        File targetFile = new File(path.toUri()); //File needs a name
-        OutputStream outStream = new FileOutputStream(targetFile);
+            byte[] buffer = new byte[8 * 1024];
+            int bytesRead;
+            while ((bytesRead = inputStream.read(buffer)) != -1) {
+                outStream.write(buffer, 0, bytesRead);
+            }
 
-        byte[] buffer = new byte[8 * 1024];
-        int bytesRead;
-        while ((bytesRead = inputStream.read(buffer)) != -1) {
-            outStream.write(buffer, 0, bytesRead);
+        } finally {
+            IOUtils.closeQuietly(inputStream);
         }
-        IOUtils.closeQuietly(inputStream);
-        IOUtils.closeQuietly(outStream);
         return targetFile;
     } 
 
@@ -1043,6 +1045,11 @@ private static AmazonS3 getClient(String driverId) {
     		// get a standard client, using the standard way of configuration the credentials, etc.
     		AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard();
 
+    		ClientConfiguration cc = new ClientConfiguration();
+    		Integer poolSize = Integer.getInteger("dataverse.files." + driverId + ".connection-pool-size", 256);
+    		cc.setMaxConnections(poolSize);
+    		s3CB.setClientConfiguration(cc);
+
     		/**
     		 * Pass in a URL pointing to your S3 compatible storage.
     		 * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StoredOriginalFile.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StoredOriginalFile.java
@@ -26,6 +26,8 @@
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.util.logging.Logger;
+
+import org.apache.tika.io.IOUtils;
 /**
  *
  * @author Leonid Andreev
@@ -56,17 +58,18 @@ public static StorageIO<DataFile> retreive(StorageIO<DataFile> storageIO) {
 
         long storedOriginalSize; 
         InputStreamIO inputStreamIO;
-
+        Channel storedOriginalChannel = null;
         try {
             storageIO.open();
-            Channel storedOriginalChannel = storageIO.openAuxChannel(SAVED_ORIGINAL_FILENAME_EXTENSION);
+            storedOriginalChannel = storageIO.openAuxChannel(SAVED_ORIGINAL_FILENAME_EXTENSION);
             storedOriginalSize = dataFile.getDataTable().getOriginalFileSize() != null ? 
                     dataFile.getDataTable().getOriginalFileSize() : 
                     storageIO.getAuxObjectSize(SAVED_ORIGINAL_FILENAME_EXTENSION);
             inputStreamIO = new InputStreamIO(Channels.newInputStream((ReadableByteChannel) storedOriginalChannel), storedOriginalSize);
             logger.fine("Opened stored original file as Aux "+SAVED_ORIGINAL_FILENAME_EXTENSION);
         } catch (IOException ioEx) {
-            // The original file not saved, or could not be opened.
+        	IOUtils.closeQuietly(storedOriginalChannel);
+        	// The original file not saved, or could not be opened.
             logger.fine("Failed to open stored original file as Aux "+SAVED_ORIGINAL_FILENAME_EXTENSION+"!");
             return null;
         }

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -273,7 +273,7 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat
         try {
             tmpFile = FileUtil.inputStreamToFile(inputStream);
         } catch (IOException ex) {
-            logger.severe(ex.getMessage());
+        	logger.severe(ex.getMessage());
         }
 
         StorageIO<Dataset> dataAccess = null;
@@ -298,11 +298,13 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat
         try {
             fullSizeImage = ImageIO.read(tmpFile);
         } catch (IOException ex) {
+        	IOUtils.closeQuietly(inputStream);
             logger.severe(ex.getMessage());
             return null;
         }
         if (fullSizeImage == null) {
             logger.fine("fullSizeImage was null!");
+            IOUtils.closeQuietly(inputStream);
             return null;
         }
         int width = fullSizeImage.getWidth();
@@ -311,13 +313,15 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat
         try {
             src = new FileInputStream(tmpFile).getChannel();
         } catch (FileNotFoundException ex) {
+        	IOUtils.closeQuietly(inputStream);
             logger.severe(ex.getMessage());
             return null;
         }
         FileChannel dest = null;
         try {
             dest = new FileOutputStream(tmpFile).getChannel();
         } catch (FileNotFoundException ex) {
+        	IOUtils.closeQuietly(inputStream);
             logger.severe(ex.getMessage());
             return null;
         }
@@ -329,10 +333,13 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat
         }
         File tmpFileForResize = null;
         try {
+        	//The stream was used around line 274 above, so this creates an empty file (OK since all it is used for is getting a path, but not reusing it here would make it easier to close it above.)
             tmpFileForResize = FileUtil.inputStreamToFile(inputStream);
         } catch (IOException ex) {
             logger.severe(ex.getMessage());
             return null;
+        } finally {
+        	IOUtils.closeQuietly(inputStream);
         }
         // We'll try to pre-generate the rescaled versions in both the 
         // DEFAULT_DATASET_LOGO (currently 140) and DEFAULT_CARDIMAGE_SIZE (48)