apache · itsvikramagr · Jun 20, 2019 · Jun 20, 2019 · Jun 25, 2019 · Jun 25, 2019
diff --git a/core/src/main/java/org/apache/spark/io/FileUtility.java b/core/src/main/java/org/apache/spark/io/FileUtility.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.io;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+import org.apache.commons.compress.utils.IOUtils;
+
+import java.io.*;
+
+public class FileUtility {
+
+  /**
+   * Extract an input tar file into an output files and directories.
+   * inputTarFileLoc:  the input file location for the tar file
+   * destDirLoc: destination for the extracted files
+   *
+   * throws IllegalStateException
+   */
+  public static final String ENCODING = "utf-8";
+
+  public static void extractTarFile(String inputTarFileLoc, String destDirLoc)
+    throws IllegalStateException {
+    File inputFile = new File(inputTarFileLoc);
+    if (!inputTarFileLoc.endsWith(".tar")) {
+      throw new IllegalStateException(String.format(
+        "Input File[%s] should end with tar extension.", inputTarFileLoc));
+    }
+    File destDir = new File(destDirLoc);
+    if (destDir.exists() && !destDir.delete()) {
+      throw new IllegalStateException(String.format(
+        "Couldn't delete the existing destination directory[%s] ", destDirLoc));
+    } else if (!destDir.mkdir()) {
+      throw new IllegalStateException(String.format(
+        "Couldn't create directory  %s ", destDirLoc));
+    }
+
+    try (InputStream is = new FileInputStream(inputFile);
+         TarArchiveInputStream debInputStream = new TarArchiveInputStream(is, ENCODING)) {
+      TarArchiveEntry entry;
+      while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
+        final File outputFile = new File(destDirLoc, entry.getName());
+        if (entry.isDirectory()) {
+          if (!outputFile.exists() && !outputFile.mkdirs()) {
+            throw new IllegalStateException(String.format(
+              "Couldn't create directory %s.", outputFile.getAbsolutePath()));
+          }
+        } else {
+          try (OutputStream outputFileStream = new FileOutputStream(outputFile)) {
+            IOUtils.copy(debInputStream, outputFileStream);
+          }
+        }
+      }
+    } catch (IOException e){
+      throw new IllegalStateException(String.format(
+        "extractTarFile failed with exception %s.", e.getMessage()));
+    }
+  }
+
+  /**
+   * create a tar file for input source directory location .
+   * source: the source directory location
+   * destFileLoc: destination of the created tarball
+   *
+   * throws IllegalStateException
+   */
+
+  public static void createTarFile(String source, String destFileLoc)
+    throws IllegalStateException {
+    File f = new File(destFileLoc);
+    if (f.exists() && !f.delete()) {
+      throw new IllegalStateException(String.format(
+        "Couldn't delete the destination file location[%s]", destFileLoc));
+    }
+    File folder = new File(source);
+    if (!folder.exists()) {
+      throw new IllegalStateException(String.format(
+        "Source folder[%s] does not exist", source));
+    }
+
+    try (FileOutputStream fos = new FileOutputStream(destFileLoc);
+         TarArchiveOutputStream tarOs = new TarArchiveOutputStream(fos, ENCODING)) {
+      File[] fileNames = folder.listFiles();
+      for (File file : fileNames) {
+        TarArchiveEntry tar_file = new TarArchiveEntry(file.getName());
+        tar_file.setSize(file.length());
+        tarOs.putArchiveEntry(tar_file);
+        try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
+          IOUtils.copy(bis, tarOs);
+          tarOs.closeArchiveEntry();
+        }
+      }
+      tarOs.finish();
+    } catch (IOException e) {
+      throw new IllegalStateException(String.format(
+        "createTarFile failed with exception %s.", e.getMessage()));
+    }
+  }
+
+}
diff --git a/core/src/test/java/org/apache/spark/io/FileUtilitySuite.java b/core/src/test/java/org/apache/spark/io/FileUtilitySuite.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.io;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.RandomUtils;
+import org.apache.spark.util.Utils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Tests functionality of {@link FileUtility}
+ */
+public class FileUtilitySuite {
+
+  protected File sourceFolder;
+  protected File destTarLoc;
+  protected File destFolder;
+
+  @Before
+  public void setUp() throws IOException {
+    String tmpDir = System.getProperty("java.io.tmpdir");
+    sourceFolder = Utils.createTempDir(tmpDir, "FileUtilTest-src-" + RandomUtils.nextLong());
+    destFolder = Utils.createTempDir(tmpDir, "FileUtilTest-dest-" + RandomUtils.nextLong());
+    destTarLoc= File.createTempFile("dest-tar", ".tar");
+  }
+
+  @After
+  public void tearDown() {
+    destTarLoc.delete();
+  }
+
+  @Test
+  public void testCreationAndExtraction() throws IllegalStateException, IOException {
+    // Create a temp file in the source folder
+    Assert.assertEquals(sourceFolder.listFiles().length , 0);
+    File inputFile = File.createTempFile("source-file", ".tmp", sourceFolder);
+    // Create a byte array of size 1 KB with random bytes
+    byte[] randomBytes =  RandomUtils.nextBytes(1 * 1024);
+    FileUtils.writeByteArrayToFile(inputFile, randomBytes);
+
+    // Create the tarball
+    destTarLoc.delete();
+    Assert.assertFalse(destTarLoc.exists());
+    FileUtility.createTarFile(sourceFolder.toString(), destTarLoc.getAbsolutePath());
+    Assert.assertTrue(destTarLoc.exists());
+
+    // Extract the tarball
+    Assert.assertEquals(destFolder.listFiles().length , 0);
+    FileUtility.extractTarFile(destTarLoc.getAbsolutePath(), destFolder.getAbsolutePath());
+
+    // Verify that the extraction was successful
+    Assert.assertTrue(destFolder.exists());
+    Assert.assertEquals(destFolder.listFiles().length , 1);
+    Assert.assertArrayEquals(randomBytes, FileUtils.readFileToByteArray(destFolder.listFiles()[0]));
+  }
+
+}
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
@@ -175,6 +175,7 @@ parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.30.jar
+rocksdbjni-6.2.2.jar
 scala-collection-compat_2.12-2.1.1.jar
 scala-compiler-2.12.10.jar
 scala-library-2.12.10.jar

diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
@@ -194,6 +194,7 @@ protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.30.jar
 re2j-1.1.jar
+rocksdbjni-6.2.2.jar
 scala-collection-compat_2.12-2.1.1.jar
 scala-compiler-2.12.10.jar
 scala-library-2.12.10.jar

diff --git a/pom.xml b/pom.xml
@@ -193,6 +193,7 @@
     <jpam.version>1.1</jpam.version>
     <selenium.version>2.52.0</selenium.version>
     <htmlunit.version>2.22</htmlunit.version>
+    <rocksdb.version>6.2.2</rocksdb.version>
     <!--
     Managed up from older version from Avro; sync with jackson-module-paranamer dependency version
     -->

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
@@ -147,6 +147,11 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.rocksdb</groupId>
+      <artifactId>rocksdbjni</artifactId>
+      <version>${rocksdb.version}</version>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>