diff --git a/README.md b/README.md
index 89edb4b60021..7d298971a1f0 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+# Hoodie
Hoodie manages storage of large analytical datasets on [HDFS](http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) and serve them out via two types of tables
* **Read Optimized Table** - Provides excellent query performance via purely columnar storage (e.g. [Parquet](https://parquet.apache.org/))
diff --git a/docs/configurations.md b/docs/configurations.md
index cf5c2d7b85e7..7042d8237529 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -76,4 +76,7 @@ summary: "Here we list all possible configurations and what they mean"
- [usePrefix](#usePrefix) ()
Standard prefix for all metrics
+ - [S3Configs](s3_hoodie.html) (Hoodie S3 Configs)
+ Configurations required for S3 and Hoodie co-operability.
+
{% include callout.html content="Hoodie is a young project. A lot of pluggable interfaces and configurations to support diverse workloads need to be created. Get involved [here](https://github.com/uber/hoodie)" type="info" %}
diff --git a/docs/s3_filesystem.md b/docs/s3_filesystem.md
new file mode 100644
index 000000000000..adb1cefcb5a4
--- /dev/null
+++ b/docs/s3_filesystem.md
@@ -0,0 +1,61 @@
+---
+title: S3 Filesystem (experimental)
+keywords: sql hive s3 spark presto
+sidebar: mydoc_sidebar
+permalink: s3_hoodie.html
+toc: false
+summary: In this page, we go over how to configure hoodie with S3 filesystem.
+---
+Hoodie works with HDFS by default. There is an experimental work going on Hoodie-S3 compatibility.
+
+## AWS configs
+
+There are two configurations required for Hoodie-S3 compatibility:
+
+- Adding AWS Credentials for Hoodie
+- Adding required Jars to classpath
+
+### AWS Credentials
+
+Add the required configs in your core-site.xml from where Hoodie can fetch them. Replace the `fs.defaultFS` with your S3 bucket name and Hoodie should be able to read/write from the bucket.
+
+```
+
+ fs.defaultFS
+ s3://ysharma
+
+
+
+ fs.s3.impl
+ org.apache.hadoop.fs.s3native.NativeS3FileSystem
+
+
+
+ fs.s3.awsAccessKeyId
+ AWS_KEY
+
+
+
+ fs.s3.awsSecretAccessKey
+ AWS_SECRET
+
+
+
+ fs.s3n.awsAccessKeyId
+ AWS_KEY
+
+
+
+ fs.s3n.awsSecretAccessKey
+ AWS_SECRET
+
+```
+
+### AWS Libs
+
+AWS hadoop libraries to add to our classpath
+
+ - com.amazonaws:aws-java-sdk:1.10.34
+ - org.apache.hadoop:hadoop-aws:2.7.3
+
+
diff --git a/hoodie-client/pom.xml b/hoodie-client/pom.xml
index 617dc9c4bf5f..11e6b85a30e6 100644
--- a/hoodie-client/pom.xml
+++ b/hoodie-client/pom.xml
@@ -118,6 +118,11 @@
io.dropwizard.metrics
metrics-core
+
+ com.beust
+ jcommander
+ 1.48
+
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
index 64034b4d24d0..d413fc5c38da 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
@@ -49,9 +49,10 @@ public class HoodieWrapperFileSystem extends FileSystem {
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
static {
- SUPPORT_SCHEMES = new HashSet<>(2);
+ SUPPORT_SCHEMES = new HashSet<>();
SUPPORT_SCHEMES.add("file");
SUPPORT_SCHEMES.add("hdfs");
+ SUPPORT_SCHEMES.add("s3");
}
private ConcurrentMap openStreams =
diff --git a/hoodie-client/src/test/java/HoodieClientExample.java b/hoodie-client/src/test/java/HoodieClientExample.java
index eb7e56f707c5..39724f67e488 100644
--- a/hoodie-client/src/test/java/HoodieClientExample.java
+++ b/hoodie-client/src/test/java/HoodieClientExample.java
@@ -14,15 +14,17 @@
* limitations under the License.
*/
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
import com.uber.hoodie.HoodieWriteClient;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.common.util.FSUtils;
-import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.common.HoodieTestDataGenerator;
import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.config.HoodieIndexConfig;
+import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.index.HoodieIndex;
-
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
@@ -38,12 +40,23 @@
*/
public class HoodieClientExample {
+ @Parameter(names={"--table-path", "-p"}, description = "path for Hoodie sample table")
+ private String inputTablePath = "file:///tmp/hoodie/sample-table";
+
+ @Parameter(names={"--table-name", "-n"}, description = "table name for Hoodie sample table")
+ private String inputTableName = "sample-table";
private static Logger logger = LogManager.getLogger(HoodieClientExample.class);
+
public static void main(String[] args) throws Exception {
- String tablePath = args.length == 1 ? args[0] : "file:///tmp/hoodie/sample-table";
+ HoodieClientExample cli = new HoodieClientExample();
+ new JCommander(cli, args);
+ cli.run();
+ }
+
+ public void run() throws Exception {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example");
@@ -54,16 +67,15 @@ public static void main(String[] args) throws Exception {
// generate some records to be loaded in.
HoodieWriteConfig cfg =
- HoodieWriteConfig.newBuilder().withPath(tablePath)
+ HoodieWriteConfig.newBuilder().withPath(inputTablePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
- .forTable("sample-table").withIndexConfig(
+ .forTable(inputTableName).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.build();
Properties properties = new Properties();
- properties.put(HoodieWriteConfig.TABLE_NAME, "sample-table");
+ properties.put(HoodieWriteConfig.TABLE_NAME, inputTableName);
HoodieTableMetaClient
- .initializePathAsHoodieDataset(FSUtils.getFs(), tablePath,
- properties);
+ .initializePathAsHoodieDataset(FSUtils.getFs(), inputTablePath, properties);
HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
/**
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
index 77448d058910..2f554aecb81a 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
@@ -18,7 +18,6 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.log.HoodieLogFile;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.exception.HoodieIOException;