diff --git a/README.md b/README.md index a761fa78..b4217bd8 100755 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Moreover, it provide a standalone component which can be run locally and which s For windows users, you need to download a hadoop distribution, to unzip it and to define the system environment variable `HADOOP_HOME`. You can also define the path into files `hadoop-unit-default.properties` (warning: there are a lot...). +Hadoop Unit is using java 8 so set up your environment with it. + To build, launch the command: ```bash mvn package @@ -62,6 +64,7 @@ The available components are: * MONGODB * CASSANDRA * ELASTICSEARCH +* NEO4J However, for compatibility reason, SolR/SolRCloud and Elasticsearch can not be run into the same JVM. For this purpose, there are 2 standalone packages which are generated (one which is compliant with solr and one which is compliant with elasticsearch). @@ -541,9 +544,9 @@ public class HdfsBootstrapIntegrationTest { * MongoDB * Cassandra 3.4 * ElasticSearch 5.0-alpha4 +* Neo4j 3.0.3 Built on: - * [hadoop-mini-cluster-0.1.7](https://github.com/sakserv/hadoop-mini-clusters) (aka. HDP 2.4.2) * [achilles-embedded-4.2.0](https://github.com/doanduyhai/Achilles) @@ -553,16 +556,17 @@ Use: * edit `hadoop-unit-default.properties` and indicate `HADOOP_HOME` or set your `HADOOP_HOME` environment variable * edit `hadoop-unit-default.properties` and indicate `oozie.sharelib.path` -Todo: -* male client utils for kafka produce/consume -* make sample with spark streaming and kafka - Issues: * oozie does not work on windows 7 (see http://stackoverflow.com/questions/25790319/getting-access-denied-error-while-running-hadoop-2-3-mapreduce-jobs-in-windows-7) * integrate phoenix * can only manage one solr collection * better docs ;) +#Note + +Neo4j will not be integrated with the standalone component since there are too much conflicts with dependencies: +* Kafka (2.10_10.0.0.0) is using scala-library-2.10.6.jar but neo4j-cypher is using scala-library-2.11.8.jar. +* Neo4j is using lucene 5.5.0 which create conflict with solr and elasticsearch. # License diff --git a/hadoop-unit-cassandra/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-cassandra/src/test/resources/hadoop-unit-default.properties index 69207546..844fbf58 100644 --- a/hadoop-unit-cassandra/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-cassandra/src/test/resources/hadoop-unit-default.properties @@ -123,4 +123,9 @@ mongo.collection.name=test_collection # Cassandra cassandra.ip=127.0.0.1 cassandra.port=13433 -cassandra.temp.dir=/tmp/embedded_cassandra \ No newline at end of file +cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/Component.java b/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/Component.java index d391307b..e1874fc6 100644 --- a/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/Component.java +++ b/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/Component.java @@ -29,7 +29,8 @@ public enum Component { SOLR("solr"), CASSANDRA("cassandra"), MONGODB("mongodb"), - ELASTICSEARCH("elastic"); + ELASTICSEARCH("elastic"), + NEO4J("neo4j"); private String key; diff --git a/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/HadoopUnitConfig.java b/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/HadoopUnitConfig.java index c3eab56e..e91126e8 100644 --- a/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/HadoopUnitConfig.java +++ b/hadoop-unit-commons/src/main/java/fr/jetoile/hadoopunit/HadoopUnitConfig.java @@ -58,6 +58,11 @@ public class HadoopUnitConfig { public static final String CASSANDRA_PORT_KEY = "cassandra.port"; public static final String CASSANDRA_TEMP_DIR_KEY = "cassandra.temp.dir"; + // Neo4j + public static final String NEO4J_IP_KEY = "neo4j.ip"; + public static final String NEO4J_PORT_KEY = "neo4j.port"; + public static final String NEO4J_TEMP_DIR_KEY = "neo4j.temp.dir"; + // ElasticSearch public static final String ELASTICSEARCH_IP_KEY = "elasticsearch.ip"; public static final String ELASTICSEARCH_HTTP_PORT_KEY = "elasticsearch.http.port"; diff --git a/hadoop-unit-commons/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-commons/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-commons/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-commons/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-elasticsearch/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-elasticsearch/src/test/resources/hadoop-unit-default.properties index 4b5a37b6..c398108f 100644 --- a/hadoop-unit-elasticsearch/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-elasticsearch/src/test/resources/hadoop-unit-default.properties @@ -131,4 +131,9 @@ elasticsearch.http.port=14433 elasticsearch.tcp.port=14533 elasticsearch.temp.dir=/tmp/elasticsearch elasticsearch.index.name=test_index -elasticsearch.cluster.name=elasticsearch \ No newline at end of file +elasticsearch.cluster.name=elasticsearch + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-hbase/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-hbase/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-hbase/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-hbase/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-hdfs/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-hdfs/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-hdfs/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-hdfs/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-hive/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-hive/src/test/resources/hadoop-unit-default.properties index 43530f17..4b1a7d34 100644 --- a/hadoop-unit-hive/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-hive/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-kafka/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-kafka/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-kafka/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-kafka/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-mongodb/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-mongodb/src/test/resources/hadoop-unit-default.properties index 4579cb8b..587c0d30 100644 --- a/hadoop-unit-mongodb/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-mongodb/src/test/resources/hadoop-unit-default.properties @@ -118,4 +118,9 @@ oozie.host=localhost mongo.ip=127.0.0.1 mongo.port=13333 mongo.database.name=test_database -mongo.collection.name=test_collection \ No newline at end of file +mongo.collection.name=test_collection + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-neo4j/pom.xml b/hadoop-unit-neo4j/pom.xml new file mode 100644 index 00000000..41aac54c --- /dev/null +++ b/hadoop-unit-neo4j/pom.xml @@ -0,0 +1,44 @@ + + + + hadoop-unit + fr.jetoile.hadoop + 1.4-SNAPSHOT + + 4.0.0 + + hadoop-unit-neo4j + + + 1.0.4 + + + + + + fr.jetoile.hadoop + hadoop-unit-commons + + + + org.neo4j + neo4j + + + + org.neo4j + neo4j-bolt + + + + org.neo4j.driver + neo4j-java-driver + ${neo4j-java-driver.version} + test + + + + + \ No newline at end of file diff --git a/hadoop-unit-neo4j/src/main/java/fr/jetoile/hadoopunit/component/Neo4jBootstrap.java b/hadoop-unit-neo4j/src/main/java/fr/jetoile/hadoopunit/component/Neo4jBootstrap.java new file mode 100644 index 00000000..71d36329 --- /dev/null +++ b/hadoop-unit-neo4j/src/main/java/fr/jetoile/hadoopunit/component/Neo4jBootstrap.java @@ -0,0 +1,147 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fr.jetoile.hadoopunit.component; + + +import fr.jetoile.hadoopunit.Component; +import fr.jetoile.hadoopunit.HadoopUnitConfig; +import fr.jetoile.hadoopunit.exception.BootstrapException; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.io.FileUtils; +import org.neo4j.cypher.javacompat.internal.GraphDatabaseCypherService; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.factory.GraphDatabaseFactory; +import org.neo4j.graphdb.factory.GraphDatabaseSettings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Paths; + +public class Neo4jBootstrap implements Bootstrap { + final public static String NAME = Component.NEO4J.name(); + + final private Logger LOGGER = LoggerFactory.getLogger(Neo4jBootstrap.class); + + private State state = State.STOPPED; + + private Configuration configuration; + + private int port; + private String ip; + private String tmp; + + private GraphDatabaseService graphDb; + + public Neo4jBootstrap() { + try { + loadConfig(); + } catch (BootstrapException e) { + LOGGER.error("unable to load configuration", e); + } + } + + @Override + public String getName() { + return NAME; + } + + @Override + public String getProperties() { + return "[" + + "ip:" + ip + + ", port:" + port + + "]"; + } + + private void loadConfig() throws BootstrapException { + try { + configuration = new PropertiesConfiguration(HadoopUnitConfig.DEFAULT_PROPS_FILE); + } catch (ConfigurationException e) { + throw new BootstrapException("bad config", e); + } + + port = configuration.getInt(HadoopUnitConfig.NEO4J_PORT_KEY); + ip = configuration.getString(HadoopUnitConfig.NEO4J_IP_KEY); + tmp = configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY); + } + + private void build() { + GraphDatabaseSettings.BoltConnector bolt = GraphDatabaseSettings.boltConnector( "0" ); + + graphDb = new GraphDatabaseFactory() + .newEmbeddedDatabaseBuilder(Paths.get(tmp).toFile()) +// .setConfig(GraphDatabaseSettings.pagecache_memory, "512M") +// .setConfig(GraphDatabaseSettings.string_block_size, "60") +// .setConfig(GraphDatabaseSettings.array_block_size, "300") + .setConfig( bolt.enabled, "true" ) + .setConfig( bolt.address, ip + ":" + port ) + .newGraphDatabase(); + } + + @Override + public Bootstrap start() { + if (state == State.STOPPED) { + state = State.STARTING; + LOGGER.info("{} is starting", this.getClass().getName()); + try { + build(); + } catch (Exception e) { + LOGGER.error("unable to add cassandra", e); + } + state = State.STARTED; + LOGGER.info("{} is started", this.getClass().getName()); + } + + return this; + } + + @Override + public Bootstrap stop() { + if (state == State.STARTED) { + state = State.STOPPING; + LOGGER.info("{} is stopping", this.getClass().getName()); + try { + graphDb.shutdown(); + cleanup(); + } catch (Exception e) { + LOGGER.error("unable to stop cassandra", e); + } + state = State.STOPPED; + LOGGER.info("{} is stopped", this.getClass().getName()); + } + return this; + } + + private void cleanup() { + try { + FileUtils.deleteDirectory(Paths.get(configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY)).toFile()); + } catch (IOException e) { + LOGGER.error("unable to delete {}", configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY), e); + } + } + + @Override + public org.apache.hadoop.conf.Configuration getConfiguration() { + throw new UnsupportedOperationException("the method getConfiguration can not be called on Neo4jBootstrap"); + } + + public GraphDatabaseService getNeo4jGraph() { + return this.graphDb; + } + +} diff --git a/hadoop-unit-neo4j/src/main/resources/META-INF/services/fr.jetoile.hadoopunit.component.Bootstrap b/hadoop-unit-neo4j/src/main/resources/META-INF/services/fr.jetoile.hadoopunit.component.Bootstrap new file mode 100644 index 00000000..93b0ff2d --- /dev/null +++ b/hadoop-unit-neo4j/src/main/resources/META-INF/services/fr.jetoile.hadoopunit.component.Bootstrap @@ -0,0 +1 @@ +fr.jetoile.hadoopunit.component.Neo4jBootstrap \ No newline at end of file diff --git a/hadoop-unit-neo4j/src/test/java/fr/jetoile/hadoopunit/component/Neo4jBootstrapTest.java b/hadoop-unit-neo4j/src/test/java/fr/jetoile/hadoopunit/component/Neo4jBootstrapTest.java new file mode 100644 index 00000000..d71e6007 --- /dev/null +++ b/hadoop-unit-neo4j/src/test/java/fr/jetoile/hadoopunit/component/Neo4jBootstrapTest.java @@ -0,0 +1,188 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fr.jetoile.hadoopunit.component; + +import fr.jetoile.hadoopunit.Component; +import fr.jetoile.hadoopunit.HadoopBootstrap; +import fr.jetoile.hadoopunit.HadoopUnitConfig; +import fr.jetoile.hadoopunit.exception.BootstrapException; +import fr.jetoile.hadoopunit.exception.NotFoundServiceException; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.junit.*; +import org.neo4j.driver.v1.*; +import org.neo4j.graphdb.*; +import org.neo4j.graphdb.Transaction; +import org.neo4j.graphdb.traversal.Evaluators; +import org.neo4j.graphdb.traversal.TraversalDescription; +import org.neo4j.graphdb.traversal.Traverser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class Neo4jBootstrapTest { + private static final Logger LOGGER = LoggerFactory.getLogger(Neo4jBootstrapTest.class); + + private enum RelTypes implements RelationshipType { + KNOWS + } + + static private Configuration configuration; + + private GraphDatabaseService graphDb; + + + @BeforeClass + public static void setup() throws BootstrapException { + HadoopBootstrap.INSTANCE.startAll(); + + try { + configuration = new PropertiesConfiguration(HadoopUnitConfig.DEFAULT_PROPS_FILE); + } catch (ConfigurationException e) { + throw new BootstrapException("bad config", e); + } + } + + + @AfterClass + public static void tearDown() throws BootstrapException { + HadoopBootstrap.INSTANCE.stopAll(); + } + + @Before + public void setUp() throws NotFoundServiceException { + Bootstrap neo4j = HadoopBootstrap.INSTANCE.getService(Component.NEO4J); + graphDb = ((Neo4jBootstrap) neo4j).getNeo4jGraph(); + } + + @After + public void teardown() throws NotFoundServiceException { + } + + @Test + public void neo4jShouldStart() { + try (Transaction tx = graphDb.beginTx()) { + Node firstNode = graphDb.createNode(); + firstNode.setProperty("message", "Hello, "); + Node secondNode = graphDb.createNode(); + secondNode.setProperty("message", "World!"); + + Relationship relationship = firstNode.createRelationshipTo(secondNode, RelTypes.KNOWS); + relationship.setProperty("message", "brave Neo4j "); + + tx.success(); + + assertEquals("Hello, brave Neo4j World!", "" + firstNode.getProperty("message") + relationship.getProperty("message") + secondNode.getProperty("message")); + } + } + + @Test + public void traversal_query_should_success() { + int numberOfFriends = 0; + + try (Transaction tx = graphDb.beginTx()) { + + Node neoNode = graphDb.createNode(); + neoNode.setProperty("name", "Hello, "); + Node secondNode = graphDb.createNode(); + secondNode.setProperty("name", "World!"); + Node thirdNode = graphDb.createNode(); + secondNode.setProperty("name", "World2!"); + Relationship relationship = neoNode.createRelationshipTo(secondNode, RelTypes.KNOWS); + Relationship relationship2 = neoNode.createRelationshipTo(thirdNode, RelTypes.KNOWS); + relationship.setProperty("message", "brave Neo4j "); + relationship2.setProperty("message", "brave Neo4j2 "); + tx.success(); + + Traverser friendsTraverser = getFriends(graphDb, neoNode); + for (Path friendPath : friendsTraverser) { + numberOfFriends++; + } + + assertEquals(2, numberOfFriends); + } + } + + @Test + public void cypher_query_should_sucess() { + + try (Transaction tx = graphDb.beginTx()) { + Node myNode = graphDb.createNode(); + myNode.setProperty("name", "my node"); + tx.success(); + } + + List res = new ArrayList<>(); + try (Transaction ignored = graphDb.beginTx(); + Result result = graphDb.execute("match (n {name: 'my node'}) return n, n.name")) { + while (result.hasNext()) { + Map row = result.next(); + for (Map.Entry column : row.entrySet()) { + res.add(column.getKey() + ": " + column.getValue()); + LOGGER.debug(column.getKey() + ": " + column.getValue()); + } + } + } + + assertEquals(2, res.size()); + assertTrue(res.toString().contains("n.name: my node")); + } + + @Test + public void neo4jShouldStartWithRealDriver() { + + Driver driver = GraphDatabase.driver( + "bolt://localhost:13533", + Config.build() + .withEncryptionLevel(Config.EncryptionLevel.NONE) + .toConfig() + ); + + List results = new ArrayList<>(); + try (Session session = driver.session()) { + session.run("CREATE (person:Person {name: {name}, title:'King'})", Values.parameters("name", "Arthur")); + + StatementResult result = session.run("MATCH (a:Person) WHERE a.name = 'Arthur' RETURN a.name AS name, a.title AS title"); + while (result.hasNext()) { + Record record = result.next(); + results.add(record); + LOGGER.debug(record.get("title").asString() + " " + record.get("name").asString()); + } + } + + assertEquals(1, results.size()); + assertEquals("King", results.get(0).get("title").asString()); + assertEquals("Arthur", results.get(0).get("name").asString()); + } + + private Traverser getFriends(GraphDatabaseService graphDb, final Node person) { + TraversalDescription td = graphDb.traversalDescription() + .breadthFirst() + .relationships(RelTypes.KNOWS, Direction.OUTGOING) + .evaluator(Evaluators.excludeStartPosition()); + return td.traverse(person); + } + + +} + + diff --git a/hadoop-unit-neo4j/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-neo4j/src/test/resources/hadoop-unit-default.properties new file mode 100644 index 00000000..844fbf58 --- /dev/null +++ b/hadoop-unit-neo4j/src/test/resources/hadoop-unit-default.properties @@ -0,0 +1,131 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#HADOOP_HOME=/opt/hadoop + +# Zookeeper +zookeeper.temp.dir=/tmp/embedded_zk +zookeeper.host=127.0.0.1 +zookeeper.port=22010 + +# Hive +hive.scratch.dir=/tmp/hive_scratch_dir +hive.warehouse.dir=/tmp/warehouse_dir + +# Hive Metastore +hive.metastore.hostname=localhost +hive.metastore.port=20102 +hive.metastore.derby.db.dir=metastore_db + +# Hive Server2 +hive.server2.hostname=localhost +hive.server2.port=20103 + +# Hive Test +hive.test.database.name=default +hive.test.table.name=test_table + + +# HDFS +hdfs.namenode.port=20112 +hdfs.namenode.http.port=50070 +hdfs.temp.dir=/tmp/embedded_hdfs +hdfs.num.datanodes=1 +hdfs.enable.permissions=false +hdfs.format=true +hdfs.enable.running.user.as.proxy.user=true + +# HDFS Test +hdfs.test.file=/tmp/testing +hdfs.test.string=TESTING + + +# HBase +hbase.master.port=25111 +hbase.master.info.port=-1 +hbase.num.region.servers=1 +hbase.root.dir=/tmp/embedded_hbase +hbase.znode.parent=/hbase-unsecure +hbase.wal.replication.enabled=false + +# HBase Test +hbase.test.table.name=hbase_test_table +hbase.test.col.family.name=cf1 +hbase.test.col.qualifier.name=cq1 +hbase.test.num.rows.to.put=50 + +# Kafka +kafka.hostname=127.0.0.1 +kafka.port=20111 + +# Kafka Test +kafka.test.topic=testtopic +kafka.test.message.count=10 +kafka.test.broker.id=1 +kafka.test.temp.dir=embedded_kafka + +#SolR + SolRCloud +solr.dir=solr + +#SolR +solr.collection.internal.name=collection1_shard1_replica1 + +#SolRCloud +solr.collection.name=collection1 +solr.cloud.port=8983 + + +# YARN +yarn.num.node.managers=1 +yarn.num.local.dirs=1 +yarn.num.log.dirs=1 +yarn.resource.manager.address=localhost:37001 +yarn.resource.manager.hostname=localhost +yarn.resource.manager.scheduler.address=localhost:37002 +yarn.resource.manager.resource.tracker.address=localhost:37003 +yarn.resource.manager.webapp.address=localhost:37004 +yarn.use.in.jvm.container.executor=false + +# MR +mr.job.history.address=localhost:37005 + +# Oozie +oozie.test.dir=/tmp/embedded_oozie +oozie.home.dir=/tmp/oozie_home +oozie.username=blah +oozie.groupname=testgroup +oozie.hdfs.share.lib.dir=/tmp/share_lib +oozie.share.lib.create=true +oozie.local.share.lib.cache.dir=/tmp/share_lib_cache +oozie.purge.local.share.lib.cache=false +oozie.sharelib.path=/home/khanh/github +oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz +oozie.port=20113 +oozie.host=localhost + +# MongoDB +mongo.ip=127.0.0.1 +mongo.port=13333 +mongo.database.name=test_database +mongo.collection.name=test_collection + +# Cassandra +cassandra.ip=127.0.0.1 +cassandra.port=13433 +cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-oozie/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-oozie/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-oozie/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-oozie/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-solr/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-solr/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-solr/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-solr/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-solrcloud/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-solrcloud/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-solrcloud/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-solrcloud/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/main/conf/hadoop-unit-default.properties b/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/main/conf/hadoop-unit-default.properties index 8f87213f..f078fec8 100644 --- a/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/main/conf/hadoop-unit-default.properties +++ b/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/main/conf/hadoop-unit-default.properties @@ -120,4 +120,9 @@ elasticsearch.http.port=14433 elasticsearch.tcp.port=14533 elasticsearch.temp.dir=/tmp/elasticsearch elasticsearch.index.name=test_index -elasticsearch.cluster.name=elasticsearch \ No newline at end of file +elasticsearch.cluster.name=elasticsearch + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/test/resources/hadoop-unit-default.properties index 2dfed130..f513258d 100644 --- a/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-standalone/hadoop-unit-standalone-elasticsearch/src/test/resources/hadoop-unit-default.properties @@ -117,4 +117,9 @@ elasticsearch.http.port=14433 elasticsearch.tcp.port=14533 elasticsearch.temp.dir=/tmp/elasticsearch elasticsearch.index.name=test_index -elasticsearch.cluster.name=elasticsearch \ No newline at end of file +elasticsearch.cluster.name=elasticsearch + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/main/conf/hadoop-unit-default.properties b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/main/conf/hadoop-unit-default.properties index 40eb09f7..84d5f4f1 100644 --- a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/main/conf/hadoop-unit-default.properties +++ b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/main/conf/hadoop-unit-default.properties @@ -113,3 +113,8 @@ mongo.collection.name=test_collection cassandra.ip=127.0.0.1 cassandra.port=13433 cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/java/fr/jetoile/hadoopunit/integrationtest/ManualIntegrationBootstrapTest.java b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/java/fr/jetoile/hadoopunit/integrationtest/ManualIntegrationBootstrapTest.java index 51803a98..5ee3d145 100644 --- a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/java/fr/jetoile/hadoopunit/integrationtest/ManualIntegrationBootstrapTest.java +++ b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/java/fr/jetoile/hadoopunit/integrationtest/ManualIntegrationBootstrapTest.java @@ -127,7 +127,6 @@ public void kafkaShouldStart() throws Exception { } - // Consumer KafkaConsumerUtils.INSTANCE.consumeMessagesWithNewApi(configuration.getString(HadoopUnitConfig.KAFKA_TEST_TOPIC_KEY), 10); @@ -220,7 +219,6 @@ public void hiveServer2ShouldStart() throws InterruptedException, ClassNotFoundE } - @Test public void hdfsShouldStart() throws Exception { @@ -246,11 +244,11 @@ public void hdfsShouldStart() throws Exception { hdfsFsHandle.close(); URL url = new URL( - String.format( "http://localhost:%s/webhdfs/v1?op=GETHOMEDIRECTORY&user.name=guest", - configuration.getInt( HadoopUnitConfig.HDFS_NAMENODE_HTTP_PORT_KEY ) ) ); + String.format("http://localhost:%s/webhdfs/v1?op=GETHOMEDIRECTORY&user.name=guest", + configuration.getInt(HadoopUnitConfig.HDFS_NAMENODE_HTTP_PORT_KEY))); URLConnection connection = url.openConnection(); - connection.setRequestProperty( "Accept-Charset", "UTF-8" ); - BufferedReader response = new BufferedReader( new InputStreamReader( connection.getInputStream() ) ); + connection.setRequestProperty("Accept-Charset", "UTF-8"); + BufferedReader response = new BufferedReader(new InputStreamReader(connection.getInputStream())); String line = response.readLine(); response.close(); assertThat("{\"Path\":\"/user/guest\"}").isEqualTo(line); @@ -297,9 +295,9 @@ public void oozieShouldStart() throws Exception { org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.set("fs.default.name", "hdfs://127.0.0.1:" + configuration.getInt(HadoopUnitConfig.HDFS_NAMENODE_PORT_KEY)); - URI uri = URI.create ("hdfs://127.0.0.1:" + configuration.getInt(HadoopUnitConfig.HDFS_NAMENODE_PORT_KEY)); + URI uri = URI.create("hdfs://127.0.0.1:" + configuration.getInt(HadoopUnitConfig.HDFS_NAMENODE_PORT_KEY)); - FileSystem hdfsFs = FileSystem.get (uri, conf); + FileSystem hdfsFs = FileSystem.get(uri, conf); OozieClient oozieClient = new OozieClient("http://" + configuration.getString(OozieBootstrap.OOZIE_HOST) + ":" + configuration.getInt(OozieBootstrap.OOZIE_PORT) + "/oozie"); @@ -379,7 +377,7 @@ public void mongodbShouldStart() throws UnknownHostException { assertEquals(1, col.count()); DBCursor cursor = col.find(); - while(cursor.hasNext()) { + while (cursor.hasNext()) { LOGGER.info("MONGODB: Document output: {}", cursor.next()); } cursor.close(); diff --git a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/resources/hadoop-unit-default.properties index 0d364b93..fbd5b724 100644 --- a/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-standalone/hadoop-unit-standalone-solr/src/test/resources/hadoop-unit-default.properties @@ -110,3 +110,8 @@ mongo.collection.name=test_collection cassandra.ip=127.0.0.1 cassandra.port=13433 cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/hadoop-unit-standalone/pom.xml b/hadoop-unit-standalone/pom.xml index 0e2afcb9..d0c179e9 100644 --- a/hadoop-unit-standalone/pom.xml +++ b/hadoop-unit-standalone/pom.xml @@ -79,13 +79,6 @@ commons-configuration - - - fr.jetoile.hadoop - hadoop-unit-client-spark - test - - fr.jetoile.hadoop hadoop-unit-client-kafka @@ -116,6 +109,7 @@ ${mongo-java-driver.version} test + \ No newline at end of file diff --git a/hadoop-unit-zookeeper/src/test/resources/hadoop-unit-default.properties b/hadoop-unit-zookeeper/src/test/resources/hadoop-unit-default.properties index 7aeceb48..ad8559d8 100644 --- a/hadoop-unit-zookeeper/src/test/resources/hadoop-unit-default.properties +++ b/hadoop-unit-zookeeper/src/test/resources/hadoop-unit-default.properties @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false oozie.sharelib.path=/home/khanh/github oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz oozie.port=20113 -oozie.host=localhost \ No newline at end of file +oozie.host=localhost + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/pom.xml b/pom.xml index 30be8375..086116c5 100755 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,7 @@ hadoop-unit-mongodb hadoop-unit-elasticsearch sample + hadoop-unit-neo4j @@ -145,6 +146,8 @@ 3.1.2 4.2.0 2.6.5 + + 3.0.3 @@ -274,6 +277,24 @@ ${hadoop-unit.version} + + fr.jetoile.hadoop + hadoop-unit-neo4j + ${hadoop-unit.version} + + + + org.neo4j + neo4j + ${neo4j.version} + + + + org.neo4j + neo4j-bolt + ${neo4j.version} + + fr.jetoile.hadoop hadoop-unit-mongodb @@ -629,6 +650,10 @@ slf4j-log4j12 org.slf4j + + netty + io.netty + diff --git a/sample/kafka-spark-streaming/src/test/resources/hadoop-unit-default.properties b/sample/kafka-spark-streaming/src/test/resources/hadoop-unit-default.properties index 8f87213f..f078fec8 100644 --- a/sample/kafka-spark-streaming/src/test/resources/hadoop-unit-default.properties +++ b/sample/kafka-spark-streaming/src/test/resources/hadoop-unit-default.properties @@ -120,4 +120,9 @@ elasticsearch.http.port=14433 elasticsearch.tcp.port=14533 elasticsearch.temp.dir=/tmp/elasticsearch elasticsearch.index.name=test_index -elasticsearch.cluster.name=elasticsearch \ No newline at end of file +elasticsearch.cluster.name=elasticsearch + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/sample/kafka-stream/src/test/resources/hadoop-unit-default.properties b/sample/kafka-stream/src/test/resources/hadoop-unit-default.properties index 0d364b93..fbd5b724 100644 --- a/sample/kafka-stream/src/test/resources/hadoop-unit-default.properties +++ b/sample/kafka-stream/src/test/resources/hadoop-unit-default.properties @@ -110,3 +110,8 @@ mongo.collection.name=test_collection cassandra.ip=127.0.0.1 cassandra.port=13433 cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/sample/parquet-spark/src/test/resources/hadoop-unit-default.properties b/sample/parquet-spark/src/test/resources/hadoop-unit-default.properties index 0d364b93..fbd5b724 100644 --- a/sample/parquet-spark/src/test/resources/hadoop-unit-default.properties +++ b/sample/parquet-spark/src/test/resources/hadoop-unit-default.properties @@ -110,3 +110,8 @@ mongo.collection.name=test_collection cassandra.ip=127.0.0.1 cassandra.port=13433 cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file diff --git a/sample/solr-parquet-spark/src/test/resources/hadoop-unit-default.properties b/sample/solr-parquet-spark/src/test/resources/hadoop-unit-default.properties index 0d364b93..fbd5b724 100644 --- a/sample/solr-parquet-spark/src/test/resources/hadoop-unit-default.properties +++ b/sample/solr-parquet-spark/src/test/resources/hadoop-unit-default.properties @@ -110,3 +110,8 @@ mongo.collection.name=test_collection cassandra.ip=127.0.0.1 cassandra.port=13433 cassandra.temp.dir=/tmp/embedded_cassandra + +# Neo4j +neo4j.ip=127.0.0.1 +neo4j.port=13533 +neo4j.temp.dir=/tmp/embedded_neo4j \ No newline at end of file