Skip to content

Commit

Permalink
add neo4j (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
jetoile authored Jul 17, 2016
1 parent 51f3871 commit a2f72bf
Show file tree
Hide file tree
Showing 31 changed files with 675 additions and 37 deletions.
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Moreover, it provide a standalone component which can be run locally and which s

For windows users, you need to download a hadoop distribution, to unzip it and to define the system environment variable `HADOOP_HOME`. You can also define the path into files `hadoop-unit-default.properties` (warning: there are a lot...).

Hadoop Unit is using java 8 so set up your environment with it.

To build, launch the command:
```bash
mvn package
Expand Down Expand Up @@ -62,6 +64,7 @@ The available components are:
* MONGODB
* CASSANDRA
* ELASTICSEARCH
* NEO4J

However, for compatibility reason, SolR/SolRCloud and Elasticsearch can not be run into the same JVM. For this purpose, there are 2 standalone packages which are generated (one which is compliant with solr and one which is compliant with elasticsearch).

Expand Down Expand Up @@ -541,9 +544,9 @@ public class HdfsBootstrapIntegrationTest {
* MongoDB
* Cassandra 3.4
* ElasticSearch 5.0-alpha4
* Neo4j 3.0.3
Built on:
* [hadoop-mini-cluster-0.1.7](https://github.com/sakserv/hadoop-mini-clusters) (aka. HDP 2.4.2)
* [achilles-embedded-4.2.0](https://github.com/doanduyhai/Achilles)
Expand All @@ -553,16 +556,17 @@ Use:
* edit `hadoop-unit-default.properties` and indicate `HADOOP_HOME` or set your `HADOOP_HOME` environment variable
* edit `hadoop-unit-default.properties` and indicate `oozie.sharelib.path`
Todo:
* male client utils for kafka produce/consume
* make sample with spark streaming and kafka
Issues:
* oozie does not work on windows 7 (see http://stackoverflow.com/questions/25790319/getting-access-denied-error-while-running-hadoop-2-3-mapreduce-jobs-in-windows-7)
* integrate phoenix
* can only manage one solr collection
* better docs ;)
#Note
Neo4j will not be integrated with the standalone component since there are too much conflicts with dependencies:
* Kafka (2.10_10.0.0.0) is using scala-library-2.10.6.jar but neo4j-cypher is using scala-library-2.11.8.jar.
* Neo4j is using lucene 5.5.0 which create conflict with solr and elasticsearch.
# License
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,9 @@ mongo.collection.name=test_collection
# Cassandra
cassandra.ip=127.0.0.1
cassandra.port=13433
cassandra.temp.dir=/tmp/embedded_cassandra
cassandra.temp.dir=/tmp/embedded_cassandra

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ public enum Component {
SOLR("solr"),
CASSANDRA("cassandra"),
MONGODB("mongodb"),
ELASTICSEARCH("elastic");
ELASTICSEARCH("elastic"),
NEO4J("neo4j");

private String key;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public class HadoopUnitConfig {
public static final String CASSANDRA_PORT_KEY = "cassandra.port";
public static final String CASSANDRA_TEMP_DIR_KEY = "cassandra.temp.dir";

// Neo4j
public static final String NEO4J_IP_KEY = "neo4j.ip";
public static final String NEO4J_PORT_KEY = "neo4j.port";
public static final String NEO4J_TEMP_DIR_KEY = "neo4j.temp.dir";

// ElasticSearch
public static final String ELASTICSEARCH_IP_KEY = "elasticsearch.ip";
public static final String ELASTICSEARCH_HTTP_PORT_KEY = "elasticsearch.http.port";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false
oozie.sharelib.path=/home/khanh/github
oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz
oozie.port=20113
oozie.host=localhost
oozie.host=localhost

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,9 @@ elasticsearch.http.port=14433
elasticsearch.tcp.port=14533
elasticsearch.temp.dir=/tmp/elasticsearch
elasticsearch.index.name=test_index
elasticsearch.cluster.name=elasticsearch
elasticsearch.cluster.name=elasticsearch

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false
oozie.sharelib.path=/home/khanh/github
oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz
oozie.port=20113
oozie.host=localhost
oozie.host=localhost

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false
oozie.sharelib.path=/home/khanh/github
oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz
oozie.port=20113
oozie.host=localhost
oozie.host=localhost

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false
oozie.sharelib.path=/home/khanh/github
oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz
oozie.port=20113
oozie.host=localhost
oozie.host=localhost

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ oozie.purge.local.share.lib.cache=false
oozie.sharelib.path=/home/khanh/github
oozie.sharelib.name=oozie-4.2.0.2.3.2.0-2950-distro.tar.gz
oozie.port=20113
oozie.host=localhost
oozie.host=localhost

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,9 @@ oozie.host=localhost
mongo.ip=127.0.0.1
mongo.port=13333
mongo.database.name=test_database
mongo.collection.name=test_collection
mongo.collection.name=test_collection

# Neo4j
neo4j.ip=127.0.0.1
neo4j.port=13533
neo4j.temp.dir=/tmp/embedded_neo4j
44 changes: 44 additions & 0 deletions hadoop-unit-neo4j/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hadoop-unit</artifactId>
<groupId>fr.jetoile.hadoop</groupId>
<version>1.4-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>hadoop-unit-neo4j</artifactId>

<properties>
<neo4j-java-driver.version>1.0.4</neo4j-java-driver.version>
</properties>

<dependencies>

<dependency>
<groupId>fr.jetoile.hadoop</groupId>
<artifactId>hadoop-unit-commons</artifactId>
</dependency>

<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
</dependency>

<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-bolt</artifactId>
</dependency>

<dependency>
<groupId>org.neo4j.driver</groupId>
<artifactId>neo4j-java-driver</artifactId>
<version>${neo4j-java-driver.version}</version>
<scope>test</scope>
</dependency>
</dependencies>


</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fr.jetoile.hadoopunit.component;


import fr.jetoile.hadoopunit.Component;
import fr.jetoile.hadoopunit.HadoopUnitConfig;
import fr.jetoile.hadoopunit.exception.BootstrapException;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.neo4j.cypher.javacompat.internal.GraphDatabaseCypherService;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.file.Paths;

public class Neo4jBootstrap implements Bootstrap {
final public static String NAME = Component.NEO4J.name();

final private Logger LOGGER = LoggerFactory.getLogger(Neo4jBootstrap.class);

private State state = State.STOPPED;

private Configuration configuration;

private int port;
private String ip;
private String tmp;

private GraphDatabaseService graphDb;

public Neo4jBootstrap() {
try {
loadConfig();
} catch (BootstrapException e) {
LOGGER.error("unable to load configuration", e);
}
}

@Override
public String getName() {
return NAME;
}

@Override
public String getProperties() {
return "[" +
"ip:" + ip +
", port:" + port +
"]";
}

private void loadConfig() throws BootstrapException {
try {
configuration = new PropertiesConfiguration(HadoopUnitConfig.DEFAULT_PROPS_FILE);
} catch (ConfigurationException e) {
throw new BootstrapException("bad config", e);
}

port = configuration.getInt(HadoopUnitConfig.NEO4J_PORT_KEY);
ip = configuration.getString(HadoopUnitConfig.NEO4J_IP_KEY);
tmp = configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY);
}

private void build() {
GraphDatabaseSettings.BoltConnector bolt = GraphDatabaseSettings.boltConnector( "0" );

graphDb = new GraphDatabaseFactory()
.newEmbeddedDatabaseBuilder(Paths.get(tmp).toFile())
// .setConfig(GraphDatabaseSettings.pagecache_memory, "512M")
// .setConfig(GraphDatabaseSettings.string_block_size, "60")
// .setConfig(GraphDatabaseSettings.array_block_size, "300")
.setConfig( bolt.enabled, "true" )
.setConfig( bolt.address, ip + ":" + port )
.newGraphDatabase();
}

@Override
public Bootstrap start() {
if (state == State.STOPPED) {
state = State.STARTING;
LOGGER.info("{} is starting", this.getClass().getName());
try {
build();
} catch (Exception e) {
LOGGER.error("unable to add cassandra", e);
}
state = State.STARTED;
LOGGER.info("{} is started", this.getClass().getName());
}

return this;
}

@Override
public Bootstrap stop() {
if (state == State.STARTED) {
state = State.STOPPING;
LOGGER.info("{} is stopping", this.getClass().getName());
try {
graphDb.shutdown();
cleanup();
} catch (Exception e) {
LOGGER.error("unable to stop cassandra", e);
}
state = State.STOPPED;
LOGGER.info("{} is stopped", this.getClass().getName());
}
return this;
}

private void cleanup() {
try {
FileUtils.deleteDirectory(Paths.get(configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY)).toFile());
} catch (IOException e) {
LOGGER.error("unable to delete {}", configuration.getString(HadoopUnitConfig.NEO4J_TEMP_DIR_KEY), e);
}
}

@Override
public org.apache.hadoop.conf.Configuration getConfiguration() {
throw new UnsupportedOperationException("the method getConfiguration can not be called on Neo4jBootstrap");
}

public GraphDatabaseService getNeo4jGraph() {
return this.graphDb;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fr.jetoile.hadoopunit.component.Neo4jBootstrap
Loading

0 comments on commit a2f72bf

Please sign in to comment.