Skip to content

Commit

Permalink
fixes #9
Browse files Browse the repository at this point in the history
remove the Hadoop jars from the packaged lib directory, and instead run the Slurper via the "hadoop" command so the locally installed Hadoop libs are used
  • Loading branch information
alexholmes committed Feb 16, 2014
1 parent de572fb commit ebe6434
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 161 deletions.
23 changes: 15 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>com.alexholmes</groupId>
<artifactId>hdfs-slurper</artifactId>
<name>HDFS File Slurper</name>
<version>0.1.6</version>
<version>0.1.7</version>
<packaging>jar</packaging>

<properties>
Expand Down Expand Up @@ -33,7 +33,7 @@
</properties>

<description>
A simple utility to copy files from a local file system into HDFS, and vice-versa
A simple utility to copy files from a local file system into HDFS, and vice-versa.
</description>

<developers>
Expand All @@ -49,8 +49,8 @@

<repository>
<id>hadoop-non-releases</id>
<name>Hadoop non-releases</name>
<url>http://alexholmes.github.com/hadoop-book-mvn-repo/repository/releases/</url>
<name>Twitter Maven Repo</name>
<url>http://maven.twttr.com/</url>
<releases>
<enabled>true</enabled>
</releases>
Expand Down Expand Up @@ -101,14 +101,21 @@

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
<artifactId>hadoop-common</artifactId>
<version>2.2.0</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.2.0</version>
</dependency>

<dependency>
<groupId>com.hadoop.compression.lzo</groupId>
<groupId>com.hadoop.gplcompression</groupId>
<artifactId>hadoop-lzo</artifactId>
<version>0.4.14</version>
<version>0.4.19</version>
<scope>compile</scope>
</dependency>

Expand Down
5 changes: 4 additions & 1 deletion src/main/assembly/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
<directory>target/lib</directory>
<outputDirectory>lib</outputDirectory>
<includes>
<include>*.jar</include>
<include>commons*.jar</include>
<include>log4j*.jar</include>
<include>slf4j*.jar</include>
<include>hadoop-lzo*.jar</include>
</includes>
</fileSet>
<fileSet>
Expand Down
9 changes: 2 additions & 7 deletions src/main/config/slurper-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
##########################################################


# Set the directory for the local Java installation
# The location of the main hadoop script
#
# export JAVA_HOME=/usr/java/latest


# Set the directory for the local Hadoop installation
#
# export HADOOP_HOME=/usr/local/hadoop
export HADOOP_BIN=/usr/local/hadoop/bin/hadoop

2 changes: 1 addition & 1 deletion src/main/java/com/alexholmes/hdfsslurper/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class Config {
private long pollSleepPeriodMillis;
FileSystem srcFs;
FileSystem destFs;
Configuration config = new Configuration();
Configuration config;


public String getDatasource() {
Expand Down
15 changes: 7 additions & 8 deletions src/main/java/com/alexholmes/hdfsslurper/Configurator.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ public enum ConfigNames {

private static Log log = LogFactory.getLog(Configurator.class);

public static Config loadAndVerify(String path) throws IOException, MissingRequiredConfigException, ClassNotFoundException, MutuallyExclusiveConfigsExist, ConfigSettingException, FileSystemMkdirFailed, NoMutuallyExclusiveConfigsExist {
public static Config loadAndVerify(Configuration config, String path) throws IOException, MissingRequiredConfigException, ClassNotFoundException, MutuallyExclusiveConfigsExist, ConfigSettingException, FileSystemMkdirFailed, NoMutuallyExclusiveConfigsExist {
Map<String, String> props = loadProperties(path);

Config c = load(props);
Config c = load(config, props);

// make sure the mutually exclusive config names are flagged
//
Expand All @@ -75,13 +75,12 @@ public static Config loadAndVerify(String path) throws IOException, MissingRequi
return c;
}

public static Config load(Map<String, String> props) throws IOException, MissingRequiredConfigException, ClassNotFoundException {
public static Config load(Configuration config, Map<String, String> props) throws IOException, MissingRequiredConfigException, ClassNotFoundException {
Config c = new Config();

// set the Hadoop config
//
Configuration conf = new Configuration();
c.setConfig(conf);
c.setConfig(config);

// datasource name
//
Expand All @@ -98,17 +97,17 @@ public static Config load(Map<String, String> props) throws IOException, Missing

// setup the file systems
//
c.setSrcFs(c.getSrcDir().getFileSystem(conf));
c.setSrcFs(c.getSrcDir().getFileSystem(config));
if(c.getDestDir() != null) {
c.setDestFs(c.getDestDir().getFileSystem(conf));
c.setDestFs(c.getDestDir().getFileSystem(config));
}

// compression
//
String compressionCodec = getConfigValue(props, ConfigNames.COMPRESSION_CODEC);
if (compressionCodec != null) {
c.setCodec((CompressionCodec)
ReflectionUtils.newInstance(Class.forName(compressionCodec), conf));
ReflectionUtils.newInstance(Class.forName(compressionCodec), config));
}
c.setCreateLzopIndex(isOptionEnabled(props, ConfigNames.CREATE_LZO_INDEX));

Expand Down
66 changes: 49 additions & 17 deletions src/main/java/com/alexholmes/hdfsslurper/Slurper.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.PropertyConfigurator;

import java.io.FileInputStream;
Expand All @@ -31,9 +35,10 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

public class Slurper {
public class Slurper extends Configured implements Tool {
private static Log log = LogFactory.getLog(Slurper.class);
public static final String ARGS_CONFIG_FILE = "config-file";
public static final String ARGS_LOG4J_FILE = "log4j-file";

private Config config;

Expand All @@ -55,10 +60,11 @@ public void configure(String... args) throws ClassNotFoundException, IllegalAcce

Options options = new Options();
options.addOption("o", ARGS_CONFIG_FILE, true, "The configuration file (required). ");
options.addOption("o", ARGS_LOG4J_FILE, true, "The log4j file (required). ");

CommandLine commandLine;
try {
commandLine = new PosixParser().parse(options, args);
commandLine = new PosixParser().parse(options, args, false);
} catch (ParseException e) {
log.error("Could not parse command line args: " + e.getMessage());
System.err.println("Could not parse command line args: " + e.getMessage());
Expand All @@ -72,31 +78,38 @@ public void configure(String... args) throws ClassNotFoundException, IllegalAcce
printUsageAndExit(options, 2);
}

String log4jPath = commandLine.getOptionValue(ARGS_LOG4J_FILE);
if (log4jPath == null) {
System.err.println("Missing required argument " + ARGS_LOG4J_FILE);
printUsageAndExit(options, 3);
}

System.out.println("Conf = " + getConf());

try {
config = Configurator.loadAndVerify(path);
config = Configurator.loadAndVerify(getConf(), path);
} catch (Configurator.MissingRequiredConfigException e) {
printErrorAndExit("Missing required option in config file: " + e.getKey(), 3);
printErrorAndExit("Missing required option in config file: " + e.getKey(), 10);
} catch (Configurator.MutuallyExclusiveConfigsExist e2) {
printErrorAndExit("Mutually exclusive options are both set (only one should be set): " + e2.getKey1() +
", " + e2.getKey2(), 4);
", " + e2.getKey2(), 11);
} catch (Configurator.ConfigSettingException e) {
printErrorAndExit(e.getMessage(), 5);
printErrorAndExit(e.getMessage(), 12);
} catch (Configurator.FileSystemMkdirFailed e3) {
printErrorAndExit(e3.getMessage(), 6);
printErrorAndExit(e3.getMessage(), 13);
} catch (Configurator.NoMutuallyExclusiveConfigsExist e4) {
printErrorAndExit("One of these mutually exclusive options must be set: " + e4.getKey1() +
", " + e4.getKey2(), 7);
", " + e4.getKey2(), 14);
}

setupLog4j(config.getDatasource());
setupLog4j(log4jPath, config.getDatasource());
}

private void setupLog4j(String datasourceName) throws IOException {
String propFile = System.getProperty("slurper.log4j.properties");
private void setupLog4j(String log4jPath, String datasourceName) throws IOException {
Properties p = new Properties();
InputStream is = null;
try {
is = new FileInputStream(propFile);
is = new FileInputStream(log4jPath);
p.load(is);
p.put("log.datasource", datasourceName); // overwrite "log.dir"
PropertyConfigurator.configure(p);
Expand Down Expand Up @@ -151,15 +164,34 @@ public void run() {
programmaticShutdown.set(true);
}

public static void main(String... args) {
/**
* Main entry point.
*
* @param args arguments
* @throws Exception when something goes wrong
*/
public static void main(final String[] args) throws Exception {
Slurper slurper = new Slurper();
int res = ToolRunner.run(new Configuration(), slurper, args);
System.exit(res);
}

/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
try {
Slurper slurper = new Slurper();
slurper.configure(args);
slurper.run();
configure(args);
run();
return 0;
} catch (Throwable t) {
log.error("Caught exception in main()", t);
t.printStackTrace();
System.exit(1000);
return 1000;
}
}
}
67 changes: 4 additions & 63 deletions src/main/scripts/base
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,11 @@ BASEDIR=`pwd`

. $BASEDIR/conf/slurper-env.sh

if [ "$JAVA_HOME" == "" ]; then
echo "JAVA_HOME must be set"
exit 1
if [ ! -f "${HADOOP_BIN}" ]; then
echo "HADOOP_BIN must be set and point to the location of the hadoop script"
exit 1;
fi

if [ ! -f $JAVA_HOME/bin/java ]; then
echo "JAVA_HOME must be set to a valid Java installation directory"
exit 2
fi

CDH_HADOOP_HOME=/usr/lib/hadoop

if [ ! -d "${HADOOP_HOME}" ]; then
if [ -d "${CDH_HADOOP_HOME}" ]; then
export HADOOP_HOME=${CDH_HADOOP_HOME}
echo "HADOOP_HOME environment not set, but found ${HADOOP_HOME} in path so using that"
else
echo "HADOOP_HOME must be set and point to the hadoop home directory"
exit 1;
fi
fi

export HADOOP_CONF_DIR=${HADOOP_HOME}/conf

export SLURPER_JAR_DIR=$BASEDIR/lib

function add_to_classpath() {
Expand All @@ -63,49 +44,9 @@ function add_to_classpath() {

add_to_classpath ${SLURPER_JAR_DIR}

function add_to_hadoop_classpath() {
dir=$1
for f in $dir/*.jar; do
HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f;
done

export HADOOP_CLASSPATH
}

export HADOOP_LIB_DIR=$HADOOP_HOME
add_to_hadoop_classpath ${HADOOP_LIB_DIR}
export HADOOP_LIB_DIR=$HADOOP_HOME/lib
add_to_hadoop_classpath ${HADOOP_LIB_DIR}

export SLURPER_JAR="${BASEDIR}/dist/lib/*"
export SLURPER_CONFIG_DIR="${BASEDIR}/config"

export CLASSPATH=${SLURPER_CONFIG_DIR}:${SLURPER_JAR}:${HADOOP_CONF_DIR}:${HADOOP_CLASSPATH}:${SLURPER_CLASSPATH}
export HADOOP_CLASSPATH=${SLURPER_CONFIG_DIR}:${SLURPER_CLASSPATH}:${HADOOP_CLASSPATH}

export JAVA=$JAVA_HOME/bin/java
export JAVA_HEAP_MAX=-Xmx512m

# pick up the native Hadoop directory if it exists
# this is to support native compression codecs
#
if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" -o -d "${HADOOP_HOME}/sbin" ]; then
JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`

if [ -d "$HADOOP_HOME/build/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib
else
JAVA_LIBRARY_PATH=${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib
fi
fi

if [ -d "${HADOOP_HOME}/lib/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
else
JAVA_LIBRARY_PATH=${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
fi
fi
fi

export JAVA_LIBRARY_PATH
6 changes: 3 additions & 3 deletions src/main/scripts/slurper-inittab.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
SCRIPT=`basename ${PRG}`

cd $BASEDIR
cd ${BASEDIR}

. $BASEDIR/bin/base
. ${BASEDIR}/bin/base

date=`date +"%Y%m%d-%H%M%S"`
outfile=$BASEDIR/logs/slurper-$date.out

nohup "$JAVA" $JAVA_HEAP_MAX -Dslurper.log4j.properties=${BASEDIR}/conf/daemon/log4j.properties -Djava.library.path=${JAVA_LIBRARY_PATH} -classpath "$CLASSPATH" com.alexholmes.hdfsslurper.Slurper "$@" &> $outfile < /dev/null &
nohup ${HADOOP_BIN} com.alexholmes.hdfsslurper.Slurper "$@" --log4j-file ${BASEDIR}/conf/daemon/log4j.properties &> $outfile < /dev/null &
PID="$!"
trap "kill $PID" SIGTERM
wait
6 changes: 3 additions & 3 deletions src/main/scripts/slurper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
SCRIPT=`basename ${PRG}`

cd $BASEDIR
cd ${BASEDIR}

. $BASEDIR/bin/base
. ${BASEDIR}/bin/base

"$JAVA" $JAVA_HEAP_MAX -Dslurper.log4j.properties=${BASEDIR}/conf/normal/log4j.properties -Djava.library.path=${JAVA_LIBRARY_PATH} -classpath "$CLASSPATH" com.alexholmes.hdfsslurper.Slurper "$@"
${HADOOP_BIN} com.alexholmes.hdfsslurper.Slurper "$@" --log4j-file ${BASEDIR}/conf/normal/log4j.properties

Loading

0 comments on commit ebe6434

Please sign in to comment.