Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bare Java JNI Bindings optimized for Apache Spark #1798

Merged
merged 34 commits into from
Jun 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7e71216
initial VW spark featurizer
Mar 9, 2019
70c5315
working version for Spark
eisber Mar 14, 2019
78f76a0
package natives up with Java code. This includes boost program options
eisber Mar 14, 2019
36222d7
removed masking to avoid parameter duplication, fixed duplicate index…
eisber Mar 14, 2019
6231297
API cleanup. preparations for scoring on spark
eisber Mar 15, 2019
88e6a6f
added dynamic port allocation to spanning tree to allow for parallel …
eisber Mar 15, 2019
ee0bcc1
extracting VW command line args from model
eisber Mar 16, 2019
91eba92
final refactor
eisber Mar 16, 2019
dc30782
removed java header generation and checked in the headers just like t…
eisber Mar 16, 2019
23b321b
fix unit tests on build server
eisber Mar 16, 2019
36f666f
only build on linux
eisber Mar 16, 2019
f47d8a7
create missing directory
eisber Mar 16, 2019
7a2f2ac
update java build path
eisber Mar 16, 2019
e63c482
fix package name in cluster wrapper
eisber Mar 16, 2019
bf0c5a6
replaced bzero with memset
eisber Mar 16, 2019
75eeb62
expose native hashing for testing
eisber Mar 16, 2019
27135a6
fixed parameter name
eisber Mar 16, 2019
544a4d9
added javadoc, some more API cleanup
eisber Mar 19, 2019
1ab2345
fix javadoc
eisber Mar 15, 2019
c8c0d7c
Merge branch 'master' into marcozo/spark
eisber Mar 20, 2019
e7e0143
Merge branch 'marcozo/spark' of https://github.com/eisber/vowpal_wabb…
eisber Mar 16, 2019
f67e783
Merge branch 'master' into marcozo/spark
JohnLangford Apr 1, 2019
9a2fb2a
fixed Java hashing
eisber Apr 29, 2019
b0bbd82
Merge branch 'marcozo/spark' of https://github.com/eisber/vowpal_wabb…
eisber Apr 29, 2019
3b1d031
removed debug print out
eisber May 1, 2019
86b00ea
merged bare&classic into single library
May 5, 2019
5e00bff
fixed java build path
eisber May 7, 2019
af179b2
renamed bare to spark
eisber May 7, 2019
cff765d
move the java library to the right path (target/bin)
eisber May 15, 2019
d8d9221
Merge branch 'master' of https://github.com/VowpalWabbit/vowpal_wabbi…
eisber May 15, 2019
01d1ec9
copy the java library to the right path (target/bin)
eisber May 15, 2019
b821517
Merge branch 'master' into marcozo/spark
jackgerrits May 31, 2019
9b31e53
Merge branch 'master' of https://github.com/VowpalWabbit/vowpal_wabbi…
eisber Jun 4, 2019
355fa94
clean up of CMakeFile.txt
eisber Jun 4, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ make test_with_output
cd ..

# Run Java build and test
mvn clean test -f java/pom.xml
mvn verify -f java/pom.xml

# Run python build and tests
cd python
Expand Down
73 changes: 70 additions & 3 deletions java/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ if(JNI_FOUND)
${src_base}/vowpalWabbit_learner_VWScalarLearner.h
${src_base}/vowpalWabbit_learner_VWScalarsLearner.h
${src_base}/vowpalWabbit_VW.h
${src_base}/jni_spark_vw.h
${src_base}/jni_spark_vw_generated.h
${src_base}/vector_io_buf.h
${src_base}/util.h
)

set(vw_jni_sources
Expand All @@ -27,18 +31,28 @@ if(JNI_FOUND)
${src_base}/vowpalWabbit_learner_VWScalarLearner.cc
${src_base}/vowpalWabbit_learner_VWScalarsLearner.cc
${src_base}/vowpalWabbit_VW.cc
${src_base}/jni_spark_vw.cc
${src_base}/vector_io_buf.cc
${src_base}/jni_spark_cluster.cc
${src_base}/util.cc
${CMAKE_CURRENT_SOURCE_DIR}/../vowpalwabbit/spanning_tree.cc
)

add_library(vw_jni SHARED ${vw_jni_headers} ${vw_jni_sources})
target_link_libraries(vw_jni PUBLIC vw)
target_include_directories(vw_jni PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${JNI_INCLUDE_DIRS})
target_include_directories(vw_jni PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${JNI_INCLUDE_DIRS})

# Ensure target directory exists
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/target/)
add_custom_command(TARGET vw_jni POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:vw_jni> ${CMAKE_CURRENT_SOURCE_DIR}/target/
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:vw_jni> ${CMAKE_CURRENT_SOURCE_DIR}/target/bin/
)

# enable-new-dtags and rpath enables shared object library lookup in the location of libvw_spark_jni.so
target_link_libraries(vw_jni PUBLIC -Wl,--enable-new-dtags -Wl,-rpath,\"\$ORIGIN\" vw)

# Replace version number in POM
configure_file(pom.xml.in ${CMAKE_CURRENT_SOURCE_DIR}/pom.xml @ONLY)

Expand All @@ -54,4 +68,57 @@ if(JNI_FOUND)
LIBRARY DESTINATION ${JAVA_INSTALL_PATH}
)
endif()
endif()

if(NOT WIN32)
# Ensure target directory exists
file(MAKE_DIRECTORY target/classes)
file(MAKE_DIRECTORY target/test-classes)
file(MAKE_DIRECTORY target/bin/natives/linux_64)

# Development
# - uncomment the following section to generate the jni headers
# - it's commented to speed up the build as it's not expected to change frequently

# find_package(Java)
# include(UseJava)

# add_custom_target(javacompile
eisber marked this conversation as resolved.
Show resolved Hide resolved
# COMMAND mvn compile
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
# COMMENT "Compile classes for javah")

# create_javah(TARGET javaheaders
# CLASSES
# org.vowpalwabbit.spark.VowpalWabbitNative
# org.vowpalwabbit.spark.VowpalWabbitExample
# org.vowpalwabbit.spark.ClusterSpanningTree
# CLASSPATH ${CMAKE_CURRENT_SOURCE_DIR}/target/classes
# OUTPUT_NAME ${CMAKE_CURRENT_SOURCE_DIR}/src/main/c++/jni_spark_vw_generated.h)
# add_dependencies(javaheaders javacompile)

add_custom_command(TARGET vw_jni POST_BUILD
COMMAND ldd $<TARGET_FILE:vw_jni> | grep -E 'boost|libz' | grep -oP '=> \\K\\S+' | xargs -i cp {} target/bin/natives/linux_64
COMMAND cp $<TARGET_FILE:vw_jni> target/bin/natives/linux_64
COMMAND echo $<TARGET_FILE:vw-bin> > ${CMAKE_CURRENT_SOURCE_DIR}/target/test-classes/vw-bin.txt
COMMAND mvn verify
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Copying shared libary dependencies to output directory")

# Replace version number in POM
configure_file(pom.xml.in ${CMAKE_CURRENT_SOURCE_DIR}/pom.xml @ONLY)

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(JAVA_INSTALL_PATH /usr/lib)
else()
set(JAVA_INSTALL_PATH /Library/Java/Extensions)
endif()

if(VW_INSTALL)
install(TARGETS vw_jni
RUNTIME DESTINATION ${JAVA_INSTALL_PATH}
LIBRARY DESTINATION ${JAVA_INSTALL_PATH}
)
endif()

endif()
endif()
13 changes: 13 additions & 0 deletions java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,16 @@ It should also be noted that Vowpal Wabbit makes all attempts at compatibility b
| ---------- | ---------------------------------------- |
| 8.4.1 | 10bd09ab06f59291e04ad7805e88fd3e693b7159 |
| 8.1.0 | 9e5831a72d5b0a124c845dcaec75879f498b355f |

# Spark Layer
To improve performance when hosting VW in Spark an additional optimized layer can be found in org.vowpalwabbit.spark.*. The actual VW/Spark integration will be available throogh [MMLSpark](https://github.com/Azure/mmlspark).

## Features

1. Native dependencies are included in the JAR file.
2. Features are expected to be already hashed.
3. Multi-pass support.

## Limitations

1. Only simple label is supported for now (e.g. classification/regression).
24 changes: 23 additions & 1 deletion java/pom.xml.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
<name>John Langford</name>
<email>jl@hunch.net</email>
</developer>
<developer>
<name>Markus Cozowicz</name>
<email>marcozo@microsoft.com</email>
</developer>
</developers>

<properties>
Expand Down Expand Up @@ -79,6 +83,11 @@
</dependencies>

<build>
<resources>
<resource>
<directory>target/bin</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${project.build.directory}</directory>
Expand Down Expand Up @@ -152,6 +161,19 @@
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.19.1</version>
<executions>
<execution>
<goals>
<goal>integration-test</goal>
<goal>verify</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
Expand All @@ -175,7 +197,7 @@
<!--<forkMode>once</forkMode>-->
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<argLine>-Djava.library.path=${project.build.directory}</argLine>
<argLine>-Djava.library.path=${project.build.directory}/bin</argLine>

</configuration>
</plugin>
Expand Down
39 changes: 7 additions & 32 deletions java/src/main/c++/jni_base_learner.cc
Original file line number Diff line number Diff line change
@@ -1,37 +1,11 @@
#include "../../../../vowpalwabbit/vw.h"
#include "../../../../vowpalwabbit/vw_exception.h"
#include "vw.h"
#include "vw_exception.h"

#include "jni_base_learner.h"

void throw_java_exception(JNIEnv *env, const char* name, const char* msg)
{ jclass jc = env->FindClass(name);
if (jc)
env->ThrowNew(jc, msg);
}

void rethrow_cpp_exception_as_java_exception(JNIEnv *env)
{ try
{ throw;
}
catch(const std::bad_alloc& e)
{ throw_java_exception(env, "java/lang/OutOfMemoryError", e.what());
}
catch(const VW::vw_unrecognised_option_exception& e)
{ throw_java_exception(env, "java/lang/IllegalArgumentException", e.what());
}
catch(const std::exception& e)
{ throw_java_exception(env, "java/lang/Exception", e.what());
}

catch (...)
{ throw_java_exception(env, "java/lang/Error", "Unidentified exception => "
"rethrow_cpp_exception_as_java_exception "
"may require some completion...");
}
}

example* read_example(JNIEnv *env, jstring example_string, vw* vwInstance)
{ const char* utf_string = env->GetStringUTFChars(example_string, NULL);
example* read_example(JNIEnv* env, jstring example_string, vw* vwInstance)
{
const char* utf_string = env->GetStringUTFChars(example_string, NULL);
example* ex = read_example(utf_string, vwInstance);

env->ReleaseStringUTFChars(example_string, utf_string);
Expand All @@ -41,5 +15,6 @@ example* read_example(JNIEnv *env, jstring example_string, vw* vwInstance)
}

example* read_example(const char* example_string, vw* vwInstance)
{ return VW::read_example(*vwInstance, example_string);
{
return VW::read_example(*vwInstance, example_string);
}
64 changes: 27 additions & 37 deletions java/src/main/c++/jni_base_learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@

#include <jni.h>
#include <functional>
#include "util.h"

void throw_java_exception(JNIEnv *env, const char* name, const char* msg);
void rethrow_cpp_exception_as_java_exception(JNIEnv *env);

example* read_example(JNIEnv *env, jstring example_string, vw* vwInstance);
example* read_example(JNIEnv* env, jstring example_string, vw* vwInstance);
example* read_example(const char* example_string, vw* vwInstance);

// It would appear that after reading posts like
Expand All @@ -16,17 +14,13 @@ example* read_example(const char* example_string, vw* vwInstance);
// http://stackoverflow.com/questions/3203305/write-a-function-that-accepts-a-lambda-expression-as-argument
// it is more efficient to use another type parameter instead of std::function<T(example*)>
// but more difficult to read.
template<typename T, typename F>
T base_predict(
JNIEnv *env,
example* ex,
bool learn,
vw* vwInstance,
const F& predictor,
const bool predict)
{ T result = 0;
template <typename T, typename F>
T base_predict(JNIEnv* env, example* ex, bool learn, vw* vwInstance, const F& predictor, const bool predict)
{
T result = 0;
try
{ if (learn)
{
if (learn)
vwInstance->learn(*ex);
else
vwInstance->predict(*ex);
Expand All @@ -37,36 +31,30 @@ T base_predict(
vwInstance->finish_example(*ex);
}
catch (...)
{ rethrow_cpp_exception_as_java_exception(env);
{
rethrow_cpp_exception_as_java_exception(env);
}
return result;
}

template<typename T, typename F>
T base_predict(
JNIEnv *env,
jstring example_string,
jboolean learn,
jlong vwPtr,
const F& predictor)
{ vw* vwInstance = (vw*)vwPtr;
template <typename T, typename F>
T base_predict(JNIEnv* env, jstring example_string, jboolean learn, jlong vwPtr, const F& predictor)
{
vw* vwInstance = (vw*)vwPtr;
example* ex = read_example(env, example_string, vwInstance);
return base_predict<T>(env, ex, learn, vwInstance, predictor, true);
}

template<typename T, typename F>
T base_predict(
JNIEnv *env,
jobjectArray example_strings,
jboolean learn,
jlong vwPtr,
const F& predictor)
{ vw* vwInstance = (vw*)vwPtr;
template <typename T, typename F>
T base_predict(JNIEnv* env, jobjectArray example_strings, jboolean learn, jlong vwPtr, const F& predictor)
{
vw* vwInstance = (vw*)vwPtr;
int example_count = env->GetArrayLength(example_strings);
multi_ex ex_coll; // When doing multiline prediction the final result is stored in the FIRST example parsed.
multi_ex ex_coll; // When doing multiline prediction the final result is stored in the FIRST example parsed.
example* first_example = NULL;
for (int i=0; i<example_count; i++)
{ jstring example_string = (jstring) (env->GetObjectArrayElement(example_strings, i));
for (int i = 0; i < example_count; i++)
{
jstring example_string = (jstring)(env->GetObjectArrayElement(example_strings, i));
example* ex = read_example(env, example_string, vwInstance);
ex_coll.push_back(ex);
if (i == 0)
Expand All @@ -75,18 +63,20 @@ T base_predict(
env->DeleteLocalRef(example_strings);

try
{ if (learn)
{
if (learn)
vwInstance->learn(ex_coll);
else
vwInstance->predict(ex_coll);
}
catch (...)
{ rethrow_cpp_exception_as_java_exception(env);
{
rethrow_cpp_exception_as_java_exception(env);
}

vwInstance->finish_example(ex_coll);

return predictor(first_example, env);
}

#endif // VW_BASE_LEARNER_H
#endif // VW_BASE_LEARNER_H
Loading