Skip to content

Commit

Permalink
Fair learning pull request (#1235)
Browse files Browse the repository at this point in the history
* Add prerequisites: zlib-devel (or zlib1g-dev) (#1226)

* Added zlib-devel (or zlib1g-dev) to Prerequisites

* Update README.md

* do not use deprecated sklearn module (#1223)

* Enable the C wrapper to extract model to memory and initialize model from memory (#1189)

* extract model to memory and initialize model from memory

* add get_confidence C wrapper function

* JNI concurrent interface extensions (#1215)

* replaced NativeUtils with Java's own library loader; Added MulticlassMultilabel Interface to JNI; generated OS dependant lib files; modified pom to include generated lib file in java library path; removed unused variable in multiclassleaner;

* Since an instance of vw model is not thread safe for multiple predictions in parallel, we introduce a concurrent learner (for multilabel and multiline multiclass learners) that works with a pool of learners to get high throughput for predictions in an online setting.

* Learner pool now creates vw instances of the same model using seed_vw_model method instead of initialize. Former method reuses shared variables from the seed learner instance and hence has much less memory footprint in comparison to latter which allocates new memory for all new instances created.

* Fixed space alignment issue and changed getLearner api to return Optional to stop any unwanted NPEs.

* Removed TODO as we now use seed_vw_model to instantiate multiple learner instances of the same model.

* remove java8 components

* added method to return Future in abstract concurrent predictor; added method to create concurrent predictor using thread pool and predictor pool of same size in factory;

* tweaks
  • Loading branch information
beygel authored and JohnLangford committed May 3, 2017
1 parent c9e3b2c commit cafafd8
Show file tree
Hide file tree
Showing 29 changed files with 1,033 additions and 281 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ These prerequisites are usually pre-installed on many platforms. However, you ma
manager (*yum*, *apt*, *MacPorts*, *brew*, ...) to install missing software.

- [Boost](http://www.boost.org) library, with the `Boost::Program_Options` library option enabled.
- The zlib compression library + headers. In linux distros: package `zlib-devel` (Red Hat/CentOS), or `zlib1g-dev` (Ubuntu/Debian)
- lsb-release (RedHat/CentOS: redhat-lsb-core, Debian: lsb-release, Ubuntu: you're all set, OSX: not required)
- GNU *autotools*: *autoconf*, *automake*, *libtool*, *autoheader*, et. al. This is not a strict prereq. On many systems (notably Ubuntu with `libboost-program-options-dev` installed), the provided `Makefile` works fine.
- (optional) [git](http://git-scm.com) if you want to check out the latest version of *vowpal wabbit*,
Expand Down
13 changes: 9 additions & 4 deletions java/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,22 @@ endif
VWLIBS := -L../vowpalwabbit -l vw
STDLIBS = $(BOOST_LIBRARY) $(LIBS)
JAVA_INCLUDE = -I $(JAVA_HOME)/include
LIB_NAME = "vw_jni"

ifeq ($(UNAME), Linux)
OS_DEPENDENT_LIB_TARGET = "lib$(LIB_NAME).so"
JAVA_INCLUDE += -I $(JAVA_HOME)/include/linux
endif
ifeq ($(UNAME), FreeBSD)
OS_DEPENDENT_LIB_TARGET = "lib$(LIB_NAME).so"
JAVA_INCLUDE += -I $(JAVA_HOME)/include/linux
endif
ifeq "CYGWIN" "$(findstring CYGWIN,$(UNAME))"
OS_DEPENDENT_LIB_TARGET = "$(LIB_NAME).dll"
JAVA_INCLUDE += -I $(JAVA_HOME)/include/linux
endif
ifeq ($(UNAME), Darwin)
OS_DEPENDENT_LIB_TARGET = "lib$(LIB_NAME).dylib"
JAVA_INCLUDE += -I $(JAVA_HOME)/include/darwin
endif

Expand All @@ -29,14 +34,14 @@ all:
test:
cd ..; $(MAKE) test

things: pom_version target/vw_jni.lib
things: pom_version target/$(OS_DEPENDENT_LIB_TARGET)

pom_version: pom.xml
ver=$$(grep AC_INIT ../configure.ac | cut -d '[' -f 3 | cut -d ']' -f 1) && \
newVer=$$(perl -e "@a=split('\.', '$$ver'); \$$a[2]++; print(join('.', @a))") && \
perl -pi -e "s/(\s*)<version>.*-SNAPSHOT/\1<version>$$newVer-SNAPSHOT/" pom.xml

target/vw_jni.lib: $(jni_OBJS) ../vowpalwabbit/main.o ../vowpalwabbit/libvw.a ../vowpalwabbit/liballreduce.a
target/$(OS_DEPENDENT_LIB_TARGET): $(jni_OBJS) ../vowpalwabbit/main.o ../vowpalwabbit/libvw.a ../vowpalwabbit/liballreduce.a
mkdir -p target;
$(CXX) -shared $(FLAGS) -o $@ $^ $(VWLIBS) $(STDLIBS) $(JAVA_INCLUDE)

Expand All @@ -50,5 +55,5 @@ target/vw_jni.lib: $(jni_OBJS) ../vowpalwabbit/main.o ../vowpalwabbit/libvw.a ..

.PHONY: clean
clean:
rm -f target/vw_jni.lib
rm -f $(jni_SRCS:.cc=.o)
rm -f target/$(OS_DEPENDANT_LIB_TARGET)
rm -f $(jni_OBJS)
14 changes: 11 additions & 3 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>com.github.johnlangford</groupId>
<artifactId>vw-jni</artifactId>
<version>8.2.2-SNAPSHOT</version>
<version>8.3.3-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Vowpal Wabbit JNI Layer</name>
<description>
Expand Down Expand Up @@ -71,11 +71,19 @@
<testResource>
<directory>${project.build.directory}</directory>
<includes>
<include>vw_jni.*lib</include>
<include>*vw_jni.*</include>
</includes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkMode>once</forkMode>
<argLine>-Djava.library.path=./target</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.6.1</version>
Expand Down Expand Up @@ -173,7 +181,7 @@
<resource>
<directory>${project.build.directory}</directory>
<includes>
<include>vw_jni.*lib</include>
<include>*vw_jni.*</include>
</includes>
</resource>
</resources>
Expand Down
12 changes: 12 additions & 0 deletions java/src/main/c++/vowpalWabbit_learner_VWLearners.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ JNIEXPORT jlong JNICALL Java_vowpalWabbit_learner_VWLearners_initialize(JNIEnv *
return vwPtr;
}

JNIEXPORT jlong JNICALL Java_vowpalWabbit_learner_VWLearners_seedVWModel(JNIEnv *env, jclass obj, jlong vwPtr)
{ jlong cloneVwPtr = 0;
try
{ vw* vwInstance = VW::seed_vw_model((vw*)vwPtr, "");
cloneVwPtr = (jlong)vwInstance;
}
catch(...)
{ rethrow_cpp_exception_as_java_exception(env);
}
return cloneVwPtr;
}

JNIEXPORT void JNICALL Java_vowpalWabbit_learner_VWLearners_closeInstance(JNIEnv *env, jclass obj, jlong vwPtr)
{ try
{ VW::finish(*((vw*)vwPtr));
Expand Down
8 changes: 8 additions & 0 deletions java/src/main/c++/vowpalWabbit_learner_VWLearners.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 80 additions & 1 deletion java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include <string>
#include "vowpalWabbit_learner_VWMulticlassLearner.h"
#include "../../../../vowpalwabbit/vw.h"
#include "jni_base_learner.h"

jint multiclass_predictor(example* vec, JNIEnv *env) { return vec->pred.multiclass; }
jint multiclass_predictor(example* vec, JNIEnv *env){ return vec->pred.multiclass; }

JNIEXPORT jint JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predict(JNIEnv *env, jobject obj, jstring example_string, jboolean learn, jlong vwPtr)
{ return base_predict<jint>(env, example_string, learn, vwPtr, multiclass_predictor);
Expand All @@ -11,3 +12,81 @@ JNIEXPORT jint JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predict(JNI
JNIEXPORT jint JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predictMultiline(JNIEnv *env, jobject obj, jobjectArray example_strings, jboolean learn, jlong vwPtr)
{ return base_predict<jint>(env, example_strings, learn, vwPtr, multiclass_predictor);
}

/*
* private multiline prediction utility
* predict and annotates for multiline example string arrays
* results will be stored as example ptrs in the example array passed in
*/
void _predict_for_multilines(JNIEnv *env, jobject obj, jobjectArray example_strings, jboolean learn, jlong vwPtr, example** ex_array)
{ vw* vwInstance = (vw*)vwPtr;
int example_count = env->GetArrayLength(example_strings);

// first pass to process all examples without giving final predictions
for (int i=0; i<example_count; i++) {
jstring example_string = (jstring) (env->GetObjectArrayElement(example_strings, i));
example* ex = read_example(env, example_string, vwInstance);
base_predict<jint>(env, ex, learn, vwInstance, multiclass_predictor, false);
ex_array[i] = ex;
}

// release JVM references to examples
env->DeleteLocalRef(example_strings);

// close out examples
example* ex = read_example("\0", vwInstance);
base_predict<jint>(env, ex, learn, vwInstance, multiclass_predictor, false);

return;
}

JNIEXPORT jintArray JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predictForAllLines(JNIEnv *env, jobject obj, jobjectArray example_strings, jboolean learn, jlong vwPtr)
{ int example_count = env->GetArrayLength(example_strings);
example** ex_array = new example*[example_count];

// annotate examples for inputs
_predict_for_multilines(env, obj, example_strings, learn, vwPtr, ex_array);

// second pass to collect all predictions in int
jint* pred_c_array = new jint[example_count];
for (int i=0; i<example_count; i++) {
pred_c_array[i] = ex_array[i]->pred.multiclass;
}

// alloc pred_j_array
jintArray pred_j_array = env->NewIntArray(example_count);
env->SetIntArrayRegion(pred_j_array, 0, example_count, pred_c_array);

// release allocated resources
delete[] pred_c_array;
delete[] ex_array;
return pred_j_array;
}

JNIEXPORT jobjectArray JNICALL Java_vowpalWabbit_learner_VWMulticlassLearner_predictNamedLabelsForAllLines(JNIEnv *env, jobject obj, jobjectArray example_strings, jboolean learn, jlong vwPtr)
{ vw* vwInstance = (vw*)vwPtr;
int example_count = env->GetArrayLength(example_strings);
example** ex_array = new example*[example_count];

// annotate examples for inputs
_predict_for_multilines(env, obj, example_strings, learn, vwPtr, ex_array);

// second pass to collect all pretty string predictions from annotated examples
jobjectArray pred_j_str_array = env->NewObjectArray(example_count, env->FindClass("java/lang/String"), NULL);
for (int i=0; i<example_count; i++) {
jstring pretty_pred_str = NULL;
if (vwInstance->sd->ldict) {
// if name labels were provided, use the named labels from model
substring ss = vwInstance->sd->ldict->get(ex_array[i]->pred.multiclass);
pretty_pred_str = env->NewStringUTF(std::string(ss.begin, ss.end-ss.begin).c_str());
} else {
// else use the string value of the multiclass prediction index as an output
pretty_pred_str = env->NewStringUTF(std::to_string(ex_array[i]->pred.multiclass).c_str());
}
env->SetObjectArrayElement(pred_j_str_array, i, pretty_pred_str);
}

// release allocated resources
delete[] ex_array;
return pred_j_str_array;
}
22 changes: 19 additions & 3 deletions java/src/main/c++/vowpalWabbit_learner_VWMulticlassLearner.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cafafd8

Please sign in to comment.