Skip to content

Commit

Permalink
[SPARK-13715][MLLIB] Remove last usages of jblas in tests
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Remove last usage of jblas, in tests

## How was this patch tested?

Jenkins tests -- the same ones that are being modified.

Author: Sean Owen <sowen@cloudera.com>

Closes apache#11560 from srowen/SPARK-13715.
  • Loading branch information
srowen committed Mar 8, 2016
1 parent ca1a7b9 commit 54040f8
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 121 deletions.
1 change: 0 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt.

(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
(BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
Expand Down
2 changes: 1 addition & 1 deletion docs/mllib-data-types.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ MLlib supports local vectors and matrices stored on a single machine,
as well as distributed matrices backed by one or more RDDs.
Local vectors and local matrices are simple data models
that serve as public interfaces. The underlying linear algebra operations are provided by
[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
[Breeze](http://www.scalanlp.org/).
A training example used in supervised learning is called a "labeled point" in MLlib.

## Local vector
Expand Down
6 changes: 0 additions & 6 deletions mllib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,6 @@
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.jblas</groupId>
<artifactId>jblas</artifactId>
<version>${jblas.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import scala.Tuple2;
import scala.Tuple3;

import org.jblas.DoubleMatrix;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand All @@ -48,14 +47,14 @@ public void tearDown() {
sc = null;
}

void validatePrediction(
private void validatePrediction(
MatrixFactorizationModel model,
int users,
int products,
DoubleMatrix trueRatings,
double[] trueRatings,
double matchThreshold,
boolean implicitPrefs,
DoubleMatrix truePrefs) {
double[] truePrefs) {
List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products);
for (int u=0; u < users; ++u) {
for (int p=0; p < products; ++p) {
Expand All @@ -68,7 +67,7 @@ void validatePrediction(
if (!implicitPrefs) {
for (Rating r: predictedRatings) {
double prediction = r.rating();
double correct = trueRatings.get(r.user(), r.product());
double correct = trueRatings[r.product() * users + r.user()];
Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
}
Expand All @@ -79,9 +78,9 @@ void validatePrediction(
double denom = 0.0;
for (Rating r: predictedRatings) {
double prediction = r.rating();
double truePref = truePrefs.get(r.user(), r.product());
double truePref = truePrefs[r.product() * users + r.user()];
double confidence = 1.0 +
/* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
/* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + r.user()]);
double err = confidence * (truePref - prediction) * (truePref - prediction);
sqErr += err;
denom += confidence;
Expand All @@ -98,8 +97,8 @@ public void runALSUsingStaticMethods() {
int iterations = 15;
int users = 50;
int products = 100;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, false, false);
Tuple3<List<Rating>, double[], double[]> testData =
ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);

JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
Expand All @@ -112,8 +111,8 @@ public void runALSUsingConstructor() {
int iterations = 15;
int users = 100;
int products = 200;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, false, false);
Tuple3<List<Rating>, double[], double[]> testData =
ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false);

JavaRDD<Rating> data = sc.parallelize(testData._1());

Expand All @@ -129,8 +128,8 @@ public void runImplicitALSUsingStaticMethods() {
int iterations = 15;
int users = 80;
int products = 160;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, true, false);
Tuple3<List<Rating>, double[], double[]> testData =
ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);

JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
Expand All @@ -143,8 +142,8 @@ public void runImplicitALSUsingConstructor() {
int iterations = 15;
int users = 100;
int products = 200;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, true, false);
Tuple3<List<Rating>, double[], double[]> testData =
ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false);

JavaRDD<Rating> data = sc.parallelize(testData._1());

Expand All @@ -161,8 +160,8 @@ public void runImplicitALSWithNegativeWeight() {
int iterations = 15;
int users = 80;
int products = 160;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, true, true);
Tuple3<List<Rating>, double[], double[]> testData =
ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true);

JavaRDD<Rating> data = sc.parallelize(testData._1());
MatrixFactorizationModel model = new ALS().setRank(features)
Expand All @@ -179,9 +178,9 @@ public void runRecommend() {
int iterations = 10;
int users = 200;
int products = 50;
Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList(
users, products, features, 0.7, true, false);
JavaRDD<Rating> data = sc.parallelize(testData._1());
List<Rating> testData = ALSSuite.generateRatingsAsJava(
users, products, features, 0.7, true, false)._1();
JavaRDD<Rating> data = sc.parallelize(testData);
MatrixFactorizationModel model = new ALS().setRank(features)
.setIterations(iterations)
.setImplicitPrefs(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@

import java.io.Serializable;
import java.util.List;
import java.util.Random;

import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import org.jblas.DoubleMatrix;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.util.LinearDataGenerator;
Expand All @@ -45,7 +44,8 @@ public void tearDown() {
sc = null;
}

double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) {
private static double predictionError(List<LabeledPoint> validationData,
RidgeRegressionModel model) {
double errorSum = 0;
for (LabeledPoint point: validationData) {
Double prediction = model.predict(point.features());
Expand All @@ -54,11 +54,14 @@ public void tearDown() {
return errorSum / validationData.size();
}

List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
org.jblas.util.Random.seed(42);
private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) {
// Pick weights as random values distributed uniformly in [-0.5, 0.5]
DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std);
Random random = new Random(42);
double[] w = new double[numFeatures];
for (int i = 0; i < w.length; i++) {
w[i] = random.nextDouble() - 0.5;
}
return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
import scala.collection.JavaConverters._
import scala.util.Random

import org.jblas.DoubleMatrix
import breeze.linalg.{DenseVector => BDV}

import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.mllib.linalg.Vectors
Expand All @@ -45,12 +45,11 @@ object SVMSuite {
nPoints: Int,
seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
val weightsMat = new BDV(weights)
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
intercept + 0.01 * rnd.nextGaussian()
val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian()
if (yD < 0) 0.0 else 1.0
}
y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization

import scala.util.Random

import org.jblas.{DoubleMatrix, SimpleBlas}
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.TestingUtils._

class NNLSSuite extends SparkFunSuite {
/** Generate an NNLS problem whose optimal solution is the all-ones vector. */
def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
val b = A.mmul(DoubleMatrix.ones(n, 1))

val ata = A.transpose.mmul(A)
val atb = A.transpose.mmul(b)

(ata, atb)
def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
val b = A * new BDV(Array.fill(n)(1.0))
(A.t * A, A.t * b)
}

/** Compute the objective value */
def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
res.get(0)
}
def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double =
(x.t * ata * x) / 2.0 - atb.dot(x)

test("NNLS: exact solution cases") {
val n = 20
Expand All @@ -54,33 +48,34 @@ class NNLSSuite extends SparkFunSuite {

for (k <- 0 until 100) {
val (ata, atb) = genOnesData(n, rand)
val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
assert(x.length === n)
val answer = DoubleMatrix.ones(n, 1)
SimpleBlas.axpy(-1.0, answer, x)
val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
if (solved) numSolved = numSolved + 1
val answer = new BDV(Array.fill(n)(1.0))
val solved =
(breeze.linalg.norm(x - answer) < 0.01) && // L2 norm
((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
if (solved) {
numSolved += 1
}
}

assert(numSolved > 50)
}

test("NNLS: nonnegativity constraint active") {
val n = 5
// scalastyle:off
val ata = new DoubleMatrix(Array(
Array( 4.377, -3.531, -1.306, -0.139, 3.418),
Array(-3.531, 4.344, 0.934, 0.305, -2.140),
Array(-1.306, 0.934, 2.644, -0.203, -0.170),
Array(-0.139, 0.305, -0.203, 5.883, 1.428),
Array( 3.418, -2.140, -0.170, 1.428, 4.684)))
// scalastyle:on
val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
val ata = Array(
4.377, -3.531, -1.306, -0.139, 3.418,
-3.531, 4.344, 0.934, 0.305, -2.140,
-1.306, 0.934, 2.644, -0.203, -0.170,
-0.139, 0.305, -0.203, 5.883, 1.428,
3.418, -2.140, -0.170, 1.428, 4.684)
val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)

val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)

val ws = NNLS.createWorkspace(n)
val x = NNLS.solve(ata.data, atb.data, ws)
val x = NNLS.solve(ata, atb, ws)
for (i <- 0 until n) {
assert(x(i) ~== goodx(i) absTol 1E-3)
assert(x(i) >= 0)
Expand All @@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {

test("NNLS: objective value test") {
val n = 5
val ata = new DoubleMatrix(5, 5
, 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
, 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
, -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
, 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
, -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
)
val atb = new DoubleMatrix(5, 1,
-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
val ata = new BDM(5, 5, Array(
517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
-153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
-798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814))
val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017))

/** reference solution obtained from matlab function quadprog */
val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
val refObj = computeObjectiveValue(ata, atb, refx)


val ws = NNLS.createWorkspace(n)
val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
val obj = computeObjectiveValue(ata, atb, x)

assert(obj < refObj + 1E-5)
Expand Down
Loading

0 comments on commit 54040f8

Please sign in to comment.