diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 2a2c373242201..16cd55c8a45f0 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -68,6 +68,13 @@
test
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ ${project.version}
+ test-jar
+ test
+
@@ -81,6 +88,34 @@
+
+ jvm-vectorized
+
+ src/jvm-vectorized/java
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-vectorized-sources
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+
+
+
target/scala-${scala.binary.version}/classes
diff --git a/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java b/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java
new file mode 100644
index 0000000000000..7db1bb9111a00
--- /dev/null
+++ b/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg;
+
+import com.github.fommil.netlib.F2jBLAS;
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.FloatVector;
+import jdk.incubator.vector.VectorOperators;
+import jdk.incubator.vector.VectorSpecies;
+
+public class VectorizedBLAS extends F2jBLAS {
+
+ private static final VectorSpecies FMAX = FloatVector.SPECIES_MAX;
+ private static final VectorSpecies DMAX = DoubleVector.SPECIES_MAX;
+
+ // y += alpha * x
+ @Override
+ public void daxpy(int n, double alpha, double[] x, int incx, double[] y, int incy) {
+ if (n >= 0
+ && x != null && x.length >= n && incx == 1
+ && y != null && y.length >= n && incy == 1) {
+ if (alpha != 0.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ int i = 0;
+ for (; i < DMAX.loopBound(n); i += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, i);
+ DoubleVector vy = DoubleVector.fromArray(DMAX, y, i);
+ vx.fma(valpha, vy).intoArray(y, i);
+ }
+ for (; i < n; i += 1) {
+ y[i] += alpha * x[i];
+ }
+ }
+ } else {
+ super.daxpy(n, alpha, x, incx, y, incy);
+ }
+ }
+
+ // sum(x * y)
+ @Override
+ public float sdot(int n, float[] x, int incx, float[] y, int incy) {
+ if (n >= 0
+ && x != null && x.length >= n && incx == 1
+ && y != null && y.length >= n && incy == 1) {
+ float sum = 0.0f;
+ int i = 0;
+ FloatVector vsum = FloatVector.zero(FMAX);
+ for (; i < FMAX.loopBound(n); i += FMAX.length()) {
+ FloatVector vx = FloatVector.fromArray(FMAX, x, i);
+ FloatVector vy = FloatVector.fromArray(FMAX, y, i);
+ vsum = vx.fma(vy, vsum);
+ }
+ sum += vsum.reduceLanes(VectorOperators.ADD);
+ for (; i < n; i += 1) {
+ sum += x[i] * y[i];
+ }
+ return sum;
+ } else {
+ return super.sdot(n, x, incx, y, incy);
+ }
+ }
+
+ // sum(x * y)
+ @Override
+ public double ddot(int n, double[] x, int incx, double[] y, int incy) {
+ if (n >= 0
+ && x != null && x.length >= n && incx == 1
+ && y != null && y.length >= n && incy == 1) {
+ double sum = 0.;
+ int i = 0;
+ DoubleVector vsum = DoubleVector.zero(DMAX);
+ for (; i < DMAX.loopBound(n); i += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, i);
+ DoubleVector vy = DoubleVector.fromArray(DMAX, y, i);
+ vsum = vx.fma(vy, vsum);
+ }
+ sum += vsum.reduceLanes(VectorOperators.ADD);
+ for (; i < n; i += 1) {
+ sum += x[i] * y[i];
+ }
+ return sum;
+ } else {
+ return super.ddot(n, x, incx, y, incy);
+ }
+ }
+
+ @Override
+ public void dscal(int n, double alpha, double[] x, int incx) {
+ dscal(n, alpha, x, 0, incx);
+ }
+
+ // x = alpha * x
+ @Override
+ public void dscal(int n, double alpha, double[] x, int offsetx, int incx) {
+ if (n >= 0 && x != null && x.length >= offsetx + n && incx == 1) {
+ if (alpha != 1.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ int i = 0;
+ for (; i < DMAX.loopBound(n); i += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, offsetx + i);
+ vx.mul(valpha).intoArray(x, offsetx + i);
+ }
+ for (; i < n; i += 1) {
+ x[offsetx + i] *= alpha;
+ }
+ }
+ } else {
+ super.dscal(n, alpha, x, offsetx, incx);
+ }
+ }
+
+ @Override
+ public void sscal(int n, float alpha, float[] x, int incx) {
+ sscal(n, alpha, x, 0, incx);
+ }
+
+ // x = alpha * x
+ @Override
+ public void sscal(int n, float alpha, float[] x, int offsetx, int incx) {
+ if (n >= 0 && x != null && x.length >= offsetx + n && incx == 1) {
+ if (alpha != 1.) {
+ FloatVector valpha = FloatVector.broadcast(FMAX, alpha);
+ int i = 0;
+ for (; i < FMAX.loopBound(n); i += FMAX.length()) {
+ FloatVector vx = FloatVector.fromArray(FMAX, x, offsetx + i);
+ vx.mul(valpha).intoArray(x, offsetx + i);
+ }
+ for (; i < n; i += 1) {
+ x[offsetx + i] *= alpha;
+ }
+ }
+ } else {
+ super.sscal(n, alpha, x, offsetx, incx);
+ }
+ }
+
+ // y = alpha * a * x + beta * y
+ @Override
+ public void dspmv(String uplo, int n, double alpha, double[] a,
+ double[] x, int incx, double beta, double[] y, int incy) {
+ if ("U".equals(uplo)
+ && n >= 0
+ && a != null && a.length >= n * (n + 1) / 2
+ && x != null && x.length >= n && incx == 1
+ && y != null && y.length >= n && incy == 1) {
+ // y = beta * y
+ dscal(n, beta, y, 1);
+ // y += alpha * A * x
+ if (alpha != 0.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ for (int row = 0; row < n; row += 1) {
+ int col = 0;
+ DoubleVector vyrowsum = DoubleVector.zero(DMAX);
+ DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]);
+ for (; col < DMAX.loopBound(row); col += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, col);
+ DoubleVector vy = DoubleVector.fromArray(DMAX, y, col);
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * (row + 1) / 2);
+ vyrowsum = valpha.mul(vx).fma(va, vyrowsum);
+ valphaxrow.fma(va, vy).intoArray(y, col);
+ }
+ y[row] += vyrowsum.reduceLanes(VectorOperators.ADD);
+ for (; col < row; col += 1) {
+ y[row] += alpha * x[col] * a[col + row * (row + 1) / 2];
+ y[col] += alpha * x[row] * a[col + row * (row + 1) / 2];
+ }
+ y[row] += alpha * x[col] * a[col + row * (row + 1) / 2];
+ }
+ }
+ } else {
+ super.dspmv(uplo, n, alpha, a, x, incx, beta, y, incy);
+ }
+ }
+
+ // a += alpha * x * x.t
+ @Override
+ public void dspr(String uplo, int n, double alpha, double[] x, int incx, double[] a) {
+ if ("U".equals(uplo)
+ && n >= 0
+ && x != null && x.length >= n && incx == 1
+ && a != null && a.length >= n * (n + 1) / 2) {
+ if (alpha != 0.) {
+ for (int row = 0; row < n; row += 1) {
+ int col = 0;
+ DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]);
+ for (; col < DMAX.loopBound(row + 1); col += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, col);
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * (row + 1) / 2);
+ vx.fma(valphaxrow, va).intoArray(a, col + row * (row + 1) / 2);
+ }
+ for (; col < row + 1; col += 1) {
+ a[col + row * (row + 1) / 2] += alpha * x[row] * x[col];
+ }
+ }
+ }
+ } else {
+ super.dspr(uplo, n, alpha, x, incx, a);
+ }
+ }
+
+ // a += alpha * x * x.t
+ @Override
+ public void dsyr(String uplo, int n, double alpha, double[] x, int incx, double[] a, int lda) {
+ if ("U".equals(uplo)
+ && n >= 0
+ && x != null && x.length >= n && incx == 1
+ && a != null && a.length >= n * n && lda == n) {
+ if (alpha != 0.) {
+ for (int row = 0; row < n; row += 1) {
+ int col = 0;
+ DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]);
+ for (; col < DMAX.loopBound(row + 1); col += DMAX.length()) {
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, col);
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * n);
+ vx.fma(valphaxrow, va).intoArray(a, col + row * n);
+ }
+ for (; col < row + 1; col += 1) {
+ a[col + row * n] += alpha * x[row] * x[col];
+ }
+ }
+ }
+ } else {
+ super.dsyr(uplo, n, alpha, x, incx, a, lda);
+ }
+ }
+
+ @Override
+ public void dgemv(String trans, int m, int n,
+ double alpha, double[] a, int lda, double[] x, int incx,
+ double beta, double[] y, int incy) {
+ dgemv(trans, m, n, alpha, a, 0, lda, x, 0, incx, beta, y, 0, incy);
+ }
+
+ // y = alpha * A * x + beta * y
+ @Override
+ public void dgemv(String trans, int m, int n,
+ double alpha, double[] a, int offseta, int lda, double[] x, int offsetx, int incx,
+ double beta, double[] y, int offsety, int incy) {
+ if ("N".equals(trans)
+ && m >= 0 && n >= 0
+ && a != null && a.length >= offseta + m * n && lda == m
+ && x != null && x.length >= offsetx + n && incx == 1
+ && y != null && y.length >= offsety + m && incy == 1) {
+ // y = beta * y
+ dscal(m, beta, y, offsety, 1);
+ // y += alpha * A * x
+ if (alpha != 0.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ for (int col = 0; col < n; col += 1) {
+ int row = 0;
+ for (; row < DMAX.loopBound(m); row += DMAX.length()) {
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + row + col * m);
+ DoubleVector vy = DoubleVector.fromArray(DMAX, y, offsety + row);
+ valpha.mul(x[offsetx + col]).fma(va, vy)
+ .intoArray(y, offsety + row);
+ }
+ for (; row < m; row += 1) {
+ y[offsety + row] += alpha * x[offsetx + col] * a[offseta + row + col * m];
+ }
+ }
+ }
+ } else if ("T".equals(trans)
+ && m >= 0 && n >= 0
+ && a != null && a.length >= offseta + m * n && lda == m
+ && x != null && x.length >= offsetx + m && incx == 1
+ && y != null && y.length >= offsety + n && incy == 1) {
+ if (alpha != 0. || beta != 1.) {
+ for (int col = 0; col < n; col += 1) {
+ double sum = 0.;
+ int row = 0;
+ DoubleVector vsum = DoubleVector.zero(DMAX);
+ for (; row < DMAX.loopBound(m); row += DMAX.length()) {
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + row + col * m);
+ DoubleVector vx = DoubleVector.fromArray(DMAX, x, offsetx + row);
+ vsum = va.fma(vx, vsum);
+ }
+ sum += vsum.reduceLanes(VectorOperators.ADD);
+ for (; row < m; row += 1) {
+ sum += x[offsetx + row] * a[offseta + row + col * m];
+ }
+ y[offsety + col] = alpha * sum + beta * y[offsety + col];
+ }
+ }
+ } else {
+ super.dgemv(trans, m, n, alpha, a, offseta, lda, x, offsetx, incx, beta, y, offsety, incy);
+ }
+ }
+
+ @Override
+ public void sgemv(String trans, int m, int n,
+ float alpha, float[] a, int lda, float[] x, int incx,
+ float beta, float[] y, int incy) {
+ sgemv(trans, m, n, alpha, a, 0, lda, x, 0, incx, beta, y, 0, incy);
+ }
+
+ // y = alpha * A * x + beta * y
+ @Override
+ public void sgemv(String trans, int m, int n,
+ float alpha, float[] a, int offseta, int lda, float[] x, int offsetx, int incx,
+ float beta, float[] y, int offsety, int incy) {
+ if ("N".equals(trans)
+ && m >= 0 && n >= 0
+ && a != null && a.length >= offseta + m * n && lda == m
+ && x != null && x.length >= offsetx + n && incx == 1
+ && y != null && y.length >= offsety + m && incy == 1) {
+ // y = beta * y
+ sscal(m, beta, y, offsety, 1);
+ // y += alpha * A * x
+ if (alpha != 0.f) {
+ FloatVector valpha = FloatVector.broadcast(FMAX, alpha);
+ for (int col = 0; col < n; col += 1) {
+ int row = 0;
+ for (; row < FMAX.loopBound(m); row += FMAX.length()) {
+ FloatVector va = FloatVector.fromArray(FMAX, a, offseta + row + col * m);
+ FloatVector vy = FloatVector.fromArray(FMAX, y, offsety + row);
+ valpha.mul(x[offsetx + col]).fma(va, vy)
+ .intoArray(y, offsety + row);
+ }
+ for (; row < m; row += 1) {
+ y[offsety + row] += alpha * x[offsetx + col] * a[offseta + row + col * m];
+ }
+ }
+ }
+ } else if ("T".equals(trans)
+ && m >= 0 && n >= 0
+ && a != null && a.length >= offseta + m * n && lda == m
+ && x != null && x.length >= offsetx + m && incx == 1
+ && y != null && y.length >= offsety + n && incy == 1) {
+ if (alpha != 0. || beta != 1.) {
+ for (int col = 0; col < n; col += 1) {
+ float sum = 0.f;
+ int row = 0;
+ FloatVector vsum = FloatVector.zero(FMAX);
+ for (; row < FMAX.loopBound(m); row += FMAX.length()) {
+ FloatVector va = FloatVector.fromArray(FMAX, a, offseta + row + col * m);
+ FloatVector vx = FloatVector.fromArray(FMAX, x, offsetx + row);
+ vsum = va.fma(vx, vsum);
+ }
+ sum += vsum.reduceLanes(VectorOperators.ADD);
+ for (; row < m; row += 1) {
+ sum += x[offsetx + row] * a[offseta + row + col * m];
+ }
+ y[offsety + col] = alpha * sum + beta * y[offsety + col];
+ }
+ }
+ } else {
+ super.sgemv(trans, m, n, alpha, a, offseta, lda, x, offsetx, incx, beta, y, offsety, incy);
+ }
+ }
+
+ @Override
+ public void dgemm(String transa, String transb, int m, int n, int k,
+ double alpha, double[] a, int lda, double[] b, int ldb,
+ double beta, double[] c, int ldc) {
+ dgemm(transa, transb, m, n, k, alpha, a, 0, lda, b, 0, ldb, beta, c, 0, ldc);
+ }
+
+ // c = alpha * a * b + beta * c
+ @Override
+ public void dgemm(String transa, String transb, int m, int n, int k,
+ double alpha, double[] a, int offseta, int lda, double[] b, int offsetb, int ldb,
+ double beta, double[] c, int offsetc, int ldc) {
+ if ("N".equals(transa) && "N".equals(transb)
+ && m >= 0 && n >= 0 && k >= 0
+ && a != null && a.length >= offseta + m * k && lda == m
+ && b != null && b.length >= offsetb + k * n && ldb == k
+ && c != null && c.length >= offsetc + m * n && ldc == m) {
+ // C = beta * C
+ dscal(m * n, beta, c, offsetc, 1);
+ // C += alpha * A * B
+ if (alpha != 0.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ for (int col = 0; col < n; col += 1) {
+ for (int i = 0; i < k; i += 1) {
+ int row = 0;
+ for (; row < DMAX.loopBound(m); row += DMAX.length()) {
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i * m + row);
+ DoubleVector vc = DoubleVector.fromArray(DMAX, c, offsetc + col * m + row);
+ valpha.mul(b[offsetb + col * k + i]).fma(va, vc)
+ .intoArray(c, offsetc + col * m + row);
+ }
+ for (; row < m; row += 1) {
+ c[offsetc + col * m + row] += alpha * a[offseta + i * m + row] * b[offsetb + col * k + i];
+ }
+ }
+ }
+ }
+ } else if ("N".equals(transa) && "T".equals(transb)
+ && m >= 0 && n >= 0 && k >= 0
+ && a != null && a.length >= offseta + m * k && lda == m
+ && b != null && b.length >= offsetb + k * n && ldb == n
+ && c != null && c.length >= offsetc + m * n && ldc == m) {
+ // C = beta * C
+ dscal(m * n, beta, c, offsetc, 1);
+ // C += alpha * A * B
+ if (alpha != 0.) {
+ DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha);
+ for (int i = 0; i < k; i += 1) {
+ for (int col = 0; col < n; col += 1) {
+ int row = 0;
+ for (; row < DMAX.loopBound(m); row += DMAX.length()) {
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i * m + row);
+ DoubleVector vc = DoubleVector.fromArray(DMAX, c, offsetc + col * m + row);
+ valpha.mul(b[offsetb + col + i * n]).fma(va, vc)
+ .intoArray(c, offsetc + col * m + row);
+ }
+ for (; row < m; row += 1) {
+ c[offsetc + col * m + row] += alpha * a[offseta + i * m + row] * b[offsetb + col + i * n];
+ }
+ }
+ }
+ }
+ } else if ("T".equals(transa) && "N".equals(transb)
+ && m >= 0 && n >= 0 && k >= 0
+ && a != null && a.length >= offseta + m * k && lda == k
+ && b != null && b.length >= offsetb + k * n && ldb == k
+ && c != null && c.length >= offsetc + m * n && ldc == m) {
+ if (alpha != 0. || beta != 1.) {
+ for (int col = 0; col < n; col += 1) {
+ for (int row = 0; row < m; row += 1) {
+ double sum = 0.;
+ int i = 0;
+ DoubleVector vsum = DoubleVector.zero(DMAX);
+ for (; i < DMAX.loopBound(k); i += DMAX.length()) {
+ DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i + row * k);
+ DoubleVector vb = DoubleVector.fromArray(DMAX, b, offsetb + col * k + i);
+ vsum = va.fma(vb, vsum);
+ }
+ sum += vsum.reduceLanes(VectorOperators.ADD);
+ for (; i < k; i += 1) {
+ sum += a[offseta + i + row * k] * b[offsetb + col * k + i];
+ }
+ if (beta != 0.) {
+ c[offsetc + col * m + row] = alpha * sum + beta * c[offsetc + col * m + row];
+ } else {
+ c[offsetc + col * m + row] = alpha * sum;
+ }
+ }
+ }
+ }
+ } else if ("T".equals(transa) && "T".equals(transb)
+ && m >= 0 && n >= 0 && k >= 0
+ && a != null && a.length >= offseta + m * k && lda == k
+ && b != null && b.length >= offsetb + k * n && ldb == n
+ && c != null && c.length >= offsetc + m * n && ldc == m) {
+ if (alpha != 0. || beta != 1.) {
+ // FIXME: do block by block
+ for (int col = 0; col < n; col += 1) {
+ for (int row = 0; row < m; row += 1) {
+ double sum = 0.;
+ for (int i = 0; i < k; i += 1) {
+ sum += a[offseta + i + row * k] * b[offsetb + col + i * n];
+ }
+ if (beta != 0.) {
+ c[offsetc + col * m + row] = alpha * sum + beta * c[offsetc + col * m + row];
+ } else {
+ c[offsetc + col * m + row] = alpha * sum;
+ }
+ }
+ }
+ }
+ } else {
+ super.dgemm(transa, transb, m, n, k,
+ alpha, a, offseta, lda, b, offsetb, ldb,
+ beta, c, offsetc, ldc);
+ }
+ }
+}
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index b6c1b011f004c..518c71129a970 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -18,28 +18,51 @@
package org.apache.spark.ml.linalg
import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
-import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
/**
* BLAS routines for MLlib's vectors and matrices.
*/
private[spark] object BLAS extends Serializable {
- @transient private var _f2jBLAS: NetlibBLAS = _
+ @transient private var _javaBLAS: NetlibBLAS = _
@transient private var _nativeBLAS: NetlibBLAS = _
private val nativeL1Threshold: Int = 256
- // For level-1 function dspmv, use f2jBLAS for better performance.
- private[ml] def f2jBLAS: NetlibBLAS = {
- if (_f2jBLAS == null) {
- _f2jBLAS = new F2jBLAS
+ // For level-1 function dspmv, use javaBLAS for better performance.
+ private[ml] def javaBLAS: NetlibBLAS = {
+ if (_javaBLAS == null) {
+ _javaBLAS =
+ try {
+ // scalastyle:off classforname
+ Class.forName("org.apache.spark.ml.linalg.VectorizedBLAS", true,
+ Option(Thread.currentThread().getContextClassLoader)
+ .getOrElse(getClass.getClassLoader))
+ .newInstance()
+ .asInstanceOf[NetlibBLAS]
+ // scalastyle:on classforname
+ } catch {
+ case _: Throwable => new F2jBLAS
+ }
+ }
+ _javaBLAS
+ }
+
+ // For level-3 routines, we use the native BLAS.
+ private[ml] def nativeBLAS: NetlibBLAS = {
+ if (_nativeBLAS == null) {
+ _nativeBLAS =
+ if (NetlibBLAS.getInstance.isInstanceOf[F2jBLAS]) {
+ javaBLAS
+ } else {
+ NetlibBLAS.getInstance
+ }
}
- _f2jBLAS
+ _nativeBLAS
}
private[ml] def getBLAS(vectorSize: Int): NetlibBLAS = {
if (vectorSize < nativeL1Threshold) {
- f2jBLAS
+ javaBLAS
} else {
nativeBLAS
}
@@ -235,14 +258,6 @@ private[spark] object BLAS extends Serializable {
}
}
- // For level-3 routines, we use the native BLAS.
- private[ml] def nativeBLAS: NetlibBLAS = {
- if (_nativeBLAS == null) {
- _nativeBLAS = NativeBLAS
- }
- _nativeBLAS
- }
-
/**
* Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
*
@@ -267,7 +282,7 @@ private[spark] object BLAS extends Serializable {
x: DenseVector,
beta: Double,
y: DenseVector): Unit = {
- f2jBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1)
+ javaBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1)
}
/**
@@ -279,7 +294,7 @@ private[spark] object BLAS extends Serializable {
val n = v.size
v match {
case DenseVector(values) =>
- NativeBLAS.dspr("U", n, alpha, values, 1, U)
+ nativeBLAS.dspr("U", n, alpha, values, 1, U)
case SparseVector(size, indices, values) =>
val nnz = indices.length
var colStartIdx = 0
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index 1254ed747aeb4..3272a50508a3d 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -22,7 +22,6 @@ import java.util.{Arrays, Random}
import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, HashSet => MHashSet}
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.annotation.Since
@@ -457,7 +456,7 @@ class DenseMatrix @Since("2.0.0") (
if (isTransposed) {
Iterator.tabulate(numCols) { j =>
val col = new Array[Double](numRows)
- blas.dcopy(numRows, values, j, numCols, col, 0, 1)
+ BLAS.nativeBLAS.dcopy(numRows, values, j, numCols, col, 0, 1)
new DenseVector(col)
}
} else {
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
new file mode 100644
index 0000000000000..1dcfcf9ebb034
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Serialization benchmark for BLAS.
+ * To run this benchmark:
+ * {{{
+ * 1. without sbt: bin/spark-submit --class
+ * 2. build/sbt "mllib-local/test:runMain "
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain "
+ * Results will be written to "benchmarks/BLASBenchmark-results.txt".
+ * }}}
+ */
+object BLASBenchmark extends BenchmarkBase {
+
+ override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+
+ val iters = 1e2.toInt
+ val rnd = new scala.util.Random(0)
+
+ val f2jBLAS = new F2jBLAS
+ val nativeBLAS = NetlibBLAS.getInstance
+ val vectorBLAS =
+ try {
+ // scalastyle:off classforname
+ Class.forName("org.apache.spark.ml.linalg.VectorizedBLAS", true,
+ Option(Thread.currentThread().getContextClassLoader)
+ .getOrElse(getClass.getClassLoader))
+ .newInstance()
+ .asInstanceOf[NetlibBLAS]
+ // scalastyle:on classforname
+ } catch {
+ case _: Throwable => new F2jBLAS
+ }
+
+ // scalastyle:off println
+ println("nativeBLAS = " + nativeBLAS.getClass.getName)
+ println("f2jBLAS = " + f2jBLAS.getClass.getName)
+ println("vectorBLAS = " + vectorBLAS.getClass.getName)
+ // scalastyle:on println
+
+ runBenchmark("daxpy") {
+ val n = 1e7.toInt
+ val alpha = rnd.nextDouble
+ val x = Array.fill(n) { rnd.nextDouble }
+ val y = Array.fill(n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("daxpy", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.daxpy(n, alpha, x, 1, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.daxpy(n, alpha, x, 1, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.daxpy(n, alpha, x, 1, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("ddot") {
+ val n = 1e7.toInt
+ val x = Array.fill(n) { rnd.nextDouble }
+ val y = Array.fill(n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("ddot", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.ddot(n, x, 1, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.ddot(n, x, 1, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.ddot(n, x, 1, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("sdot") {
+ val n = 1e7.toInt
+ val x = Array.fill(n) { rnd.nextFloat }
+ val y = Array.fill(n) { rnd.nextFloat }
+
+ val benchmark = new Benchmark("sdot", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.sdot(n, x, 1, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.sdot(n, x, 1, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.sdot(n, x, 1, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dscal") {
+ val n = 1e7.toInt
+ val alpha = rnd.nextDouble
+ val x = Array.fill(n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dscal", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dscal(n, alpha, x, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dscal(n, alpha, x, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dscal(n, alpha, x, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("sscal") {
+ val n = 1e7.toInt
+ val alpha = rnd.nextFloat
+ val x = Array.fill(n) { rnd.nextFloat }
+
+ val benchmark = new Benchmark("sscal", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.sscal(n, alpha, x, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.sscal(n, alpha, x, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.sscal(n, alpha, x, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dspmv[U]") {
+ val n = 1e4.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(n * (n + 1) / 2) { rnd.nextDouble }
+ val x = Array.fill(n) { rnd.nextDouble }
+ val beta = rnd.nextDouble
+ val y = Array.fill(n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dspmv[U]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dspr[U]") {
+ val n = 1e4.toInt
+ val alpha = rnd.nextDouble
+ val x = Array.fill(n) { rnd.nextDouble }
+ val a = Array.fill(n * (n + 1) / 2) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dspr[U]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dspr("U", n, alpha, x, 1, a)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dspr("U", n, alpha, x, 1, a)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dspr("U", n, alpha, x, 1, a)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dsyr[U]") {
+ val n = 1e4.toInt
+ val alpha = rnd.nextDouble
+ val x = Array.fill(n) { rnd.nextDouble }
+ val a = Array.fill(n * n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dsyr[U]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dsyr("U", n, alpha, x, 1, a, n)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dsyr("U", n, alpha, x, 1, a, n)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dsyr("U", n, alpha, x, 1, a, n)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dgemv[N]") {
+ val m = 1e4.toInt
+ val n = 1e3.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(m * n) { rnd.nextDouble }
+ val lda = m
+ val x = Array.fill(n) { rnd.nextDouble }
+ val beta = rnd.nextDouble
+ val y = Array.fill(m) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dgemv[N]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dgemv[T]") {
+ val m = 1e4.toInt
+ val n = 1e3.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(m * n) { rnd.nextDouble }
+ val lda = m
+ val x = Array.fill(m) { rnd.nextDouble }
+ val beta = rnd.nextDouble
+ val y = Array.fill(n) { rnd.nextDouble }
+
+ val benchmark = new Benchmark("dgemv[T]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("sgemv[N]") {
+ val m = 1e4.toInt
+ val n = 1e3.toInt
+ val alpha = rnd.nextFloat
+ val a = Array.fill(m * n) { rnd.nextFloat }
+ val lda = m
+ val x = Array.fill(n) { rnd.nextFloat }
+ val beta = rnd.nextFloat
+ val y = Array.fill(m) { rnd.nextFloat }
+
+ val benchmark = new Benchmark("sgemv[N]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("sgemv[T]") {
+ val m = 1e4.toInt
+ val n = 1e3.toInt
+ val alpha = rnd.nextFloat
+ val a = Array.fill(m * n) { rnd.nextFloat }
+ val lda = m
+ val x = Array.fill(m) { rnd.nextFloat }
+ val beta = rnd.nextFloat
+ val y = Array.fill(n) { rnd.nextFloat }
+
+ val benchmark = new Benchmark("sgemv[T]", n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dgemm[N,N]") {
+ val m = 1e3.toInt
+ val n = 1e2.toInt
+ val k = 1e3.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(m * k) { rnd.nextDouble }
+ val lda = m
+ val b = Array.fill(k * n) { rnd.nextDouble }
+ val ldb = k
+ val beta = rnd.nextDouble
+ val c = Array.fill(m * n) { rnd.nextDouble }
+ var ldc = m
+
+ val benchmark = new Benchmark("dgemm[N,N]", m*n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dgemm[N,T]") {
+ val m = 1e3.toInt
+ val n = 1e2.toInt
+ val k = 1e3.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(m * k) { rnd.nextDouble }
+ val lda = m
+ val b = Array.fill(k * n) { rnd.nextDouble }
+ val ldb = n
+ val beta = rnd.nextDouble
+ val c = Array.fill(m * n) { rnd.nextDouble }
+ var ldc = m
+
+ val benchmark = new Benchmark("dgemm[N,T]", m*n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ benchmark.run()
+ }
+
+ runBenchmark("dgemm[T,N]") {
+ val m = 1e3.toInt
+ val n = 1e2.toInt
+ val k = 1e3.toInt
+ val alpha = rnd.nextDouble
+ val a = Array.fill(m * k) { rnd.nextDouble }
+ val lda = k
+ val b = Array.fill(k * n) { rnd.nextDouble }
+ val ldb = k
+ val beta = rnd.nextDouble
+ val c = Array.fill(m * n) { rnd.nextDouble }
+ var ldc = m
+
+ val benchmark = new Benchmark("dgemm[T,N]", m*n, iters, output = output)
+
+ benchmark.addCase("f2j") { _ =>
+ f2jBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+
+ if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("native") { _ =>
+ nativeBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) {
+ benchmark.addCase("vector") { _ =>
+ vectorBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
+ }
+ }
+
+ benchmark.run()
+ }
+ }
+}
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
index 781f3da313d82..ce177666f34e6 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.util.TestingUtils._
class BLASSuite extends SparkMLFunSuite {
test("nativeL1Threshold") {
- assert(getBLAS(128) == BLAS.f2jBLAS)
+ assert(getBLAS(128) == BLAS.javaBLAS)
assert(getBLAS(256) == BLAS.nativeBLAS)
assert(getBLAS(512) == BLAS.nativeBLAS)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
index 6bbe7e1cb2134..ce9093a6238ee 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
@@ -18,7 +18,8 @@
package org.apache.spark.ml.ann
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
-import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
+
+import org.apache.spark.ml.linalg.BLAS
/**
* In-place DGEMM and DGEMV for Breeze
@@ -41,7 +42,7 @@ private[ann] object BreezeUtil {
require(A.cols == B.rows, "A & B Dimension mismatch!")
require(A.rows == C.rows, "A & C Dimension mismatch!")
require(B.cols == C.cols, "A & C Dimension mismatch!")
- NativeBLAS.dgemm(transposeString(A), transposeString(B), C.rows, C.cols, A.cols,
+ BLAS.nativeBLAS.dgemm(transposeString(A), transposeString(B), C.rows, C.cols, A.cols,
alpha, A.data, A.offset, A.majorStride, B.data, B.offset, B.majorStride,
beta, C.data, C.offset, C.rows)
}
@@ -57,7 +58,7 @@ private[ann] object BreezeUtil {
def dgemv(alpha: Double, A: BDM[Double], x: BDV[Double], beta: Double, y: BDV[Double]): Unit = {
require(A.cols == x.length, "A & x Dimension mismatch!")
require(A.rows == y.length, "A & y Dimension mismatch!")
- NativeBLAS.dgemv(transposeString(A), A.rows, A.cols,
+ BLAS.nativeBLAS.dgemv(transposeString(A), A.rows, A.cols,
alpha, A.data, A.offset, A.majorStride, x.data, x.offset, x.stride,
beta, y.data, y.offset, y.stride)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index ca43ef863003a..453e28609a0c2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -17,14 +17,13 @@
package org.apache.spark.ml.classification
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.feature.Instance
-import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
import org.apache.spark.ml.tree._
@@ -371,7 +370,7 @@ class GBTClassificationModel private[ml](
/** Raw prediction for the positive class. */
private def margin(features: Vector): Double = {
val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
- blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
+ BLAS.nativeBLAS.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
}
/** (private[ml]) Convert to a model in the old API */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0bc5b9a9ca09f..57fb46b451689 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -980,7 +980,7 @@ class LogisticRegression @Since("1.2.0") (
if (fitWithMean) {
if (multinomial) {
val adapt = Array.ofDim[Double](numClasses)
- BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0,
+ BLAS.javaBLAS.dgemv("N", numClasses, numFeatures, 1.0,
initialSolution, numClasses, scaledMean, 1, 0.0, adapt, 1)
BLAS.getBLAS(numFeatures).daxpy(numClasses, 1.0, adapt, 0, 1,
initialSolution, numClasses * numFeatures, 1)
@@ -1016,7 +1016,7 @@ class LogisticRegression @Since("1.2.0") (
if (fitWithMean && solution != null) {
if (multinomial) {
val adapt = Array.ofDim[Double](numClasses)
- BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0,
+ BLAS.javaBLAS.dgemv("N", numClasses, numFeatures, 1.0,
solution, numClasses, scaledMean, 1, 0.0, adapt, 1)
BLAS.getBLAS(numFeatures).daxpy(numClasses, -1.0, adapt, 0, 1,
solution, numClasses * numFeatures, 1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index 475fc5b7f8ccf..16d711197fdde 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -183,8 +183,8 @@ class BucketedRandomProjectionLSH(override val uid: String)
var i = 0
while (i < localNumHashTables) {
val offset = i * inputDim
- val norm = BLAS.f2jBLAS.dnrm2(inputDim, values, offset, 1)
- if (norm != 0) BLAS.f2jBLAS.dscal(inputDim, 1.0 / norm, values, offset, 1)
+ val norm = BLAS.javaBLAS.dnrm2(inputDim, values, offset, 1)
+ if (norm != 0) BLAS.javaBLAS.dscal(inputDim, 1.0 / norm, values, offset, 1)
i += 1
}
val randMatrix = new DenseMatrix(localNumHashTables, inputDim, values, true)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 9a24d3b0d09db..a0ddf7129c9b5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -26,7 +26,6 @@ import scala.reflect.ClassTag
import scala.util.{Sorting, Try}
import scala.util.hashing.byteswap64
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
import com.google.common.collect.{Ordering => GuavaOrdering}
import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
@@ -483,7 +482,7 @@ class ALSModel private[ml] (
Iterator.range(0, m).flatMap { i =>
// scores = i-th vec in srcMat * dstMat
- BLAS.f2jBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank,
+ BLAS.javaBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank,
srcMat, i * rank, 1, 0.0F, scores, 0, 1)
val srcId = srcIds(i)
@@ -895,9 +894,9 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
require(c >= 0.0)
require(a.length == k)
copyToDouble(a)
- blas.dspr(upper, k, c, da, 1, ata)
+ BLAS.nativeBLAS.dspr(upper, k, c, da, 1, ata)
if (b != 0.0) {
- blas.daxpy(k, b, da, 1, atb, 1)
+ BLAS.nativeBLAS.daxpy(k, b, da, 1, atb, 1)
}
this
}
@@ -905,8 +904,8 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
/** Merges another normal equation object. */
def merge(other: NormalEquation): NormalEquation = {
require(other.k == k)
- blas.daxpy(ata.length, 1.0, other.ata, 1, ata, 1)
- blas.daxpy(atb.length, 1.0, other.atb, 1, atb, 1)
+ BLAS.nativeBLAS.daxpy(ata.length, 1.0, other.ata, 1, ata, 1)
+ BLAS.nativeBLAS.daxpy(atb.length, 1.0, other.atb, 1, atb, 1)
this
}
@@ -1280,8 +1279,8 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
val random = new XORShiftRandom(byteswap64(seed ^ srcBlockId))
val factors = Array.fill(inBlock.srcIds.length) {
val factor = Array.fill(rank)(random.nextGaussian().toFloat)
- val nrm = blas.snrm2(rank, factor, 1)
- blas.sscal(rank, 1.0f / nrm, factor, 1)
+ val nrm = BLAS.nativeBLAS.snrm2(rank, factor, 1)
+ BLAS.nativeBLAS.sscal(rank, 1.0f / nrm, factor, 1)
factor
}
(srcBlockId, factors)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 78d5ddaa2758b..fd8af71d43568 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -17,13 +17,12 @@
package org.apache.spark.ml.regression
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{BLAS, Vector}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree._
import org.apache.spark.ml.tree.impl.GradientBoostedTrees
@@ -299,7 +298,7 @@ class GBTRegressionModel private[ml](
// TODO: When we add a generic Boosting class, handle transform there? SPARK-7129
// Classifies by thresholding sum of weighted tree predictions
val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
- blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
+ BLAS.nativeBLAS.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
}
@Since("1.4.0")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 37e695ef88b14..c86bc0d9a36f3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -17,12 +17,10 @@
package org.apache.spark.ml.classification
-import com.github.fommil.netlib.BLAS
-
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.ml.classification.LinearSVCSuite.generateSVMInput
import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
import org.apache.spark.ml.tree._
@@ -170,8 +168,6 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
val numFeatures = trainingDataset.select(featuresCol).first().getAs[Vector](0).size
assert(gbtModel.numFeatures === numFeatures)
- val blas = BLAS.getInstance()
-
val validationDataset = validationData.toDF(labelCol, featuresCol)
testTransformer[(Double, Vector)](validationDataset, gbtModel,
"rawPrediction", "features", "probability", "prediction") {
@@ -179,7 +175,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
assert(raw.size === 2)
// check that raw prediction is tree predictions dot tree weights
val treePredictions = gbtModel.trees.map(_.rootNode.predictImpl(features).prediction)
- val prediction = blas.ddot(gbtModel.getNumTrees, treePredictions, 1,
+ val prediction = BLAS.nativeBLAS.ddot(gbtModel.getNumTrees, treePredictions, 1,
gbtModel.treeWeights, 1)
assert(raw ~== Vectors.dense(-prediction, prediction) relTol eps)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 28275eb06cf0d..cebd8cac057f1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -24,14 +24,13 @@ import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, WrappedArray}
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.TrueFileFilter
import org.scalatest.BeforeAndAfterEach
import org.apache.spark._
import org.apache.spark.internal.Logging
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{BLAS, Vectors}
import org.apache.spark.ml.recommendation.ALS._
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
@@ -296,7 +295,7 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) {
val x = random.nextDouble()
if (x < totalFraction) {
- val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1)
+ val rating = BLAS.nativeBLAS.sdot(rank, userFactor, 1, itemFactor, 1)
if (x < trainingFraction) {
val noise = noiseStd * random.nextGaussian()
training += Rating(userId, itemId, rating + noise.toFloat)
@@ -1194,7 +1193,7 @@ object ALSSuite extends Logging {
val training = ArrayBuffer.empty[Rating[Int]]
val test = ArrayBuffer.empty[Rating[Int]]
for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) {
- val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1)
+ val rating = BLAS.nativeBLAS.sdot(rank, userFactor, 1, itemFactor, 1)
val threshold = if (rating > 0) positiveFraction else negativeFraction
val observed = random.nextDouble() < threshold
if (observed) {
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7bfb0a4aa1f3b..97f5e2f4ff683 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -291,6 +291,12 @@ object SparkBuild extends PomBuild {
(SbtCompile / publishLocal) := publishTask((SbtCompile / publishLocalConfiguration)).value,
publishLocal := Seq((MavenCompile / publishLocal), (SbtCompile / publishLocal)).dependOn.value,
+ javaOptions ++= {
+ val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
+ var major = versionParts(0).toInt
+ if (major >= 16) Seq("--add-modules=jdk.incubator.vector") else Seq.empty
+ },
+
(Compile / doc / javacOptions) ++= {
val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
var major = versionParts(0).toInt