diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 2a2c373242201..16cd55c8a45f0 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -68,6 +68,13 @@ test + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + @@ -81,6 +88,34 @@ + + jvm-vectorized + + src/jvm-vectorized/java + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-vectorized-sources + generate-sources + + add-source + + + + ${extra.source.dir} + + + + + + + + target/scala-${scala.binary.version}/classes diff --git a/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java b/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java new file mode 100644 index 0000000000000..7db1bb9111a00 --- /dev/null +++ b/mllib-local/src/jvm-vectorized/java/org/apache/spark/ml/linalg/VectorizedBLAS.java @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.linalg; + +import com.github.fommil.netlib.F2jBLAS; +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.FloatVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +public class VectorizedBLAS extends F2jBLAS { + + private static final VectorSpecies FMAX = FloatVector.SPECIES_MAX; + private static final VectorSpecies DMAX = DoubleVector.SPECIES_MAX; + + // y += alpha * x + @Override + public void daxpy(int n, double alpha, double[] x, int incx, double[] y, int incy) { + if (n >= 0 + && x != null && x.length >= n && incx == 1 + && y != null && y.length >= n && incy == 1) { + if (alpha != 0.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + int i = 0; + for (; i < DMAX.loopBound(n); i += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, i); + DoubleVector vy = DoubleVector.fromArray(DMAX, y, i); + vx.fma(valpha, vy).intoArray(y, i); + } + for (; i < n; i += 1) { + y[i] += alpha * x[i]; + } + } + } else { + super.daxpy(n, alpha, x, incx, y, incy); + } + } + + // sum(x * y) + @Override + public float sdot(int n, float[] x, int incx, float[] y, int incy) { + if (n >= 0 + && x != null && x.length >= n && incx == 1 + && y != null && y.length >= n && incy == 1) { + float sum = 0.0f; + int i = 0; + FloatVector vsum = FloatVector.zero(FMAX); + for (; i < FMAX.loopBound(n); i += FMAX.length()) { + FloatVector vx = FloatVector.fromArray(FMAX, x, i); + FloatVector vy = FloatVector.fromArray(FMAX, y, i); + vsum = vx.fma(vy, vsum); + } + sum += vsum.reduceLanes(VectorOperators.ADD); + for (; i < n; i += 1) { + sum += x[i] * y[i]; + } + return sum; + } else { + return super.sdot(n, x, incx, y, incy); + } + } + + // sum(x * y) + @Override + public double ddot(int n, double[] x, int incx, double[] y, int incy) { + if (n >= 0 + && x != null && x.length >= n && incx == 1 + && y != null && y.length >= n && incy == 1) { + double sum = 0.; + int i = 0; + DoubleVector vsum = DoubleVector.zero(DMAX); + for (; i < DMAX.loopBound(n); i += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, i); + DoubleVector vy = DoubleVector.fromArray(DMAX, y, i); + vsum = vx.fma(vy, vsum); + } + sum += vsum.reduceLanes(VectorOperators.ADD); + for (; i < n; i += 1) { + sum += x[i] * y[i]; + } + return sum; + } else { + return super.ddot(n, x, incx, y, incy); + } + } + + @Override + public void dscal(int n, double alpha, double[] x, int incx) { + dscal(n, alpha, x, 0, incx); + } + + // x = alpha * x + @Override + public void dscal(int n, double alpha, double[] x, int offsetx, int incx) { + if (n >= 0 && x != null && x.length >= offsetx + n && incx == 1) { + if (alpha != 1.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + int i = 0; + for (; i < DMAX.loopBound(n); i += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, offsetx + i); + vx.mul(valpha).intoArray(x, offsetx + i); + } + for (; i < n; i += 1) { + x[offsetx + i] *= alpha; + } + } + } else { + super.dscal(n, alpha, x, offsetx, incx); + } + } + + @Override + public void sscal(int n, float alpha, float[] x, int incx) { + sscal(n, alpha, x, 0, incx); + } + + // x = alpha * x + @Override + public void sscal(int n, float alpha, float[] x, int offsetx, int incx) { + if (n >= 0 && x != null && x.length >= offsetx + n && incx == 1) { + if (alpha != 1.) { + FloatVector valpha = FloatVector.broadcast(FMAX, alpha); + int i = 0; + for (; i < FMAX.loopBound(n); i += FMAX.length()) { + FloatVector vx = FloatVector.fromArray(FMAX, x, offsetx + i); + vx.mul(valpha).intoArray(x, offsetx + i); + } + for (; i < n; i += 1) { + x[offsetx + i] *= alpha; + } + } + } else { + super.sscal(n, alpha, x, offsetx, incx); + } + } + + // y = alpha * a * x + beta * y + @Override + public void dspmv(String uplo, int n, double alpha, double[] a, + double[] x, int incx, double beta, double[] y, int incy) { + if ("U".equals(uplo) + && n >= 0 + && a != null && a.length >= n * (n + 1) / 2 + && x != null && x.length >= n && incx == 1 + && y != null && y.length >= n && incy == 1) { + // y = beta * y + dscal(n, beta, y, 1); + // y += alpha * A * x + if (alpha != 0.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + for (int row = 0; row < n; row += 1) { + int col = 0; + DoubleVector vyrowsum = DoubleVector.zero(DMAX); + DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]); + for (; col < DMAX.loopBound(row); col += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, col); + DoubleVector vy = DoubleVector.fromArray(DMAX, y, col); + DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * (row + 1) / 2); + vyrowsum = valpha.mul(vx).fma(va, vyrowsum); + valphaxrow.fma(va, vy).intoArray(y, col); + } + y[row] += vyrowsum.reduceLanes(VectorOperators.ADD); + for (; col < row; col += 1) { + y[row] += alpha * x[col] * a[col + row * (row + 1) / 2]; + y[col] += alpha * x[row] * a[col + row * (row + 1) / 2]; + } + y[row] += alpha * x[col] * a[col + row * (row + 1) / 2]; + } + } + } else { + super.dspmv(uplo, n, alpha, a, x, incx, beta, y, incy); + } + } + + // a += alpha * x * x.t + @Override + public void dspr(String uplo, int n, double alpha, double[] x, int incx, double[] a) { + if ("U".equals(uplo) + && n >= 0 + && x != null && x.length >= n && incx == 1 + && a != null && a.length >= n * (n + 1) / 2) { + if (alpha != 0.) { + for (int row = 0; row < n; row += 1) { + int col = 0; + DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]); + for (; col < DMAX.loopBound(row + 1); col += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, col); + DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * (row + 1) / 2); + vx.fma(valphaxrow, va).intoArray(a, col + row * (row + 1) / 2); + } + for (; col < row + 1; col += 1) { + a[col + row * (row + 1) / 2] += alpha * x[row] * x[col]; + } + } + } + } else { + super.dspr(uplo, n, alpha, x, incx, a); + } + } + + // a += alpha * x * x.t + @Override + public void dsyr(String uplo, int n, double alpha, double[] x, int incx, double[] a, int lda) { + if ("U".equals(uplo) + && n >= 0 + && x != null && x.length >= n && incx == 1 + && a != null && a.length >= n * n && lda == n) { + if (alpha != 0.) { + for (int row = 0; row < n; row += 1) { + int col = 0; + DoubleVector valphaxrow = DoubleVector.broadcast(DMAX, alpha * x[row]); + for (; col < DMAX.loopBound(row + 1); col += DMAX.length()) { + DoubleVector vx = DoubleVector.fromArray(DMAX, x, col); + DoubleVector va = DoubleVector.fromArray(DMAX, a, col + row * n); + vx.fma(valphaxrow, va).intoArray(a, col + row * n); + } + for (; col < row + 1; col += 1) { + a[col + row * n] += alpha * x[row] * x[col]; + } + } + } + } else { + super.dsyr(uplo, n, alpha, x, incx, a, lda); + } + } + + @Override + public void dgemv(String trans, int m, int n, + double alpha, double[] a, int lda, double[] x, int incx, + double beta, double[] y, int incy) { + dgemv(trans, m, n, alpha, a, 0, lda, x, 0, incx, beta, y, 0, incy); + } + + // y = alpha * A * x + beta * y + @Override + public void dgemv(String trans, int m, int n, + double alpha, double[] a, int offseta, int lda, double[] x, int offsetx, int incx, + double beta, double[] y, int offsety, int incy) { + if ("N".equals(trans) + && m >= 0 && n >= 0 + && a != null && a.length >= offseta + m * n && lda == m + && x != null && x.length >= offsetx + n && incx == 1 + && y != null && y.length >= offsety + m && incy == 1) { + // y = beta * y + dscal(m, beta, y, offsety, 1); + // y += alpha * A * x + if (alpha != 0.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + for (int col = 0; col < n; col += 1) { + int row = 0; + for (; row < DMAX.loopBound(m); row += DMAX.length()) { + DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + row + col * m); + DoubleVector vy = DoubleVector.fromArray(DMAX, y, offsety + row); + valpha.mul(x[offsetx + col]).fma(va, vy) + .intoArray(y, offsety + row); + } + for (; row < m; row += 1) { + y[offsety + row] += alpha * x[offsetx + col] * a[offseta + row + col * m]; + } + } + } + } else if ("T".equals(trans) + && m >= 0 && n >= 0 + && a != null && a.length >= offseta + m * n && lda == m + && x != null && x.length >= offsetx + m && incx == 1 + && y != null && y.length >= offsety + n && incy == 1) { + if (alpha != 0. || beta != 1.) { + for (int col = 0; col < n; col += 1) { + double sum = 0.; + int row = 0; + DoubleVector vsum = DoubleVector.zero(DMAX); + for (; row < DMAX.loopBound(m); row += DMAX.length()) { + DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + row + col * m); + DoubleVector vx = DoubleVector.fromArray(DMAX, x, offsetx + row); + vsum = va.fma(vx, vsum); + } + sum += vsum.reduceLanes(VectorOperators.ADD); + for (; row < m; row += 1) { + sum += x[offsetx + row] * a[offseta + row + col * m]; + } + y[offsety + col] = alpha * sum + beta * y[offsety + col]; + } + } + } else { + super.dgemv(trans, m, n, alpha, a, offseta, lda, x, offsetx, incx, beta, y, offsety, incy); + } + } + + @Override + public void sgemv(String trans, int m, int n, + float alpha, float[] a, int lda, float[] x, int incx, + float beta, float[] y, int incy) { + sgemv(trans, m, n, alpha, a, 0, lda, x, 0, incx, beta, y, 0, incy); + } + + // y = alpha * A * x + beta * y + @Override + public void sgemv(String trans, int m, int n, + float alpha, float[] a, int offseta, int lda, float[] x, int offsetx, int incx, + float beta, float[] y, int offsety, int incy) { + if ("N".equals(trans) + && m >= 0 && n >= 0 + && a != null && a.length >= offseta + m * n && lda == m + && x != null && x.length >= offsetx + n && incx == 1 + && y != null && y.length >= offsety + m && incy == 1) { + // y = beta * y + sscal(m, beta, y, offsety, 1); + // y += alpha * A * x + if (alpha != 0.f) { + FloatVector valpha = FloatVector.broadcast(FMAX, alpha); + for (int col = 0; col < n; col += 1) { + int row = 0; + for (; row < FMAX.loopBound(m); row += FMAX.length()) { + FloatVector va = FloatVector.fromArray(FMAX, a, offseta + row + col * m); + FloatVector vy = FloatVector.fromArray(FMAX, y, offsety + row); + valpha.mul(x[offsetx + col]).fma(va, vy) + .intoArray(y, offsety + row); + } + for (; row < m; row += 1) { + y[offsety + row] += alpha * x[offsetx + col] * a[offseta + row + col * m]; + } + } + } + } else if ("T".equals(trans) + && m >= 0 && n >= 0 + && a != null && a.length >= offseta + m * n && lda == m + && x != null && x.length >= offsetx + m && incx == 1 + && y != null && y.length >= offsety + n && incy == 1) { + if (alpha != 0. || beta != 1.) { + for (int col = 0; col < n; col += 1) { + float sum = 0.f; + int row = 0; + FloatVector vsum = FloatVector.zero(FMAX); + for (; row < FMAX.loopBound(m); row += FMAX.length()) { + FloatVector va = FloatVector.fromArray(FMAX, a, offseta + row + col * m); + FloatVector vx = FloatVector.fromArray(FMAX, x, offsetx + row); + vsum = va.fma(vx, vsum); + } + sum += vsum.reduceLanes(VectorOperators.ADD); + for (; row < m; row += 1) { + sum += x[offsetx + row] * a[offseta + row + col * m]; + } + y[offsety + col] = alpha * sum + beta * y[offsety + col]; + } + } + } else { + super.sgemv(trans, m, n, alpha, a, offseta, lda, x, offsetx, incx, beta, y, offsety, incy); + } + } + + @Override + public void dgemm(String transa, String transb, int m, int n, int k, + double alpha, double[] a, int lda, double[] b, int ldb, + double beta, double[] c, int ldc) { + dgemm(transa, transb, m, n, k, alpha, a, 0, lda, b, 0, ldb, beta, c, 0, ldc); + } + + // c = alpha * a * b + beta * c + @Override + public void dgemm(String transa, String transb, int m, int n, int k, + double alpha, double[] a, int offseta, int lda, double[] b, int offsetb, int ldb, + double beta, double[] c, int offsetc, int ldc) { + if ("N".equals(transa) && "N".equals(transb) + && m >= 0 && n >= 0 && k >= 0 + && a != null && a.length >= offseta + m * k && lda == m + && b != null && b.length >= offsetb + k * n && ldb == k + && c != null && c.length >= offsetc + m * n && ldc == m) { + // C = beta * C + dscal(m * n, beta, c, offsetc, 1); + // C += alpha * A * B + if (alpha != 0.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + for (int col = 0; col < n; col += 1) { + for (int i = 0; i < k; i += 1) { + int row = 0; + for (; row < DMAX.loopBound(m); row += DMAX.length()) { + DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i * m + row); + DoubleVector vc = DoubleVector.fromArray(DMAX, c, offsetc + col * m + row); + valpha.mul(b[offsetb + col * k + i]).fma(va, vc) + .intoArray(c, offsetc + col * m + row); + } + for (; row < m; row += 1) { + c[offsetc + col * m + row] += alpha * a[offseta + i * m + row] * b[offsetb + col * k + i]; + } + } + } + } + } else if ("N".equals(transa) && "T".equals(transb) + && m >= 0 && n >= 0 && k >= 0 + && a != null && a.length >= offseta + m * k && lda == m + && b != null && b.length >= offsetb + k * n && ldb == n + && c != null && c.length >= offsetc + m * n && ldc == m) { + // C = beta * C + dscal(m * n, beta, c, offsetc, 1); + // C += alpha * A * B + if (alpha != 0.) { + DoubleVector valpha = DoubleVector.broadcast(DMAX, alpha); + for (int i = 0; i < k; i += 1) { + for (int col = 0; col < n; col += 1) { + int row = 0; + for (; row < DMAX.loopBound(m); row += DMAX.length()) { + DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i * m + row); + DoubleVector vc = DoubleVector.fromArray(DMAX, c, offsetc + col * m + row); + valpha.mul(b[offsetb + col + i * n]).fma(va, vc) + .intoArray(c, offsetc + col * m + row); + } + for (; row < m; row += 1) { + c[offsetc + col * m + row] += alpha * a[offseta + i * m + row] * b[offsetb + col + i * n]; + } + } + } + } + } else if ("T".equals(transa) && "N".equals(transb) + && m >= 0 && n >= 0 && k >= 0 + && a != null && a.length >= offseta + m * k && lda == k + && b != null && b.length >= offsetb + k * n && ldb == k + && c != null && c.length >= offsetc + m * n && ldc == m) { + if (alpha != 0. || beta != 1.) { + for (int col = 0; col < n; col += 1) { + for (int row = 0; row < m; row += 1) { + double sum = 0.; + int i = 0; + DoubleVector vsum = DoubleVector.zero(DMAX); + for (; i < DMAX.loopBound(k); i += DMAX.length()) { + DoubleVector va = DoubleVector.fromArray(DMAX, a, offseta + i + row * k); + DoubleVector vb = DoubleVector.fromArray(DMAX, b, offsetb + col * k + i); + vsum = va.fma(vb, vsum); + } + sum += vsum.reduceLanes(VectorOperators.ADD); + for (; i < k; i += 1) { + sum += a[offseta + i + row * k] * b[offsetb + col * k + i]; + } + if (beta != 0.) { + c[offsetc + col * m + row] = alpha * sum + beta * c[offsetc + col * m + row]; + } else { + c[offsetc + col * m + row] = alpha * sum; + } + } + } + } + } else if ("T".equals(transa) && "T".equals(transb) + && m >= 0 && n >= 0 && k >= 0 + && a != null && a.length >= offseta + m * k && lda == k + && b != null && b.length >= offsetb + k * n && ldb == n + && c != null && c.length >= offsetc + m * n && ldc == m) { + if (alpha != 0. || beta != 1.) { + // FIXME: do block by block + for (int col = 0; col < n; col += 1) { + for (int row = 0; row < m; row += 1) { + double sum = 0.; + for (int i = 0; i < k; i += 1) { + sum += a[offseta + i + row * k] * b[offsetb + col + i * n]; + } + if (beta != 0.) { + c[offsetc + col * m + row] = alpha * sum + beta * c[offsetc + col * m + row]; + } else { + c[offsetc + col * m + row] = alpha * sum; + } + } + } + } + } else { + super.dgemm(transa, transb, m, n, k, + alpha, a, offseta, lda, b, offsetb, ldb, + beta, c, offsetc, ldc); + } + } +} diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala index b6c1b011f004c..518c71129a970 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala @@ -18,28 +18,51 @@ package org.apache.spark.ml.linalg import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS} -import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS} /** * BLAS routines for MLlib's vectors and matrices. */ private[spark] object BLAS extends Serializable { - @transient private var _f2jBLAS: NetlibBLAS = _ + @transient private var _javaBLAS: NetlibBLAS = _ @transient private var _nativeBLAS: NetlibBLAS = _ private val nativeL1Threshold: Int = 256 - // For level-1 function dspmv, use f2jBLAS for better performance. - private[ml] def f2jBLAS: NetlibBLAS = { - if (_f2jBLAS == null) { - _f2jBLAS = new F2jBLAS + // For level-1 function dspmv, use javaBLAS for better performance. + private[ml] def javaBLAS: NetlibBLAS = { + if (_javaBLAS == null) { + _javaBLAS = + try { + // scalastyle:off classforname + Class.forName("org.apache.spark.ml.linalg.VectorizedBLAS", true, + Option(Thread.currentThread().getContextClassLoader) + .getOrElse(getClass.getClassLoader)) + .newInstance() + .asInstanceOf[NetlibBLAS] + // scalastyle:on classforname + } catch { + case _: Throwable => new F2jBLAS + } + } + _javaBLAS + } + + // For level-3 routines, we use the native BLAS. + private[ml] def nativeBLAS: NetlibBLAS = { + if (_nativeBLAS == null) { + _nativeBLAS = + if (NetlibBLAS.getInstance.isInstanceOf[F2jBLAS]) { + javaBLAS + } else { + NetlibBLAS.getInstance + } } - _f2jBLAS + _nativeBLAS } private[ml] def getBLAS(vectorSize: Int): NetlibBLAS = { if (vectorSize < nativeL1Threshold) { - f2jBLAS + javaBLAS } else { nativeBLAS } @@ -235,14 +258,6 @@ private[spark] object BLAS extends Serializable { } } - // For level-3 routines, we use the native BLAS. - private[ml] def nativeBLAS: NetlibBLAS = { - if (_nativeBLAS == null) { - _nativeBLAS = NativeBLAS - } - _nativeBLAS - } - /** * Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR. * @@ -267,7 +282,7 @@ private[spark] object BLAS extends Serializable { x: DenseVector, beta: Double, y: DenseVector): Unit = { - f2jBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1) + javaBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1) } /** @@ -279,7 +294,7 @@ private[spark] object BLAS extends Serializable { val n = v.size v match { case DenseVector(values) => - NativeBLAS.dspr("U", n, alpha, values, 1, U) + nativeBLAS.dspr("U", n, alpha, values, 1, U) case SparseVector(size, indices, values) => val nnz = indices.length var colStartIdx = 0 diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala index 1254ed747aeb4..3272a50508a3d 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala @@ -22,7 +22,6 @@ import java.util.{Arrays, Random} import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, HashSet => MHashSet} import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM} -import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.annotation.Since @@ -457,7 +456,7 @@ class DenseMatrix @Since("2.0.0") ( if (isTransposed) { Iterator.tabulate(numCols) { j => val col = new Array[Double](numRows) - blas.dcopy(numRows, values, j, numCols, col, 0, 1) + BLAS.nativeBLAS.dcopy(numRows, values, j, numCols, col, 0, 1) new DenseVector(col) } } else { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala new file mode 100644 index 0000000000000..1dcfcf9ebb034 --- /dev/null +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.linalg + +import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS} + +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} + +/** + * Serialization benchmark for BLAS. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "mllib-local/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain " + * Results will be written to "benchmarks/BLASBenchmark-results.txt". + * }}} + */ +object BLASBenchmark extends BenchmarkBase { + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + + val iters = 1e2.toInt + val rnd = new scala.util.Random(0) + + val f2jBLAS = new F2jBLAS + val nativeBLAS = NetlibBLAS.getInstance + val vectorBLAS = + try { + // scalastyle:off classforname + Class.forName("org.apache.spark.ml.linalg.VectorizedBLAS", true, + Option(Thread.currentThread().getContextClassLoader) + .getOrElse(getClass.getClassLoader)) + .newInstance() + .asInstanceOf[NetlibBLAS] + // scalastyle:on classforname + } catch { + case _: Throwable => new F2jBLAS + } + + // scalastyle:off println + println("nativeBLAS = " + nativeBLAS.getClass.getName) + println("f2jBLAS = " + f2jBLAS.getClass.getName) + println("vectorBLAS = " + vectorBLAS.getClass.getName) + // scalastyle:on println + + runBenchmark("daxpy") { + val n = 1e7.toInt + val alpha = rnd.nextDouble + val x = Array.fill(n) { rnd.nextDouble } + val y = Array.fill(n) { rnd.nextDouble } + + val benchmark = new Benchmark("daxpy", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.daxpy(n, alpha, x, 1, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.daxpy(n, alpha, x, 1, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.daxpy(n, alpha, x, 1, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("ddot") { + val n = 1e7.toInt + val x = Array.fill(n) { rnd.nextDouble } + val y = Array.fill(n) { rnd.nextDouble } + + val benchmark = new Benchmark("ddot", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.ddot(n, x, 1, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.ddot(n, x, 1, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.ddot(n, x, 1, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("sdot") { + val n = 1e7.toInt + val x = Array.fill(n) { rnd.nextFloat } + val y = Array.fill(n) { rnd.nextFloat } + + val benchmark = new Benchmark("sdot", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.sdot(n, x, 1, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.sdot(n, x, 1, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.sdot(n, x, 1, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("dscal") { + val n = 1e7.toInt + val alpha = rnd.nextDouble + val x = Array.fill(n) { rnd.nextDouble } + + val benchmark = new Benchmark("dscal", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dscal(n, alpha, x, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dscal(n, alpha, x, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dscal(n, alpha, x, 1) + } + } + + benchmark.run() + } + + runBenchmark("sscal") { + val n = 1e7.toInt + val alpha = rnd.nextFloat + val x = Array.fill(n) { rnd.nextFloat } + + val benchmark = new Benchmark("sscal", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.sscal(n, alpha, x, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.sscal(n, alpha, x, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.sscal(n, alpha, x, 1) + } + } + + benchmark.run() + } + + runBenchmark("dspmv[U]") { + val n = 1e4.toInt + val alpha = rnd.nextDouble + val a = Array.fill(n * (n + 1) / 2) { rnd.nextDouble } + val x = Array.fill(n) { rnd.nextDouble } + val beta = rnd.nextDouble + val y = Array.fill(n) { rnd.nextDouble } + + val benchmark = new Benchmark("dspmv[U]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dspmv("U", n, alpha, a, x, 1, beta, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("dspr[U]") { + val n = 1e4.toInt + val alpha = rnd.nextDouble + val x = Array.fill(n) { rnd.nextDouble } + val a = Array.fill(n * (n + 1) / 2) { rnd.nextDouble } + + val benchmark = new Benchmark("dspr[U]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dspr("U", n, alpha, x, 1, a) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dspr("U", n, alpha, x, 1, a) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dspr("U", n, alpha, x, 1, a) + } + } + + benchmark.run() + } + + runBenchmark("dsyr[U]") { + val n = 1e4.toInt + val alpha = rnd.nextDouble + val x = Array.fill(n) { rnd.nextDouble } + val a = Array.fill(n * n) { rnd.nextDouble } + + val benchmark = new Benchmark("dsyr[U]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dsyr("U", n, alpha, x, 1, a, n) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dsyr("U", n, alpha, x, 1, a, n) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dsyr("U", n, alpha, x, 1, a, n) + } + } + + benchmark.run() + } + + runBenchmark("dgemv[N]") { + val m = 1e4.toInt + val n = 1e3.toInt + val alpha = rnd.nextDouble + val a = Array.fill(m * n) { rnd.nextDouble } + val lda = m + val x = Array.fill(n) { rnd.nextDouble } + val beta = rnd.nextDouble + val y = Array.fill(m) { rnd.nextDouble } + + val benchmark = new Benchmark("dgemv[N]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("dgemv[T]") { + val m = 1e4.toInt + val n = 1e3.toInt + val alpha = rnd.nextDouble + val a = Array.fill(m * n) { rnd.nextDouble } + val lda = m + val x = Array.fill(m) { rnd.nextDouble } + val beta = rnd.nextDouble + val y = Array.fill(n) { rnd.nextDouble } + + val benchmark = new Benchmark("dgemv[T]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("sgemv[N]") { + val m = 1e4.toInt + val n = 1e3.toInt + val alpha = rnd.nextFloat + val a = Array.fill(m * n) { rnd.nextFloat } + val lda = m + val x = Array.fill(n) { rnd.nextFloat } + val beta = rnd.nextFloat + val y = Array.fill(m) { rnd.nextFloat } + + val benchmark = new Benchmark("sgemv[N]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.sgemv("N", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("sgemv[T]") { + val m = 1e4.toInt + val n = 1e3.toInt + val alpha = rnd.nextFloat + val a = Array.fill(m * n) { rnd.nextFloat } + val lda = m + val x = Array.fill(m) { rnd.nextFloat } + val beta = rnd.nextFloat + val y = Array.fill(n) { rnd.nextFloat } + + val benchmark = new Benchmark("sgemv[T]", n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.sgemv("T", m, n, alpha, a, lda, x, 1, beta, y, 1) + } + } + + benchmark.run() + } + + runBenchmark("dgemm[N,N]") { + val m = 1e3.toInt + val n = 1e2.toInt + val k = 1e3.toInt + val alpha = rnd.nextDouble + val a = Array.fill(m * k) { rnd.nextDouble } + val lda = m + val b = Array.fill(k * n) { rnd.nextDouble } + val ldb = k + val beta = rnd.nextDouble + val c = Array.fill(m * n) { rnd.nextDouble } + var ldc = m + + val benchmark = new Benchmark("dgemm[N,N]", m*n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dgemm("N", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + benchmark.run() + } + + runBenchmark("dgemm[N,T]") { + val m = 1e3.toInt + val n = 1e2.toInt + val k = 1e3.toInt + val alpha = rnd.nextDouble + val a = Array.fill(m * k) { rnd.nextDouble } + val lda = m + val b = Array.fill(k * n) { rnd.nextDouble } + val ldb = n + val beta = rnd.nextDouble + val c = Array.fill(m * n) { rnd.nextDouble } + var ldc = m + + val benchmark = new Benchmark("dgemm[N,T]", m*n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dgemm("N", "T", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + benchmark.run() + } + + runBenchmark("dgemm[T,N]") { + val m = 1e3.toInt + val n = 1e2.toInt + val k = 1e3.toInt + val alpha = rnd.nextDouble + val a = Array.fill(m * k) { rnd.nextDouble } + val lda = k + val b = Array.fill(k * n) { rnd.nextDouble } + val ldb = k + val beta = rnd.nextDouble + val c = Array.fill(m * n) { rnd.nextDouble } + var ldc = m + + val benchmark = new Benchmark("dgemm[T,N]", m*n, iters, output = output) + + benchmark.addCase("f2j") { _ => + f2jBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + + if (!nativeBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("native") { _ => + nativeBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + if (!vectorBLAS.getClass.equals(classOf[F2jBLAS])) { + benchmark.addCase("vector") { _ => + vectorBLAS.dgemm("T", "N", m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) + } + } + + benchmark.run() + } + } +} diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala index 781f3da313d82..ce177666f34e6 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.ml.util.TestingUtils._ class BLASSuite extends SparkMLFunSuite { test("nativeL1Threshold") { - assert(getBLAS(128) == BLAS.f2jBLAS) + assert(getBLAS(128) == BLAS.javaBLAS) assert(getBLAS(256) == BLAS.nativeBLAS) assert(getBLAS(512) == BLAS.nativeBLAS) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala index 6bbe7e1cb2134..ce9093a6238ee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala @@ -18,7 +18,8 @@ package org.apache.spark.ml.ann import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} -import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS} + +import org.apache.spark.ml.linalg.BLAS /** * In-place DGEMM and DGEMV for Breeze @@ -41,7 +42,7 @@ private[ann] object BreezeUtil { require(A.cols == B.rows, "A & B Dimension mismatch!") require(A.rows == C.rows, "A & C Dimension mismatch!") require(B.cols == C.cols, "A & C Dimension mismatch!") - NativeBLAS.dgemm(transposeString(A), transposeString(B), C.rows, C.cols, A.cols, + BLAS.nativeBLAS.dgemm(transposeString(A), transposeString(B), C.rows, C.cols, A.cols, alpha, A.data, A.offset, A.majorStride, B.data, B.offset, B.majorStride, beta, C.data, C.offset, C.rows) } @@ -57,7 +58,7 @@ private[ann] object BreezeUtil { def dgemv(alpha: Double, A: BDM[Double], x: BDV[Double], beta: Double, y: BDV[Double]): Unit = { require(A.cols == x.length, "A & x Dimension mismatch!") require(A.rows == y.length, "A & y Dimension mismatch!") - NativeBLAS.dgemv(transposeString(A), A.rows, A.cols, + BLAS.nativeBLAS.dgemv(transposeString(A), A.rows, A.cols, alpha, A.data, A.offset, A.majorStride, x.data, x.offset, x.stride, beta, y.data, y.offset, y.stride) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index ca43ef863003a..453e28609a0c2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -17,14 +17,13 @@ package org.apache.spark.ml.classification -import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.Instance -import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} +import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree._ @@ -371,7 +370,7 @@ class GBTClassificationModel private[ml]( /** Raw prediction for the positive class. */ private def margin(features: Vector): Double = { val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) - blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) + BLAS.nativeBLAS.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) } /** (private[ml]) Convert to a model in the old API */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 0bc5b9a9ca09f..57fb46b451689 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -980,7 +980,7 @@ class LogisticRegression @Since("1.2.0") ( if (fitWithMean) { if (multinomial) { val adapt = Array.ofDim[Double](numClasses) - BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0, + BLAS.javaBLAS.dgemv("N", numClasses, numFeatures, 1.0, initialSolution, numClasses, scaledMean, 1, 0.0, adapt, 1) BLAS.getBLAS(numFeatures).daxpy(numClasses, 1.0, adapt, 0, 1, initialSolution, numClasses * numFeatures, 1) @@ -1016,7 +1016,7 @@ class LogisticRegression @Since("1.2.0") ( if (fitWithMean && solution != null) { if (multinomial) { val adapt = Array.ofDim[Double](numClasses) - BLAS.f2jBLAS.dgemv("N", numClasses, numFeatures, 1.0, + BLAS.javaBLAS.dgemv("N", numClasses, numFeatures, 1.0, solution, numClasses, scaledMean, 1, 0.0, adapt, 1) BLAS.getBLAS(numFeatures).daxpy(numClasses, -1.0, adapt, 0, 1, solution, numClasses * numFeatures, 1) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala index 475fc5b7f8ccf..16d711197fdde 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala @@ -183,8 +183,8 @@ class BucketedRandomProjectionLSH(override val uid: String) var i = 0 while (i < localNumHashTables) { val offset = i * inputDim - val norm = BLAS.f2jBLAS.dnrm2(inputDim, values, offset, 1) - if (norm != 0) BLAS.f2jBLAS.dscal(inputDim, 1.0 / norm, values, offset, 1) + val norm = BLAS.javaBLAS.dnrm2(inputDim, values, offset, 1) + if (norm != 0) BLAS.javaBLAS.dscal(inputDim, 1.0 / norm, values, offset, 1) i += 1 } val randMatrix = new DenseMatrix(localNumHashTables, inputDim, values, true) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 9a24d3b0d09db..a0ddf7129c9b5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -26,7 +26,6 @@ import scala.reflect.ClassTag import scala.util.{Sorting, Try} import scala.util.hashing.byteswap64 -import com.github.fommil.netlib.BLAS.{getInstance => blas} import com.google.common.collect.{Ordering => GuavaOrdering} import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats @@ -483,7 +482,7 @@ class ALSModel private[ml] ( Iterator.range(0, m).flatMap { i => // scores = i-th vec in srcMat * dstMat - BLAS.f2jBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank, + BLAS.javaBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank, srcMat, i * rank, 1, 0.0F, scores, 0, 1) val srcId = srcIds(i) @@ -895,9 +894,9 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { require(c >= 0.0) require(a.length == k) copyToDouble(a) - blas.dspr(upper, k, c, da, 1, ata) + BLAS.nativeBLAS.dspr(upper, k, c, da, 1, ata) if (b != 0.0) { - blas.daxpy(k, b, da, 1, atb, 1) + BLAS.nativeBLAS.daxpy(k, b, da, 1, atb, 1) } this } @@ -905,8 +904,8 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { /** Merges another normal equation object. */ def merge(other: NormalEquation): NormalEquation = { require(other.k == k) - blas.daxpy(ata.length, 1.0, other.ata, 1, ata, 1) - blas.daxpy(atb.length, 1.0, other.atb, 1, atb, 1) + BLAS.nativeBLAS.daxpy(ata.length, 1.0, other.ata, 1, ata, 1) + BLAS.nativeBLAS.daxpy(atb.length, 1.0, other.atb, 1, atb, 1) this } @@ -1280,8 +1279,8 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { val random = new XORShiftRandom(byteswap64(seed ^ srcBlockId)) val factors = Array.fill(inBlock.srcIds.length) { val factor = Array.fill(rank)(random.nextGaussian().toFloat) - val nrm = blas.snrm2(rank, factor, 1) - blas.sscal(rank, 1.0f / nrm, factor, 1) + val nrm = BLAS.nativeBLAS.snrm2(rank, factor, 1) + BLAS.nativeBLAS.sscal(rank, 1.0f / nrm, factor, 1) factor } (srcBlockId, factors) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 78d5ddaa2758b..fd8af71d43568 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -17,13 +17,12 @@ package org.apache.spark.ml.regression -import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging -import org.apache.spark.ml.linalg.Vector +import org.apache.spark.ml.linalg.{BLAS, Vector} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.tree._ import org.apache.spark.ml.tree.impl.GradientBoostedTrees @@ -299,7 +298,7 @@ class GBTRegressionModel private[ml]( // TODO: When we add a generic Boosting class, handle transform there? SPARK-7129 // Classifies by thresholding sum of weighted tree predictions val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) - blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) + BLAS.nativeBLAS.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) } @Since("1.4.0") diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index 37e695ef88b14..c86bc0d9a36f3 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -17,12 +17,10 @@ package org.apache.spark.ml.classification -import com.github.fommil.netlib.BLAS - import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.ml.classification.LinearSVCSuite.generateSVMInput import org.apache.spark.ml.feature.{Instance, LabeledPoint} -import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors} import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree._ @@ -170,8 +168,6 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { val numFeatures = trainingDataset.select(featuresCol).first().getAs[Vector](0).size assert(gbtModel.numFeatures === numFeatures) - val blas = BLAS.getInstance() - val validationDataset = validationData.toDF(labelCol, featuresCol) testTransformer[(Double, Vector)](validationDataset, gbtModel, "rawPrediction", "features", "probability", "prediction") { @@ -179,7 +175,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { assert(raw.size === 2) // check that raw prediction is tree predictions dot tree weights val treePredictions = gbtModel.trees.map(_.rootNode.predictImpl(features).prediction) - val prediction = blas.ddot(gbtModel.getNumTrees, treePredictions, 1, + val prediction = BLAS.nativeBLAS.ddot(gbtModel.getNumTrees, treePredictions, 1, gbtModel.treeWeights, 1) assert(raw ~== Vectors.dense(-prediction, prediction) relTol eps) diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 28275eb06cf0d..cebd8cac057f1 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -24,14 +24,13 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, WrappedArray} -import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterEach import org.apache.spark._ import org.apache.spark.internal.Logging -import org.apache.spark.ml.linalg.Vectors +import org.apache.spark.ml.linalg.{BLAS, Vectors} import org.apache.spark.ml.recommendation.ALS._ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ @@ -296,7 +295,7 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging { for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) { val x = random.nextDouble() if (x < totalFraction) { - val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1) + val rating = BLAS.nativeBLAS.sdot(rank, userFactor, 1, itemFactor, 1) if (x < trainingFraction) { val noise = noiseStd * random.nextGaussian() training += Rating(userId, itemId, rating + noise.toFloat) @@ -1194,7 +1193,7 @@ object ALSSuite extends Logging { val training = ArrayBuffer.empty[Rating[Int]] val test = ArrayBuffer.empty[Rating[Int]] for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) { - val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1) + val rating = BLAS.nativeBLAS.sdot(rank, userFactor, 1, itemFactor, 1) val threshold = if (rating > 0) positiveFraction else negativeFraction val observed = random.nextDouble() < threshold if (observed) { diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 7bfb0a4aa1f3b..97f5e2f4ff683 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -291,6 +291,12 @@ object SparkBuild extends PomBuild { (SbtCompile / publishLocal) := publishTask((SbtCompile / publishLocalConfiguration)).value, publishLocal := Seq((MavenCompile / publishLocal), (SbtCompile / publishLocal)).dependOn.value, + javaOptions ++= { + val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3) + var major = versionParts(0).toInt + if (major >= 16) Seq("--add-modules=jdk.incubator.vector") else Seq.empty + }, + (Compile / doc / javacOptions) ++= { val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3) var major = versionParts(0).toInt