apache · luhenry · Dec 16, 2020 · Dec 18, 2020 · Dec 20, 2020 · Dec 20, 2020
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
@@ -68,6 +68,13 @@
       <scope>test</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <profiles>
     <profile>
@@ -81,6 +88,34 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>vectorized</id>
+      <properties>
+        <extra.source.dir>src/vectorized/java</extra.source.dir>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-vectorized-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>${extra.source.dir}</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -18,28 +18,51 @@
 package org.apache.spark.ml.linalg
 
 import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
-import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
 
 /**
  * BLAS routines for MLlib's vectors and matrices.
  */
 private[spark] object BLAS extends Serializable {
 
-  @transient private var _f2jBLAS: NetlibBLAS = _
+  @transient private var _javaBLAS: NetlibBLAS = _
   @transient private var _nativeBLAS: NetlibBLAS = _
   private val nativeL1Threshold: Int = 256
 
-  // For level-1 function dspmv, use f2jBLAS for better performance.
-  private[ml] def f2jBLAS: NetlibBLAS = {
-    if (_f2jBLAS == null) {
-      _f2jBLAS = new F2jBLAS
+  // For level-1 function dspmv, use javaBLAS for better performance.
+  private[ml] def javaBLAS: NetlibBLAS = {
+    if (_javaBLAS == null) {
+      _javaBLAS =
+        try {
+          // scalastyle:off classforname
+          Class.forName("org.apache.spark.ml.linalg.VectorizedBLAS", true,
+                          Option(Thread.currentThread().getContextClassLoader)
+                            .getOrElse(getClass.getClassLoader))
+               .newInstance()
+               .asInstanceOf[NetlibBLAS]
+          // scalastyle:on classforname
+        } catch {
+          case _: Throwable => new F2jBLAS
+        }
+    }
+    _javaBLAS
+  }
+
+  // For level-3 routines, we use the native BLAS.
+  private[ml] def nativeBLAS: NetlibBLAS = {
+    if (_nativeBLAS == null) {
+      _nativeBLAS =
+        if (NetlibBLAS.getInstance.isInstanceOf[F2jBLAS]) {
+          javaBLAS
+        } else {
+          NetlibBLAS.getInstance
+        }
     }
-    _f2jBLAS
+    _nativeBLAS
   }
 
   private[ml] def getBLAS(vectorSize: Int): NetlibBLAS = {
     if (vectorSize < nativeL1Threshold) {
-      f2jBLAS
+      javaBLAS
     } else {
       nativeBLAS
     }
@@ -235,14 +258,6 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
-  // For level-3 routines, we use the native BLAS.
-  private[ml] def nativeBLAS: NetlibBLAS = {
-    if (_nativeBLAS == null) {
-      _nativeBLAS = NativeBLAS
-    }
-    _nativeBLAS
-  }
-
   /**
    * Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
    *
@@ -267,7 +282,7 @@ private[spark] object BLAS extends Serializable {
       x: DenseVector,
       beta: Double,
       y: DenseVector): Unit = {
-    f2jBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1)
+    javaBLAS.dspmv("U", n, alpha, A.values, x.values, 1, beta, y.values, 1)
   }
 
   /**
@@ -279,7 +294,7 @@ private[spark] object BLAS extends Serializable {
     val n = v.size
     v match {
       case DenseVector(values) =>
-        NativeBLAS.dspr("U", n, alpha, values, 1, U)
+        nativeBLAS.dspr("U", n, alpha, values, 1, U)
       case SparseVector(size, indices, values) =>
         val nnz = indices.length
         var colStartIdx = 0

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -22,7 +22,6 @@ import java.util.{Arrays, Random}
 import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, HashSet => MHashSet}
 
 import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
-import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
 import org.apache.spark.annotation.Since
 
@@ -457,7 +456,7 @@ class DenseMatrix @Since("2.0.0") (
     if (isTransposed) {
       Iterator.tabulate(numCols) { j =>
         val col = new Array[Double](numRows)
-        blas.dcopy(numRows, values, j, numCols, col, 0, 1)
+        BLAS.nativeBLAS.dcopy(numRows, values, j, numCols, col, 0, 1)
         new DenseVector(col)
       }
     } else {