Skip to content

Commit

Permalink
respond to review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
hhbyyh committed Apr 27, 2015
1 parent 08a45da commit 0e2e006
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 21 deletions.
33 changes: 15 additions & 18 deletions mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import org.apache.spark.util.Utils
* - "token": instance of a term appearing in a document
* - "topic": multinomial distribution over words representing some concept
*
* References:
* - Original LDA paper (journal version):
* Blei, Ng, and Jordan. "Latent Dirichlet Allocation." JMLR, 2003.
*
* @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
* (Wikipedia)]]
*/
Expand All @@ -47,12 +51,11 @@ class LDA private (
private var docConcentration: Double,
private var topicConcentration: Double,
private var seed: Long,
private var checkpointInterval: Int) extends Logging {
private var checkpointInterval: Int,
private var ldaOptimizer: LDAOptimizer) extends Logging {

def this() = this(k = 10, maxIterations = 20, docConcentration = -1, topicConcentration = -1,
seed = Utils.random.nextLong(), checkpointInterval = 10)

private var ldaOptimizer: LDAOptimizer = getDefaultOptimizer("EM")
seed = Utils.random.nextLong(), checkpointInterval = 10, ldaOptimizer = new EMLDAOptimizer)

/**
* Number of topics to infer. I.e., the number of soft cluster centers.
Expand Down Expand Up @@ -208,7 +211,7 @@ class LDA private (


/** LDAOptimizer used to perform the actual calculation */
def getOptimizer(): LDAOptimizer = ldaOptimizer
def getOptimizer: LDAOptimizer = ldaOptimizer

/**
* LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
Expand All @@ -220,24 +223,18 @@ class LDA private (

/**
* Set the LDAOptimizer used to perform the actual calculation by algorithm name.
* Currently "EM" is supported.
* Currently "em" is supported.
*/
def setOptimizer(optimizerName: String): this.type = {
this.ldaOptimizer = getDefaultOptimizer(optimizerName)
this.ldaOptimizer =
optimizerName.toLowerCase match {
case "em" => new EMLDAOptimizer
case other =>
throw new IllegalArgumentException(s"Only em is supported but got $other.")
}
this
}

/**
* Get the default optimizer from String parameter.
*/
private def getDefaultOptimizer(optimizerName: String): LDAOptimizer = {
optimizerName match{
case "EM" => new EMLDAOptimizer()
case other =>
throw new UnsupportedOperationException(s"Only EM are supported but got $other.")
}
}

/**
* Learn an LDA model using the given dataset.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.mllib.clustering
import java.util.Random

import breeze.linalg.{DenseVector => BDV, normalize}

import org.apache.spark.annotation.Experimental
import org.apache.spark.graphx._
import org.apache.spark.graphx.impl.GraphImpl
Expand All @@ -30,13 +31,20 @@ import org.apache.spark.rdd.RDD
/**
* :: Experimental ::
*
* An LDAOptimizer contains an algorithm for LDA and performs the actual computation, which
* stores internal data structure (Graph or Matrix) and any other parameter for the algorithm.
* The interface is isolated to improve the extensibility of LDA.
* An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
* hold optimizer-specific parameters for users to set.
*/
@Experimental
trait LDAOptimizer{

/*
DEVELOPERS NOTE:
An LDAOptimizer contains an algorithm for LDA and performs the actual computation, which
stores internal data structure (Graph or Matrix) and other parameters for the algorithm.
The interface is isolated to improve the extensibility of LDA.
*/

/**
* Initializer for the optimizer. LDA passes the common parameters to the optimizer and
* the internal structure can be initialized properly.
Expand Down Expand Up @@ -75,6 +83,7 @@ trait LDAOptimizer{
class EMLDAOptimizer extends LDAOptimizer{

import LDA._

/**
* Following fields will only be initialized through initialState method
*/
Expand Down

0 comments on commit 0e2e006

Please sign in to comment.