Skip to content

Commit

Permalink
fix a bug in filtering finished runs
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Mar 12, 2014
1 parent 42512f2 commit 6f5cdde
Showing 1 changed file with 11 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,11 @@ class KMeans private (
val counts = Array.fill(runs, k)(0L)

points.foreach { point =>
activeRuns.foreach { r =>
val (bestCenter, cost) = KMeans.findClosest(centers(r), point)
costAccums(r) += cost
sums(r)(bestCenter) += point.vector
counts(r)(bestCenter) += 1
(0 until runs).foreach { i =>
val (bestCenter, cost) = KMeans.findClosest(activeCenters(i), point)
costAccums(i) += cost
sums(i)(bestCenter) += point.vector
counts(i)(bestCenter) += 1
}
}

Expand All @@ -210,7 +210,7 @@ class KMeans private (
}.reduceByKey(mergeContribs).collectAsMap()

// Update the cluster centers and costs for each active run
for ((run, i) <- activeRuns.view.zipWithIndex) {
for ((run, i) <- activeRuns.zipWithIndex) {
var changed = false
var j = 0
while (j < k) {
Expand Down Expand Up @@ -242,10 +242,13 @@ class KMeans private (
if (iteration == maxIterations) {
logInfo(s"KMeans reached the max number of iterations: $maxIterations.")
} else {
logInfo(s"Kmeans converged in $iteration iterations.")
logInfo(s"KMeans converged in $iteration iterations.")
}

val bestRun = costs.zipWithIndex.min._2
val (minCost, bestRun) = costs.zipWithIndex.min

logInfo(s"The cost for the best run is $minCost.")

new KMeansModel(centers(bestRun).map { v =>
v.vector.toArray
})
Expand Down

0 comments on commit 6f5cdde

Please sign in to comment.